blob: b9a37ab4eae9a42cf8ef411176066eed05cb36d3 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080020 * different ranges of character are actually implanted either in
Owen Taylor3473f882001-02-23 17:55:21 +000021 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080043#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000045#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000047#include <libxml/threads.h>
48#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <libxml/tree.h>
50#include <libxml/parser.h>
51#include <libxml/parserInternals.h>
52#include <libxml/valid.h>
53#include <libxml/entities.h>
54#include <libxml/xmlerror.h>
55#include <libxml/encoding.h>
56#include <libxml/xmlIO.h>
57#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000058#ifdef LIBXML_CATALOG_ENABLED
59#include <libxml/catalog.h>
60#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000061#ifdef LIBXML_SCHEMAS_ENABLED
62#include <libxml/xmlschemastypes.h>
63#include <libxml/relaxng.h>
64#endif
Owen Taylor3473f882001-02-23 17:55:21 +000065#ifdef HAVE_CTYPE_H
66#include <ctype.h>
67#endif
68#ifdef HAVE_STDLIB_H
69#include <stdlib.h>
70#endif
71#ifdef HAVE_SYS_STAT_H
72#include <sys/stat.h>
73#endif
74#ifdef HAVE_FCNTL_H
75#include <fcntl.h>
76#endif
77#ifdef HAVE_UNISTD_H
78#include <unistd.h>
79#endif
80#ifdef HAVE_ZLIB_H
81#include <zlib.h>
82#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +020083#ifdef HAVE_LZMA_H
84#include <lzma.h>
85#endif
Owen Taylor3473f882001-02-23 17:55:21 +000086
Daniel Veillard768eb3b2012-07-16 14:19:49 +080087#include "buf.h"
88#include "enc.h"
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090static void
91xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
Rob Richards9c0aa472009-03-26 18:10:19 +000093static xmlParserCtxtPtr
94xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
Daniel Veillard0161e632008-08-28 15:36:32 +000097/************************************************************************
98 * *
99 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
100 * *
101 ************************************************************************/
102
103#define XML_PARSER_BIG_ENTITY 1000
104#define XML_PARSER_LOT_ENTITY 5000
105
106/*
107 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
108 * replacement over the size in byte of the input indicates that you have
109 * and eponential behaviour. A value of 10 correspond to at least 3 entity
110 * replacement per byte of input.
111 */
112#define XML_PARSER_NON_LINEAR 10
113
114/*
115 * xmlParserEntityCheck
116 *
117 * Function to check non-linear entity expansion behaviour
118 * This is here to detect and stop exponential linear entity expansion
119 * This is not a limitation of the parser but a safety
120 * boundary feature. It can be disabled with the XML_PARSE_HUGE
121 * parser option.
122 */
123static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800124xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard23f05e02013-02-19 10:21:49 +0800125 xmlEntityPtr ent, size_t replacement)
Daniel Veillard0161e632008-08-28 15:36:32 +0000126{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800127 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000128
129 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
130 return (0);
131 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
132 return (1);
Daniel Veillard23f05e02013-02-19 10:21:49 +0800133 if (replacement != 0) {
134 if (replacement < XML_MAX_TEXT_LENGTH)
135 return(0);
136
137 /*
138 * If the volume of entity copy reaches 10 times the
139 * amount of parsed data and over the large text threshold
140 * then that's very likely to be an abuse.
141 */
142 if (ctxt->input != NULL) {
143 consumed = ctxt->input->consumed +
144 (ctxt->input->cur - ctxt->input->base);
145 }
146 consumed += ctxt->sizeentities;
147
148 if (replacement < XML_PARSER_NON_LINEAR * consumed)
149 return(0);
150 } else if (size != 0) {
Daniel Veillard0161e632008-08-28 15:36:32 +0000151 /*
152 * Do the check based on the replacement size of the entity
153 */
154 if (size < XML_PARSER_BIG_ENTITY)
155 return(0);
156
157 /*
158 * A limit on the amount of text data reasonably used
159 */
160 if (ctxt->input != NULL) {
161 consumed = ctxt->input->consumed +
162 (ctxt->input->cur - ctxt->input->base);
163 }
164 consumed += ctxt->sizeentities;
165
166 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
167 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
168 return (0);
169 } else if (ent != NULL) {
170 /*
171 * use the number of parsed entities in the replacement
172 */
Daniel Veillardcff25462013-03-11 15:57:55 +0800173 size = ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +0000174
175 /*
176 * The amount of data parsed counting entities size only once
177 */
178 if (ctxt->input != NULL) {
179 consumed = ctxt->input->consumed +
180 (ctxt->input->cur - ctxt->input->base);
181 }
182 consumed += ctxt->sizeentities;
183
184 /*
185 * Check the density of entities for the amount of data
186 * knowing an entity reference will take at least 3 bytes
187 */
188 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
189 return (0);
190 } else {
191 /*
192 * strange we got no data for checking just return
193 */
194 return (0);
195 }
Daniel Veillard0161e632008-08-28 15:36:32 +0000196 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
197 return (1);
198}
199
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000200/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000201 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000202 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000203 * arbitrary depth limit for the XML documents that we allow to
204 * process. This is not a limitation of the parser but a safety
205 * boundary feature. It can be disabled with the XML_PARSE_HUGE
206 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000207 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000208unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000209
Daniel Veillard0fb18932003-09-07 09:14:37 +0000210
Daniel Veillard0161e632008-08-28 15:36:32 +0000211
212#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000213#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000214#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000215#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
216
Daniel Veillard1f972e92012-08-15 10:16:37 +0800217/**
218 * XML_PARSER_CHUNK_SIZE
219 *
220 * When calling GROW that's the minimal amount of data
221 * the parser expected to have received. It is not a hard
222 * limit but an optimization when reading strings like Names
223 * It is not strictly needed as long as inputs available characters
224 * are followed by 0, which should be provided by the I/O level
225 */
226#define XML_PARSER_CHUNK_SIZE 100
227
Owen Taylor3473f882001-02-23 17:55:21 +0000228/*
Owen Taylor3473f882001-02-23 17:55:21 +0000229 * List of XML prefixed PI allowed by W3C specs
230 */
231
Daniel Veillardb44025c2001-10-11 22:55:55 +0000232static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000233 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800234 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000235 NULL
236};
237
Daniel Veillarda07050d2003-10-19 14:46:32 +0000238
Owen Taylor3473f882001-02-23 17:55:21 +0000239/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200240static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
241 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000242
Daniel Veillard7d515752003-09-26 19:12:37 +0000243static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000244xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
245 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000246 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000247 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000248
Daniel Veillard37334572008-07-31 08:20:02 +0000249static int
250xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
251 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000252#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000253static void
254xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
255 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000256#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000257
Daniel Veillard7d515752003-09-26 19:12:37 +0000258static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000259xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
260 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000261
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000262static int
263xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
264
Daniel Veillarde57ec792003-09-10 10:50:59 +0000265/************************************************************************
266 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800267 * Some factorized error routines *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 * *
269 ************************************************************************/
270
271/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000272 * xmlErrAttributeDup:
273 * @ctxt: an XML parser context
274 * @prefix: the attribute prefix
275 * @localname: the attribute localname
276 *
277 * Handle a redefinition of attribute error
278 */
279static void
280xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
281 const xmlChar * localname)
282{
Daniel Veillard157fee02003-10-31 10:36:03 +0000283 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
284 (ctxt->instate == XML_PARSER_EOF))
285 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000286 if (ctxt != NULL)
287 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200288
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000289 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000290 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200291 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000292 (const char *) localname, NULL, NULL, 0, 0,
293 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000294 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000295 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200296 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 (const char *) prefix, (const char *) localname,
298 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
299 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000300 if (ctxt != NULL) {
301 ctxt->wellFormed = 0;
302 if (ctxt->recovery == 0)
303 ctxt->disableSAX = 1;
304 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000305}
306
307/**
308 * xmlFatalErr:
309 * @ctxt: an XML parser context
310 * @error: the error number
311 * @extra: extra information string
312 *
313 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
314 */
315static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000317{
318 const char *errmsg;
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800319 char errstr[129] = "";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000320
Daniel Veillard157fee02003-10-31 10:36:03 +0000321 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
322 (ctxt->instate == XML_PARSER_EOF))
323 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 switch (error) {
325 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800326 errmsg = "CharRef: invalid hexadecimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000327 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000328 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800329 errmsg = "CharRef: invalid decimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000330 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000331 case XML_ERR_INVALID_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800332 errmsg = "CharRef: invalid value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000333 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000334 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000335 errmsg = "internal error";
336 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000337 case XML_ERR_PEREF_AT_EOF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800338 errmsg = "PEReference at end of document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000339 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000340 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800341 errmsg = "PEReference in prolog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000342 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000343 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800344 errmsg = "PEReference in epilog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000345 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000346 case XML_ERR_PEREF_NO_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800347 errmsg = "PEReference: no name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000348 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000349 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800350 errmsg = "PEReference: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000351 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000352 case XML_ERR_ENTITY_LOOP:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800353 errmsg = "Detected an entity reference loop";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800356 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800359 errmsg = "PEReferences forbidden in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800362 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000363 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800365 errmsg = "AttValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000366 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800368 errmsg = "Unescaped '<' not allowed in attributes values";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000369 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000370 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800371 errmsg = "SystemLiteral \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000372 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000373 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800374 errmsg = "Unfinished System or Public ID \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000375 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000376 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800377 errmsg = "Sequence ']]>' not allowed in content";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000378 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000379 case XML_ERR_URI_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800380 errmsg = "SYSTEM or PUBLIC, the URI is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000381 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000382 case XML_ERR_PUBID_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800383 errmsg = "PUBLIC, the Public Identifier is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000384 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000385 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800386 errmsg = "Comment must not contain '--' (double-hyphen)";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000387 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000388 case XML_ERR_PI_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800389 errmsg = "xmlParsePI : no target name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000390 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000391 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800392 errmsg = "Invalid PI name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000393 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000394 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800395 errmsg = "NOTATION: Name expected here";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000396 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000397 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800398 errmsg = "'>' required to close NOTATION declaration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000399 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000400 case XML_ERR_VALUE_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800401 errmsg = "Entity value required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000402 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000403 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000404 errmsg = "Fragment not allowed";
405 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000406 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800407 errmsg = "'(' required to start ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000408 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000409 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800410 errmsg = "NmToken expected in ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000411 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000412 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800413 errmsg = "')' required to finish ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000415 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800416 errmsg = "MixedContentDecl : '|' or ')*' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000418 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800419 errmsg = "MixedContentDecl : '#PCDATA' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000421 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800422 errmsg = "ContentDecl : Name or '(' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000424 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800425 errmsg = "ContentDecl : ',' '|' or ')' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000427 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000428 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800429 "PEReference: forbidden within markup decl in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000430 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000431 case XML_ERR_GT_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800432 errmsg = "expected '>'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000433 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000434 case XML_ERR_CONDSEC_INVALID:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800435 errmsg = "XML conditional section '[' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000436 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000437 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800438 errmsg = "Content error in the external subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000439 break;
440 case XML_ERR_CONDSEC_INVALID_KEYWORD:
441 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800442 "conditional section INCLUDE or IGNORE keyword expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000443 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000444 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800445 errmsg = "XML conditional section not closed";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000446 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000447 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800448 errmsg = "Text declaration '<?xml' required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000449 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000450 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800451 errmsg = "parsing XML declaration: '?>' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000452 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000453 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800454 errmsg = "external parsed entities cannot be standalone";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000455 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000456 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800457 errmsg = "EntityRef: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000458 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000459 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800460 errmsg = "DOCTYPE improperly terminated";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000461 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000462 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800463 errmsg = "EndTag: '</' not found";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000464 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000465 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800466 errmsg = "expected '='";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000467 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000468 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800469 errmsg = "String not closed expecting \" or '";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000471 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800472 errmsg = "String not started expecting ' or \"";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000473 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000474 case XML_ERR_ENCODING_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800475 errmsg = "Invalid XML encoding name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000476 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000477 case XML_ERR_STANDALONE_VALUE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800478 errmsg = "standalone accepts only 'yes' or 'no'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000479 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000480 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800481 errmsg = "Document is empty";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000482 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000483 case XML_ERR_DOCUMENT_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800484 errmsg = "Extra content at the end of the document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000485 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000486 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800487 errmsg = "chunk is not well balanced";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000488 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000489 case XML_ERR_EXTRA_CONTENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800490 errmsg = "extra content at the end of well balanced chunk";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000491 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000492 case XML_ERR_VERSION_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800493 errmsg = "Malformed declaration expecting version";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000494 break;
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800495 case XML_ERR_NAME_TOO_LONG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800496 errmsg = "Name too long use XML_PARSE_HUGE option";
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800497 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000498#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000499 case:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800500 errmsg = "";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000501 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000502#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000503 default:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800504 errmsg = "Unregistered error message";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000505 }
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800506 if (info == NULL)
507 snprintf(errstr, 128, "%s\n", errmsg);
508 else
509 snprintf(errstr, 128, "%s: %%s\n", errmsg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000510 if (ctxt != NULL)
511 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000512 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800513 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000514 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000515 if (ctxt != NULL) {
516 ctxt->wellFormed = 0;
517 if (ctxt->recovery == 0)
518 ctxt->disableSAX = 1;
519 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000520}
521
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000522/**
523 * xmlFatalErrMsg:
524 * @ctxt: an XML parser context
525 * @error: the error number
526 * @msg: the error message
527 *
528 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
529 */
530static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000531xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
532 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000533{
Daniel Veillard157fee02003-10-31 10:36:03 +0000534 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
535 (ctxt->instate == XML_PARSER_EOF))
536 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000537 if (ctxt != NULL)
538 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000539 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200540 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000541 if (ctxt != NULL) {
542 ctxt->wellFormed = 0;
543 if (ctxt->recovery == 0)
544 ctxt->disableSAX = 1;
545 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000546}
547
548/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000549 * xmlWarningMsg:
550 * @ctxt: an XML parser context
551 * @error: the error number
552 * @msg: the error message
553 * @str1: extra data
554 * @str2: extra data
555 *
556 * Handle a warning.
557 */
558static void
559xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
560 const char *msg, const xmlChar *str1, const xmlChar *str2)
561{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000562 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000563
Daniel Veillard157fee02003-10-31 10:36:03 +0000564 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
565 (ctxt->instate == XML_PARSER_EOF))
566 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000567 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
568 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000569 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200570 if (ctxt != NULL) {
571 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000572 (ctxt->sax) ? ctxt->sax->warning : NULL,
573 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000574 ctxt, NULL, XML_FROM_PARSER, error,
575 XML_ERR_WARNING, NULL, 0,
576 (const char *) str1, (const char *) str2, NULL, 0, 0,
577 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200578 } else {
579 __xmlRaiseError(schannel, NULL, NULL,
580 ctxt, NULL, XML_FROM_PARSER, error,
581 XML_ERR_WARNING, NULL, 0,
582 (const char *) str1, (const char *) str2, NULL, 0, 0,
583 msg, (const char *) str1, (const char *) str2);
584 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000585}
586
587/**
588 * xmlValidityError:
589 * @ctxt: an XML parser context
590 * @error: the error number
591 * @msg: the error message
592 * @str1: extra data
593 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000594 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000595 */
596static void
597xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000598 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000599{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000600 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000601
602 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
603 (ctxt->instate == XML_PARSER_EOF))
604 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000605 if (ctxt != NULL) {
606 ctxt->errNo = error;
607 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
608 schannel = ctxt->sax->serror;
609 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200610 if (ctxt != NULL) {
611 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000612 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000613 ctxt, NULL, XML_FROM_DTD, error,
614 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000615 (const char *) str2, NULL, 0, 0,
616 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000617 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200618 } else {
619 __xmlRaiseError(schannel, NULL, NULL,
620 ctxt, NULL, XML_FROM_DTD, error,
621 XML_ERR_ERROR, NULL, 0, (const char *) str1,
622 (const char *) str2, NULL, 0, 0,
623 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000624 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000625}
626
627/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000628 * xmlFatalErrMsgInt:
629 * @ctxt: an XML parser context
630 * @error: the error number
631 * @msg: the error message
632 * @val: an integer value
633 *
634 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
635 */
636static void
637xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000638 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000639{
Daniel Veillard157fee02003-10-31 10:36:03 +0000640 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
641 (ctxt->instate == XML_PARSER_EOF))
642 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000643 if (ctxt != NULL)
644 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000645 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000646 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
647 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000648 if (ctxt != NULL) {
649 ctxt->wellFormed = 0;
650 if (ctxt->recovery == 0)
651 ctxt->disableSAX = 1;
652 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000653}
654
655/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000656 * xmlFatalErrMsgStrIntStr:
657 * @ctxt: an XML parser context
658 * @error: the error number
659 * @msg: the error message
660 * @str1: an string info
661 * @val: an integer value
662 * @str2: an string info
663 *
664 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
665 */
666static void
667xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800668 const char *msg, const xmlChar *str1, int val,
Daniel Veillardf403d292003-10-05 13:51:35 +0000669 const xmlChar *str2)
670{
Daniel Veillard157fee02003-10-31 10:36:03 +0000671 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672 (ctxt->instate == XML_PARSER_EOF))
673 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000674 if (ctxt != NULL)
675 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000676 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000677 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
678 NULL, 0, (const char *) str1, (const char *) str2,
679 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000680 if (ctxt != NULL) {
681 ctxt->wellFormed = 0;
682 if (ctxt->recovery == 0)
683 ctxt->disableSAX = 1;
684 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000685}
686
687/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000688 * xmlFatalErrMsgStr:
689 * @ctxt: an XML parser context
690 * @error: the error number
691 * @msg: the error message
692 * @val: a string value
693 *
694 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
695 */
696static void
697xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000698 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000699{
Daniel Veillard157fee02003-10-31 10:36:03 +0000700 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
701 (ctxt->instate == XML_PARSER_EOF))
702 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000703 if (ctxt != NULL)
704 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000705 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000706 XML_FROM_PARSER, error, XML_ERR_FATAL,
707 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
708 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000709 if (ctxt != NULL) {
710 ctxt->wellFormed = 0;
711 if (ctxt->recovery == 0)
712 ctxt->disableSAX = 1;
713 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000714}
715
716/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000717 * xmlErrMsgStr:
718 * @ctxt: an XML parser context
719 * @error: the error number
720 * @msg: the error message
721 * @val: a string value
722 *
723 * Handle a non fatal parser error
724 */
725static void
726xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
727 const char *msg, const xmlChar * val)
728{
Daniel Veillard157fee02003-10-31 10:36:03 +0000729 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
730 (ctxt->instate == XML_PARSER_EOF))
731 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000732 if (ctxt != NULL)
733 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000734 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000735 XML_FROM_PARSER, error, XML_ERR_ERROR,
736 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
737 val);
738}
739
740/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000741 * xmlNsErr:
742 * @ctxt: an XML parser context
743 * @error: the error number
744 * @msg: the message
745 * @info1: extra information string
746 * @info2: extra information string
747 *
748 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
749 */
750static void
751xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
752 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000753 const xmlChar * info1, const xmlChar * info2,
754 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000755{
Daniel Veillard157fee02003-10-31 10:36:03 +0000756 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
757 (ctxt->instate == XML_PARSER_EOF))
758 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000759 if (ctxt != NULL)
760 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000761 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000762 XML_ERR_ERROR, NULL, 0, (const char *) info1,
763 (const char *) info2, (const char *) info3, 0, 0, msg,
764 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000765 if (ctxt != NULL)
766 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000767}
768
Daniel Veillard37334572008-07-31 08:20:02 +0000769/**
770 * xmlNsWarn
771 * @ctxt: an XML parser context
772 * @error: the error number
773 * @msg: the message
774 * @info1: extra information string
775 * @info2: extra information string
776 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800777 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000778 */
779static void
780xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
781 const char *msg,
782 const xmlChar * info1, const xmlChar * info2,
783 const xmlChar * info3)
784{
785 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
786 (ctxt->instate == XML_PARSER_EOF))
787 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000788 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
789 XML_ERR_WARNING, NULL, 0, (const char *) info1,
790 (const char *) info2, (const char *) info3, 0, 0, msg,
791 info1, info2, info3);
792}
793
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000794/************************************************************************
795 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800796 * Library wide options *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000797 * *
798 ************************************************************************/
799
800/**
801 * xmlHasFeature:
802 * @feature: the feature to be examined
803 *
804 * Examines if the library has been compiled with a given feature.
805 *
806 * Returns a non-zero value if the feature exist, otherwise zero.
807 * Returns zero (0) if the feature does not exist or an unknown
808 * unknown feature is requested, non-zero otherwise.
809 */
810int
811xmlHasFeature(xmlFeature feature)
812{
813 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000814 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000815#ifdef LIBXML_THREAD_ENABLED
816 return(1);
817#else
818 return(0);
819#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000820 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000821#ifdef LIBXML_TREE_ENABLED
822 return(1);
823#else
824 return(0);
825#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000826 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000827#ifdef LIBXML_OUTPUT_ENABLED
828 return(1);
829#else
830 return(0);
831#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000832 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000833#ifdef LIBXML_PUSH_ENABLED
834 return(1);
835#else
836 return(0);
837#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000838 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000839#ifdef LIBXML_READER_ENABLED
840 return(1);
841#else
842 return(0);
843#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000844 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000845#ifdef LIBXML_PATTERN_ENABLED
846 return(1);
847#else
848 return(0);
849#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000850 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000851#ifdef LIBXML_WRITER_ENABLED
852 return(1);
853#else
854 return(0);
855#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000856 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000857#ifdef LIBXML_SAX1_ENABLED
858 return(1);
859#else
860 return(0);
861#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000862 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000863#ifdef LIBXML_FTP_ENABLED
864 return(1);
865#else
866 return(0);
867#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000868 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000869#ifdef LIBXML_HTTP_ENABLED
870 return(1);
871#else
872 return(0);
873#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000874 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000875#ifdef LIBXML_VALID_ENABLED
876 return(1);
877#else
878 return(0);
879#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000880 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000881#ifdef LIBXML_HTML_ENABLED
882 return(1);
883#else
884 return(0);
885#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000886 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000887#ifdef LIBXML_LEGACY_ENABLED
888 return(1);
889#else
890 return(0);
891#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000892 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000893#ifdef LIBXML_C14N_ENABLED
894 return(1);
895#else
896 return(0);
897#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000898 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000899#ifdef LIBXML_CATALOG_ENABLED
900 return(1);
901#else
902 return(0);
903#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000904 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000905#ifdef LIBXML_XPATH_ENABLED
906 return(1);
907#else
908 return(0);
909#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000910 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000911#ifdef LIBXML_XPTR_ENABLED
912 return(1);
913#else
914 return(0);
915#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000916 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000917#ifdef LIBXML_XINCLUDE_ENABLED
918 return(1);
919#else
920 return(0);
921#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000922 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000923#ifdef LIBXML_ICONV_ENABLED
924 return(1);
925#else
926 return(0);
927#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000928 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000929#ifdef LIBXML_ISO8859X_ENABLED
930 return(1);
931#else
932 return(0);
933#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000934 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000935#ifdef LIBXML_UNICODE_ENABLED
936 return(1);
937#else
938 return(0);
939#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000940 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000941#ifdef LIBXML_REGEXP_ENABLED
942 return(1);
943#else
944 return(0);
945#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000946 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000947#ifdef LIBXML_AUTOMATA_ENABLED
948 return(1);
949#else
950 return(0);
951#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000952 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000953#ifdef LIBXML_EXPR_ENABLED
954 return(1);
955#else
956 return(0);
957#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000958 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000959#ifdef LIBXML_SCHEMAS_ENABLED
960 return(1);
961#else
962 return(0);
963#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000964 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000965#ifdef LIBXML_SCHEMATRON_ENABLED
966 return(1);
967#else
968 return(0);
969#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000970 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000971#ifdef LIBXML_MODULES_ENABLED
972 return(1);
973#else
974 return(0);
975#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000976 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000977#ifdef LIBXML_DEBUG_ENABLED
978 return(1);
979#else
980 return(0);
981#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000982 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000983#ifdef DEBUG_MEMORY_LOCATION
984 return(1);
985#else
986 return(0);
987#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000988 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000989#ifdef LIBXML_DEBUG_RUNTIME
990 return(1);
991#else
992 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000993#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000994 case XML_WITH_ZLIB:
995#ifdef LIBXML_ZLIB_ENABLED
996 return(1);
997#else
998 return(0);
999#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +02001000 case XML_WITH_LZMA:
1001#ifdef LIBXML_LZMA_ENABLED
1002 return(1);
1003#else
1004 return(0);
1005#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001006 case XML_WITH_ICU:
1007#ifdef LIBXML_ICU_ENABLED
1008 return(1);
1009#else
1010 return(0);
1011#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001012 default:
1013 break;
1014 }
1015 return(0);
1016}
1017
1018/************************************************************************
1019 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001020 * SAX2 defaulted attributes handling *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001021 * *
1022 ************************************************************************/
1023
1024/**
1025 * xmlDetectSAX2:
1026 * @ctxt: an XML parser context
1027 *
1028 * Do the SAX2 detection and specific intialization
1029 */
1030static void
1031xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1032 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +00001033#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001034 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1035 ((ctxt->sax->startElementNs != NULL) ||
1036 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001037#else
1038 ctxt->sax2 = 1;
1039#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001040
1041 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1042 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1043 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001044 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1045 (ctxt->str_xml_ns == NULL)) {
William M. Brack9f797ab2004-07-28 07:40:12 +00001046 xmlErrMemory(ctxt, NULL);
1047 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001048}
1049
Daniel Veillarde57ec792003-09-10 10:50:59 +00001050typedef struct _xmlDefAttrs xmlDefAttrs;
1051typedef xmlDefAttrs *xmlDefAttrsPtr;
1052struct _xmlDefAttrs {
1053 int nbAttrs; /* number of defaulted attributes on that element */
1054 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +00001055 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001056};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001057
1058/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001059 * xmlAttrNormalizeSpace:
1060 * @src: the source string
1061 * @dst: the target string
1062 *
1063 * Normalize the space in non CDATA attribute values:
1064 * If the attribute type is not CDATA, then the XML processor MUST further
1065 * process the normalized attribute value by discarding any leading and
1066 * trailing space (#x20) characters, and by replacing sequences of space
1067 * (#x20) characters by a single space (#x20) character.
1068 * Note that the size of dst need to be at least src, and if one doesn't need
1069 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1070 * passing src as dst is just fine.
1071 *
1072 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1073 * is needed.
1074 */
1075static xmlChar *
1076xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1077{
1078 if ((src == NULL) || (dst == NULL))
1079 return(NULL);
1080
1081 while (*src == 0x20) src++;
1082 while (*src != 0) {
1083 if (*src == 0x20) {
1084 while (*src == 0x20) src++;
1085 if (*src != 0)
1086 *dst++ = 0x20;
1087 } else {
1088 *dst++ = *src++;
1089 }
1090 }
1091 *dst = 0;
1092 if (dst == src)
1093 return(NULL);
1094 return(dst);
1095}
1096
1097/**
1098 * xmlAttrNormalizeSpace2:
1099 * @src: the source string
1100 *
1101 * Normalize the space in non CDATA attribute values, a slightly more complex
1102 * front end to avoid allocation problems when running on attribute values
1103 * coming from the input.
1104 *
1105 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1106 * is needed.
1107 */
1108static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001109xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001110{
1111 int i;
1112 int remove_head = 0;
1113 int need_realloc = 0;
1114 const xmlChar *cur;
1115
1116 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1117 return(NULL);
1118 i = *len;
1119 if (i <= 0)
1120 return(NULL);
1121
1122 cur = src;
1123 while (*cur == 0x20) {
1124 cur++;
1125 remove_head++;
1126 }
1127 while (*cur != 0) {
1128 if (*cur == 0x20) {
1129 cur++;
1130 if ((*cur == 0x20) || (*cur == 0)) {
1131 need_realloc = 1;
1132 break;
1133 }
1134 } else
1135 cur++;
1136 }
1137 if (need_realloc) {
1138 xmlChar *ret;
1139
1140 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1141 if (ret == NULL) {
1142 xmlErrMemory(ctxt, NULL);
1143 return(NULL);
1144 }
1145 xmlAttrNormalizeSpace(ret, ret);
1146 *len = (int) strlen((const char *)ret);
1147 return(ret);
1148 } else if (remove_head) {
1149 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001150 memmove(src, src + remove_head, 1 + *len);
1151 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001152 }
1153 return(NULL);
1154}
1155
1156/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001157 * xmlAddDefAttrs:
1158 * @ctxt: an XML parser context
1159 * @fullname: the element fullname
1160 * @fullattr: the attribute fullname
1161 * @value: the attribute value
1162 *
1163 * Add a defaulted attribute for an element
1164 */
1165static void
1166xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1167 const xmlChar *fullname,
1168 const xmlChar *fullattr,
1169 const xmlChar *value) {
1170 xmlDefAttrsPtr defaults;
1171 int len;
1172 const xmlChar *name;
1173 const xmlChar *prefix;
1174
Daniel Veillard6a31b832008-03-26 14:06:44 +00001175 /*
1176 * Allows to detect attribute redefinitions
1177 */
1178 if (ctxt->attsSpecial != NULL) {
1179 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1180 return;
1181 }
1182
Daniel Veillarde57ec792003-09-10 10:50:59 +00001183 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001184 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001185 if (ctxt->attsDefault == NULL)
1186 goto mem_error;
1187 }
1188
1189 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001190 * split the element name into prefix:localname , the string found
1191 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001192 */
1193 name = xmlSplitQName3(fullname, &len);
1194 if (name == NULL) {
1195 name = xmlDictLookup(ctxt->dict, fullname, -1);
1196 prefix = NULL;
1197 } else {
1198 name = xmlDictLookup(ctxt->dict, name, -1);
1199 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1200 }
1201
1202 /*
1203 * make sure there is some storage
1204 */
1205 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1206 if (defaults == NULL) {
1207 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001208 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001209 if (defaults == NULL)
1210 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001211 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001212 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001213 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1214 defaults, NULL) < 0) {
1215 xmlFree(defaults);
1216 goto mem_error;
1217 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001218 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001219 xmlDefAttrsPtr temp;
1220
1221 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001222 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001223 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001224 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001225 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001226 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001227 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1228 defaults, NULL) < 0) {
1229 xmlFree(defaults);
1230 goto mem_error;
1231 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001232 }
1233
1234 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001235 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001236 * are within the DTD and hen not associated to namespace names.
1237 */
1238 name = xmlSplitQName3(fullattr, &len);
1239 if (name == NULL) {
1240 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1241 prefix = NULL;
1242 } else {
1243 name = xmlDictLookup(ctxt->dict, name, -1);
1244 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1245 }
1246
Daniel Veillardae0765b2008-07-31 19:54:59 +00001247 defaults->values[5 * defaults->nbAttrs] = name;
1248 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001249 /* intern the string and precompute the end */
1250 len = xmlStrlen(value);
1251 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001252 defaults->values[5 * defaults->nbAttrs + 2] = value;
1253 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1254 if (ctxt->external)
1255 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1256 else
1257 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001258 defaults->nbAttrs++;
1259
1260 return;
1261
1262mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001263 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001264 return;
1265}
1266
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001267/**
1268 * xmlAddSpecialAttr:
1269 * @ctxt: an XML parser context
1270 * @fullname: the element fullname
1271 * @fullattr: the attribute fullname
1272 * @type: the attribute type
1273 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001274 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001275 */
1276static void
1277xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1278 const xmlChar *fullname,
1279 const xmlChar *fullattr,
1280 int type)
1281{
1282 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001283 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001284 if (ctxt->attsSpecial == NULL)
1285 goto mem_error;
1286 }
1287
Daniel Veillardac4118d2008-01-11 05:27:32 +00001288 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1289 return;
1290
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001291 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1292 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001293 return;
1294
1295mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001296 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001297 return;
1298}
1299
Daniel Veillard4432df22003-09-28 18:58:27 +00001300/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001301 * xmlCleanSpecialAttrCallback:
1302 *
1303 * Removes CDATA attributes from the special attribute table
1304 */
1305static void
1306xmlCleanSpecialAttrCallback(void *payload, void *data,
1307 const xmlChar *fullname, const xmlChar *fullattr,
1308 const xmlChar *unused ATTRIBUTE_UNUSED) {
1309 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1310
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001311 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001312 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1313 }
1314}
1315
1316/**
1317 * xmlCleanSpecialAttr:
1318 * @ctxt: an XML parser context
1319 *
1320 * Trim the list of attributes defined to remove all those of type
1321 * CDATA as they are not special. This call should be done when finishing
1322 * to parse the DTD and before starting to parse the document root.
1323 */
1324static void
1325xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1326{
1327 if (ctxt->attsSpecial == NULL)
1328 return;
1329
1330 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1331
1332 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1333 xmlHashFree(ctxt->attsSpecial, NULL);
1334 ctxt->attsSpecial = NULL;
1335 }
1336 return;
1337}
1338
1339/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001340 * xmlCheckLanguageID:
1341 * @lang: pointer to the string value
1342 *
1343 * Checks that the value conforms to the LanguageID production:
1344 *
1345 * NOTE: this is somewhat deprecated, those productions were removed from
1346 * the XML Second edition.
1347 *
1348 * [33] LanguageID ::= Langcode ('-' Subcode)*
1349 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1350 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1351 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1352 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1353 * [38] Subcode ::= ([a-z] | [A-Z])+
1354 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001355 * The current REC reference the sucessors of RFC 1766, currently 5646
1356 *
1357 * http://www.rfc-editor.org/rfc/rfc5646.txt
1358 * langtag = language
1359 * ["-" script]
1360 * ["-" region]
1361 * *("-" variant)
1362 * *("-" extension)
1363 * ["-" privateuse]
1364 * language = 2*3ALPHA ; shortest ISO 639 code
1365 * ["-" extlang] ; sometimes followed by
1366 * ; extended language subtags
1367 * / 4ALPHA ; or reserved for future use
1368 * / 5*8ALPHA ; or registered language subtag
1369 *
1370 * extlang = 3ALPHA ; selected ISO 639 codes
1371 * *2("-" 3ALPHA) ; permanently reserved
1372 *
1373 * script = 4ALPHA ; ISO 15924 code
1374 *
1375 * region = 2ALPHA ; ISO 3166-1 code
1376 * / 3DIGIT ; UN M.49 code
1377 *
1378 * variant = 5*8alphanum ; registered variants
1379 * / (DIGIT 3alphanum)
1380 *
1381 * extension = singleton 1*("-" (2*8alphanum))
1382 *
1383 * ; Single alphanumerics
1384 * ; "x" reserved for private use
1385 * singleton = DIGIT ; 0 - 9
1386 * / %x41-57 ; A - W
1387 * / %x59-5A ; Y - Z
1388 * / %x61-77 ; a - w
1389 * / %x79-7A ; y - z
1390 *
1391 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1392 * The parser below doesn't try to cope with extension or privateuse
1393 * that could be added but that's not interoperable anyway
1394 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001395 * Returns 1 if correct 0 otherwise
1396 **/
1397int
1398xmlCheckLanguageID(const xmlChar * lang)
1399{
Daniel Veillard60587d62010-11-04 15:16:27 +01001400 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001401
1402 if (cur == NULL)
1403 return (0);
1404 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001405 ((cur[0] == 'I') && (cur[1] == '-')) ||
1406 ((cur[0] == 'x') && (cur[1] == '-')) ||
1407 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001408 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001409 * Still allow IANA code and user code which were coming
1410 * from the previous version of the XML-1.0 specification
1411 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001412 */
1413 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001414 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001415 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1416 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001417 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001418 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001419 nxt = cur;
1420 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1421 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1422 nxt++;
1423 if (nxt - cur >= 4) {
1424 /*
1425 * Reserved
1426 */
1427 if ((nxt - cur > 8) || (nxt[0] != 0))
1428 return(0);
1429 return(1);
1430 }
1431 if (nxt - cur < 2)
1432 return(0);
1433 /* we got an ISO 639 code */
1434 if (nxt[0] == 0)
1435 return(1);
1436 if (nxt[0] != '-')
1437 return(0);
1438
1439 nxt++;
1440 cur = nxt;
1441 /* now we can have extlang or script or region or variant */
1442 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1443 goto region_m49;
1444
1445 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1446 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1447 nxt++;
1448 if (nxt - cur == 4)
1449 goto script;
1450 if (nxt - cur == 2)
1451 goto region;
1452 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1453 goto variant;
1454 if (nxt - cur != 3)
1455 return(0);
1456 /* we parsed an extlang */
1457 if (nxt[0] == 0)
1458 return(1);
1459 if (nxt[0] != '-')
1460 return(0);
1461
1462 nxt++;
1463 cur = nxt;
1464 /* now we can have script or region or variant */
1465 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1466 goto region_m49;
1467
1468 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1469 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1470 nxt++;
1471 if (nxt - cur == 2)
1472 goto region;
1473 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1474 goto variant;
1475 if (nxt - cur != 4)
1476 return(0);
1477 /* we parsed a script */
1478script:
1479 if (nxt[0] == 0)
1480 return(1);
1481 if (nxt[0] != '-')
1482 return(0);
1483
1484 nxt++;
1485 cur = nxt;
1486 /* now we can have region or variant */
1487 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1488 goto region_m49;
1489
1490 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1491 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1492 nxt++;
1493
1494 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1495 goto variant;
1496 if (nxt - cur != 2)
1497 return(0);
1498 /* we parsed a region */
1499region:
1500 if (nxt[0] == 0)
1501 return(1);
1502 if (nxt[0] != '-')
1503 return(0);
1504
1505 nxt++;
1506 cur = nxt;
1507 /* now we can just have a variant */
1508 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1509 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1510 nxt++;
1511
1512 if ((nxt - cur < 5) || (nxt - cur > 8))
1513 return(0);
1514
1515 /* we parsed a variant */
1516variant:
1517 if (nxt[0] == 0)
1518 return(1);
1519 if (nxt[0] != '-')
1520 return(0);
1521 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001522 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001523
1524region_m49:
1525 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1526 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1527 nxt += 3;
1528 goto region;
1529 }
1530 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001531}
1532
Owen Taylor3473f882001-02-23 17:55:21 +00001533/************************************************************************
1534 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001535 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001536 * *
1537 ************************************************************************/
1538
Daniel Veillard8ed10722009-08-20 19:17:36 +02001539static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1540 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001541
Daniel Veillard0fb18932003-09-07 09:14:37 +00001542#ifdef SAX2
1543/**
1544 * nsPush:
1545 * @ctxt: an XML parser context
1546 * @prefix: the namespace prefix or NULL
1547 * @URL: the namespace name
1548 *
1549 * Pushes a new parser namespace on top of the ns stack
1550 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001551 * Returns -1 in case of error, -2 if the namespace should be discarded
1552 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001553 */
1554static int
1555nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1556{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001557 if (ctxt->options & XML_PARSE_NSCLEAN) {
1558 int i;
Daniel Veillard711b15d2012-10-25 19:23:26 +08001559 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001560 if (ctxt->nsTab[i] == prefix) {
1561 /* in scope */
1562 if (ctxt->nsTab[i + 1] == URL)
1563 return(-2);
1564 /* out of scope keep it */
1565 break;
1566 }
1567 }
1568 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001569 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1570 ctxt->nsMax = 10;
1571 ctxt->nsNr = 0;
1572 ctxt->nsTab = (const xmlChar **)
1573 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1574 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001575 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001576 ctxt->nsMax = 0;
1577 return (-1);
1578 }
1579 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001580 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001581 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001582 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1583 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1584 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001585 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001586 ctxt->nsMax /= 2;
1587 return (-1);
1588 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001589 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001590 }
1591 ctxt->nsTab[ctxt->nsNr++] = prefix;
1592 ctxt->nsTab[ctxt->nsNr++] = URL;
1593 return (ctxt->nsNr);
1594}
1595/**
1596 * nsPop:
1597 * @ctxt: an XML parser context
1598 * @nr: the number to pop
1599 *
1600 * Pops the top @nr parser prefix/namespace from the ns stack
1601 *
1602 * Returns the number of namespaces removed
1603 */
1604static int
1605nsPop(xmlParserCtxtPtr ctxt, int nr)
1606{
1607 int i;
1608
1609 if (ctxt->nsTab == NULL) return(0);
1610 if (ctxt->nsNr < nr) {
1611 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1612 nr = ctxt->nsNr;
1613 }
1614 if (ctxt->nsNr <= 0)
1615 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001616
Daniel Veillard0fb18932003-09-07 09:14:37 +00001617 for (i = 0;i < nr;i++) {
1618 ctxt->nsNr--;
1619 ctxt->nsTab[ctxt->nsNr] = NULL;
1620 }
1621 return(nr);
1622}
1623#endif
1624
1625static int
1626xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1627 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001628 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001629 int maxatts;
1630
1631 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001632 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001633 atts = (const xmlChar **)
1634 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001635 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001636 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001637 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1638 if (attallocs == NULL) goto mem_error;
1639 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001640 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001641 } else if (nr + 5 > ctxt->maxatts) {
1642 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001643 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1644 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001645 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001646 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001647 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1648 (maxatts / 5) * sizeof(int));
1649 if (attallocs == NULL) goto mem_error;
1650 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001651 ctxt->maxatts = maxatts;
1652 }
1653 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001654mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001655 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001656 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001657}
1658
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001659/**
1660 * inputPush:
1661 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001662 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001663 *
1664 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001665 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001666 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001667 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001668int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001669inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1670{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001671 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001672 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001673 if (ctxt->inputNr >= ctxt->inputMax) {
1674 ctxt->inputMax *= 2;
1675 ctxt->inputTab =
1676 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1677 ctxt->inputMax *
1678 sizeof(ctxt->inputTab[0]));
1679 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001680 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001681 xmlFreeInputStream(value);
1682 ctxt->inputMax /= 2;
1683 value = NULL;
1684 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001685 }
1686 }
1687 ctxt->inputTab[ctxt->inputNr] = value;
1688 ctxt->input = value;
1689 return (ctxt->inputNr++);
1690}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001691/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001692 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001693 * @ctxt: an XML parser context
1694 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001695 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001696 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001697 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001698 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001699xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001700inputPop(xmlParserCtxtPtr ctxt)
1701{
1702 xmlParserInputPtr ret;
1703
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001704 if (ctxt == NULL)
1705 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001706 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001707 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001708 ctxt->inputNr--;
1709 if (ctxt->inputNr > 0)
1710 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1711 else
1712 ctxt->input = NULL;
1713 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001714 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001715 return (ret);
1716}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001717/**
1718 * nodePush:
1719 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001720 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001721 *
1722 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001723 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001724 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001725 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001726int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001727nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1728{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001729 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001730 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001731 xmlNodePtr *tmp;
1732
1733 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1734 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001735 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001736 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001737 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001738 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001739 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001740 ctxt->nodeTab = tmp;
1741 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001742 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001743 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1744 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001745 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001746 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001747 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001748 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001749 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001750 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001751 ctxt->nodeTab[ctxt->nodeNr] = value;
1752 ctxt->node = value;
1753 return (ctxt->nodeNr++);
1754}
Daniel Veillard8915c152008-08-26 13:05:34 +00001755
Daniel Veillard1c732d22002-11-30 11:22:59 +00001756/**
1757 * nodePop:
1758 * @ctxt: an XML parser context
1759 *
1760 * Pops the top element node from the node stack
1761 *
1762 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001763 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001764xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001765nodePop(xmlParserCtxtPtr ctxt)
1766{
1767 xmlNodePtr ret;
1768
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001769 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001770 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001771 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001772 ctxt->nodeNr--;
1773 if (ctxt->nodeNr > 0)
1774 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1775 else
1776 ctxt->node = NULL;
1777 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001778 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001779 return (ret);
1780}
Daniel Veillarda2351322004-06-27 12:08:10 +00001781
1782#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001783/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001784 * nameNsPush:
1785 * @ctxt: an XML parser context
1786 * @value: the element name
1787 * @prefix: the element prefix
1788 * @URI: the element namespace name
1789 *
1790 * Pushes a new element name/prefix/URL on top of the name stack
1791 *
1792 * Returns -1 in case of error, the index in the stack otherwise
1793 */
1794static int
1795nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1796 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1797{
1798 if (ctxt->nameNr >= ctxt->nameMax) {
1799 const xmlChar * *tmp;
1800 void **tmp2;
1801 ctxt->nameMax *= 2;
1802 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1803 ctxt->nameMax *
1804 sizeof(ctxt->nameTab[0]));
1805 if (tmp == NULL) {
1806 ctxt->nameMax /= 2;
1807 goto mem_error;
1808 }
1809 ctxt->nameTab = tmp;
1810 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1811 ctxt->nameMax * 3 *
1812 sizeof(ctxt->pushTab[0]));
1813 if (tmp2 == NULL) {
1814 ctxt->nameMax /= 2;
1815 goto mem_error;
1816 }
1817 ctxt->pushTab = tmp2;
1818 }
1819 ctxt->nameTab[ctxt->nameNr] = value;
1820 ctxt->name = value;
1821 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1822 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001823 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001824 return (ctxt->nameNr++);
1825mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001826 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001827 return (-1);
1828}
1829/**
1830 * nameNsPop:
1831 * @ctxt: an XML parser context
1832 *
1833 * Pops the top element/prefix/URI name from the name stack
1834 *
1835 * Returns the name just removed
1836 */
1837static const xmlChar *
1838nameNsPop(xmlParserCtxtPtr ctxt)
1839{
1840 const xmlChar *ret;
1841
1842 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001843 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001844 ctxt->nameNr--;
1845 if (ctxt->nameNr > 0)
1846 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1847 else
1848 ctxt->name = NULL;
1849 ret = ctxt->nameTab[ctxt->nameNr];
1850 ctxt->nameTab[ctxt->nameNr] = NULL;
1851 return (ret);
1852}
Daniel Veillarda2351322004-06-27 12:08:10 +00001853#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001854
1855/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001856 * namePush:
1857 * @ctxt: an XML parser context
1858 * @value: the element name
1859 *
1860 * Pushes a new element name on top of the name stack
1861 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001862 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001863 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001864int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001865namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001866{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001867 if (ctxt == NULL) return (-1);
1868
Daniel Veillard1c732d22002-11-30 11:22:59 +00001869 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001870 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001871 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001872 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001873 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001874 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001875 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001876 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001877 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001878 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001879 }
1880 ctxt->nameTab[ctxt->nameNr] = value;
1881 ctxt->name = value;
1882 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001883mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001884 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001885 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001886}
1887/**
1888 * namePop:
1889 * @ctxt: an XML parser context
1890 *
1891 * Pops the top element name from the name stack
1892 *
1893 * Returns the name just removed
1894 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001895const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001896namePop(xmlParserCtxtPtr ctxt)
1897{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001898 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001899
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001900 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1901 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001902 ctxt->nameNr--;
1903 if (ctxt->nameNr > 0)
1904 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1905 else
1906 ctxt->name = NULL;
1907 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001908 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001909 return (ret);
1910}
Owen Taylor3473f882001-02-23 17:55:21 +00001911
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001912static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001913 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001914 int *tmp;
1915
Owen Taylor3473f882001-02-23 17:55:21 +00001916 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001917 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1918 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1919 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001920 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001921 ctxt->spaceMax /=2;
1922 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001923 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001924 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001925 }
1926 ctxt->spaceTab[ctxt->spaceNr] = val;
1927 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1928 return(ctxt->spaceNr++);
1929}
1930
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001931static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001932 int ret;
1933 if (ctxt->spaceNr <= 0) return(0);
1934 ctxt->spaceNr--;
1935 if (ctxt->spaceNr > 0)
1936 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1937 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001938 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001939 ret = ctxt->spaceTab[ctxt->spaceNr];
1940 ctxt->spaceTab[ctxt->spaceNr] = -1;
1941 return(ret);
1942}
1943
1944/*
1945 * Macros for accessing the content. Those should be used only by the parser,
1946 * and not exported.
1947 *
1948 * Dirty macros, i.e. one often need to make assumption on the context to
1949 * use them
1950 *
1951 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1952 * To be used with extreme caution since operations consuming
1953 * characters may move the input buffer to a different location !
1954 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1955 * This should be used internally by the parser
1956 * only to compare to ASCII values otherwise it would break when
1957 * running with UTF-8 encoding.
1958 * RAW same as CUR but in the input buffer, bypass any token
1959 * extraction that may have been done
1960 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1961 * to compare on ASCII based substring.
1962 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001963 * strings without newlines within the parser.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001964 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
Daniel Veillard77a90a72003-03-22 00:04:05 +00001965 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001966 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1967 *
1968 * NEXT Skip to the next character, this does the proper decoding
1969 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001970 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001971 * CUR_CHAR(l) returns the current unicode character (int), set l
1972 * to the number of xmlChars used for the encoding [0-5].
1973 * CUR_SCHAR same but operate on a string instead of the context
1974 * COPY_BUF copy the current unicode char to the target buffer, increment
1975 * the index
1976 * GROW, SHRINK handling of input buffers
1977 */
1978
Daniel Veillardfdc91562002-07-01 21:52:03 +00001979#define RAW (*ctxt->input->cur)
1980#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001981#define NXT(val) ctxt->input->cur[(val)]
1982#define CUR_PTR ctxt->input->cur
1983
Daniel Veillarda07050d2003-10-19 14:46:32 +00001984#define CMP4( s, c1, c2, c3, c4 ) \
1985 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1986 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1987#define CMP5( s, c1, c2, c3, c4, c5 ) \
1988 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1989#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1990 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1991#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1992 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1993#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1994 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1995#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1996 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1997 ((unsigned char *) s)[ 8 ] == c9 )
1998#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1999 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2000 ((unsigned char *) s)[ 9 ] == c10 )
2001
Owen Taylor3473f882001-02-23 17:55:21 +00002002#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002003 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00002004 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002005 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00002006 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2007 xmlPopInput(ctxt); \
2008 } while (0)
2009
Daniel Veillard0b787f32004-03-26 17:29:53 +00002010#define SKIPL(val) do { \
2011 int skipl; \
2012 for(skipl=0; skipl<val; skipl++) { \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002013 if (*(ctxt->input->cur) == '\n') { \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002014 ctxt->input->line++; ctxt->input->col = 1; \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002015 } else ctxt->input->col++; \
2016 ctxt->nbChars++; \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002017 ctxt->input->cur++; \
2018 } \
2019 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2020 if ((*ctxt->input->cur == 0) && \
2021 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2022 xmlPopInput(ctxt); \
2023 } while (0)
2024
Daniel Veillarda880b122003-04-21 21:36:41 +00002025#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002026 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2027 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002028 xmlSHRINK (ctxt);
2029
2030static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2031 xmlParserInputShrink(ctxt->input);
2032 if ((*ctxt->input->cur == 0) &&
2033 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2034 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002035 }
Owen Taylor3473f882001-02-23 17:55:21 +00002036
Daniel Veillarda880b122003-04-21 21:36:41 +00002037#define GROW if ((ctxt->progressive == 0) && \
2038 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002039 xmlGROW (ctxt);
2040
2041static void xmlGROW (xmlParserCtxtPtr ctxt) {
Longstreth Jon190a0b82014-02-06 10:58:17 +01002042 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2043 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2044
2045 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2046 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
Patrick Gansterer9c8eaab2013-01-04 12:41:53 +01002047 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002048 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2049 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillard0df83ca2012-07-30 15:41:10 +08002050 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002051 }
Daniel Veillard46de64e2002-05-29 08:21:33 +00002052 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard59df7832010-02-02 10:24:01 +01002053 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
Daniel Veillard46de64e2002-05-29 08:21:33 +00002054 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2055 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00002056}
Owen Taylor3473f882001-02-23 17:55:21 +00002057
2058#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2059
2060#define NEXT xmlNextChar(ctxt)
2061
Daniel Veillard21a0f912001-02-25 19:54:14 +00002062#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002063 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002064 ctxt->input->cur++; \
2065 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002066 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002067 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2068 }
2069
Owen Taylor3473f882001-02-23 17:55:21 +00002070#define NEXTL(l) do { \
2071 if (*(ctxt->input->cur) == '\n') { \
2072 ctxt->input->line++; ctxt->input->col = 1; \
2073 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002074 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002075 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00002076 } while (0)
2077
2078#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2079#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2080
2081#define COPY_BUF(l,b,i,v) \
2082 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002083 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002084
2085/**
2086 * xmlSkipBlankChars:
2087 * @ctxt: the XML parser context
2088 *
2089 * skip all blanks character found at that point in the input streams.
2090 * It pops up finished entities in the process if allowable at that point.
2091 *
2092 * Returns the number of space chars skipped
2093 */
2094
2095int
2096xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002097 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002098
2099 /*
2100 * It's Okay to use CUR/NEXT here since all the blanks are on
2101 * the ASCII range.
2102 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002103 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2104 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002105 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002106 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002107 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002108 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002109 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002110 if (*cur == '\n') {
2111 ctxt->input->line++; ctxt->input->col = 1;
Juergen Keil5d4310a2014-08-07 16:28:09 +08002112 } else {
2113 ctxt->input->col++;
Daniel Veillard02141ea2001-04-30 11:46:40 +00002114 }
2115 cur++;
2116 res++;
2117 if (*cur == 0) {
2118 ctxt->input->cur = cur;
2119 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2120 cur = ctxt->input->cur;
2121 }
2122 }
2123 ctxt->input->cur = cur;
2124 } else {
2125 int cur;
2126 do {
2127 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00002128 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002129 NEXT;
2130 cur = CUR;
2131 res++;
2132 }
2133 while ((cur == 0) && (ctxt->inputNr > 1) &&
2134 (ctxt->instate != XML_PARSER_COMMENT)) {
2135 xmlPopInput(ctxt);
2136 cur = CUR;
2137 }
2138 /*
2139 * Need to handle support of entities branching here
2140 */
2141 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2142 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2143 }
Owen Taylor3473f882001-02-23 17:55:21 +00002144 return(res);
2145}
2146
2147/************************************************************************
2148 * *
2149 * Commodity functions to handle entities *
2150 * *
2151 ************************************************************************/
2152
2153/**
2154 * xmlPopInput:
2155 * @ctxt: an XML parser context
2156 *
2157 * xmlPopInput: the current input pointed by ctxt->input came to an end
2158 * pop it and return the next char.
2159 *
2160 * Returns the current xmlChar in the parser context
2161 */
2162xmlChar
2163xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002164 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002165 if (xmlParserDebugEntities)
2166 xmlGenericError(xmlGenericErrorContext,
2167 "Popping input %d\n", ctxt->inputNr);
2168 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00002169 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00002170 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2171 return(xmlPopInput(ctxt));
2172 return(CUR);
2173}
2174
2175/**
2176 * xmlPushInput:
2177 * @ctxt: an XML parser context
2178 * @input: an XML parser input fragment (entity, XML fragment ...).
2179 *
2180 * xmlPushInput: switch to a new input stream which is stacked on top
2181 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002182 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002183 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002184int
Owen Taylor3473f882001-02-23 17:55:21 +00002185xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002186 int ret;
2187 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002188
2189 if (xmlParserDebugEntities) {
2190 if ((ctxt->input != NULL) && (ctxt->input->filename))
2191 xmlGenericError(xmlGenericErrorContext,
2192 "%s(%d): ", ctxt->input->filename,
2193 ctxt->input->line);
2194 xmlGenericError(xmlGenericErrorContext,
2195 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2196 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002197 ret = inputPush(ctxt, input);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002198 if (ctxt->instate == XML_PARSER_EOF)
2199 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002200 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002201 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002202}
2203
2204/**
2205 * xmlParseCharRef:
2206 * @ctxt: an XML parser context
2207 *
2208 * parse Reference declarations
2209 *
2210 * [66] CharRef ::= '&#' [0-9]+ ';' |
2211 * '&#x' [0-9a-fA-F]+ ';'
2212 *
2213 * [ WFC: Legal Character ]
2214 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002215 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002216 *
2217 * Returns the value parsed (as an int), 0 in case of error
2218 */
2219int
2220xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002221 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002222 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002223 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002224
Owen Taylor3473f882001-02-23 17:55:21 +00002225 /*
2226 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2227 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002228 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002229 (NXT(2) == 'x')) {
2230 SKIP(3);
2231 GROW;
2232 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002233 if (count++ > 20) {
2234 count = 0;
2235 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002236 if (ctxt->instate == XML_PARSER_EOF)
2237 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002238 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002239 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002240 val = val * 16 + (CUR - '0');
2241 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2242 val = val * 16 + (CUR - 'a') + 10;
2243 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2244 val = val * 16 + (CUR - 'A') + 10;
2245 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002246 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002247 val = 0;
2248 break;
2249 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002250 if (val > 0x10FFFF)
2251 outofrange = val;
2252
Owen Taylor3473f882001-02-23 17:55:21 +00002253 NEXT;
2254 count++;
2255 }
2256 if (RAW == ';') {
2257 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002258 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002259 ctxt->nbChars ++;
2260 ctxt->input->cur++;
2261 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002262 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002263 SKIP(2);
2264 GROW;
2265 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002266 if (count++ > 20) {
2267 count = 0;
2268 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002269 if (ctxt->instate == XML_PARSER_EOF)
2270 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002271 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002272 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002273 val = val * 10 + (CUR - '0');
2274 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002275 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002276 val = 0;
2277 break;
2278 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002279 if (val > 0x10FFFF)
2280 outofrange = val;
2281
Owen Taylor3473f882001-02-23 17:55:21 +00002282 NEXT;
2283 count++;
2284 }
2285 if (RAW == ';') {
2286 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002287 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002288 ctxt->nbChars ++;
2289 ctxt->input->cur++;
2290 }
2291 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002292 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002293 }
2294
2295 /*
2296 * [ WFC: Legal Character ]
2297 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002298 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002299 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002300 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002301 return(val);
2302 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002303 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2304 "xmlParseCharRef: invalid xmlChar value %d\n",
2305 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002306 }
2307 return(0);
2308}
2309
2310/**
2311 * xmlParseStringCharRef:
2312 * @ctxt: an XML parser context
2313 * @str: a pointer to an index in the string
2314 *
2315 * parse Reference declarations, variant parsing from a string rather
2316 * than an an input flow.
2317 *
2318 * [66] CharRef ::= '&#' [0-9]+ ';' |
2319 * '&#x' [0-9a-fA-F]+ ';'
2320 *
2321 * [ WFC: Legal Character ]
2322 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002323 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002324 *
2325 * Returns the value parsed (as an int), 0 in case of error, str will be
2326 * updated to the current value of the index
2327 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002328static int
Owen Taylor3473f882001-02-23 17:55:21 +00002329xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2330 const xmlChar *ptr;
2331 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002332 unsigned int val = 0;
2333 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002334
2335 if ((str == NULL) || (*str == NULL)) return(0);
2336 ptr = *str;
2337 cur = *ptr;
2338 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2339 ptr += 3;
2340 cur = *ptr;
2341 while (cur != ';') { /* Non input consuming loop */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002342 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002343 val = val * 16 + (cur - '0');
2344 else if ((cur >= 'a') && (cur <= 'f'))
2345 val = val * 16 + (cur - 'a') + 10;
2346 else if ((cur >= 'A') && (cur <= 'F'))
2347 val = val * 16 + (cur - 'A') + 10;
2348 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002349 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002350 val = 0;
2351 break;
2352 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002353 if (val > 0x10FFFF)
2354 outofrange = val;
2355
Owen Taylor3473f882001-02-23 17:55:21 +00002356 ptr++;
2357 cur = *ptr;
2358 }
2359 if (cur == ';')
2360 ptr++;
2361 } else if ((cur == '&') && (ptr[1] == '#')){
2362 ptr += 2;
2363 cur = *ptr;
2364 while (cur != ';') { /* Non input consuming loops */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002365 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002366 val = val * 10 + (cur - '0');
2367 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002368 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002369 val = 0;
2370 break;
2371 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002372 if (val > 0x10FFFF)
2373 outofrange = val;
2374
Owen Taylor3473f882001-02-23 17:55:21 +00002375 ptr++;
2376 cur = *ptr;
2377 }
2378 if (cur == ';')
2379 ptr++;
2380 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002381 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002382 return(0);
2383 }
2384 *str = ptr;
2385
2386 /*
2387 * [ WFC: Legal Character ]
2388 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002389 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002390 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002391 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002392 return(val);
2393 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002394 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2395 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2396 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002397 }
2398 return(0);
2399}
2400
2401/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002402 * xmlNewBlanksWrapperInputStream:
2403 * @ctxt: an XML parser context
2404 * @entity: an Entity pointer
2405 *
2406 * Create a new input stream for wrapping
2407 * blanks around a PEReference
2408 *
2409 * Returns the new input stream or NULL
2410 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002411
Daniel Veillardf5582f12002-06-11 10:08:16 +00002412static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002413
Daniel Veillardf4862f02002-09-10 11:13:43 +00002414static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002415xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2416 xmlParserInputPtr input;
2417 xmlChar *buffer;
2418 size_t length;
2419 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002420 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2421 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002422 return(NULL);
2423 }
2424 if (xmlParserDebugEntities)
2425 xmlGenericError(xmlGenericErrorContext,
2426 "new blanks wrapper for entity: %s\n", entity->name);
2427 input = xmlNewInputStream(ctxt);
2428 if (input == NULL) {
2429 return(NULL);
2430 }
2431 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002432 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002433 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002434 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002435 xmlFree(input);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002436 return(NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002437 }
2438 buffer [0] = ' ';
2439 buffer [1] = '%';
2440 buffer [length-3] = ';';
2441 buffer [length-2] = ' ';
2442 buffer [length-1] = 0;
2443 memcpy(buffer + 2, entity->name, length - 5);
2444 input->free = deallocblankswrapper;
2445 input->base = buffer;
2446 input->cur = buffer;
2447 input->length = length;
2448 input->end = &buffer[length];
2449 return(input);
2450}
2451
2452/**
Owen Taylor3473f882001-02-23 17:55:21 +00002453 * xmlParserHandlePEReference:
2454 * @ctxt: the parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002455 *
Owen Taylor3473f882001-02-23 17:55:21 +00002456 * [69] PEReference ::= '%' Name ';'
2457 *
2458 * [ WFC: No Recursion ]
2459 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002460 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00002461 *
2462 * [ WFC: Entity Declared ]
2463 * In a document without any DTD, a document with only an internal DTD
2464 * subset which contains no parameter entity references, or a document
2465 * with "standalone='yes'", ... ... The declaration of a parameter
2466 * entity must precede any reference to it...
2467 *
2468 * [ VC: Entity Declared ]
2469 * In a document with an external subset or external parameter entities
2470 * with "standalone='no'", ... ... The declaration of a parameter entity
2471 * must precede any reference to it...
2472 *
2473 * [ WFC: In DTD ]
2474 * Parameter-entity references may only appear in the DTD.
2475 * NOTE: misleading but this is handled.
2476 *
2477 * A PEReference may have been detected in the current input stream
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002478 * the handling is done accordingly to
Owen Taylor3473f882001-02-23 17:55:21 +00002479 * http://www.w3.org/TR/REC-xml#entproc
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002480 * i.e.
Owen Taylor3473f882001-02-23 17:55:21 +00002481 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002482 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002483 */
2484void
2485xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002486 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002487 xmlEntityPtr entity = NULL;
2488 xmlParserInputPtr input;
2489
Owen Taylor3473f882001-02-23 17:55:21 +00002490 if (RAW != '%') return;
2491 switch(ctxt->instate) {
2492 case XML_PARSER_CDATA_SECTION:
2493 return;
2494 case XML_PARSER_COMMENT:
2495 return;
2496 case XML_PARSER_START_TAG:
2497 return;
2498 case XML_PARSER_END_TAG:
2499 return;
2500 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002501 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002502 return;
2503 case XML_PARSER_PROLOG:
2504 case XML_PARSER_START:
2505 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002506 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002507 return;
2508 case XML_PARSER_ENTITY_DECL:
2509 case XML_PARSER_CONTENT:
2510 case XML_PARSER_ATTRIBUTE_VALUE:
2511 case XML_PARSER_PI:
2512 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002513 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002514 /* we just ignore it there */
2515 return;
2516 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002517 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002518 return;
2519 case XML_PARSER_ENTITY_VALUE:
2520 /*
2521 * NOTE: in the case of entity values, we don't do the
2522 * substitution here since we need the literal
2523 * entity value to be able to save the internal
2524 * subset of the document.
2525 * This will be handled by xmlStringDecodeEntities
2526 */
2527 return;
2528 case XML_PARSER_DTD:
2529 /*
2530 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2531 * In the internal DTD subset, parameter-entity references
2532 * can occur only where markup declarations can occur, not
2533 * within markup declarations.
2534 * In that case this is handled in xmlParseMarkupDecl
2535 */
2536 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2537 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002538 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002539 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002540 break;
2541 case XML_PARSER_IGNORE:
2542 return;
2543 }
2544
2545 NEXT;
2546 name = xmlParseName(ctxt);
2547 if (xmlParserDebugEntities)
2548 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002549 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002550 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002551 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002552 } else {
2553 if (RAW == ';') {
2554 NEXT;
2555 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2556 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
Daniel Veillarde50ba812013-04-11 15:54:51 +08002557 if (ctxt->instate == XML_PARSER_EOF)
2558 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002559 if (entity == NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002560
Owen Taylor3473f882001-02-23 17:55:21 +00002561 /*
2562 * [ WFC: Entity Declared ]
2563 * In a document without any DTD, a document with only an
2564 * internal DTD subset which contains no parameter entity
2565 * references, or a document with "standalone='yes'", ...
2566 * ... The declaration of a parameter entity must precede
2567 * any reference to it...
2568 */
2569 if ((ctxt->standalone == 1) ||
2570 ((ctxt->hasExternalSubset == 0) &&
2571 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002572 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002573 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002574 } else {
2575 /*
2576 * [ VC: Entity Declared ]
2577 * In a document with an external subset or external
2578 * parameter entities with "standalone='no'", ...
2579 * ... The declaration of a parameter entity must precede
2580 * any reference to it...
2581 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002582 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2583 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2584 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002585 name, NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002586 } else
Daniel Veillard24eb9782003-10-04 21:08:09 +00002587 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2588 "PEReference: %%%s; not found\n",
2589 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002590 ctxt->valid = 0;
2591 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002592 } else if (ctxt->input->free != deallocblankswrapper) {
2593 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002594 if (xmlPushInput(ctxt, input) < 0)
2595 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002596 } else {
2597 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2598 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002599 xmlChar start[4];
2600 xmlCharEncoding enc;
2601
Owen Taylor3473f882001-02-23 17:55:21 +00002602 /*
Daniel Veillarddd8367d2014-06-11 16:54:32 +08002603 * Note: external parameter entities will not be loaded, it
2604 * is not required for a non-validating parser, unless the
Daniel Veillard9cd1c3c2014-04-22 15:30:56 +08002605 * option of validating, or substituting entities were
2606 * given. Doing so is far more secure as the parser will
2607 * only process data coming from the document entity by
2608 * default.
2609 */
2610 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2611 ((ctxt->options & XML_PARSE_NOENT) == 0) &&
2612 ((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
Daniel Veillarddd8367d2014-06-11 16:54:32 +08002613 ((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
2614 ((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
2615 (ctxt->replaceEntities == 0) &&
Daniel Veillard9cd1c3c2014-04-22 15:30:56 +08002616 (ctxt->validate == 0))
2617 return;
2618
2619 /*
Owen Taylor3473f882001-02-23 17:55:21 +00002620 * handle the extra spaces added before and after
2621 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002622 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002623 */
2624 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002625 if (xmlPushInput(ctxt, input) < 0)
2626 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002627
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002628 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +00002629 * Get the 4 first bytes and decode the charset
2630 * if enc != XML_CHAR_ENCODING_NONE
2631 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002632 * Note that, since we may have some non-UTF8
2633 * encoding (like UTF16, bug 135229), the 'length'
2634 * is not known, but we can calculate based upon
2635 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002636 */
2637 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002638 if (ctxt->instate == XML_PARSER_EOF)
2639 return;
William M. Bracka0c48ad2004-04-16 15:58:29 +00002640 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002641 start[0] = RAW;
2642 start[1] = NXT(1);
2643 start[2] = NXT(2);
2644 start[3] = NXT(3);
2645 enc = xmlDetectCharEncoding(start, 4);
2646 if (enc != XML_CHAR_ENCODING_NONE) {
2647 xmlSwitchEncoding(ctxt, enc);
2648 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002649 }
2650
Owen Taylor3473f882001-02-23 17:55:21 +00002651 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002652 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2653 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002654 xmlParseTextDecl(ctxt);
2655 }
Owen Taylor3473f882001-02-23 17:55:21 +00002656 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002657 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2658 "PEReference: %s is not a parameter entity\n",
2659 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002660 }
2661 }
2662 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002663 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002664 }
Owen Taylor3473f882001-02-23 17:55:21 +00002665 }
2666}
2667
2668/*
2669 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002670 * buffer##_size is expected to be a size_t
2671 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002672 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002673#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002674 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002675 size_t new_size = buffer##_size * 2 + n; \
2676 if (new_size < buffer##_size) goto mem_error; \
2677 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002678 if (tmp == NULL) goto mem_error; \
2679 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002680 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002681}
2682
2683/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002684 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002685 * @ctxt: the parser context
2686 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002687 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002688 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2689 * @end: an end marker xmlChar, 0 if none
2690 * @end2: an end marker xmlChar, 0 if none
2691 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002692 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002693 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002694 *
2695 * [67] Reference ::= EntityRef | CharRef
2696 *
2697 * [69] PEReference ::= '%' Name ';'
2698 *
2699 * Returns A newly allocated string with the substitution done. The caller
2700 * must deallocate it !
2701 */
2702xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002703xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2704 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002705 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002706 size_t buffer_size = 0;
2707 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002708
2709 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002710 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002711 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002712 xmlEntityPtr ent;
2713 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002714
Daniel Veillarda82b1822004-11-08 16:24:57 +00002715 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002716 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002717 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002718
Daniel Veillard0161e632008-08-28 15:36:32 +00002719 if (((ctxt->depth > 40) &&
2720 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2721 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002722 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002723 return(NULL);
2724 }
2725
2726 /*
2727 * allocate a translation buffer.
2728 */
2729 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002730 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002731 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002732
2733 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002734 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002735 * we are operating on already parsed values.
2736 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002737 if (str < last)
2738 c = CUR_SCHAR(str, l);
2739 else
2740 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002741 while ((c != 0) && (c != end) && /* non input consuming loop */
2742 (c != end2) && (c != end3)) {
2743
2744 if (c == 0) break;
2745 if ((c == '&') && (str[1] == '#')) {
2746 int val = xmlParseStringCharRef(ctxt, &str);
2747 if (val != 0) {
2748 COPY_BUF(0,buffer,nbchars,val);
2749 }
Daniel Veillard459eeb92012-07-17 16:19:17 +08002750 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002751 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002752 }
Owen Taylor3473f882001-02-23 17:55:21 +00002753 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2754 if (xmlParserDebugEntities)
2755 xmlGenericError(xmlGenericErrorContext,
2756 "String decoding Entity Reference: %.30s\n",
2757 str);
2758 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002759 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2760 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002761 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002762 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002763 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002764 if ((ent != NULL) &&
2765 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2766 if (ent->content != NULL) {
2767 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002768 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002769 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002770 }
Owen Taylor3473f882001-02-23 17:55:21 +00002771 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002772 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2773 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002774 }
2775 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002776 ctxt->depth++;
2777 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2778 0, 0, 0);
2779 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002780
Owen Taylor3473f882001-02-23 17:55:21 +00002781 if (rep != NULL) {
2782 current = rep;
2783 while (*current != 0) { /* non input consuming loop */
2784 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002785 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002786 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002787 goto int_error;
2788 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002789 }
2790 }
2791 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002792 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002793 }
2794 } else if (ent != NULL) {
2795 int i = xmlStrlen(ent->name);
2796 const xmlChar *cur = ent->name;
2797
2798 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002799 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002800 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002801 }
2802 for (;i > 0;i--)
2803 buffer[nbchars++] = *cur++;
2804 buffer[nbchars++] = ';';
2805 }
2806 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2807 if (xmlParserDebugEntities)
2808 xmlGenericError(xmlGenericErrorContext,
2809 "String decoding PE Reference: %.30s\n", str);
2810 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002811 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2812 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002813 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002814 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002815 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002816 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002817 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002818 }
Owen Taylor3473f882001-02-23 17:55:21 +00002819 ctxt->depth++;
2820 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2821 0, 0, 0);
2822 ctxt->depth--;
2823 if (rep != NULL) {
2824 current = rep;
2825 while (*current != 0) { /* non input consuming loop */
2826 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002827 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002828 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002829 goto int_error;
2830 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002831 }
2832 }
2833 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002834 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002835 }
2836 }
2837 } else {
2838 COPY_BUF(l,buffer,nbchars,c);
2839 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002840 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2841 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002842 }
2843 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002844 if (str < last)
2845 c = CUR_SCHAR(str, l);
2846 else
2847 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002848 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002849 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002850 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002851
2852mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002853 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002854int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002855 if (rep != NULL)
2856 xmlFree(rep);
2857 if (buffer != NULL)
2858 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002859 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002860}
2861
Daniel Veillarde57ec792003-09-10 10:50:59 +00002862/**
2863 * xmlStringDecodeEntities:
2864 * @ctxt: the parser context
2865 * @str: the input string
2866 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2867 * @end: an end marker xmlChar, 0 if none
2868 * @end2: an end marker xmlChar, 0 if none
2869 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002870 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002871 * Takes a entity string content and process to do the adequate substitutions.
2872 *
2873 * [67] Reference ::= EntityRef | CharRef
2874 *
2875 * [69] PEReference ::= '%' Name ';'
2876 *
2877 * Returns A newly allocated string with the substitution done. The caller
2878 * must deallocate it !
2879 */
2880xmlChar *
2881xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2882 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002883 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002884 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2885 end, end2, end3));
2886}
Owen Taylor3473f882001-02-23 17:55:21 +00002887
2888/************************************************************************
2889 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002890 * Commodity functions, cleanup needed ? *
2891 * *
2892 ************************************************************************/
2893
2894/**
2895 * areBlanks:
2896 * @ctxt: an XML parser context
2897 * @str: a xmlChar *
2898 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002899 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002900 *
2901 * Is this a sequence of blank chars that one can ignore ?
2902 *
2903 * Returns 1 if ignorable 0 otherwise.
2904 */
2905
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002906static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2907 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002908 int i, ret;
2909 xmlNodePtr lastChild;
2910
Daniel Veillard05c13a22001-09-09 08:38:09 +00002911 /*
2912 * Don't spend time trying to differentiate them, the same callback is
2913 * used !
2914 */
2915 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002916 return(0);
2917
Owen Taylor3473f882001-02-23 17:55:21 +00002918 /*
2919 * Check for xml:space value.
2920 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002921 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2922 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002923 return(0);
2924
2925 /*
2926 * Check that the string is made of blanks
2927 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002928 if (blank_chars == 0) {
2929 for (i = 0;i < len;i++)
2930 if (!(IS_BLANK_CH(str[i]))) return(0);
2931 }
Owen Taylor3473f882001-02-23 17:55:21 +00002932
2933 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002934 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002935 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002936 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002937 if (ctxt->myDoc != NULL) {
2938 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2939 if (ret == 0) return(1);
2940 if (ret == 1) return(0);
2941 }
2942
2943 /*
2944 * Otherwise, heuristic :-\
2945 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002946 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002947 if ((ctxt->node->children == NULL) &&
2948 (RAW == '<') && (NXT(1) == '/')) return(0);
2949
2950 lastChild = xmlGetLastChild(ctxt->node);
2951 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002952 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2953 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002954 } else if (xmlNodeIsText(lastChild))
2955 return(0);
2956 else if ((ctxt->node->children != NULL) &&
2957 (xmlNodeIsText(ctxt->node->children)))
2958 return(0);
2959 return(1);
2960}
2961
Owen Taylor3473f882001-02-23 17:55:21 +00002962/************************************************************************
2963 * *
2964 * Extra stuff for namespace support *
2965 * Relates to http://www.w3.org/TR/WD-xml-names *
2966 * *
2967 ************************************************************************/
2968
2969/**
2970 * xmlSplitQName:
2971 * @ctxt: an XML parser context
2972 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002973 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002974 *
2975 * parse an UTF8 encoded XML qualified name string
2976 *
2977 * [NS 5] QName ::= (Prefix ':')? LocalPart
2978 *
2979 * [NS 6] Prefix ::= NCName
2980 *
2981 * [NS 7] LocalPart ::= NCName
2982 *
2983 * Returns the local part, and prefix is updated
2984 * to get the Prefix if any.
2985 */
2986
2987xmlChar *
2988xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2989 xmlChar buf[XML_MAX_NAMELEN + 5];
2990 xmlChar *buffer = NULL;
2991 int len = 0;
2992 int max = XML_MAX_NAMELEN;
2993 xmlChar *ret = NULL;
2994 const xmlChar *cur = name;
2995 int c;
2996
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002997 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002998 *prefix = NULL;
2999
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00003000 if (cur == NULL) return(NULL);
3001
Owen Taylor3473f882001-02-23 17:55:21 +00003002#ifndef XML_XML_NAMESPACE
3003 /* xml: prefix is not really a namespace */
3004 if ((cur[0] == 'x') && (cur[1] == 'm') &&
3005 (cur[2] == 'l') && (cur[3] == ':'))
3006 return(xmlStrdup(name));
3007#endif
3008
Daniel Veillard597bc482003-07-24 16:08:28 +00003009 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00003010 if (cur[0] == ':')
3011 return(xmlStrdup(name));
3012
3013 c = *cur++;
3014 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3015 buf[len++] = c;
3016 c = *cur++;
3017 }
3018 if (len >= max) {
3019 /*
3020 * Okay someone managed to make a huge name, so he's ready to pay
3021 * for the processing speed.
3022 */
3023 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003024
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003025 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003026 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003027 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003028 return(NULL);
3029 }
3030 memcpy(buffer, buf, len);
3031 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3032 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003033 xmlChar *tmp;
3034
Owen Taylor3473f882001-02-23 17:55:21 +00003035 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003036 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003037 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003038 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00003039 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003040 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003041 return(NULL);
3042 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003043 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003044 }
3045 buffer[len++] = c;
3046 c = *cur++;
3047 }
3048 buffer[len] = 0;
3049 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003050
Daniel Veillard597bc482003-07-24 16:08:28 +00003051 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00003052 if (buffer != NULL)
3053 xmlFree(buffer);
3054 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00003055 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00003056 }
Daniel Veillard597bc482003-07-24 16:08:28 +00003057
Owen Taylor3473f882001-02-23 17:55:21 +00003058 if (buffer == NULL)
3059 ret = xmlStrndup(buf, len);
3060 else {
3061 ret = buffer;
3062 buffer = NULL;
3063 max = XML_MAX_NAMELEN;
3064 }
3065
3066
3067 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00003068 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003069 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00003070 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00003071 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00003072 }
Owen Taylor3473f882001-02-23 17:55:21 +00003073 len = 0;
3074
Daniel Veillardbb284f42002-10-16 18:02:47 +00003075 /*
3076 * Check that the first character is proper to start
3077 * a new name
3078 */
3079 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3080 ((c >= 0x41) && (c <= 0x5A)) ||
3081 (c == '_') || (c == ':'))) {
3082 int l;
3083 int first = CUR_SCHAR(cur, l);
3084
3085 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003086 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003087 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003088 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003089 }
3090 }
3091 cur++;
3092
Owen Taylor3473f882001-02-23 17:55:21 +00003093 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3094 buf[len++] = c;
3095 c = *cur++;
3096 }
3097 if (len >= max) {
3098 /*
3099 * Okay someone managed to make a huge name, so he's ready to pay
3100 * for the processing speed.
3101 */
3102 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003103
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003104 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003105 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003106 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003107 return(NULL);
3108 }
3109 memcpy(buffer, buf, len);
3110 while (c != 0) { /* tested bigname2.xml */
3111 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003112 xmlChar *tmp;
3113
Owen Taylor3473f882001-02-23 17:55:21 +00003114 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003115 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003116 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003117 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003118 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003119 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003120 return(NULL);
3121 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003122 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003123 }
3124 buffer[len++] = c;
3125 c = *cur++;
3126 }
3127 buffer[len] = 0;
3128 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003129
Owen Taylor3473f882001-02-23 17:55:21 +00003130 if (buffer == NULL)
3131 ret = xmlStrndup(buf, len);
3132 else {
3133 ret = buffer;
3134 }
3135 }
3136
3137 return(ret);
3138}
3139
3140/************************************************************************
3141 * *
3142 * The parser itself *
3143 * Relates to http://www.w3.org/TR/REC-xml *
3144 * *
3145 ************************************************************************/
3146
Daniel Veillard34e3f642008-07-29 09:02:27 +00003147/************************************************************************
3148 * *
3149 * Routines to parse Name, NCName and NmToken *
3150 * *
3151 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003152#ifdef DEBUG
3153static unsigned long nbParseName = 0;
3154static unsigned long nbParseNmToken = 0;
3155static unsigned long nbParseNCName = 0;
3156static unsigned long nbParseNCNameComplex = 0;
3157static unsigned long nbParseNameComplex = 0;
3158static unsigned long nbParseStringName = 0;
3159#endif
3160
Daniel Veillard34e3f642008-07-29 09:02:27 +00003161/*
3162 * The two following functions are related to the change of accepted
3163 * characters for Name and NmToken in the Revision 5 of XML-1.0
3164 * They correspond to the modified production [4] and the new production [4a]
3165 * changes in that revision. Also note that the macros used for the
3166 * productions Letter, Digit, CombiningChar and Extender are not needed
3167 * anymore.
3168 * We still keep compatibility to pre-revision5 parsing semantic if the
3169 * new XML_PARSE_OLD10 option is given to the parser.
3170 */
3171static int
3172xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3173 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3174 /*
3175 * Use the new checks of production [4] [4a] amd [5] of the
3176 * Update 5 of XML-1.0
3177 */
3178 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3179 (((c >= 'a') && (c <= 'z')) ||
3180 ((c >= 'A') && (c <= 'Z')) ||
3181 (c == '_') || (c == ':') ||
3182 ((c >= 0xC0) && (c <= 0xD6)) ||
3183 ((c >= 0xD8) && (c <= 0xF6)) ||
3184 ((c >= 0xF8) && (c <= 0x2FF)) ||
3185 ((c >= 0x370) && (c <= 0x37D)) ||
3186 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3187 ((c >= 0x200C) && (c <= 0x200D)) ||
3188 ((c >= 0x2070) && (c <= 0x218F)) ||
3189 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3190 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3191 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3192 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3193 ((c >= 0x10000) && (c <= 0xEFFFF))))
3194 return(1);
3195 } else {
3196 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3197 return(1);
3198 }
3199 return(0);
3200}
3201
3202static int
3203xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3204 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3205 /*
3206 * Use the new checks of production [4] [4a] amd [5] of the
3207 * Update 5 of XML-1.0
3208 */
3209 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3210 (((c >= 'a') && (c <= 'z')) ||
3211 ((c >= 'A') && (c <= 'Z')) ||
3212 ((c >= '0') && (c <= '9')) || /* !start */
3213 (c == '_') || (c == ':') ||
3214 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3215 ((c >= 0xC0) && (c <= 0xD6)) ||
3216 ((c >= 0xD8) && (c <= 0xF6)) ||
3217 ((c >= 0xF8) && (c <= 0x2FF)) ||
3218 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3219 ((c >= 0x370) && (c <= 0x37D)) ||
3220 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3221 ((c >= 0x200C) && (c <= 0x200D)) ||
3222 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3223 ((c >= 0x2070) && (c <= 0x218F)) ||
3224 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3225 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3226 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3227 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3228 ((c >= 0x10000) && (c <= 0xEFFFF))))
3229 return(1);
3230 } else {
3231 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3232 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003233 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003234 (IS_COMBINING(c)) ||
3235 (IS_EXTENDER(c)))
3236 return(1);
3237 }
3238 return(0);
3239}
3240
Daniel Veillarde57ec792003-09-10 10:50:59 +00003241static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003242 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003243
Daniel Veillard34e3f642008-07-29 09:02:27 +00003244static const xmlChar *
3245xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3246 int len = 0, l;
3247 int c;
3248 int count = 0;
3249
Daniel Veillardc6561462009-03-25 10:22:31 +00003250#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003251 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003252#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003253
3254 /*
3255 * Handler for more complex cases
3256 */
3257 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003258 if (ctxt->instate == XML_PARSER_EOF)
3259 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003260 c = CUR_CHAR(l);
3261 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3262 /*
3263 * Use the new checks of production [4] [4a] amd [5] of the
3264 * Update 5 of XML-1.0
3265 */
3266 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3267 (!(((c >= 'a') && (c <= 'z')) ||
3268 ((c >= 'A') && (c <= 'Z')) ||
3269 (c == '_') || (c == ':') ||
3270 ((c >= 0xC0) && (c <= 0xD6)) ||
3271 ((c >= 0xD8) && (c <= 0xF6)) ||
3272 ((c >= 0xF8) && (c <= 0x2FF)) ||
3273 ((c >= 0x370) && (c <= 0x37D)) ||
3274 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3275 ((c >= 0x200C) && (c <= 0x200D)) ||
3276 ((c >= 0x2070) && (c <= 0x218F)) ||
3277 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3278 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3279 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3280 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3281 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3282 return(NULL);
3283 }
3284 len += l;
3285 NEXTL(l);
3286 c = CUR_CHAR(l);
3287 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3288 (((c >= 'a') && (c <= 'z')) ||
3289 ((c >= 'A') && (c <= 'Z')) ||
3290 ((c >= '0') && (c <= '9')) || /* !start */
3291 (c == '_') || (c == ':') ||
3292 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3293 ((c >= 0xC0) && (c <= 0xD6)) ||
3294 ((c >= 0xD8) && (c <= 0xF6)) ||
3295 ((c >= 0xF8) && (c <= 0x2FF)) ||
3296 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3297 ((c >= 0x370) && (c <= 0x37D)) ||
3298 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3299 ((c >= 0x200C) && (c <= 0x200D)) ||
3300 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3301 ((c >= 0x2070) && (c <= 0x218F)) ||
3302 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3303 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3304 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3305 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3306 ((c >= 0x10000) && (c <= 0xEFFFF))
3307 )) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003308 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003309 count = 0;
3310 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003311 if (ctxt->instate == XML_PARSER_EOF)
3312 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003313 }
3314 len += l;
3315 NEXTL(l);
3316 c = CUR_CHAR(l);
3317 }
3318 } else {
3319 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3320 (!IS_LETTER(c) && (c != '_') &&
3321 (c != ':'))) {
3322 return(NULL);
3323 }
3324 len += l;
3325 NEXTL(l);
3326 c = CUR_CHAR(l);
3327
3328 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3329 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3330 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003331 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003332 (IS_COMBINING(c)) ||
3333 (IS_EXTENDER(c)))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003334 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003335 count = 0;
3336 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003337 if (ctxt->instate == XML_PARSER_EOF)
3338 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003339 }
3340 len += l;
3341 NEXTL(l);
3342 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003343 if (c == 0) {
3344 count = 0;
3345 GROW;
3346 if (ctxt->instate == XML_PARSER_EOF)
3347 return(NULL);
3348 c = CUR_CHAR(l);
3349 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003350 }
3351 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003352 if ((len > XML_MAX_NAME_LENGTH) &&
3353 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3354 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3355 return(NULL);
3356 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003357 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3358 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3359 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3360}
3361
Owen Taylor3473f882001-02-23 17:55:21 +00003362/**
3363 * xmlParseName:
3364 * @ctxt: an XML parser context
3365 *
3366 * parse an XML name.
3367 *
3368 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3369 * CombiningChar | Extender
3370 *
3371 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3372 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003373 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003374 *
3375 * Returns the Name parsed or NULL
3376 */
3377
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003378const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003379xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003380 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003381 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003382 int count = 0;
3383
3384 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003385
Daniel Veillardc6561462009-03-25 10:22:31 +00003386#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003387 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003388#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003389
Daniel Veillard48b2f892001-02-25 16:11:03 +00003390 /*
3391 * Accelerator for simple ASCII names
3392 */
3393 in = ctxt->input->cur;
3394 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3395 ((*in >= 0x41) && (*in <= 0x5A)) ||
3396 (*in == '_') || (*in == ':')) {
3397 in++;
3398 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3399 ((*in >= 0x41) && (*in <= 0x5A)) ||
3400 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003401 (*in == '_') || (*in == '-') ||
3402 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003403 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003404 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003405 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003406 if ((count > XML_MAX_NAME_LENGTH) &&
3407 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3408 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3409 return(NULL);
3410 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003411 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003412 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003413 ctxt->nbChars += count;
3414 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003415 if (ret == NULL)
3416 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003417 return(ret);
3418 }
3419 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003420 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003421 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003422}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003423
Daniel Veillard34e3f642008-07-29 09:02:27 +00003424static const xmlChar *
3425xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3426 int len = 0, l;
3427 int c;
3428 int count = 0;
Daniel Veillarddcc19502013-05-22 22:56:45 +02003429 const xmlChar *end; /* needed because CUR_CHAR() can move cur on \r\n */
Daniel Veillard34e3f642008-07-29 09:02:27 +00003430
Daniel Veillardc6561462009-03-25 10:22:31 +00003431#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003432 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003433#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003434
3435 /*
3436 * Handler for more complex cases
3437 */
3438 GROW;
Daniel Veillarddcc19502013-05-22 22:56:45 +02003439 end = ctxt->input->cur;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003440 c = CUR_CHAR(l);
3441 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3442 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3443 return(NULL);
3444 }
3445
3446 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3447 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003448 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003449 if ((len > XML_MAX_NAME_LENGTH) &&
3450 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3451 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3452 return(NULL);
3453 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003454 count = 0;
3455 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003456 if (ctxt->instate == XML_PARSER_EOF)
3457 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003458 }
3459 len += l;
3460 NEXTL(l);
Daniel Veillarddcc19502013-05-22 22:56:45 +02003461 end = ctxt->input->cur;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003462 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003463 if (c == 0) {
3464 count = 0;
3465 GROW;
3466 if (ctxt->instate == XML_PARSER_EOF)
3467 return(NULL);
Daniel Veillarddcc19502013-05-22 22:56:45 +02003468 end = ctxt->input->cur;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003469 c = CUR_CHAR(l);
3470 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003471 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003472 if ((len > XML_MAX_NAME_LENGTH) &&
3473 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3474 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3475 return(NULL);
3476 }
Daniel Veillarddcc19502013-05-22 22:56:45 +02003477 return(xmlDictLookup(ctxt->dict, end - len, len));
Daniel Veillard34e3f642008-07-29 09:02:27 +00003478}
3479
3480/**
3481 * xmlParseNCName:
3482 * @ctxt: an XML parser context
Michael Woodfb27e2c2012-09-28 08:59:33 +02003483 * @len: length of the string parsed
Daniel Veillard34e3f642008-07-29 09:02:27 +00003484 *
3485 * parse an XML name.
3486 *
3487 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3488 * CombiningChar | Extender
3489 *
3490 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3491 *
3492 * Returns the Name parsed or NULL
3493 */
3494
3495static const xmlChar *
3496xmlParseNCName(xmlParserCtxtPtr ctxt) {
3497 const xmlChar *in;
3498 const xmlChar *ret;
3499 int count = 0;
3500
Daniel Veillardc6561462009-03-25 10:22:31 +00003501#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003502 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003503#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003504
3505 /*
3506 * Accelerator for simple ASCII names
3507 */
3508 in = ctxt->input->cur;
3509 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3510 ((*in >= 0x41) && (*in <= 0x5A)) ||
3511 (*in == '_')) {
3512 in++;
3513 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3514 ((*in >= 0x41) && (*in <= 0x5A)) ||
3515 ((*in >= 0x30) && (*in <= 0x39)) ||
3516 (*in == '_') || (*in == '-') ||
3517 (*in == '.'))
3518 in++;
3519 if ((*in > 0) && (*in < 0x80)) {
3520 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003521 if ((count > XML_MAX_NAME_LENGTH) &&
3522 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3523 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3524 return(NULL);
3525 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003526 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3527 ctxt->input->cur = in;
3528 ctxt->nbChars += count;
3529 ctxt->input->col += count;
3530 if (ret == NULL) {
3531 xmlErrMemory(ctxt, NULL);
3532 }
3533 return(ret);
3534 }
3535 }
3536 return(xmlParseNCNameComplex(ctxt));
3537}
3538
Daniel Veillard46de64e2002-05-29 08:21:33 +00003539/**
3540 * xmlParseNameAndCompare:
3541 * @ctxt: an XML parser context
3542 *
3543 * parse an XML name and compares for match
3544 * (specialized for endtag parsing)
3545 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003546 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3547 * and the name for mismatch
3548 */
3549
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003550static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003551xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003552 register const xmlChar *cmp = other;
3553 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003554 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003555
3556 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003557 if (ctxt->instate == XML_PARSER_EOF)
3558 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003559
Daniel Veillard46de64e2002-05-29 08:21:33 +00003560 in = ctxt->input->cur;
3561 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003562 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003563 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003564 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003565 }
William M. Brack76e95df2003-10-18 16:20:14 +00003566 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003567 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003568 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003569 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003570 }
3571 /* failure (or end of input buffer), check with full function */
3572 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003573 /* strings coming from the dictionnary direct compare possible */
3574 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003575 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003576 }
3577 return ret;
3578}
3579
Owen Taylor3473f882001-02-23 17:55:21 +00003580/**
3581 * xmlParseStringName:
3582 * @ctxt: an XML parser context
3583 * @str: a pointer to the string pointer (IN/OUT)
3584 *
3585 * parse an XML name.
3586 *
3587 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3588 * CombiningChar | Extender
3589 *
3590 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3591 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003592 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003593 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003594 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003595 * is updated to the current location in the string.
3596 */
3597
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003598static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003599xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3600 xmlChar buf[XML_MAX_NAMELEN + 5];
3601 const xmlChar *cur = *str;
3602 int len = 0, l;
3603 int c;
3604
Daniel Veillardc6561462009-03-25 10:22:31 +00003605#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003606 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003607#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003608
Owen Taylor3473f882001-02-23 17:55:21 +00003609 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003610 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003611 return(NULL);
3612 }
3613
Daniel Veillard34e3f642008-07-29 09:02:27 +00003614 COPY_BUF(l,buf,len,c);
3615 cur += l;
3616 c = CUR_SCHAR(cur, l);
3617 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003618 COPY_BUF(l,buf,len,c);
3619 cur += l;
3620 c = CUR_SCHAR(cur, l);
3621 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3622 /*
3623 * Okay someone managed to make a huge name, so he's ready to pay
3624 * for the processing speed.
3625 */
3626 xmlChar *buffer;
3627 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003628
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003629 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003630 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003631 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003632 return(NULL);
3633 }
3634 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003635 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003636 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003637 xmlChar *tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003638
3639 if ((len > XML_MAX_NAME_LENGTH) &&
3640 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3641 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3642 xmlFree(buffer);
3643 return(NULL);
3644 }
Owen Taylor3473f882001-02-23 17:55:21 +00003645 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003646 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003647 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003648 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003649 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003650 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003651 return(NULL);
3652 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003653 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003654 }
3655 COPY_BUF(l,buffer,len,c);
3656 cur += l;
3657 c = CUR_SCHAR(cur, l);
3658 }
3659 buffer[len] = 0;
3660 *str = cur;
3661 return(buffer);
3662 }
3663 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003664 if ((len > XML_MAX_NAME_LENGTH) &&
3665 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3666 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3667 return(NULL);
3668 }
Owen Taylor3473f882001-02-23 17:55:21 +00003669 *str = cur;
3670 return(xmlStrndup(buf, len));
3671}
3672
3673/**
3674 * xmlParseNmtoken:
3675 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003676 *
Owen Taylor3473f882001-02-23 17:55:21 +00003677 * parse an XML Nmtoken.
3678 *
3679 * [7] Nmtoken ::= (NameChar)+
3680 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003681 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003682 *
3683 * Returns the Nmtoken parsed or NULL
3684 */
3685
3686xmlChar *
3687xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3688 xmlChar buf[XML_MAX_NAMELEN + 5];
3689 int len = 0, l;
3690 int c;
3691 int count = 0;
3692
Daniel Veillardc6561462009-03-25 10:22:31 +00003693#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003694 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003695#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003696
Owen Taylor3473f882001-02-23 17:55:21 +00003697 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003698 if (ctxt->instate == XML_PARSER_EOF)
3699 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003700 c = CUR_CHAR(l);
3701
Daniel Veillard34e3f642008-07-29 09:02:27 +00003702 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003703 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003704 count = 0;
3705 GROW;
3706 }
3707 COPY_BUF(l,buf,len,c);
3708 NEXTL(l);
3709 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003710 if (c == 0) {
3711 count = 0;
3712 GROW;
3713 if (ctxt->instate == XML_PARSER_EOF)
3714 return(NULL);
3715 c = CUR_CHAR(l);
3716 }
Owen Taylor3473f882001-02-23 17:55:21 +00003717 if (len >= XML_MAX_NAMELEN) {
3718 /*
3719 * Okay someone managed to make a huge token, so he's ready to pay
3720 * for the processing speed.
3721 */
3722 xmlChar *buffer;
3723 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003724
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003725 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003726 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003727 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003728 return(NULL);
3729 }
3730 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003731 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003732 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003733 count = 0;
3734 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003735 if (ctxt->instate == XML_PARSER_EOF) {
3736 xmlFree(buffer);
3737 return(NULL);
3738 }
Owen Taylor3473f882001-02-23 17:55:21 +00003739 }
3740 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003741 xmlChar *tmp;
3742
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003743 if ((max > XML_MAX_NAME_LENGTH) &&
3744 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3745 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3746 xmlFree(buffer);
3747 return(NULL);
3748 }
Owen Taylor3473f882001-02-23 17:55:21 +00003749 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003750 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003751 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003752 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003753 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003754 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003755 return(NULL);
3756 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003757 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003758 }
3759 COPY_BUF(l,buffer,len,c);
3760 NEXTL(l);
3761 c = CUR_CHAR(l);
3762 }
3763 buffer[len] = 0;
3764 return(buffer);
3765 }
3766 }
3767 if (len == 0)
3768 return(NULL);
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003769 if ((len > XML_MAX_NAME_LENGTH) &&
3770 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3771 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3772 return(NULL);
3773 }
Owen Taylor3473f882001-02-23 17:55:21 +00003774 return(xmlStrndup(buf, len));
3775}
3776
3777/**
3778 * xmlParseEntityValue:
3779 * @ctxt: an XML parser context
3780 * @orig: if non-NULL store a copy of the original entity value
3781 *
3782 * parse a value for ENTITY declarations
3783 *
3784 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3785 * "'" ([^%&'] | PEReference | Reference)* "'"
3786 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003787 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003788 */
3789
3790xmlChar *
3791xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3792 xmlChar *buf = NULL;
3793 int len = 0;
3794 int size = XML_PARSER_BUFFER_SIZE;
3795 int c, l;
3796 xmlChar stop;
3797 xmlChar *ret = NULL;
3798 const xmlChar *cur = NULL;
3799 xmlParserInputPtr input;
3800
3801 if (RAW == '"') stop = '"';
3802 else if (RAW == '\'') stop = '\'';
3803 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003804 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003805 return(NULL);
3806 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003807 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003808 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003809 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003810 return(NULL);
3811 }
3812
3813 /*
3814 * The content of the entity definition is copied in a buffer.
3815 */
3816
3817 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3818 input = ctxt->input;
3819 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003820 if (ctxt->instate == XML_PARSER_EOF) {
3821 xmlFree(buf);
3822 return(NULL);
3823 }
Owen Taylor3473f882001-02-23 17:55:21 +00003824 NEXT;
3825 c = CUR_CHAR(l);
3826 /*
3827 * NOTE: 4.4.5 Included in Literal
3828 * When a parameter entity reference appears in a literal entity
3829 * value, ... a single or double quote character in the replacement
3830 * text is always treated as a normal data character and will not
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003831 * terminate the literal.
Owen Taylor3473f882001-02-23 17:55:21 +00003832 * In practice it means we stop the loop only when back at parsing
3833 * the initial entity and the quote is found
3834 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003835 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3836 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003837 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003838 xmlChar *tmp;
3839
Owen Taylor3473f882001-02-23 17:55:21 +00003840 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003841 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3842 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003843 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003844 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003845 return(NULL);
3846 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003847 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003848 }
3849 COPY_BUF(l,buf,len,c);
3850 NEXTL(l);
3851 /*
3852 * Pop-up of finished entities.
3853 */
3854 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3855 xmlPopInput(ctxt);
3856
3857 GROW;
3858 c = CUR_CHAR(l);
3859 if (c == 0) {
3860 GROW;
3861 c = CUR_CHAR(l);
3862 }
3863 }
3864 buf[len] = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003865 if (ctxt->instate == XML_PARSER_EOF) {
3866 xmlFree(buf);
3867 return(NULL);
3868 }
Owen Taylor3473f882001-02-23 17:55:21 +00003869
3870 /*
3871 * Raise problem w.r.t. '&' and '%' being used in non-entities
3872 * reference constructs. Note Charref will be handled in
3873 * xmlStringDecodeEntities()
3874 */
3875 cur = buf;
3876 while (*cur != 0) { /* non input consuming */
3877 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3878 xmlChar *name;
3879 xmlChar tmp = *cur;
3880
3881 cur++;
3882 name = xmlParseStringName(ctxt, &cur);
3883 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003884 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003885 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003886 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003887 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003888 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3889 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003890 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003891 }
3892 if (name != NULL)
3893 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003894 if (*cur == 0)
3895 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003896 }
3897 cur++;
3898 }
3899
3900 /*
3901 * Then PEReference entities are substituted.
3902 */
3903 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003904 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003905 xmlFree(buf);
3906 } else {
3907 NEXT;
3908 /*
3909 * NOTE: 4.4.7 Bypassed
3910 * When a general entity reference appears in the EntityValue in
3911 * an entity declaration, it is bypassed and left as is.
3912 * so XML_SUBSTITUTE_REF is not set here.
3913 */
3914 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3915 0, 0, 0);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003916 if (orig != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00003917 *orig = buf;
3918 else
3919 xmlFree(buf);
3920 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003921
Owen Taylor3473f882001-02-23 17:55:21 +00003922 return(ret);
3923}
3924
3925/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003926 * xmlParseAttValueComplex:
3927 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003928 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003929 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003930 *
3931 * parse a value for an attribute, this is the fallback function
3932 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003933 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003934 *
3935 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3936 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003937static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003938xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003939 xmlChar limit = 0;
3940 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003941 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003942 size_t len = 0;
3943 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003944 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003945 xmlChar *current = NULL;
3946 xmlEntityPtr ent;
3947
Owen Taylor3473f882001-02-23 17:55:21 +00003948 if (NXT(0) == '"') {
3949 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3950 limit = '"';
3951 NEXT;
3952 } else if (NXT(0) == '\'') {
3953 limit = '\'';
3954 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3955 NEXT;
3956 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003957 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003958 return(NULL);
3959 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003960
Owen Taylor3473f882001-02-23 17:55:21 +00003961 /*
3962 * allocate a translation buffer.
3963 */
3964 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003965 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003966 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003967
3968 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003969 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003970 */
3971 c = CUR_CHAR(l);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003972 while (((NXT(0) != limit) && /* checked */
3973 (IS_CHAR(c)) && (c != '<')) &&
3974 (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillarde17db992012-07-19 11:25:16 +08003975 /*
3976 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3977 * special option is given
3978 */
3979 if ((len > XML_MAX_TEXT_LENGTH) &&
3980 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3981 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02003982 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08003983 goto mem_error;
3984 }
Owen Taylor3473f882001-02-23 17:55:21 +00003985 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003986 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003987 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003988 if (NXT(1) == '#') {
3989 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003990
Owen Taylor3473f882001-02-23 17:55:21 +00003991 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003992 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003993 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003994 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003995 }
3996 buf[len++] = '&';
3997 } else {
3998 /*
3999 * The reparsing will be done in xmlStringGetNodeList()
4000 * called by the attribute() function in SAX.c
4001 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08004002 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004003 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00004004 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004005 buf[len++] = '&';
4006 buf[len++] = '#';
4007 buf[len++] = '3';
4008 buf[len++] = '8';
4009 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00004010 }
Daniel Veillarddc171602008-03-26 17:41:38 +00004011 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004012 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004013 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004014 }
Owen Taylor3473f882001-02-23 17:55:21 +00004015 len += xmlCopyChar(0, &buf[len], val);
4016 }
4017 } else {
4018 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00004019 ctxt->nbentities++;
4020 if (ent != NULL)
4021 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004022 if ((ent != NULL) &&
4023 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004024 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004025 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004026 }
4027 if ((ctxt->replaceEntities == 0) &&
4028 (ent->content[0] == '&')) {
4029 buf[len++] = '&';
4030 buf[len++] = '#';
4031 buf[len++] = '3';
4032 buf[len++] = '8';
4033 buf[len++] = ';';
4034 } else {
4035 buf[len++] = ent->content[0];
4036 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004037 } else if ((ent != NULL) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004038 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004039 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4040 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004041 XML_SUBSTITUTE_REF,
4042 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00004043 if (rep != NULL) {
4044 current = rep;
4045 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02004046 if ((*current == 0xD) || (*current == 0xA) ||
4047 (*current == 0x9)) {
4048 buf[len++] = 0x20;
4049 current++;
4050 } else
4051 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004052 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004053 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004054 }
4055 }
4056 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004057 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004058 }
4059 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004060 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004061 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004062 }
Owen Taylor3473f882001-02-23 17:55:21 +00004063 if (ent->content != NULL)
4064 buf[len++] = ent->content[0];
4065 }
4066 } else if (ent != NULL) {
4067 int i = xmlStrlen(ent->name);
4068 const xmlChar *cur = ent->name;
4069
4070 /*
4071 * This may look absurd but is needed to detect
4072 * entities problems
4073 */
4074 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004075 (ent->content != NULL) && (ent->checked == 0)) {
4076 unsigned long oldnbent = ctxt->nbentities;
4077
Owen Taylor3473f882001-02-23 17:55:21 +00004078 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00004079 XML_SUBSTITUTE_REF, 0, 0, 0);
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004080
Daniel Veillardcff25462013-03-11 15:57:55 +08004081 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00004082 if (rep != NULL) {
Daniel Veillardcff25462013-03-11 15:57:55 +08004083 if (xmlStrchr(rep, '<'))
4084 ent->checked |= 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004085 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004086 rep = NULL;
4087 }
Owen Taylor3473f882001-02-23 17:55:21 +00004088 }
4089
4090 /*
4091 * Just output the reference
4092 */
4093 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08004094 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004095 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004096 }
4097 for (;i > 0;i--)
4098 buf[len++] = *cur++;
4099 buf[len++] = ';';
4100 }
4101 }
4102 } else {
4103 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004104 if ((len != 0) || (!normalize)) {
4105 if ((!normalize) || (!in_space)) {
4106 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004107 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004108 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004109 }
4110 }
4111 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004112 }
4113 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004114 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004115 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004116 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004117 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004118 }
4119 }
4120 NEXTL(l);
4121 }
4122 GROW;
4123 c = CUR_CHAR(l);
4124 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004125 if (ctxt->instate == XML_PARSER_EOF)
4126 goto error;
4127
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004128 if ((in_space) && (normalize)) {
Daniel Veillard6a36fbe2012-10-29 10:39:55 +08004129 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004130 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00004131 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004132 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004133 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004134 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00004135 if ((c != 0) && (!IS_CHAR(c))) {
4136 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4137 "invalid character in attribute value\n");
4138 } else {
4139 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4140 "AttValue: ' expected\n");
4141 }
Owen Taylor3473f882001-02-23 17:55:21 +00004142 } else
4143 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004144
4145 /*
4146 * There we potentially risk an overflow, don't allow attribute value of
Michael Woodfb27e2c2012-09-28 08:59:33 +02004147 * length more than INT_MAX it is a very reasonnable assumption !
Daniel Veillard459eeb92012-07-17 16:19:17 +08004148 */
4149 if (len >= INT_MAX) {
4150 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004151 "AttValue length too long\n");
Daniel Veillard459eeb92012-07-17 16:19:17 +08004152 goto mem_error;
4153 }
4154
4155 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00004156 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004157
4158mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004159 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004160error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00004161 if (buf != NULL)
4162 xmlFree(buf);
4163 if (rep != NULL)
4164 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004165 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004166}
4167
4168/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00004169 * xmlParseAttValue:
4170 * @ctxt: an XML parser context
4171 *
4172 * parse a value for an attribute
4173 * Note: the parser won't do substitution of entities here, this
4174 * will be handled later in xmlStringGetNodeList
4175 *
4176 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4177 * "'" ([^<&'] | Reference)* "'"
4178 *
4179 * 3.3.3 Attribute-Value Normalization:
4180 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004181 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00004182 * - a character reference is processed by appending the referenced
4183 * character to the attribute value
4184 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004185 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00004186 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4187 * appending #x20 to the normalized value, except that only a single
4188 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004189 * parsed entity or the literal entity value of an internal parsed entity
4190 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00004191 * If the declared value is not CDATA, then the XML processor must further
4192 * process the normalized attribute value by discarding any leading and
4193 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004194 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00004195 * All attributes for which no declaration has been read should be treated
4196 * by a non-validating parser as if declared CDATA.
4197 *
4198 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4199 */
4200
4201
4202xmlChar *
4203xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004204 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004205 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004206}
4207
4208/**
Owen Taylor3473f882001-02-23 17:55:21 +00004209 * xmlParseSystemLiteral:
4210 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004211 *
Owen Taylor3473f882001-02-23 17:55:21 +00004212 * parse an XML Literal
4213 *
4214 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4215 *
4216 * Returns the SystemLiteral parsed or NULL
4217 */
4218
4219xmlChar *
4220xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4221 xmlChar *buf = NULL;
4222 int len = 0;
4223 int size = XML_PARSER_BUFFER_SIZE;
4224 int cur, l;
4225 xmlChar stop;
4226 int state = ctxt->instate;
4227 int count = 0;
4228
4229 SHRINK;
4230 if (RAW == '"') {
4231 NEXT;
4232 stop = '"';
4233 } else if (RAW == '\'') {
4234 NEXT;
4235 stop = '\'';
4236 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004237 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004238 return(NULL);
4239 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004240
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004241 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004242 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004243 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004244 return(NULL);
4245 }
4246 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4247 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004248 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004249 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004250 xmlChar *tmp;
4251
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004252 if ((size > XML_MAX_NAME_LENGTH) &&
4253 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4254 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4255 xmlFree(buf);
4256 ctxt->instate = (xmlParserInputState) state;
4257 return(NULL);
4258 }
Owen Taylor3473f882001-02-23 17:55:21 +00004259 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004260 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4261 if (tmp == NULL) {
4262 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004263 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004264 ctxt->instate = (xmlParserInputState) state;
4265 return(NULL);
4266 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004267 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004268 }
4269 count++;
4270 if (count > 50) {
4271 GROW;
4272 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004273 if (ctxt->instate == XML_PARSER_EOF) {
4274 xmlFree(buf);
4275 return(NULL);
4276 }
Owen Taylor3473f882001-02-23 17:55:21 +00004277 }
4278 COPY_BUF(l,buf,len,cur);
4279 NEXTL(l);
4280 cur = CUR_CHAR(l);
4281 if (cur == 0) {
4282 GROW;
4283 SHRINK;
4284 cur = CUR_CHAR(l);
4285 }
4286 }
4287 buf[len] = 0;
4288 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004289 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004290 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004291 } else {
4292 NEXT;
4293 }
4294 return(buf);
4295}
4296
4297/**
4298 * xmlParsePubidLiteral:
4299 * @ctxt: an XML parser context
4300 *
4301 * parse an XML public literal
4302 *
4303 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4304 *
4305 * Returns the PubidLiteral parsed or NULL.
4306 */
4307
4308xmlChar *
4309xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4310 xmlChar *buf = NULL;
4311 int len = 0;
4312 int size = XML_PARSER_BUFFER_SIZE;
4313 xmlChar cur;
4314 xmlChar stop;
4315 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004316 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004317
4318 SHRINK;
4319 if (RAW == '"') {
4320 NEXT;
4321 stop = '"';
4322 } else if (RAW == '\'') {
4323 NEXT;
4324 stop = '\'';
4325 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004326 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004327 return(NULL);
4328 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004329 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004330 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004331 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004332 return(NULL);
4333 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004334 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004335 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004336 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004337 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004338 xmlChar *tmp;
4339
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004340 if ((size > XML_MAX_NAME_LENGTH) &&
4341 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4342 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4343 xmlFree(buf);
4344 return(NULL);
4345 }
Owen Taylor3473f882001-02-23 17:55:21 +00004346 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004347 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4348 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004349 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004350 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004351 return(NULL);
4352 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004353 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004354 }
4355 buf[len++] = cur;
4356 count++;
4357 if (count > 50) {
4358 GROW;
4359 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004360 if (ctxt->instate == XML_PARSER_EOF) {
4361 xmlFree(buf);
4362 return(NULL);
4363 }
Owen Taylor3473f882001-02-23 17:55:21 +00004364 }
4365 NEXT;
4366 cur = CUR;
4367 if (cur == 0) {
4368 GROW;
4369 SHRINK;
4370 cur = CUR;
4371 }
4372 }
4373 buf[len] = 0;
4374 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004375 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004376 } else {
4377 NEXT;
4378 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004379 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004380 return(buf);
4381}
4382
Daniel Veillard8ed10722009-08-20 19:17:36 +02004383static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004384
4385/*
4386 * used for the test in the inner loop of the char data testing
4387 */
4388static const unsigned char test_char_data[256] = {
4389 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4390 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4394 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4395 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4396 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4397 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4398 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4399 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4400 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4401 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4402 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4403 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4404 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4405 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4406 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4407 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4408 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4409 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4410 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4411 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4412 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4413 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4414 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4415 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4416 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4417 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4418 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4419 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4420 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4421};
4422
Owen Taylor3473f882001-02-23 17:55:21 +00004423/**
4424 * xmlParseCharData:
4425 * @ctxt: an XML parser context
4426 * @cdata: int indicating whether we are within a CDATA section
4427 *
4428 * parse a CharData section.
4429 * if we are within a CDATA section ']]>' marks an end of section.
4430 *
4431 * The right angle bracket (>) may be represented using the string "&gt;",
4432 * and must, for compatibility, be escaped using "&gt;" or a character
4433 * reference when it appears in the string "]]>" in content, when that
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004434 * string is not marking the end of a CDATA section.
Owen Taylor3473f882001-02-23 17:55:21 +00004435 *
4436 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4437 */
4438
4439void
4440xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004441 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004442 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004443 int line = ctxt->input->line;
4444 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004445 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004446
4447 SHRINK;
4448 GROW;
4449 /*
4450 * Accelerated common case where input don't need to be
4451 * modified before passing it to the handler.
4452 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004453 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004454 in = ctxt->input->cur;
4455 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004456get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004457 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004458 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004459 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004460 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004461 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004462 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004463 goto get_more_space;
4464 }
4465 if (*in == '<') {
4466 nbchar = in - ctxt->input->cur;
4467 if (nbchar > 0) {
4468 const xmlChar *tmp = ctxt->input->cur;
4469 ctxt->input->cur = in;
4470
Daniel Veillard34099b42004-11-04 17:34:35 +00004471 if ((ctxt->sax != NULL) &&
4472 (ctxt->sax->ignorableWhitespace !=
4473 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004474 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004475 if (ctxt->sax->ignorableWhitespace != NULL)
4476 ctxt->sax->ignorableWhitespace(ctxt->userData,
4477 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004478 } else {
4479 if (ctxt->sax->characters != NULL)
4480 ctxt->sax->characters(ctxt->userData,
4481 tmp, nbchar);
4482 if (*ctxt->space == -1)
4483 *ctxt->space = -2;
4484 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004485 } else if ((ctxt->sax != NULL) &&
4486 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004487 ctxt->sax->characters(ctxt->userData,
4488 tmp, nbchar);
4489 }
4490 }
4491 return;
4492 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004493
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004494get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004495 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004496 while (test_char_data[*in]) {
4497 in++;
4498 ccol++;
4499 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004500 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004501 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004502 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004503 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004504 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004505 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004506 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004507 }
4508 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004509 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004510 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004511 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004512 return;
4513 }
4514 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004515 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004516 goto get_more;
4517 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004518 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004519 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004520 if ((ctxt->sax != NULL) &&
4521 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004522 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004523 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004524 const xmlChar *tmp = ctxt->input->cur;
4525 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004526
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004527 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004528 if (ctxt->sax->ignorableWhitespace != NULL)
4529 ctxt->sax->ignorableWhitespace(ctxt->userData,
4530 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004531 } else {
4532 if (ctxt->sax->characters != NULL)
4533 ctxt->sax->characters(ctxt->userData,
4534 tmp, nbchar);
4535 if (*ctxt->space == -1)
4536 *ctxt->space = -2;
4537 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004538 line = ctxt->input->line;
4539 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004540 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004541 if (ctxt->sax->characters != NULL)
4542 ctxt->sax->characters(ctxt->userData,
4543 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004544 line = ctxt->input->line;
4545 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004546 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004547 /* something really bad happened in the SAX callback */
4548 if (ctxt->instate != XML_PARSER_CONTENT)
4549 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004550 }
4551 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004552 if (*in == 0xD) {
4553 in++;
4554 if (*in == 0xA) {
4555 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004556 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004557 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004558 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004559 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004560 in--;
4561 }
4562 if (*in == '<') {
4563 return;
4564 }
4565 if (*in == '&') {
4566 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004567 }
4568 SHRINK;
4569 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004570 if (ctxt->instate == XML_PARSER_EOF)
4571 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004572 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004573 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004574 nbchar = 0;
4575 }
Daniel Veillard50582112001-03-26 22:52:16 +00004576 ctxt->input->line = line;
4577 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004578 xmlParseCharDataComplex(ctxt, cdata);
4579}
4580
Daniel Veillard01c13b52002-12-10 15:19:08 +00004581/**
4582 * xmlParseCharDataComplex:
4583 * @ctxt: an XML parser context
4584 * @cdata: int indicating whether we are within a CDATA section
4585 *
4586 * parse a CharData section.this is the fallback function
4587 * of xmlParseCharData() when the parsing requires handling
4588 * of non-ASCII characters.
4589 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004590static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004591xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004592 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4593 int nbchar = 0;
4594 int cur, l;
4595 int count = 0;
4596
4597 SHRINK;
4598 GROW;
4599 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004600 while ((cur != '<') && /* checked */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004601 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004602 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004603 if ((cur == ']') && (NXT(1) == ']') &&
4604 (NXT(2) == '>')) {
4605 if (cdata) break;
4606 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004607 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004608 }
4609 }
4610 COPY_BUF(l,buf,nbchar,cur);
4611 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004612 buf[nbchar] = 0;
4613
Owen Taylor3473f882001-02-23 17:55:21 +00004614 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004615 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004616 */
4617 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004618 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004619 if (ctxt->sax->ignorableWhitespace != NULL)
4620 ctxt->sax->ignorableWhitespace(ctxt->userData,
4621 buf, nbchar);
4622 } else {
4623 if (ctxt->sax->characters != NULL)
4624 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004625 if ((ctxt->sax->characters !=
4626 ctxt->sax->ignorableWhitespace) &&
4627 (*ctxt->space == -1))
4628 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004629 }
4630 }
4631 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004632 /* something really bad happened in the SAX callback */
4633 if (ctxt->instate != XML_PARSER_CONTENT)
4634 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004635 }
4636 count++;
4637 if (count > 50) {
4638 GROW;
4639 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004640 if (ctxt->instate == XML_PARSER_EOF)
4641 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004642 }
4643 NEXTL(l);
4644 cur = CUR_CHAR(l);
4645 }
4646 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004647 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004648 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004649 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004650 */
4651 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004652 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004653 if (ctxt->sax->ignorableWhitespace != NULL)
4654 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4655 } else {
4656 if (ctxt->sax->characters != NULL)
4657 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004658 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4659 (*ctxt->space == -1))
4660 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004661 }
4662 }
4663 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004664 if ((cur != 0) && (!IS_CHAR(cur))) {
4665 /* Generate the error and skip the offending character */
4666 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4667 "PCDATA invalid Char value %d\n",
4668 cur);
4669 NEXTL(l);
4670 }
Owen Taylor3473f882001-02-23 17:55:21 +00004671}
4672
4673/**
4674 * xmlParseExternalID:
4675 * @ctxt: an XML parser context
4676 * @publicID: a xmlChar** receiving PubidLiteral
4677 * @strict: indicate whether we should restrict parsing to only
4678 * production [75], see NOTE below
4679 *
4680 * Parse an External ID or a Public ID
4681 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004682 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004683 * 'PUBLIC' S PubidLiteral S SystemLiteral
4684 *
4685 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4686 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4687 *
4688 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4689 *
4690 * Returns the function returns SystemLiteral and in the second
4691 * case publicID receives PubidLiteral, is strict is off
4692 * it is possible to return NULL and have publicID set.
4693 */
4694
4695xmlChar *
4696xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4697 xmlChar *URI = NULL;
4698
4699 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004700
4701 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004702 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004703 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004704 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004705 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4706 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004707 }
4708 SKIP_BLANKS;
4709 URI = xmlParseSystemLiteral(ctxt);
4710 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004711 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004712 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004713 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004714 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004715 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004716 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004717 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004718 }
4719 SKIP_BLANKS;
4720 *publicID = xmlParsePubidLiteral(ctxt);
4721 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004722 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004723 }
4724 if (strict) {
4725 /*
4726 * We don't handle [83] so "S SystemLiteral" is required.
4727 */
William M. Brack76e95df2003-10-18 16:20:14 +00004728 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004729 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004730 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004731 }
4732 } else {
4733 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004734 * We handle [83] so we return immediately, if
Owen Taylor3473f882001-02-23 17:55:21 +00004735 * "S SystemLiteral" is not detected. From a purely parsing
4736 * point of view that's a nice mess.
4737 */
4738 const xmlChar *ptr;
4739 GROW;
4740
4741 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004742 if (!IS_BLANK_CH(*ptr)) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004743
William M. Brack76e95df2003-10-18 16:20:14 +00004744 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004745 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4746 }
4747 SKIP_BLANKS;
4748 URI = xmlParseSystemLiteral(ctxt);
4749 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004750 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004751 }
4752 }
4753 return(URI);
4754}
4755
4756/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004757 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004758 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004759 * @buf: the already parsed part of the buffer
4760 * @len: number of bytes filles in the buffer
4761 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004762 *
4763 * Skip an XML (SGML) comment <!-- .... -->
4764 * The spec says that "For compatibility, the string "--" (double-hyphen)
4765 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004766 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004767 *
4768 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4769 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004770static void
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004771xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4772 size_t len, size_t size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004773 int q, ql;
4774 int r, rl;
4775 int cur, l;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004776 size_t count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004777 int inputid;
4778
4779 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004780
Owen Taylor3473f882001-02-23 17:55:21 +00004781 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004782 len = 0;
4783 size = XML_PARSER_BUFFER_SIZE;
4784 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4785 if (buf == NULL) {
4786 xmlErrMemory(ctxt, NULL);
4787 return;
4788 }
Owen Taylor3473f882001-02-23 17:55:21 +00004789 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004790 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004791 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004792 if (q == 0)
4793 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004794 if (!IS_CHAR(q)) {
4795 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4796 "xmlParseComment: invalid xmlChar value %d\n",
4797 q);
4798 xmlFree (buf);
4799 return;
4800 }
Owen Taylor3473f882001-02-23 17:55:21 +00004801 NEXTL(ql);
4802 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004803 if (r == 0)
4804 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004805 if (!IS_CHAR(r)) {
4806 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4807 "xmlParseComment: invalid xmlChar value %d\n",
4808 q);
4809 xmlFree (buf);
4810 return;
4811 }
Owen Taylor3473f882001-02-23 17:55:21 +00004812 NEXTL(rl);
4813 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004814 if (cur == 0)
4815 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004816 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004817 ((cur != '>') ||
4818 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004819 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004820 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004821 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004822 if ((len > XML_MAX_TEXT_LENGTH) &&
4823 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4824 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4825 "Comment too big found", NULL);
4826 xmlFree (buf);
4827 return;
4828 }
Owen Taylor3473f882001-02-23 17:55:21 +00004829 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004830 xmlChar *new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004831 size_t new_size;
4832
4833 new_size = size * 2;
4834 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
William M. Bracka3215c72004-07-31 16:24:01 +00004835 if (new_buf == NULL) {
4836 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004837 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004838 return;
4839 }
William M. Bracka3215c72004-07-31 16:24:01 +00004840 buf = new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004841 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00004842 }
4843 COPY_BUF(ql,buf,len,q);
4844 q = r;
4845 ql = rl;
4846 r = cur;
4847 rl = l;
4848
4849 count++;
4850 if (count > 50) {
4851 GROW;
4852 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004853 if (ctxt->instate == XML_PARSER_EOF) {
4854 xmlFree(buf);
4855 return;
4856 }
Owen Taylor3473f882001-02-23 17:55:21 +00004857 }
4858 NEXTL(l);
4859 cur = CUR_CHAR(l);
4860 if (cur == 0) {
4861 SHRINK;
4862 GROW;
4863 cur = CUR_CHAR(l);
4864 }
4865 }
4866 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004867 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004868 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004869 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004870 } else if (!IS_CHAR(cur)) {
4871 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4872 "xmlParseComment: invalid xmlChar value %d\n",
4873 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004874 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004875 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004876 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4877 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004878 }
4879 NEXT;
4880 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4881 (!ctxt->disableSAX))
4882 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004883 }
Daniel Veillardda629342007-08-01 07:49:06 +00004884 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004885 return;
4886not_terminated:
4887 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4888 "Comment not terminated\n", NULL);
4889 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004890 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004891}
Daniel Veillardda629342007-08-01 07:49:06 +00004892
Daniel Veillard4c778d82005-01-23 17:37:44 +00004893/**
4894 * xmlParseComment:
4895 * @ctxt: an XML parser context
4896 *
4897 * Skip an XML (SGML) comment <!-- .... -->
4898 * The spec says that "For compatibility, the string "--" (double-hyphen)
4899 * must not occur within comments. "
4900 *
4901 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4902 */
4903void
4904xmlParseComment(xmlParserCtxtPtr ctxt) {
4905 xmlChar *buf = NULL;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004906 size_t size = XML_PARSER_BUFFER_SIZE;
4907 size_t len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004908 xmlParserInputState state;
4909 const xmlChar *in;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004910 size_t nbchar = 0;
4911 int ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004912 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004913
4914 /*
4915 * Check that there is a comment right here.
4916 */
4917 if ((RAW != '<') || (NXT(1) != '!') ||
4918 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004919 state = ctxt->instate;
4920 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004921 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004922 SKIP(4);
4923 SHRINK;
4924 GROW;
4925
4926 /*
4927 * Accelerated common case where input don't need to be
4928 * modified before passing it to the handler.
4929 */
4930 in = ctxt->input->cur;
4931 do {
4932 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004933 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004934 ctxt->input->line++; ctxt->input->col = 1;
4935 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004936 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004937 }
4938get_more:
4939 ccol = ctxt->input->col;
4940 while (((*in > '-') && (*in <= 0x7F)) ||
4941 ((*in >= 0x20) && (*in < '-')) ||
4942 (*in == 0x09)) {
4943 in++;
4944 ccol++;
4945 }
4946 ctxt->input->col = ccol;
4947 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004948 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004949 ctxt->input->line++; ctxt->input->col = 1;
4950 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004951 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004952 goto get_more;
4953 }
4954 nbchar = in - ctxt->input->cur;
4955 /*
4956 * save current set of data
4957 */
4958 if (nbchar > 0) {
4959 if ((ctxt->sax != NULL) &&
4960 (ctxt->sax->comment != NULL)) {
4961 if (buf == NULL) {
4962 if ((*in == '-') && (in[1] == '-'))
4963 size = nbchar + 1;
4964 else
4965 size = XML_PARSER_BUFFER_SIZE + nbchar;
4966 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4967 if (buf == NULL) {
4968 xmlErrMemory(ctxt, NULL);
4969 ctxt->instate = state;
4970 return;
4971 }
4972 len = 0;
4973 } else if (len + nbchar + 1 >= size) {
4974 xmlChar *new_buf;
4975 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4976 new_buf = (xmlChar *) xmlRealloc(buf,
4977 size * sizeof(xmlChar));
4978 if (new_buf == NULL) {
4979 xmlFree (buf);
4980 xmlErrMemory(ctxt, NULL);
4981 ctxt->instate = state;
4982 return;
4983 }
4984 buf = new_buf;
4985 }
4986 memcpy(&buf[len], ctxt->input->cur, nbchar);
4987 len += nbchar;
4988 buf[len] = 0;
4989 }
4990 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004991 if ((len > XML_MAX_TEXT_LENGTH) &&
4992 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4993 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4994 "Comment too big found", NULL);
4995 xmlFree (buf);
4996 return;
4997 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004998 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004999 if (*in == 0xA) {
5000 in++;
5001 ctxt->input->line++; ctxt->input->col = 1;
5002 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005003 if (*in == 0xD) {
5004 in++;
5005 if (*in == 0xA) {
5006 ctxt->input->cur = in;
5007 in++;
5008 ctxt->input->line++; ctxt->input->col = 1;
5009 continue; /* while */
5010 }
5011 in--;
5012 }
5013 SHRINK;
5014 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005015 if (ctxt->instate == XML_PARSER_EOF) {
5016 xmlFree(buf);
5017 return;
5018 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005019 in = ctxt->input->cur;
5020 if (*in == '-') {
5021 if (in[1] == '-') {
5022 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00005023 if (ctxt->input->id != inputid) {
5024 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5025 "comment doesn't start and stop in the same entity\n");
5026 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005027 SKIP(3);
5028 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5029 (!ctxt->disableSAX)) {
5030 if (buf != NULL)
5031 ctxt->sax->comment(ctxt->userData, buf);
5032 else
5033 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5034 }
5035 if (buf != NULL)
5036 xmlFree(buf);
Daniel Veillarde50ba812013-04-11 15:54:51 +08005037 if (ctxt->instate != XML_PARSER_EOF)
5038 ctxt->instate = state;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005039 return;
5040 }
Bryan Henderson8658d272012-05-08 16:39:05 +08005041 if (buf != NULL) {
5042 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5043 "Double hyphen within comment: "
5044 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00005045 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08005046 } else
5047 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5048 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00005049 in++;
5050 ctxt->input->col++;
5051 }
5052 in++;
5053 ctxt->input->col++;
5054 goto get_more;
5055 }
5056 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5057 xmlParseCommentComplex(ctxt, buf, len, size);
5058 ctxt->instate = state;
5059 return;
5060}
5061
Owen Taylor3473f882001-02-23 17:55:21 +00005062
5063/**
5064 * xmlParsePITarget:
5065 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005066 *
Owen Taylor3473f882001-02-23 17:55:21 +00005067 * parse the name of a PI
5068 *
5069 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5070 *
5071 * Returns the PITarget name or NULL
5072 */
5073
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005074const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00005075xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005076 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005077
5078 name = xmlParseName(ctxt);
5079 if ((name != NULL) &&
5080 ((name[0] == 'x') || (name[0] == 'X')) &&
5081 ((name[1] == 'm') || (name[1] == 'M')) &&
5082 ((name[2] == 'l') || (name[2] == 'L'))) {
5083 int i;
5084 if ((name[0] == 'x') && (name[1] == 'm') &&
5085 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005086 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00005087 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005088 return(name);
5089 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005090 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005091 return(name);
5092 }
5093 for (i = 0;;i++) {
5094 if (xmlW3CPIs[i] == NULL) break;
5095 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5096 return(name);
5097 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005098 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5099 "xmlParsePITarget: invalid name prefix 'xml'\n",
5100 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005101 }
Daniel Veillard37334572008-07-31 08:20:02 +00005102 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005103 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005104 "colons are forbidden from PI names '%s'\n", name, NULL, NULL);
Daniel Veillard37334572008-07-31 08:20:02 +00005105 }
Owen Taylor3473f882001-02-23 17:55:21 +00005106 return(name);
5107}
5108
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005109#ifdef LIBXML_CATALOG_ENABLED
5110/**
5111 * xmlParseCatalogPI:
5112 * @ctxt: an XML parser context
5113 * @catalog: the PI value string
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005114 *
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005115 * parse an XML Catalog Processing Instruction.
5116 *
5117 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5118 *
5119 * Occurs only if allowed by the user and if happening in the Misc
5120 * part of the document before any doctype informations
5121 * This will add the given catalog to the parsing context in order
5122 * to be used if there is a resolution need further down in the document
5123 */
5124
5125static void
5126xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5127 xmlChar *URL = NULL;
5128 const xmlChar *tmp, *base;
5129 xmlChar marker;
5130
5131 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00005132 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005133 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5134 goto error;
5135 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00005136 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005137 if (*tmp != '=') {
5138 return;
5139 }
5140 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005141 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005142 marker = *tmp;
5143 if ((marker != '\'') && (marker != '"'))
5144 goto error;
5145 tmp++;
5146 base = tmp;
5147 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5148 if (*tmp == 0)
5149 goto error;
5150 URL = xmlStrndup(base, tmp - base);
5151 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005152 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005153 if (*tmp != 0)
5154 goto error;
5155
5156 if (URL != NULL) {
5157 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5158 xmlFree(URL);
5159 }
5160 return;
5161
5162error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00005163 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5164 "Catalog PI syntax error: %s\n",
5165 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005166 if (URL != NULL)
5167 xmlFree(URL);
5168}
5169#endif
5170
Owen Taylor3473f882001-02-23 17:55:21 +00005171/**
5172 * xmlParsePI:
5173 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005174 *
Owen Taylor3473f882001-02-23 17:55:21 +00005175 * parse an XML Processing Instruction.
5176 *
5177 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5178 *
5179 * The processing is transfered to SAX once parsed.
5180 */
5181
5182void
5183xmlParsePI(xmlParserCtxtPtr ctxt) {
5184 xmlChar *buf = NULL;
Daniel Veillard51304812012-07-19 20:34:26 +08005185 size_t len = 0;
5186 size_t size = XML_PARSER_BUFFER_SIZE;
Owen Taylor3473f882001-02-23 17:55:21 +00005187 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005188 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00005189 xmlParserInputState state;
5190 int count = 0;
5191
5192 if ((RAW == '<') && (NXT(1) == '?')) {
5193 xmlParserInputPtr input = ctxt->input;
5194 state = ctxt->instate;
5195 ctxt->instate = XML_PARSER_PI;
5196 /*
5197 * this is a Processing Instruction.
5198 */
5199 SKIP(2);
5200 SHRINK;
5201
5202 /*
5203 * Parse the target name and check for special support like
5204 * namespace.
5205 */
5206 target = xmlParsePITarget(ctxt);
5207 if (target != NULL) {
5208 if ((RAW == '?') && (NXT(1) == '>')) {
5209 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005210 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5211 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005212 }
5213 SKIP(2);
5214
5215 /*
5216 * SAX: PI detected.
5217 */
5218 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5219 (ctxt->sax->processingInstruction != NULL))
5220 ctxt->sax->processingInstruction(ctxt->userData,
5221 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08005222 if (ctxt->instate != XML_PARSER_EOF)
5223 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005224 return;
5225 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00005226 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00005227 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005228 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005229 ctxt->instate = state;
5230 return;
5231 }
5232 cur = CUR;
5233 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005234 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5235 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005236 }
5237 SKIP_BLANKS;
5238 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00005239 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00005240 ((cur != '?') || (NXT(1) != '>'))) {
5241 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00005242 xmlChar *tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005243 size_t new_size = size * 2;
5244 tmp = (xmlChar *) xmlRealloc(buf, new_size);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005245 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005246 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005247 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005248 ctxt->instate = state;
5249 return;
5250 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005251 buf = tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005252 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00005253 }
5254 count++;
5255 if (count > 50) {
5256 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005257 if (ctxt->instate == XML_PARSER_EOF) {
5258 xmlFree(buf);
5259 return;
5260 }
Owen Taylor3473f882001-02-23 17:55:21 +00005261 count = 0;
Daniel Veillard51304812012-07-19 20:34:26 +08005262 if ((len > XML_MAX_TEXT_LENGTH) &&
5263 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5264 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5265 "PI %s too big found", target);
5266 xmlFree(buf);
5267 ctxt->instate = state;
5268 return;
5269 }
Owen Taylor3473f882001-02-23 17:55:21 +00005270 }
5271 COPY_BUF(l,buf,len,cur);
5272 NEXTL(l);
5273 cur = CUR_CHAR(l);
5274 if (cur == 0) {
5275 SHRINK;
5276 GROW;
5277 cur = CUR_CHAR(l);
5278 }
5279 }
Daniel Veillard51304812012-07-19 20:34:26 +08005280 if ((len > XML_MAX_TEXT_LENGTH) &&
5281 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5282 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5283 "PI %s too big found", target);
5284 xmlFree(buf);
5285 ctxt->instate = state;
5286 return;
5287 }
Owen Taylor3473f882001-02-23 17:55:21 +00005288 buf[len] = 0;
5289 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005290 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5291 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005292 } else {
5293 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005294 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5295 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005296 }
5297 SKIP(2);
5298
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005299#ifdef LIBXML_CATALOG_ENABLED
5300 if (((state == XML_PARSER_MISC) ||
5301 (state == XML_PARSER_START)) &&
5302 (xmlStrEqual(target, XML_CATALOG_PI))) {
5303 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5304 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5305 (allow == XML_CATA_ALLOW_ALL))
5306 xmlParseCatalogPI(ctxt, buf);
5307 }
5308#endif
5309
5310
Owen Taylor3473f882001-02-23 17:55:21 +00005311 /*
5312 * SAX: PI detected.
5313 */
5314 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5315 (ctxt->sax->processingInstruction != NULL))
5316 ctxt->sax->processingInstruction(ctxt->userData,
5317 target, buf);
5318 }
5319 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005320 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005321 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005322 }
Chris Evans77404b82011-12-14 16:18:25 +08005323 if (ctxt->instate != XML_PARSER_EOF)
5324 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005325 }
5326}
5327
5328/**
5329 * xmlParseNotationDecl:
5330 * @ctxt: an XML parser context
5331 *
5332 * parse a notation declaration
5333 *
5334 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5335 *
5336 * Hence there is actually 3 choices:
5337 * 'PUBLIC' S PubidLiteral
5338 * 'PUBLIC' S PubidLiteral S SystemLiteral
5339 * and 'SYSTEM' S SystemLiteral
5340 *
5341 * See the NOTE on xmlParseExternalID().
5342 */
5343
5344void
5345xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005346 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005347 xmlChar *Pubid;
5348 xmlChar *Systemid;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005349
Daniel Veillarda07050d2003-10-19 14:46:32 +00005350 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005351 xmlParserInputPtr input = ctxt->input;
5352 SHRINK;
5353 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00005354 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005355 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5356 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005357 return;
5358 }
5359 SKIP_BLANKS;
5360
Daniel Veillard76d66f42001-05-16 21:05:17 +00005361 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005362 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005363 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005364 return;
5365 }
William M. Brack76e95df2003-10-18 16:20:14 +00005366 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005367 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005368 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005369 return;
5370 }
Daniel Veillard37334572008-07-31 08:20:02 +00005371 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005372 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005373 "colons are forbidden from notation names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005374 name, NULL, NULL);
5375 }
Owen Taylor3473f882001-02-23 17:55:21 +00005376 SKIP_BLANKS;
5377
5378 /*
5379 * Parse the IDs.
5380 */
5381 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5382 SKIP_BLANKS;
5383
5384 if (RAW == '>') {
5385 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005386 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5387 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005388 }
5389 NEXT;
5390 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5391 (ctxt->sax->notationDecl != NULL))
5392 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5393 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005394 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005395 }
Owen Taylor3473f882001-02-23 17:55:21 +00005396 if (Systemid != NULL) xmlFree(Systemid);
5397 if (Pubid != NULL) xmlFree(Pubid);
5398 }
5399}
5400
5401/**
5402 * xmlParseEntityDecl:
5403 * @ctxt: an XML parser context
5404 *
5405 * parse <!ENTITY declarations
5406 *
5407 * [70] EntityDecl ::= GEDecl | PEDecl
5408 *
5409 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5410 *
5411 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5412 *
5413 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5414 *
5415 * [74] PEDef ::= EntityValue | ExternalID
5416 *
5417 * [76] NDataDecl ::= S 'NDATA' S Name
5418 *
5419 * [ VC: Notation Declared ]
5420 * The Name must match the declared name of a notation.
5421 */
5422
5423void
5424xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005425 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005426 xmlChar *value = NULL;
5427 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005428 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005429 int isParameter = 0;
5430 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005431 int skipped;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005432
Daniel Veillard4c778d82005-01-23 17:37:44 +00005433 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005434 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005435 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005436 SHRINK;
5437 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005438 skipped = SKIP_BLANKS;
5439 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005440 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5441 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005442 }
Owen Taylor3473f882001-02-23 17:55:21 +00005443
5444 if (RAW == '%') {
5445 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005446 skipped = SKIP_BLANKS;
5447 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005448 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5449 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005450 }
Owen Taylor3473f882001-02-23 17:55:21 +00005451 isParameter = 1;
5452 }
5453
Daniel Veillard76d66f42001-05-16 21:05:17 +00005454 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005455 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005456 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5457 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005458 return;
5459 }
Daniel Veillard37334572008-07-31 08:20:02 +00005460 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005461 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard2f9b1262014-07-26 20:29:36 +08005462 "colons are forbidden from entities names '%s'\n",
Daniel Veillard37334572008-07-31 08:20:02 +00005463 name, NULL, NULL);
5464 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005465 skipped = SKIP_BLANKS;
5466 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005467 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5468 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005469 }
Owen Taylor3473f882001-02-23 17:55:21 +00005470
Daniel Veillardf5582f12002-06-11 10:08:16 +00005471 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005472 /*
5473 * handle the various case of definitions...
5474 */
5475 if (isParameter) {
5476 if ((RAW == '"') || (RAW == '\'')) {
5477 value = xmlParseEntityValue(ctxt, &orig);
5478 if (value) {
5479 if ((ctxt->sax != NULL) &&
5480 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5481 ctxt->sax->entityDecl(ctxt->userData, name,
5482 XML_INTERNAL_PARAMETER_ENTITY,
5483 NULL, NULL, value);
5484 }
5485 } else {
5486 URI = xmlParseExternalID(ctxt, &literal, 1);
5487 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005488 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005489 }
5490 if (URI) {
5491 xmlURIPtr uri;
5492
5493 uri = xmlParseURI((const char *) URI);
5494 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005495 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5496 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005497 /*
5498 * This really ought to be a well formedness error
5499 * but the XML Core WG decided otherwise c.f. issue
5500 * E26 of the XML erratas.
5501 */
Owen Taylor3473f882001-02-23 17:55:21 +00005502 } else {
5503 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005504 /*
5505 * Okay this is foolish to block those but not
5506 * invalid URIs.
5507 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005508 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005509 } else {
5510 if ((ctxt->sax != NULL) &&
5511 (!ctxt->disableSAX) &&
5512 (ctxt->sax->entityDecl != NULL))
5513 ctxt->sax->entityDecl(ctxt->userData, name,
5514 XML_EXTERNAL_PARAMETER_ENTITY,
5515 literal, URI, NULL);
5516 }
5517 xmlFreeURI(uri);
5518 }
5519 }
5520 }
5521 } else {
5522 if ((RAW == '"') || (RAW == '\'')) {
5523 value = xmlParseEntityValue(ctxt, &orig);
5524 if ((ctxt->sax != NULL) &&
5525 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5526 ctxt->sax->entityDecl(ctxt->userData, name,
5527 XML_INTERNAL_GENERAL_ENTITY,
5528 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005529 /*
5530 * For expat compatibility in SAX mode.
5531 */
5532 if ((ctxt->myDoc == NULL) ||
5533 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5534 if (ctxt->myDoc == NULL) {
5535 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005536 if (ctxt->myDoc == NULL) {
5537 xmlErrMemory(ctxt, "New Doc failed");
5538 return;
5539 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005540 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005541 }
5542 if (ctxt->myDoc->intSubset == NULL)
5543 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5544 BAD_CAST "fake", NULL, NULL);
5545
Daniel Veillard1af9a412003-08-20 22:54:39 +00005546 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5547 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005548 }
Owen Taylor3473f882001-02-23 17:55:21 +00005549 } else {
5550 URI = xmlParseExternalID(ctxt, &literal, 1);
5551 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005552 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005553 }
5554 if (URI) {
5555 xmlURIPtr uri;
5556
5557 uri = xmlParseURI((const char *)URI);
5558 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005559 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5560 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005561 /*
5562 * This really ought to be a well formedness error
5563 * but the XML Core WG decided otherwise c.f. issue
5564 * E26 of the XML erratas.
5565 */
Owen Taylor3473f882001-02-23 17:55:21 +00005566 } else {
5567 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005568 /*
5569 * Okay this is foolish to block those but not
5570 * invalid URIs.
5571 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005572 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005573 }
5574 xmlFreeURI(uri);
5575 }
5576 }
William M. Brack76e95df2003-10-18 16:20:14 +00005577 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005578 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5579 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005580 }
5581 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005582 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005583 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005584 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005585 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5586 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005587 }
5588 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005589 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005590 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5591 (ctxt->sax->unparsedEntityDecl != NULL))
5592 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5593 literal, URI, ndata);
5594 } else {
5595 if ((ctxt->sax != NULL) &&
5596 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5597 ctxt->sax->entityDecl(ctxt->userData, name,
5598 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5599 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005600 /*
5601 * For expat compatibility in SAX mode.
5602 * assuming the entity repalcement was asked for
5603 */
5604 if ((ctxt->replaceEntities != 0) &&
5605 ((ctxt->myDoc == NULL) ||
5606 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5607 if (ctxt->myDoc == NULL) {
5608 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005609 if (ctxt->myDoc == NULL) {
5610 xmlErrMemory(ctxt, "New Doc failed");
5611 return;
5612 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005613 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005614 }
5615
5616 if (ctxt->myDoc->intSubset == NULL)
5617 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5618 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005619 xmlSAX2EntityDecl(ctxt, name,
5620 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5621 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005622 }
Owen Taylor3473f882001-02-23 17:55:21 +00005623 }
5624 }
5625 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08005626 if (ctxt->instate == XML_PARSER_EOF)
5627 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005628 SKIP_BLANKS;
5629 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005630 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005631 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005632 } else {
5633 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005634 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5635 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005636 }
5637 NEXT;
5638 }
5639 if (orig != NULL) {
5640 /*
5641 * Ugly mechanism to save the raw entity value.
5642 */
5643 xmlEntityPtr cur = NULL;
5644
5645 if (isParameter) {
5646 if ((ctxt->sax != NULL) &&
5647 (ctxt->sax->getParameterEntity != NULL))
5648 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5649 } else {
5650 if ((ctxt->sax != NULL) &&
5651 (ctxt->sax->getEntity != NULL))
5652 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005653 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005654 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005655 }
Owen Taylor3473f882001-02-23 17:55:21 +00005656 }
5657 if (cur != NULL) {
5658 if (cur->orig != NULL)
5659 xmlFree(orig);
5660 else
5661 cur->orig = orig;
5662 } else
5663 xmlFree(orig);
5664 }
Owen Taylor3473f882001-02-23 17:55:21 +00005665 if (value != NULL) xmlFree(value);
5666 if (URI != NULL) xmlFree(URI);
5667 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005668 }
5669}
5670
5671/**
5672 * xmlParseDefaultDecl:
5673 * @ctxt: an XML parser context
5674 * @value: Receive a possible fixed default value for the attribute
5675 *
5676 * Parse an attribute default declaration
5677 *
5678 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5679 *
5680 * [ VC: Required Attribute ]
5681 * if the default declaration is the keyword #REQUIRED, then the
5682 * attribute must be specified for all elements of the type in the
5683 * attribute-list declaration.
5684 *
5685 * [ VC: Attribute Default Legal ]
5686 * The declared default value must meet the lexical constraints of
5687 * the declared attribute type c.f. xmlValidateAttributeDecl()
5688 *
5689 * [ VC: Fixed Attribute Default ]
5690 * if an attribute has a default value declared with the #FIXED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005691 * keyword, instances of that attribute must match the default value.
Owen Taylor3473f882001-02-23 17:55:21 +00005692 *
5693 * [ WFC: No < in Attribute Values ]
5694 * handled in xmlParseAttValue()
5695 *
5696 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005697 * or XML_ATTRIBUTE_FIXED.
Owen Taylor3473f882001-02-23 17:55:21 +00005698 */
5699
5700int
5701xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5702 int val;
5703 xmlChar *ret;
5704
5705 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005706 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005707 SKIP(9);
5708 return(XML_ATTRIBUTE_REQUIRED);
5709 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005710 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005711 SKIP(8);
5712 return(XML_ATTRIBUTE_IMPLIED);
5713 }
5714 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005715 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005716 SKIP(6);
5717 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005718 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005719 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5720 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005721 }
5722 SKIP_BLANKS;
5723 }
5724 ret = xmlParseAttValue(ctxt);
5725 ctxt->instate = XML_PARSER_DTD;
5726 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005727 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005728 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005729 } else
5730 *value = ret;
5731 return(val);
5732}
5733
5734/**
5735 * xmlParseNotationType:
5736 * @ctxt: an XML parser context
5737 *
5738 * parse an Notation attribute type.
5739 *
5740 * Note: the leading 'NOTATION' S part has already being parsed...
5741 *
5742 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5743 *
5744 * [ VC: Notation Attributes ]
5745 * Values of this type must match one of the notation names included
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005746 * in the declaration; all notation names in the declaration must be declared.
Owen Taylor3473f882001-02-23 17:55:21 +00005747 *
5748 * Returns: the notation attribute tree built while parsing
5749 */
5750
5751xmlEnumerationPtr
5752xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005753 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005754 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005755
5756 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005757 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005758 return(NULL);
5759 }
5760 SHRINK;
5761 do {
5762 NEXT;
5763 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005764 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005765 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005766 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5767 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005768 xmlFreeEnumeration(ret);
5769 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005770 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005771 tmp = ret;
5772 while (tmp != NULL) {
5773 if (xmlStrEqual(name, tmp->name)) {
5774 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5775 "standalone: attribute notation value token %s duplicated\n",
5776 name, NULL);
5777 if (!xmlDictOwns(ctxt->dict, name))
5778 xmlFree((xmlChar *) name);
5779 break;
5780 }
5781 tmp = tmp->next;
5782 }
5783 if (tmp == NULL) {
5784 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005785 if (cur == NULL) {
5786 xmlFreeEnumeration(ret);
5787 return(NULL);
5788 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005789 if (last == NULL) ret = last = cur;
5790 else {
5791 last->next = cur;
5792 last = cur;
5793 }
Owen Taylor3473f882001-02-23 17:55:21 +00005794 }
5795 SKIP_BLANKS;
5796 } while (RAW == '|');
5797 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005798 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005799 xmlFreeEnumeration(ret);
5800 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005801 }
5802 NEXT;
5803 return(ret);
5804}
5805
5806/**
5807 * xmlParseEnumerationType:
5808 * @ctxt: an XML parser context
5809 *
5810 * parse an Enumeration attribute type.
5811 *
5812 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5813 *
5814 * [ VC: Enumeration ]
5815 * Values of this type must match one of the Nmtoken tokens in
5816 * the declaration
5817 *
5818 * Returns: the enumeration attribute tree built while parsing
5819 */
5820
5821xmlEnumerationPtr
5822xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5823 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005824 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005825
5826 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005827 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005828 return(NULL);
5829 }
5830 SHRINK;
5831 do {
5832 NEXT;
5833 SKIP_BLANKS;
5834 name = xmlParseNmtoken(ctxt);
5835 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005836 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005837 return(ret);
5838 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005839 tmp = ret;
5840 while (tmp != NULL) {
5841 if (xmlStrEqual(name, tmp->name)) {
5842 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5843 "standalone: attribute enumeration value token %s duplicated\n",
5844 name, NULL);
5845 if (!xmlDictOwns(ctxt->dict, name))
5846 xmlFree(name);
5847 break;
5848 }
5849 tmp = tmp->next;
5850 }
5851 if (tmp == NULL) {
5852 cur = xmlCreateEnumeration(name);
5853 if (!xmlDictOwns(ctxt->dict, name))
5854 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005855 if (cur == NULL) {
5856 xmlFreeEnumeration(ret);
5857 return(NULL);
5858 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005859 if (last == NULL) ret = last = cur;
5860 else {
5861 last->next = cur;
5862 last = cur;
5863 }
Owen Taylor3473f882001-02-23 17:55:21 +00005864 }
5865 SKIP_BLANKS;
5866 } while (RAW == '|');
5867 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005868 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005869 return(ret);
5870 }
5871 NEXT;
5872 return(ret);
5873}
5874
5875/**
5876 * xmlParseEnumeratedType:
5877 * @ctxt: an XML parser context
5878 * @tree: the enumeration tree built while parsing
5879 *
5880 * parse an Enumerated attribute type.
5881 *
5882 * [57] EnumeratedType ::= NotationType | Enumeration
5883 *
5884 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5885 *
5886 *
5887 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5888 */
5889
5890int
5891xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005892 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005893 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005894 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005895 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5896 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005897 return(0);
5898 }
5899 SKIP_BLANKS;
5900 *tree = xmlParseNotationType(ctxt);
5901 if (*tree == NULL) return(0);
5902 return(XML_ATTRIBUTE_NOTATION);
5903 }
5904 *tree = xmlParseEnumerationType(ctxt);
5905 if (*tree == NULL) return(0);
5906 return(XML_ATTRIBUTE_ENUMERATION);
5907}
5908
5909/**
5910 * xmlParseAttributeType:
5911 * @ctxt: an XML parser context
5912 * @tree: the enumeration tree built while parsing
5913 *
5914 * parse the Attribute list def for an element
5915 *
5916 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5917 *
5918 * [55] StringType ::= 'CDATA'
5919 *
5920 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5921 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5922 *
5923 * Validity constraints for attribute values syntax are checked in
5924 * xmlValidateAttributeValue()
5925 *
5926 * [ VC: ID ]
5927 * Values of type ID must match the Name production. A name must not
5928 * appear more than once in an XML document as a value of this type;
5929 * i.e., ID values must uniquely identify the elements which bear them.
5930 *
5931 * [ VC: One ID per Element Type ]
5932 * No element type may have more than one ID attribute specified.
5933 *
5934 * [ VC: ID Attribute Default ]
5935 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5936 *
5937 * [ VC: IDREF ]
5938 * Values of type IDREF must match the Name production, and values
5939 * of type IDREFS must match Names; each IDREF Name must match the value
5940 * of an ID attribute on some element in the XML document; i.e. IDREF
5941 * values must match the value of some ID attribute.
5942 *
5943 * [ VC: Entity Name ]
5944 * Values of type ENTITY must match the Name production, values
5945 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005946 * name of an unparsed entity declared in the DTD.
Owen Taylor3473f882001-02-23 17:55:21 +00005947 *
5948 * [ VC: Name Token ]
5949 * Values of type NMTOKEN must match the Nmtoken production; values
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005950 * of type NMTOKENS must match Nmtokens.
Owen Taylor3473f882001-02-23 17:55:21 +00005951 *
5952 * Returns the attribute type
5953 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005954int
Owen Taylor3473f882001-02-23 17:55:21 +00005955xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5956 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005957 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005958 SKIP(5);
5959 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005960 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005961 SKIP(6);
5962 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005963 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005964 SKIP(5);
5965 return(XML_ATTRIBUTE_IDREF);
5966 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5967 SKIP(2);
5968 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005969 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005970 SKIP(6);
5971 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005972 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005973 SKIP(8);
5974 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005975 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005976 SKIP(8);
5977 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005978 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005979 SKIP(7);
5980 return(XML_ATTRIBUTE_NMTOKEN);
5981 }
5982 return(xmlParseEnumeratedType(ctxt, tree));
5983}
5984
5985/**
5986 * xmlParseAttributeListDecl:
5987 * @ctxt: an XML parser context
5988 *
5989 * : parse the Attribute list def for an element
5990 *
5991 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5992 *
5993 * [53] AttDef ::= S Name S AttType S DefaultDecl
5994 *
5995 */
5996void
5997xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005998 const xmlChar *elemName;
5999 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00006000 xmlEnumerationPtr tree;
6001
Daniel Veillarda07050d2003-10-19 14:46:32 +00006002 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006003 xmlParserInputPtr input = ctxt->input;
6004
6005 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006006 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006007 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006008 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006009 }
6010 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006011 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006012 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006013 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6014 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006015 return;
6016 }
6017 SKIP_BLANKS;
6018 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006019 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006020 const xmlChar *check = CUR_PTR;
6021 int type;
6022 int def;
6023 xmlChar *defaultValue = NULL;
6024
6025 GROW;
6026 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006027 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006028 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006029 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6030 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006031 break;
6032 }
6033 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006034 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006035 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006036 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006037 break;
6038 }
6039 SKIP_BLANKS;
6040
6041 type = xmlParseAttributeType(ctxt, &tree);
6042 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006043 break;
6044 }
6045
6046 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006047 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006048 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6049 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006050 if (tree != NULL)
6051 xmlFreeEnumeration(tree);
6052 break;
6053 }
6054 SKIP_BLANKS;
6055
6056 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6057 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006058 if (defaultValue != NULL)
6059 xmlFree(defaultValue);
6060 if (tree != NULL)
6061 xmlFreeEnumeration(tree);
6062 break;
6063 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00006064 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6065 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00006066
6067 GROW;
6068 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00006069 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006070 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006071 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006072 if (defaultValue != NULL)
6073 xmlFree(defaultValue);
6074 if (tree != NULL)
6075 xmlFreeEnumeration(tree);
6076 break;
6077 }
6078 SKIP_BLANKS;
6079 }
6080 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006081 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6082 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006083 if (defaultValue != NULL)
6084 xmlFree(defaultValue);
6085 if (tree != NULL)
6086 xmlFreeEnumeration(tree);
6087 break;
6088 }
6089 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6090 (ctxt->sax->attributeDecl != NULL))
6091 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6092 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006093 else if (tree != NULL)
6094 xmlFreeEnumeration(tree);
6095
6096 if ((ctxt->sax2) && (defaultValue != NULL) &&
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006097 (def != XML_ATTRIBUTE_IMPLIED) &&
Daniel Veillarde57ec792003-09-10 10:50:59 +00006098 (def != XML_ATTRIBUTE_REQUIRED)) {
6099 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6100 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00006101 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006102 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6103 }
Owen Taylor3473f882001-02-23 17:55:21 +00006104 if (defaultValue != NULL)
6105 xmlFree(defaultValue);
6106 GROW;
6107 }
6108 if (RAW == '>') {
6109 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006110 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6111 "Attribute list declaration doesn't start and stop in the same entity\n",
6112 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006113 }
6114 NEXT;
6115 }
Owen Taylor3473f882001-02-23 17:55:21 +00006116 }
6117}
6118
6119/**
6120 * xmlParseElementMixedContentDecl:
6121 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006122 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00006123 *
6124 * parse the declaration for a Mixed Element content
6125 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006126 *
Owen Taylor3473f882001-02-23 17:55:21 +00006127 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6128 * '(' S? '#PCDATA' S? ')'
6129 *
6130 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6131 *
6132 * [ VC: No Duplicate Types ]
6133 * The same name must not appear more than once in a single
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006134 * mixed-content declaration.
Owen Taylor3473f882001-02-23 17:55:21 +00006135 *
6136 * returns: the list of the xmlElementContentPtr describing the element choices
6137 */
6138xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006139xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00006140 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006141 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006142
6143 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006144 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006145 SKIP(7);
6146 SKIP_BLANKS;
6147 SHRINK;
6148 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006149 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006150 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6151"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006152 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006153 }
Owen Taylor3473f882001-02-23 17:55:21 +00006154 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006155 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006156 if (ret == NULL)
6157 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006158 if (RAW == '*') {
6159 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6160 NEXT;
6161 }
6162 return(ret);
6163 }
6164 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006165 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00006166 if (ret == NULL) return(NULL);
6167 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006168 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006169 NEXT;
6170 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006171 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006172 if (ret == NULL) return(NULL);
6173 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006174 if (cur != NULL)
6175 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00006176 cur = ret;
6177 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006178 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006179 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006180 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006181 if (n->c1 != NULL)
6182 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006183 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006184 if (n != NULL)
6185 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006186 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006187 }
6188 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006189 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006190 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006191 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006192 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006193 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00006194 return(NULL);
6195 }
6196 SKIP_BLANKS;
6197 GROW;
6198 }
6199 if ((RAW == ')') && (NXT(1) == '*')) {
6200 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006201 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00006202 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006203 if (cur->c2 != NULL)
6204 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006205 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02006206 if (ret != NULL)
6207 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006208 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006209 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6210"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006211 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006212 }
Owen Taylor3473f882001-02-23 17:55:21 +00006213 SKIP(2);
6214 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006215 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006216 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006217 return(NULL);
6218 }
6219
6220 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006221 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006222 }
6223 return(ret);
6224}
6225
6226/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006227 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00006228 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006229 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006230 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00006231 *
6232 * parse the declaration for a Mixed Element content
6233 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006234 *
Owen Taylor3473f882001-02-23 17:55:21 +00006235 *
6236 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6237 *
6238 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6239 *
6240 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6241 *
6242 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6243 *
6244 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6245 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006246 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00006247 * opening or closing parentheses in a choice, seq, or Mixed
6248 * construct is contained in the replacement text for a parameter
6249 * entity, both must be contained in the same replacement text. For
6250 * interoperability, if a parameter-entity reference appears in a
6251 * choice, seq, or Mixed construct, its replacement text should not
6252 * be empty, and neither the first nor last non-blank character of
6253 * the replacement text should be a connector (| or ,).
6254 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006255 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00006256 * hierarchy.
6257 */
Daniel Veillard489f9672009-08-10 16:49:30 +02006258static xmlElementContentPtr
6259xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6260 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00006261 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006262 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00006263 xmlChar type = 0;
6264
Daniel Veillard489f9672009-08-10 16:49:30 +02006265 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6266 (depth > 2048)) {
6267 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6268"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6269 depth);
6270 return(NULL);
6271 }
Owen Taylor3473f882001-02-23 17:55:21 +00006272 SKIP_BLANKS;
6273 GROW;
6274 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006275 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006276
Owen Taylor3473f882001-02-23 17:55:21 +00006277 /* Recurse on first child */
6278 NEXT;
6279 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006280 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6281 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006282 SKIP_BLANKS;
6283 GROW;
6284 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006285 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006286 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006287 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006288 return(NULL);
6289 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006290 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006291 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006292 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006293 return(NULL);
6294 }
Owen Taylor3473f882001-02-23 17:55:21 +00006295 GROW;
6296 if (RAW == '?') {
6297 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6298 NEXT;
6299 } else if (RAW == '*') {
6300 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6301 NEXT;
6302 } else if (RAW == '+') {
6303 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6304 NEXT;
6305 } else {
6306 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6307 }
Owen Taylor3473f882001-02-23 17:55:21 +00006308 GROW;
6309 }
6310 SKIP_BLANKS;
6311 SHRINK;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006312 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006313 /*
6314 * Each loop we parse one separator and one element.
6315 */
6316 if (RAW == ',') {
6317 if (type == 0) type = CUR;
6318
6319 /*
6320 * Detect "Name | Name , Name" error
6321 */
6322 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006323 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006324 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006325 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006326 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006327 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006328 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006329 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006330 return(NULL);
6331 }
6332 NEXT;
6333
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006334 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006335 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006336 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006337 xmlFreeDocElementContent(ctxt->myDoc, last);
6338 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006339 return(NULL);
6340 }
6341 if (last == NULL) {
6342 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006343 if (ret != NULL)
6344 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006345 ret = cur = op;
6346 } else {
6347 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006348 if (op != NULL)
6349 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006350 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006351 if (last != NULL)
6352 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006353 cur =op;
6354 last = NULL;
6355 }
6356 } else if (RAW == '|') {
6357 if (type == 0) type = CUR;
6358
6359 /*
6360 * Detect "Name , Name | Name" error
6361 */
6362 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006363 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006364 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006365 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006366 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006367 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006368 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006369 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006370 return(NULL);
6371 }
6372 NEXT;
6373
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006374 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006375 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006376 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006377 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006378 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006379 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006380 return(NULL);
6381 }
6382 if (last == NULL) {
6383 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006384 if (ret != NULL)
6385 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006386 ret = cur = op;
6387 } else {
6388 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006389 if (op != NULL)
6390 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006391 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006392 if (last != NULL)
6393 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006394 cur =op;
6395 last = NULL;
6396 }
6397 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006398 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006399 if ((last != NULL) && (last != ret))
6400 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006401 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006402 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006403 return(NULL);
6404 }
6405 GROW;
6406 SKIP_BLANKS;
6407 GROW;
6408 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006409 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006410 /* Recurse on second child */
6411 NEXT;
6412 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006413 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6414 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006415 SKIP_BLANKS;
6416 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006417 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006418 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006419 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006420 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006421 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006422 return(NULL);
6423 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006424 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006425 if (last == NULL) {
6426 if (ret != NULL)
6427 xmlFreeDocElementContent(ctxt->myDoc, ret);
6428 return(NULL);
6429 }
Owen Taylor3473f882001-02-23 17:55:21 +00006430 if (RAW == '?') {
6431 last->ocur = XML_ELEMENT_CONTENT_OPT;
6432 NEXT;
6433 } else if (RAW == '*') {
6434 last->ocur = XML_ELEMENT_CONTENT_MULT;
6435 NEXT;
6436 } else if (RAW == '+') {
6437 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6438 NEXT;
6439 } else {
6440 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6441 }
6442 }
6443 SKIP_BLANKS;
6444 GROW;
6445 }
6446 if ((cur != NULL) && (last != NULL)) {
6447 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006448 if (last != NULL)
6449 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006450 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006451 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006452 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6453"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006454 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006455 }
Owen Taylor3473f882001-02-23 17:55:21 +00006456 NEXT;
6457 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006458 if (ret != NULL) {
6459 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6460 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6461 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6462 else
6463 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6464 }
Owen Taylor3473f882001-02-23 17:55:21 +00006465 NEXT;
6466 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006467 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006468 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006469 cur = ret;
6470 /*
6471 * Some normalization:
6472 * (a | b* | c?)* == (a | b | c)*
6473 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006474 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006475 if ((cur->c1 != NULL) &&
6476 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6477 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6478 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6479 if ((cur->c2 != NULL) &&
6480 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6481 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6482 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6483 cur = cur->c2;
6484 }
6485 }
Owen Taylor3473f882001-02-23 17:55:21 +00006486 NEXT;
6487 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006488 if (ret != NULL) {
6489 int found = 0;
6490
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006491 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6492 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6493 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006494 else
6495 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006496 /*
6497 * Some normalization:
6498 * (a | b*)+ == (a | b)*
6499 * (a | b?)+ == (a | b)*
6500 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006501 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006502 if ((cur->c1 != NULL) &&
6503 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6504 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6505 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6506 found = 1;
6507 }
6508 if ((cur->c2 != NULL) &&
6509 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6510 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6511 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6512 found = 1;
6513 }
6514 cur = cur->c2;
6515 }
6516 if (found)
6517 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6518 }
Owen Taylor3473f882001-02-23 17:55:21 +00006519 NEXT;
6520 }
6521 return(ret);
6522}
6523
6524/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006525 * xmlParseElementChildrenContentDecl:
6526 * @ctxt: an XML parser context
6527 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006528 *
6529 * parse the declaration for a Mixed Element content
6530 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6531 *
6532 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6533 *
6534 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6535 *
6536 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6537 *
6538 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6539 *
6540 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6541 * TODO Parameter-entity replacement text must be properly nested
6542 * with parenthesized groups. That is to say, if either of the
6543 * opening or closing parentheses in a choice, seq, or Mixed
6544 * construct is contained in the replacement text for a parameter
6545 * entity, both must be contained in the same replacement text. For
6546 * interoperability, if a parameter-entity reference appears in a
6547 * choice, seq, or Mixed construct, its replacement text should not
6548 * be empty, and neither the first nor last non-blank character of
6549 * the replacement text should be a connector (| or ,).
6550 *
6551 * Returns the tree of xmlElementContentPtr describing the element
6552 * hierarchy.
6553 */
6554xmlElementContentPtr
6555xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6556 /* stub left for API/ABI compat */
6557 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6558}
6559
6560/**
Owen Taylor3473f882001-02-23 17:55:21 +00006561 * xmlParseElementContentDecl:
6562 * @ctxt: an XML parser context
6563 * @name: the name of the element being defined.
6564 * @result: the Element Content pointer will be stored here if any
6565 *
6566 * parse the declaration for an Element content either Mixed or Children,
6567 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006568 *
Owen Taylor3473f882001-02-23 17:55:21 +00006569 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6570 *
6571 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6572 */
6573
6574int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006575xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006576 xmlElementContentPtr *result) {
6577
6578 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006579 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006580 int res;
6581
6582 *result = NULL;
6583
6584 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006585 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006586 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006587 return(-1);
6588 }
6589 NEXT;
6590 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006591 if (ctxt->instate == XML_PARSER_EOF)
6592 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006593 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006594 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006595 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006596 res = XML_ELEMENT_TYPE_MIXED;
6597 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006598 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006599 res = XML_ELEMENT_TYPE_ELEMENT;
6600 }
Owen Taylor3473f882001-02-23 17:55:21 +00006601 SKIP_BLANKS;
6602 *result = tree;
6603 return(res);
6604}
6605
6606/**
6607 * xmlParseElementDecl:
6608 * @ctxt: an XML parser context
6609 *
6610 * parse an Element declaration.
6611 *
6612 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6613 *
6614 * [ VC: Unique Element Type Declaration ]
6615 * No element type may be declared more than once
6616 *
6617 * Returns the type of the element, or -1 in case of error
6618 */
6619int
6620xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006621 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006622 int ret = -1;
6623 xmlElementContentPtr content = NULL;
6624
Daniel Veillard4c778d82005-01-23 17:37:44 +00006625 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006626 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006627 xmlParserInputPtr input = ctxt->input;
6628
6629 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006630 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006631 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6632 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006633 }
6634 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006635 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006636 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006637 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6638 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006639 return(-1);
6640 }
6641 while ((RAW == 0) && (ctxt->inputNr > 1))
6642 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006643 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006644 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6645 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006646 }
6647 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006648 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006649 SKIP(5);
6650 /*
6651 * Element must always be empty.
6652 */
6653 ret = XML_ELEMENT_TYPE_EMPTY;
6654 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6655 (NXT(2) == 'Y')) {
6656 SKIP(3);
6657 /*
6658 * Element is a generic container.
6659 */
6660 ret = XML_ELEMENT_TYPE_ANY;
6661 } else if (RAW == '(') {
6662 ret = xmlParseElementContentDecl(ctxt, name, &content);
6663 } else {
6664 /*
6665 * [ WFC: PEs in Internal Subset ] error handling.
6666 */
6667 if ((RAW == '%') && (ctxt->external == 0) &&
6668 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006669 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006670 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006671 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006672 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006673 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6674 }
Owen Taylor3473f882001-02-23 17:55:21 +00006675 return(-1);
6676 }
6677
6678 SKIP_BLANKS;
6679 /*
6680 * Pop-up of finished entities.
6681 */
6682 while ((RAW == 0) && (ctxt->inputNr > 1))
6683 xmlPopInput(ctxt);
6684 SKIP_BLANKS;
6685
6686 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006687 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006688 if (content != NULL) {
6689 xmlFreeDocElementContent(ctxt->myDoc, content);
6690 }
Owen Taylor3473f882001-02-23 17:55:21 +00006691 } else {
6692 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006693 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6694 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006695 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006696
Owen Taylor3473f882001-02-23 17:55:21 +00006697 NEXT;
6698 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006699 (ctxt->sax->elementDecl != NULL)) {
6700 if (content != NULL)
6701 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006702 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6703 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006704 if ((content != NULL) && (content->parent == NULL)) {
6705 /*
6706 * this is a trick: if xmlAddElementDecl is called,
6707 * instead of copying the full tree it is plugged directly
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006708 * if called from the parser. Avoid duplicating the
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006709 * interfaces or change the API/ABI
6710 */
6711 xmlFreeDocElementContent(ctxt->myDoc, content);
6712 }
6713 } else if (content != NULL) {
6714 xmlFreeDocElementContent(ctxt->myDoc, content);
6715 }
Owen Taylor3473f882001-02-23 17:55:21 +00006716 }
Owen Taylor3473f882001-02-23 17:55:21 +00006717 }
6718 return(ret);
6719}
6720
6721/**
Owen Taylor3473f882001-02-23 17:55:21 +00006722 * xmlParseConditionalSections
6723 * @ctxt: an XML parser context
6724 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006725 * [61] conditionalSect ::= includeSect | ignoreSect
6726 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
Owen Taylor3473f882001-02-23 17:55:21 +00006727 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6728 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6729 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6730 */
6731
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006732static void
Owen Taylor3473f882001-02-23 17:55:21 +00006733xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006734 int id = ctxt->input->id;
6735
Owen Taylor3473f882001-02-23 17:55:21 +00006736 SKIP(3);
6737 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006738 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006739 SKIP(7);
6740 SKIP_BLANKS;
6741 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006742 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006743 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006744 if (ctxt->input->id != id) {
6745 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6746 "All markup of the conditional section is not in the same entity\n",
6747 NULL, NULL);
6748 }
Owen Taylor3473f882001-02-23 17:55:21 +00006749 NEXT;
6750 }
6751 if (xmlParserDebugEntities) {
6752 if ((ctxt->input != NULL) && (ctxt->input->filename))
6753 xmlGenericError(xmlGenericErrorContext,
6754 "%s(%d): ", ctxt->input->filename,
6755 ctxt->input->line);
6756 xmlGenericError(xmlGenericErrorContext,
6757 "Entering INCLUDE Conditional Section\n");
6758 }
6759
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006760 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6761 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006762 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006763 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006764
6765 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6766 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006767 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006768 NEXT;
6769 } else if (RAW == '%') {
6770 xmlParsePEReference(ctxt);
6771 } else
6772 xmlParseMarkupDecl(ctxt);
6773
6774 /*
6775 * Pop-up of finished entities.
6776 */
6777 while ((RAW == 0) && (ctxt->inputNr > 1))
6778 xmlPopInput(ctxt);
6779
Daniel Veillardfdc91562002-07-01 21:52:03 +00006780 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006781 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006782 break;
6783 }
6784 }
6785 if (xmlParserDebugEntities) {
6786 if ((ctxt->input != NULL) && (ctxt->input->filename))
6787 xmlGenericError(xmlGenericErrorContext,
6788 "%s(%d): ", ctxt->input->filename,
6789 ctxt->input->line);
6790 xmlGenericError(xmlGenericErrorContext,
6791 "Leaving INCLUDE Conditional Section\n");
6792 }
6793
Daniel Veillarda07050d2003-10-19 14:46:32 +00006794 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006795 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006796 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006797 int depth = 0;
6798
6799 SKIP(6);
6800 SKIP_BLANKS;
6801 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006802 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006803 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006804 if (ctxt->input->id != id) {
6805 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6806 "All markup of the conditional section is not in the same entity\n",
6807 NULL, NULL);
6808 }
Owen Taylor3473f882001-02-23 17:55:21 +00006809 NEXT;
6810 }
6811 if (xmlParserDebugEntities) {
6812 if ((ctxt->input != NULL) && (ctxt->input->filename))
6813 xmlGenericError(xmlGenericErrorContext,
6814 "%s(%d): ", ctxt->input->filename,
6815 ctxt->input->line);
6816 xmlGenericError(xmlGenericErrorContext,
6817 "Entering IGNORE Conditional Section\n");
6818 }
6819
6820 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006821 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006822 * But disable SAX event generating DTD building in the meantime
6823 */
6824 state = ctxt->disableSAX;
6825 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006826 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006827 ctxt->instate = XML_PARSER_IGNORE;
6828
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006829 while (((depth >= 0) && (RAW != 0)) &&
6830 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006831 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6832 depth++;
6833 SKIP(3);
6834 continue;
6835 }
6836 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6837 if (--depth >= 0) SKIP(3);
6838 continue;
6839 }
6840 NEXT;
6841 continue;
6842 }
6843
6844 ctxt->disableSAX = state;
6845 ctxt->instate = instate;
6846
6847 if (xmlParserDebugEntities) {
6848 if ((ctxt->input != NULL) && (ctxt->input->filename))
6849 xmlGenericError(xmlGenericErrorContext,
6850 "%s(%d): ", ctxt->input->filename,
6851 ctxt->input->line);
6852 xmlGenericError(xmlGenericErrorContext,
6853 "Leaving IGNORE Conditional Section\n");
6854 }
6855
6856 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006857 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006858 }
6859
6860 if (RAW == 0)
6861 SHRINK;
6862
6863 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006864 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006865 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006866 if (ctxt->input->id != id) {
6867 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6868 "All markup of the conditional section is not in the same entity\n",
6869 NULL, NULL);
6870 }
Owen Taylor3473f882001-02-23 17:55:21 +00006871 SKIP(3);
6872 }
6873}
6874
6875/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006876 * xmlParseMarkupDecl:
6877 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006878 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006879 * parse Markup declarations
6880 *
6881 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6882 * NotationDecl | PI | Comment
6883 *
6884 * [ VC: Proper Declaration/PE Nesting ]
6885 * Parameter-entity replacement text must be properly nested with
6886 * markup declarations. That is to say, if either the first character
6887 * or the last character of a markup declaration (markupdecl above) is
6888 * contained in the replacement text for a parameter-entity reference,
6889 * both must be contained in the same replacement text.
6890 *
6891 * [ WFC: PEs in Internal Subset ]
6892 * In the internal DTD subset, parameter-entity references can occur
6893 * only where markup declarations can occur, not within markup declarations.
6894 * (This does not apply to references that occur in external parameter
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006895 * entities or to the external subset.)
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006896 */
6897void
6898xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6899 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006900 if (CUR == '<') {
6901 if (NXT(1) == '!') {
6902 switch (NXT(2)) {
6903 case 'E':
6904 if (NXT(3) == 'L')
6905 xmlParseElementDecl(ctxt);
6906 else if (NXT(3) == 'N')
6907 xmlParseEntityDecl(ctxt);
6908 break;
6909 case 'A':
6910 xmlParseAttributeListDecl(ctxt);
6911 break;
6912 case 'N':
6913 xmlParseNotationDecl(ctxt);
6914 break;
6915 case '-':
6916 xmlParseComment(ctxt);
6917 break;
6918 default:
6919 /* there is an error but it will be detected later */
6920 break;
6921 }
6922 } else if (NXT(1) == '?') {
6923 xmlParsePI(ctxt);
6924 }
6925 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006926 /*
6927 * This is only for internal subset. On external entities,
6928 * the replacement is done before parsing stage
6929 */
6930 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6931 xmlParsePEReference(ctxt);
6932
6933 /*
6934 * Conditional sections are allowed from entities included
6935 * by PE References in the internal subset.
6936 */
6937 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6938 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6939 xmlParseConditionalSections(ctxt);
6940 }
6941 }
6942
6943 ctxt->instate = XML_PARSER_DTD;
6944}
6945
6946/**
6947 * xmlParseTextDecl:
6948 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006949 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006950 * parse an XML declaration header for external entities
6951 *
6952 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006953 */
6954
6955void
6956xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6957 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006958 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006959
6960 /*
6961 * We know that '<?xml' is here.
6962 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006963 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006964 SKIP(5);
6965 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006966 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006967 return;
6968 }
6969
William M. Brack76e95df2003-10-18 16:20:14 +00006970 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006971 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6972 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006973 }
6974 SKIP_BLANKS;
6975
6976 /*
6977 * We may have the VersionInfo here.
6978 */
6979 version = xmlParseVersionInfo(ctxt);
6980 if (version == NULL)
6981 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006982 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006983 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006984 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6985 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006986 }
6987 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006988 ctxt->input->version = version;
6989
6990 /*
6991 * We must have the encoding declaration
6992 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006993 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006994 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6995 /*
6996 * The XML REC instructs us to stop parsing right here
6997 */
6998 return;
6999 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00007000 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
7001 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
7002 "Missing encoding in text declaration\n");
7003 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007004
7005 SKIP_BLANKS;
7006 if ((RAW == '?') && (NXT(1) == '>')) {
7007 SKIP(2);
7008 } else if (RAW == '>') {
7009 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007010 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007011 NEXT;
7012 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007013 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00007014 MOVETO_ENDTAG(CUR_PTR);
7015 NEXT;
7016 }
7017}
7018
7019/**
Owen Taylor3473f882001-02-23 17:55:21 +00007020 * xmlParseExternalSubset:
7021 * @ctxt: an XML parser context
7022 * @ExternalID: the external identifier
7023 * @SystemID: the system identifier (or URL)
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007024 *
Owen Taylor3473f882001-02-23 17:55:21 +00007025 * parse Markup declarations from an external subset
7026 *
7027 * [30] extSubset ::= textDecl? extSubsetDecl
7028 *
7029 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7030 */
7031void
7032xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7033 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00007034 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007035 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007036
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01007037 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007038 (ctxt->input->end - ctxt->input->cur >= 4)) {
7039 xmlChar start[4];
7040 xmlCharEncoding enc;
7041
7042 start[0] = RAW;
7043 start[1] = NXT(1);
7044 start[2] = NXT(2);
7045 start[3] = NXT(3);
7046 enc = xmlDetectCharEncoding(start, 4);
7047 if (enc != XML_CHAR_ENCODING_NONE)
7048 xmlSwitchEncoding(ctxt, enc);
7049 }
7050
Daniel Veillarda07050d2003-10-19 14:46:32 +00007051 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007052 xmlParseTextDecl(ctxt);
7053 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7054 /*
7055 * The XML REC instructs us to stop parsing right here
7056 */
7057 ctxt->instate = XML_PARSER_EOF;
7058 return;
7059 }
7060 }
7061 if (ctxt->myDoc == NULL) {
7062 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00007063 if (ctxt->myDoc == NULL) {
7064 xmlErrMemory(ctxt, "New Doc failed");
7065 return;
7066 }
7067 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00007068 }
7069 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7070 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7071
7072 ctxt->instate = XML_PARSER_DTD;
7073 ctxt->external = 1;
7074 while (((RAW == '<') && (NXT(1) == '?')) ||
7075 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00007076 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007077 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007078 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007079
7080 GROW;
7081 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7082 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00007083 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007084 NEXT;
7085 } else if (RAW == '%') {
7086 xmlParsePEReference(ctxt);
7087 } else
7088 xmlParseMarkupDecl(ctxt);
7089
7090 /*
7091 * Pop-up of finished entities.
7092 */
7093 while ((RAW == 0) && (ctxt->inputNr > 1))
7094 xmlPopInput(ctxt);
7095
Daniel Veillardfdc91562002-07-01 21:52:03 +00007096 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007097 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007098 break;
7099 }
7100 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007101
Owen Taylor3473f882001-02-23 17:55:21 +00007102 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007103 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007104 }
7105
7106}
7107
7108/**
7109 * xmlParseReference:
7110 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00007111 *
Owen Taylor3473f882001-02-23 17:55:21 +00007112 * parse and handle entity references in content, depending on the SAX
7113 * interface, this may end-up in a call to character() if this is a
7114 * CharRef, a predefined entity, if there is no reference() callback.
7115 * or if the parser was asked to switch to that mode.
7116 *
7117 * [67] Reference ::= EntityRef | CharRef
7118 */
7119void
7120xmlParseReference(xmlParserCtxtPtr ctxt) {
7121 xmlEntityPtr ent;
7122 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00007123 int was_checked;
7124 xmlNodePtr list = NULL;
7125 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00007126
Daniel Veillard0161e632008-08-28 15:36:32 +00007127
7128 if (RAW != '&')
7129 return;
7130
7131 /*
7132 * Simple case of a CharRef
7133 */
Owen Taylor3473f882001-02-23 17:55:21 +00007134 if (NXT(1) == '#') {
7135 int i = 0;
7136 xmlChar out[10];
7137 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007138 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007139
Daniel Veillarddc171602008-03-26 17:41:38 +00007140 if (value == 0)
7141 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007142 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7143 /*
7144 * So we are using non-UTF-8 buffers
7145 * Check that the char fit on 8bits, if not
7146 * generate a CharRef.
7147 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007148 if (value <= 0xFF) {
7149 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00007150 out[1] = 0;
7151 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7152 (!ctxt->disableSAX))
7153 ctxt->sax->characters(ctxt->userData, out, 1);
7154 } else {
7155 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007156 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007157 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007158 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007159 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7160 (!ctxt->disableSAX))
7161 ctxt->sax->reference(ctxt->userData, out);
7162 }
7163 } else {
7164 /*
7165 * Just encode the value in UTF-8
7166 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007167 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00007168 out[i] = 0;
7169 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7170 (!ctxt->disableSAX))
7171 ctxt->sax->characters(ctxt->userData, out, i);
7172 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007173 return;
7174 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007175
Daniel Veillard0161e632008-08-28 15:36:32 +00007176 /*
7177 * We are seeing an entity reference
7178 */
7179 ent = xmlParseEntityRef(ctxt);
7180 if (ent == NULL) return;
7181 if (!ctxt->wellFormed)
7182 return;
7183 was_checked = ent->checked;
7184
7185 /* special case of predefined entities */
7186 if ((ent->name == NULL) ||
7187 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7188 val = ent->content;
7189 if (val == NULL) return;
7190 /*
7191 * inline the entity.
7192 */
7193 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7194 (!ctxt->disableSAX))
7195 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7196 return;
7197 }
7198
7199 /*
7200 * The first reference to the entity trigger a parsing phase
7201 * where the ent->children is filled with the result from
7202 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08007203 * Note: external parsed entities will not be loaded, it is not
7204 * required for a non-validating parser, unless the parsing option
7205 * of validating, or substituting entities were given. Doing so is
7206 * far more secure as the parser will only process data coming from
7207 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00007208 */
Daniel Veillard4629ee02012-07-23 14:15:40 +08007209 if ((ent->checked == 0) &&
7210 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7211 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007212 unsigned long oldnbent = ctxt->nbentities;
7213
7214 /*
7215 * This is a bit hackish but this seems the best
7216 * way to make sure both SAX and DOM entity support
7217 * behaves okay.
7218 */
7219 void *user_data;
7220 if (ctxt->userData == ctxt)
7221 user_data = NULL;
7222 else
7223 user_data = ctxt->userData;
7224
7225 /*
7226 * Check that this entity is well formed
7227 * 4.3.2: An internal general parsed entity is well-formed
7228 * if its replacement text matches the production labeled
7229 * content.
7230 */
7231 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7232 ctxt->depth++;
7233 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7234 user_data, &list);
7235 ctxt->depth--;
7236
7237 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7238 ctxt->depth++;
7239 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7240 user_data, ctxt->depth, ent->URI,
7241 ent->ExternalID, &list);
7242 ctxt->depth--;
7243 } else {
7244 ret = XML_ERR_ENTITY_PE_INTERNAL;
7245 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7246 "invalid entity type found\n", NULL);
7247 }
7248
7249 /*
7250 * Store the number of entities needing parsing for this entity
7251 * content and do checkings
7252 */
Daniel Veillardcff25462013-03-11 15:57:55 +08007253 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7254 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7255 ent->checked |= 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007256 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007257 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007258 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007259 return;
7260 }
Daniel Veillard23f05e02013-02-19 10:21:49 +08007261 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007262 xmlFreeNodeList(list);
7263 return;
7264 }
Owen Taylor3473f882001-02-23 17:55:21 +00007265
Daniel Veillard0161e632008-08-28 15:36:32 +00007266 if ((ret == XML_ERR_OK) && (list != NULL)) {
7267 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7268 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7269 (ent->children == NULL)) {
7270 ent->children = list;
7271 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00007272 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007273 * Prune it directly in the generated document
7274 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00007275 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007276 if (((list->type == XML_TEXT_NODE) &&
7277 (list->next == NULL)) ||
7278 (ctxt->parseMode == XML_PARSE_READER)) {
7279 list->parent = (xmlNodePtr) ent;
7280 list = NULL;
7281 ent->owner = 1;
7282 } else {
7283 ent->owner = 0;
7284 while (list != NULL) {
7285 list->parent = (xmlNodePtr) ctxt->node;
7286 list->doc = ctxt->myDoc;
7287 if (list->next == NULL)
7288 ent->last = list;
7289 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007290 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007291 list = ent->children;
7292#ifdef LIBXML_LEGACY_ENABLED
7293 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7294 xmlAddEntityReference(ent, list, NULL);
7295#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007296 }
7297 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007298 ent->owner = 1;
7299 while (list != NULL) {
7300 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007301 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007302 if (list->next == NULL)
7303 ent->last = list;
7304 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007305 }
7306 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007307 } else {
7308 xmlFreeNodeList(list);
7309 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007310 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007311 } else if ((ret != XML_ERR_OK) &&
7312 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7313 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7314 "Entity '%s' failed to parse\n", ent->name);
7315 } else if (list != NULL) {
7316 xmlFreeNodeList(list);
7317 list = NULL;
7318 }
7319 if (ent->checked == 0)
Daniel Veillardcff25462013-03-11 15:57:55 +08007320 ent->checked = 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007321 } else if (ent->checked != 1) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007322 ctxt->nbentities += ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007323 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007324
Daniel Veillard0161e632008-08-28 15:36:32 +00007325 /*
7326 * Now that the entity content has been gathered
7327 * provide it to the application, this can take different forms based
7328 * on the parsing modes.
7329 */
7330 if (ent->children == NULL) {
7331 /*
7332 * Probably running in SAX mode and the callbacks don't
7333 * build the entity content. So unless we already went
7334 * though parsing for first checking go though the entity
7335 * content to generate callbacks associated to the entity
7336 */
7337 if (was_checked != 0) {
7338 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007339 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007340 * This is a bit hackish but this seems the best
7341 * way to make sure both SAX and DOM entity support
7342 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007343 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007344 if (ctxt->userData == ctxt)
7345 user_data = NULL;
7346 else
7347 user_data = ctxt->userData;
7348
7349 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7350 ctxt->depth++;
7351 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7352 ent->content, user_data, NULL);
7353 ctxt->depth--;
7354 } else if (ent->etype ==
7355 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7356 ctxt->depth++;
7357 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7358 ctxt->sax, user_data, ctxt->depth,
7359 ent->URI, ent->ExternalID, NULL);
7360 ctxt->depth--;
7361 } else {
7362 ret = XML_ERR_ENTITY_PE_INTERNAL;
7363 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7364 "invalid entity type found\n", NULL);
7365 }
7366 if (ret == XML_ERR_ENTITY_LOOP) {
7367 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7368 return;
7369 }
7370 }
7371 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7372 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7373 /*
7374 * Entity reference callback comes second, it's somewhat
7375 * superfluous but a compatibility to historical behaviour
7376 */
7377 ctxt->sax->reference(ctxt->userData, ent->name);
7378 }
7379 return;
7380 }
7381
7382 /*
7383 * If we didn't get any children for the entity being built
7384 */
7385 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7386 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7387 /*
7388 * Create a node.
7389 */
7390 ctxt->sax->reference(ctxt->userData, ent->name);
7391 return;
7392 }
7393
7394 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7395 /*
7396 * There is a problem on the handling of _private for entities
7397 * (bug 155816): Should we copy the content of the field from
7398 * the entity (possibly overwriting some value set by the user
7399 * when a copy is created), should we leave it alone, or should
7400 * we try to take care of different situations? The problem
7401 * is exacerbated by the usage of this field by the xmlReader.
7402 * To fix this bug, we look at _private on the created node
7403 * and, if it's NULL, we copy in whatever was in the entity.
7404 * If it's not NULL we leave it alone. This is somewhat of a
7405 * hack - maybe we should have further tests to determine
7406 * what to do.
7407 */
7408 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7409 /*
7410 * Seems we are generating the DOM content, do
7411 * a simple tree copy for all references except the first
7412 * In the first occurrence list contains the replacement.
Daniel Veillard0161e632008-08-28 15:36:32 +00007413 */
7414 if (((list == NULL) && (ent->owner == 0)) ||
7415 (ctxt->parseMode == XML_PARSE_READER)) {
7416 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7417
7418 /*
Daniel Veillard23f05e02013-02-19 10:21:49 +08007419 * We are copying here, make sure there is no abuse
7420 */
7421 ctxt->sizeentcopy += ent->length;
7422 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7423 return;
7424
7425 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007426 * when operating on a reader, the entities definitions
7427 * are always owning the entities subtree.
7428 if (ctxt->parseMode == XML_PARSE_READER)
7429 ent->owner = 1;
7430 */
7431
7432 cur = ent->children;
7433 while (cur != NULL) {
7434 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7435 if (nw != NULL) {
7436 if (nw->_private == NULL)
7437 nw->_private = cur->_private;
7438 if (firstChild == NULL){
7439 firstChild = nw;
7440 }
7441 nw = xmlAddChild(ctxt->node, nw);
7442 }
7443 if (cur == ent->last) {
7444 /*
7445 * needed to detect some strange empty
7446 * node cases in the reader tests
7447 */
7448 if ((ctxt->parseMode == XML_PARSE_READER) &&
7449 (nw != NULL) &&
7450 (nw->type == XML_ELEMENT_NODE) &&
7451 (nw->children == NULL))
7452 nw->extra = 1;
7453
7454 break;
7455 }
7456 cur = cur->next;
7457 }
7458#ifdef LIBXML_LEGACY_ENABLED
7459 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7460 xmlAddEntityReference(ent, firstChild, nw);
7461#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard28f5e1a2012-09-04 11:18:39 +08007462 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007463 xmlNodePtr nw = NULL, cur, next, last,
7464 firstChild = NULL;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007465
7466 /*
7467 * We are copying here, make sure there is no abuse
7468 */
7469 ctxt->sizeentcopy += ent->length;
7470 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7471 return;
7472
Daniel Veillard0161e632008-08-28 15:36:32 +00007473 /*
7474 * Copy the entity child list and make it the new
7475 * entity child list. The goal is to make sure any
7476 * ID or REF referenced will be the one from the
7477 * document content and not the entity copy.
7478 */
7479 cur = ent->children;
7480 ent->children = NULL;
7481 last = ent->last;
7482 ent->last = NULL;
7483 while (cur != NULL) {
7484 next = cur->next;
7485 cur->next = NULL;
7486 cur->parent = NULL;
7487 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7488 if (nw != NULL) {
7489 if (nw->_private == NULL)
7490 nw->_private = cur->_private;
7491 if (firstChild == NULL){
7492 firstChild = cur;
7493 }
7494 xmlAddChild((xmlNodePtr) ent, nw);
7495 xmlAddChild(ctxt->node, cur);
7496 }
7497 if (cur == last)
7498 break;
7499 cur = next;
7500 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007501 if (ent->owner == 0)
7502 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007503#ifdef LIBXML_LEGACY_ENABLED
7504 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7505 xmlAddEntityReference(ent, firstChild, nw);
7506#endif /* LIBXML_LEGACY_ENABLED */
7507 } else {
7508 const xmlChar *nbktext;
7509
7510 /*
7511 * the name change is to avoid coalescing of the
7512 * node with a possible previous text one which
7513 * would make ent->children a dangling pointer
7514 */
7515 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7516 -1);
7517 if (ent->children->type == XML_TEXT_NODE)
7518 ent->children->name = nbktext;
7519 if ((ent->last != ent->children) &&
7520 (ent->last->type == XML_TEXT_NODE))
7521 ent->last->name = nbktext;
7522 xmlAddChildList(ctxt->node, ent->children);
7523 }
7524
7525 /*
7526 * This is to avoid a nasty side effect, see
7527 * characters() in SAX.c
7528 */
7529 ctxt->nodemem = 0;
7530 ctxt->nodelen = 0;
7531 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007532 }
7533 }
7534}
7535
7536/**
7537 * xmlParseEntityRef:
7538 * @ctxt: an XML parser context
7539 *
7540 * parse ENTITY references declarations
7541 *
7542 * [68] EntityRef ::= '&' Name ';'
7543 *
7544 * [ WFC: Entity Declared ]
7545 * In a document without any DTD, a document with only an internal DTD
7546 * subset which contains no parameter entity references, or a document
7547 * with "standalone='yes'", the Name given in the entity reference
7548 * must match that in an entity declaration, except that well-formed
7549 * documents need not declare any of the following entities: amp, lt,
7550 * gt, apos, quot. The declaration of a parameter entity must precede
7551 * any reference to it. Similarly, the declaration of a general entity
7552 * must precede any reference to it which appears in a default value in an
7553 * attribute-list declaration. Note that if entities are declared in the
7554 * external subset or in external parameter entities, a non-validating
7555 * processor is not obligated to read and process their declarations;
7556 * for such documents, the rule that an entity must be declared is a
7557 * well-formedness constraint only if standalone='yes'.
7558 *
7559 * [ WFC: Parsed Entity ]
7560 * An entity reference must not contain the name of an unparsed entity
7561 *
7562 * Returns the xmlEntityPtr if found, or NULL otherwise.
7563 */
7564xmlEntityPtr
7565xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007566 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007567 xmlEntityPtr ent = NULL;
7568
7569 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007570 if (ctxt->instate == XML_PARSER_EOF)
7571 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007572
Daniel Veillard0161e632008-08-28 15:36:32 +00007573 if (RAW != '&')
7574 return(NULL);
7575 NEXT;
7576 name = xmlParseName(ctxt);
7577 if (name == NULL) {
7578 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7579 "xmlParseEntityRef: no name\n");
7580 return(NULL);
7581 }
7582 if (RAW != ';') {
7583 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7584 return(NULL);
7585 }
7586 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007587
Daniel Veillard0161e632008-08-28 15:36:32 +00007588 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007589 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007590 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007591 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7592 ent = xmlGetPredefinedEntity(name);
7593 if (ent != NULL)
7594 return(ent);
7595 }
Owen Taylor3473f882001-02-23 17:55:21 +00007596
Daniel Veillard0161e632008-08-28 15:36:32 +00007597 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007598 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007599 */
7600 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007601
Daniel Veillard0161e632008-08-28 15:36:32 +00007602 /*
7603 * Ask first SAX for entity resolution, otherwise try the
7604 * entities which may have stored in the parser context.
7605 */
7606 if (ctxt->sax != NULL) {
7607 if (ctxt->sax->getEntity != NULL)
7608 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007609 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007610 (ctxt->options & XML_PARSE_OLDSAX))
7611 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007612 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7613 (ctxt->userData==ctxt)) {
7614 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007615 }
7616 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007617 if (ctxt->instate == XML_PARSER_EOF)
7618 return(NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007619 /*
7620 * [ WFC: Entity Declared ]
7621 * In a document without any DTD, a document with only an
7622 * internal DTD subset which contains no parameter entity
7623 * references, or a document with "standalone='yes'", the
7624 * Name given in the entity reference must match that in an
7625 * entity declaration, except that well-formed documents
7626 * need not declare any of the following entities: amp, lt,
7627 * gt, apos, quot.
7628 * The declaration of a parameter entity must precede any
7629 * reference to it.
7630 * Similarly, the declaration of a general entity must
7631 * precede any reference to it which appears in a default
7632 * value in an attribute-list declaration. Note that if
7633 * entities are declared in the external subset or in
7634 * external parameter entities, a non-validating processor
7635 * is not obligated to read and process their declarations;
7636 * for such documents, the rule that an entity must be
7637 * declared is a well-formedness constraint only if
7638 * standalone='yes'.
7639 */
7640 if (ent == NULL) {
7641 if ((ctxt->standalone == 1) ||
7642 ((ctxt->hasExternalSubset == 0) &&
7643 (ctxt->hasPErefs == 0))) {
7644 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7645 "Entity '%s' not defined\n", name);
7646 } else {
7647 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7648 "Entity '%s' not defined\n", name);
7649 if ((ctxt->inSubset == 0) &&
7650 (ctxt->sax != NULL) &&
7651 (ctxt->sax->reference != NULL)) {
7652 ctxt->sax->reference(ctxt->userData, name);
7653 }
7654 }
7655 ctxt->valid = 0;
7656 }
7657
7658 /*
7659 * [ WFC: Parsed Entity ]
7660 * An entity reference must not contain the name of an
7661 * unparsed entity
7662 */
7663 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7664 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7665 "Entity reference to unparsed entity %s\n", name);
7666 }
7667
7668 /*
7669 * [ WFC: No External Entity References ]
7670 * Attribute values cannot contain direct or indirect
7671 * entity references to external entities.
7672 */
7673 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7674 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7675 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7676 "Attribute references external entity '%s'\n", name);
7677 }
7678 /*
7679 * [ WFC: No < in Attribute Values ]
7680 * The replacement text of any entity referred to directly or
7681 * indirectly in an attribute value (other than "&lt;") must
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007682 * not contain a <.
Daniel Veillard0161e632008-08-28 15:36:32 +00007683 */
7684 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcff25462013-03-11 15:57:55 +08007685 (ent != NULL) &&
7686 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard7cf57382014-10-08 16:09:56 +08007687 if (((ent->checked & 1) || (ent->checked == 0)) &&
7688 (ent->content != NULL) && (xmlStrchr(ent->content, '<'))) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007689 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7690 "'<' in entity '%s' is not allowed in attributes values\n", name);
7691 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007692 }
7693
7694 /*
7695 * Internal check, no parameter entities here ...
7696 */
7697 else {
7698 switch (ent->etype) {
7699 case XML_INTERNAL_PARAMETER_ENTITY:
7700 case XML_EXTERNAL_PARAMETER_ENTITY:
7701 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7702 "Attempt to reference the parameter entity '%s'\n",
7703 name);
7704 break;
7705 default:
7706 break;
7707 }
7708 }
7709
7710 /*
7711 * [ WFC: No Recursion ]
7712 * A parsed entity must not contain a recursive reference
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007713 * to itself, either directly or indirectly.
Daniel Veillard0161e632008-08-28 15:36:32 +00007714 * Done somewhere else
7715 */
Owen Taylor3473f882001-02-23 17:55:21 +00007716 return(ent);
7717}
7718
7719/**
7720 * xmlParseStringEntityRef:
7721 * @ctxt: an XML parser context
7722 * @str: a pointer to an index in the string
7723 *
7724 * parse ENTITY references declarations, but this version parses it from
7725 * a string value.
7726 *
7727 * [68] EntityRef ::= '&' Name ';'
7728 *
7729 * [ WFC: Entity Declared ]
7730 * In a document without any DTD, a document with only an internal DTD
7731 * subset which contains no parameter entity references, or a document
7732 * with "standalone='yes'", the Name given in the entity reference
7733 * must match that in an entity declaration, except that well-formed
7734 * documents need not declare any of the following entities: amp, lt,
7735 * gt, apos, quot. The declaration of a parameter entity must precede
7736 * any reference to it. Similarly, the declaration of a general entity
7737 * must precede any reference to it which appears in a default value in an
7738 * attribute-list declaration. Note that if entities are declared in the
7739 * external subset or in external parameter entities, a non-validating
7740 * processor is not obligated to read and process their declarations;
7741 * for such documents, the rule that an entity must be declared is a
7742 * well-formedness constraint only if standalone='yes'.
7743 *
7744 * [ WFC: Parsed Entity ]
7745 * An entity reference must not contain the name of an unparsed entity
7746 *
7747 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7748 * is updated to the current location in the string.
7749 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007750static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007751xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7752 xmlChar *name;
7753 const xmlChar *ptr;
7754 xmlChar cur;
7755 xmlEntityPtr ent = NULL;
7756
7757 if ((str == NULL) || (*str == NULL))
7758 return(NULL);
7759 ptr = *str;
7760 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007761 if (cur != '&')
7762 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007763
Daniel Veillard0161e632008-08-28 15:36:32 +00007764 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007765 name = xmlParseStringName(ctxt, &ptr);
7766 if (name == NULL) {
7767 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7768 "xmlParseStringEntityRef: no name\n");
7769 *str = ptr;
7770 return(NULL);
7771 }
7772 if (*ptr != ';') {
7773 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007774 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007775 *str = ptr;
7776 return(NULL);
7777 }
7778 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007779
Owen Taylor3473f882001-02-23 17:55:21 +00007780
Daniel Veillard0161e632008-08-28 15:36:32 +00007781 /*
Jan Pokorný9a85d402013-11-29 23:26:25 +01007782 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007783 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007784 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7785 ent = xmlGetPredefinedEntity(name);
7786 if (ent != NULL) {
7787 xmlFree(name);
7788 *str = ptr;
7789 return(ent);
7790 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007791 }
Owen Taylor3473f882001-02-23 17:55:21 +00007792
Daniel Veillard0161e632008-08-28 15:36:32 +00007793 /*
7794 * Increate the number of entity references parsed
7795 */
7796 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007797
Daniel Veillard0161e632008-08-28 15:36:32 +00007798 /*
7799 * Ask first SAX for entity resolution, otherwise try the
7800 * entities which may have stored in the parser context.
7801 */
7802 if (ctxt->sax != NULL) {
7803 if (ctxt->sax->getEntity != NULL)
7804 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007805 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7806 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007807 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7808 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007809 }
7810 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007811 if (ctxt->instate == XML_PARSER_EOF) {
7812 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08007813 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08007814 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007815
7816 /*
7817 * [ WFC: Entity Declared ]
7818 * In a document without any DTD, a document with only an
7819 * internal DTD subset which contains no parameter entity
7820 * references, or a document with "standalone='yes'", the
7821 * Name given in the entity reference must match that in an
7822 * entity declaration, except that well-formed documents
7823 * need not declare any of the following entities: amp, lt,
7824 * gt, apos, quot.
7825 * The declaration of a parameter entity must precede any
7826 * reference to it.
7827 * Similarly, the declaration of a general entity must
7828 * precede any reference to it which appears in a default
7829 * value in an attribute-list declaration. Note that if
7830 * entities are declared in the external subset or in
7831 * external parameter entities, a non-validating processor
7832 * is not obligated to read and process their declarations;
7833 * for such documents, the rule that an entity must be
7834 * declared is a well-formedness constraint only if
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007835 * standalone='yes'.
Daniel Veillard0161e632008-08-28 15:36:32 +00007836 */
7837 if (ent == NULL) {
7838 if ((ctxt->standalone == 1) ||
7839 ((ctxt->hasExternalSubset == 0) &&
7840 (ctxt->hasPErefs == 0))) {
7841 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7842 "Entity '%s' not defined\n", name);
7843 } else {
7844 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7845 "Entity '%s' not defined\n",
7846 name);
7847 }
7848 /* TODO ? check regressions ctxt->valid = 0; */
7849 }
7850
7851 /*
7852 * [ WFC: Parsed Entity ]
7853 * An entity reference must not contain the name of an
7854 * unparsed entity
7855 */
7856 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7857 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7858 "Entity reference to unparsed entity %s\n", name);
7859 }
7860
7861 /*
7862 * [ WFC: No External Entity References ]
7863 * Attribute values cannot contain direct or indirect
7864 * entity references to external entities.
7865 */
7866 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7867 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7868 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7869 "Attribute references external entity '%s'\n", name);
7870 }
7871 /*
7872 * [ WFC: No < in Attribute Values ]
7873 * The replacement text of any entity referred to directly or
7874 * indirectly in an attribute value (other than "&lt;") must
7875 * not contain a <.
7876 */
7877 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7878 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007879 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007880 (xmlStrchr(ent->content, '<'))) {
7881 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7882 "'<' in entity '%s' is not allowed in attributes values\n",
7883 name);
7884 }
7885
7886 /*
7887 * Internal check, no parameter entities here ...
7888 */
7889 else {
7890 switch (ent->etype) {
7891 case XML_INTERNAL_PARAMETER_ENTITY:
7892 case XML_EXTERNAL_PARAMETER_ENTITY:
7893 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7894 "Attempt to reference the parameter entity '%s'\n",
7895 name);
7896 break;
7897 default:
7898 break;
7899 }
7900 }
7901
7902 /*
7903 * [ WFC: No Recursion ]
7904 * A parsed entity must not contain a recursive reference
7905 * to itself, either directly or indirectly.
7906 * Done somewhere else
7907 */
7908
7909 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007910 *str = ptr;
7911 return(ent);
7912}
7913
7914/**
7915 * xmlParsePEReference:
7916 * @ctxt: an XML parser context
7917 *
7918 * parse PEReference declarations
7919 * The entity content is handled directly by pushing it's content as
7920 * a new input stream.
7921 *
7922 * [69] PEReference ::= '%' Name ';'
7923 *
7924 * [ WFC: No Recursion ]
7925 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007926 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007927 *
7928 * [ WFC: Entity Declared ]
7929 * In a document without any DTD, a document with only an internal DTD
7930 * subset which contains no parameter entity references, or a document
7931 * with "standalone='yes'", ... ... The declaration of a parameter
7932 * entity must precede any reference to it...
7933 *
7934 * [ VC: Entity Declared ]
7935 * In a document with an external subset or external parameter entities
7936 * with "standalone='no'", ... ... The declaration of a parameter entity
7937 * must precede any reference to it...
7938 *
7939 * [ WFC: In DTD ]
7940 * Parameter-entity references may only appear in the DTD.
7941 * NOTE: misleading but this is handled.
7942 */
7943void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007944xmlParsePEReference(xmlParserCtxtPtr ctxt)
7945{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007946 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007947 xmlEntityPtr entity = NULL;
7948 xmlParserInputPtr input;
7949
Daniel Veillard0161e632008-08-28 15:36:32 +00007950 if (RAW != '%')
7951 return;
7952 NEXT;
7953 name = xmlParseName(ctxt);
7954 if (name == NULL) {
7955 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7956 "xmlParsePEReference: no name\n");
7957 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007958 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007959 if (RAW != ';') {
7960 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7961 return;
7962 }
7963
7964 NEXT;
7965
7966 /*
7967 * Increate the number of entity references parsed
7968 */
7969 ctxt->nbentities++;
7970
7971 /*
7972 * Request the entity from SAX
7973 */
7974 if ((ctxt->sax != NULL) &&
7975 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08007976 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7977 if (ctxt->instate == XML_PARSER_EOF)
7978 return;
Daniel Veillard0161e632008-08-28 15:36:32 +00007979 if (entity == NULL) {
7980 /*
7981 * [ WFC: Entity Declared ]
7982 * In a document without any DTD, a document with only an
7983 * internal DTD subset which contains no parameter entity
7984 * references, or a document with "standalone='yes'", ...
7985 * ... The declaration of a parameter entity must precede
7986 * any reference to it...
7987 */
7988 if ((ctxt->standalone == 1) ||
7989 ((ctxt->hasExternalSubset == 0) &&
7990 (ctxt->hasPErefs == 0))) {
7991 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7992 "PEReference: %%%s; not found\n",
7993 name);
7994 } else {
7995 /*
7996 * [ VC: Entity Declared ]
7997 * In a document with an external subset or external
7998 * parameter entities with "standalone='no'", ...
7999 * ... The declaration of a parameter entity must
8000 * precede any reference to it...
8001 */
8002 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8003 "PEReference: %%%s; not found\n",
8004 name, NULL);
8005 ctxt->valid = 0;
8006 }
8007 } else {
8008 /*
8009 * Internal checking in case the entity quest barfed
8010 */
8011 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8012 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8013 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8014 "Internal: %%%s; is not a parameter entity\n",
8015 name, NULL);
8016 } else if (ctxt->input->free != deallocblankswrapper) {
8017 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
8018 if (xmlPushInput(ctxt, input) < 0)
8019 return;
8020 } else {
8021 /*
8022 * TODO !!!
8023 * handle the extra spaces added before and after
8024 * c.f. http://www.w3.org/TR/REC-xml#as-PE
8025 */
8026 input = xmlNewEntityInputStream(ctxt, entity);
8027 if (xmlPushInput(ctxt, input) < 0)
8028 return;
8029 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8030 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8031 (IS_BLANK_CH(NXT(5)))) {
8032 xmlParseTextDecl(ctxt);
8033 if (ctxt->errNo ==
8034 XML_ERR_UNSUPPORTED_ENCODING) {
8035 /*
8036 * The XML REC instructs us to stop parsing
8037 * right here
8038 */
8039 ctxt->instate = XML_PARSER_EOF;
8040 return;
8041 }
8042 }
8043 }
8044 }
8045 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008046}
8047
8048/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008049 * xmlLoadEntityContent:
8050 * @ctxt: an XML parser context
8051 * @entity: an unloaded system entity
8052 *
8053 * Load the original content of the given system entity from the
8054 * ExternalID/SystemID given. This is to be used for Included in Literal
8055 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8056 *
8057 * Returns 0 in case of success and -1 in case of failure
8058 */
8059static int
8060xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8061 xmlParserInputPtr input;
8062 xmlBufferPtr buf;
8063 int l, c;
8064 int count = 0;
8065
8066 if ((ctxt == NULL) || (entity == NULL) ||
8067 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8068 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8069 (entity->content != NULL)) {
8070 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8071 "xmlLoadEntityContent parameter error");
8072 return(-1);
8073 }
8074
8075 if (xmlParserDebugEntities)
8076 xmlGenericError(xmlGenericErrorContext,
8077 "Reading %s entity content input\n", entity->name);
8078
8079 buf = xmlBufferCreate();
8080 if (buf == NULL) {
8081 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8082 "xmlLoadEntityContent parameter error");
8083 return(-1);
8084 }
8085
8086 input = xmlNewEntityInputStream(ctxt, entity);
8087 if (input == NULL) {
8088 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8089 "xmlLoadEntityContent input error");
8090 xmlBufferFree(buf);
8091 return(-1);
8092 }
8093
8094 /*
8095 * Push the entity as the current input, read char by char
8096 * saving to the buffer until the end of the entity or an error
8097 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00008098 if (xmlPushInput(ctxt, input) < 0) {
8099 xmlBufferFree(buf);
8100 return(-1);
8101 }
8102
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008103 GROW;
8104 c = CUR_CHAR(l);
8105 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8106 (IS_CHAR(c))) {
8107 xmlBufferAdd(buf, ctxt->input->cur, l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008108 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008109 count = 0;
8110 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008111 if (ctxt->instate == XML_PARSER_EOF) {
8112 xmlBufferFree(buf);
8113 return(-1);
8114 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008115 }
8116 NEXTL(l);
8117 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008118 if (c == 0) {
8119 count = 0;
8120 GROW;
8121 if (ctxt->instate == XML_PARSER_EOF) {
8122 xmlBufferFree(buf);
8123 return(-1);
8124 }
8125 c = CUR_CHAR(l);
8126 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008127 }
8128
8129 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8130 xmlPopInput(ctxt);
8131 } else if (!IS_CHAR(c)) {
8132 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8133 "xmlLoadEntityContent: invalid char value %d\n",
8134 c);
8135 xmlBufferFree(buf);
8136 return(-1);
8137 }
8138 entity->content = buf->content;
8139 buf->content = NULL;
8140 xmlBufferFree(buf);
8141
8142 return(0);
8143}
8144
8145/**
Owen Taylor3473f882001-02-23 17:55:21 +00008146 * xmlParseStringPEReference:
8147 * @ctxt: an XML parser context
8148 * @str: a pointer to an index in the string
8149 *
8150 * parse PEReference declarations
8151 *
8152 * [69] PEReference ::= '%' Name ';'
8153 *
8154 * [ WFC: No Recursion ]
8155 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008156 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008157 *
8158 * [ WFC: Entity Declared ]
8159 * In a document without any DTD, a document with only an internal DTD
8160 * subset which contains no parameter entity references, or a document
8161 * with "standalone='yes'", ... ... The declaration of a parameter
8162 * entity must precede any reference to it...
8163 *
8164 * [ VC: Entity Declared ]
8165 * In a document with an external subset or external parameter entities
8166 * with "standalone='no'", ... ... The declaration of a parameter entity
8167 * must precede any reference to it...
8168 *
8169 * [ WFC: In DTD ]
8170 * Parameter-entity references may only appear in the DTD.
8171 * NOTE: misleading but this is handled.
8172 *
8173 * Returns the string of the entity content.
8174 * str is updated to the current value of the index
8175 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02008176static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00008177xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8178 const xmlChar *ptr;
8179 xmlChar cur;
8180 xmlChar *name;
8181 xmlEntityPtr entity = NULL;
8182
8183 if ((str == NULL) || (*str == NULL)) return(NULL);
8184 ptr = *str;
8185 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00008186 if (cur != '%')
8187 return(NULL);
8188 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00008189 name = xmlParseStringName(ctxt, &ptr);
8190 if (name == NULL) {
8191 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8192 "xmlParseStringPEReference: no name\n");
8193 *str = ptr;
8194 return(NULL);
8195 }
8196 cur = *ptr;
8197 if (cur != ';') {
8198 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8199 xmlFree(name);
8200 *str = ptr;
8201 return(NULL);
8202 }
8203 ptr++;
8204
8205 /*
8206 * Increate the number of entity references parsed
8207 */
8208 ctxt->nbentities++;
8209
8210 /*
8211 * Request the entity from SAX
8212 */
8213 if ((ctxt->sax != NULL) &&
8214 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008215 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8216 if (ctxt->instate == XML_PARSER_EOF) {
8217 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08008218 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008219 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008220 if (entity == NULL) {
8221 /*
8222 * [ WFC: Entity Declared ]
8223 * In a document without any DTD, a document with only an
8224 * internal DTD subset which contains no parameter entity
8225 * references, or a document with "standalone='yes'", ...
8226 * ... The declaration of a parameter entity must precede
8227 * any reference to it...
8228 */
8229 if ((ctxt->standalone == 1) ||
8230 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8231 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8232 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008233 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00008234 /*
8235 * [ VC: Entity Declared ]
8236 * In a document with an external subset or external
8237 * parameter entities with "standalone='no'", ...
8238 * ... The declaration of a parameter entity must
8239 * precede any reference to it...
8240 */
8241 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8242 "PEReference: %%%s; not found\n",
8243 name, NULL);
8244 ctxt->valid = 0;
8245 }
8246 } else {
8247 /*
8248 * Internal checking in case the entity quest barfed
8249 */
8250 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8251 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8252 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8253 "%%%s; is not a parameter entity\n",
8254 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008255 }
8256 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008257 ctxt->hasPErefs = 1;
8258 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008259 *str = ptr;
8260 return(entity);
8261}
8262
8263/**
8264 * xmlParseDocTypeDecl:
8265 * @ctxt: an XML parser context
8266 *
8267 * parse a DOCTYPE declaration
8268 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008269 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
Owen Taylor3473f882001-02-23 17:55:21 +00008270 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8271 *
8272 * [ VC: Root Element Type ]
8273 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008274 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00008275 */
8276
8277void
8278xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008279 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008280 xmlChar *ExternalID = NULL;
8281 xmlChar *URI = NULL;
8282
8283 /*
8284 * We know that '<!DOCTYPE' has been detected.
8285 */
8286 SKIP(9);
8287
8288 SKIP_BLANKS;
8289
8290 /*
8291 * Parse the DOCTYPE name.
8292 */
8293 name = xmlParseName(ctxt);
8294 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008295 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8296 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008297 }
8298 ctxt->intSubName = name;
8299
8300 SKIP_BLANKS;
8301
8302 /*
8303 * Check for SystemID and ExternalID
8304 */
8305 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8306
8307 if ((URI != NULL) || (ExternalID != NULL)) {
8308 ctxt->hasExternalSubset = 1;
8309 }
8310 ctxt->extSubURI = URI;
8311 ctxt->extSubSystem = ExternalID;
8312
8313 SKIP_BLANKS;
8314
8315 /*
8316 * Create and update the internal subset.
8317 */
8318 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8319 (!ctxt->disableSAX))
8320 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008321 if (ctxt->instate == XML_PARSER_EOF)
8322 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008323
8324 /*
8325 * Is there any internal subset declarations ?
8326 * they are handled separately in xmlParseInternalSubset()
8327 */
8328 if (RAW == '[')
8329 return;
8330
8331 /*
8332 * We should be at the end of the DOCTYPE declaration.
8333 */
8334 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008335 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008336 }
8337 NEXT;
8338}
8339
8340/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008341 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008342 * @ctxt: an XML parser context
8343 *
8344 * parse the internal subset declaration
8345 *
8346 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8347 */
8348
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008349static void
Owen Taylor3473f882001-02-23 17:55:21 +00008350xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8351 /*
8352 * Is there any DTD definition ?
8353 */
8354 if (RAW == '[') {
8355 ctxt->instate = XML_PARSER_DTD;
8356 NEXT;
8357 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008358 * Parse the succession of Markup declarations and
Owen Taylor3473f882001-02-23 17:55:21 +00008359 * PEReferences.
8360 * Subsequence (markupdecl | PEReference | S)*
8361 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008362 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008363 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008364 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008365
8366 SKIP_BLANKS;
8367 xmlParseMarkupDecl(ctxt);
8368 xmlParsePEReference(ctxt);
8369
8370 /*
8371 * Pop-up of finished entities.
8372 */
8373 while ((RAW == 0) && (ctxt->inputNr > 1))
8374 xmlPopInput(ctxt);
8375
8376 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008377 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008378 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008379 break;
8380 }
8381 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008382 if (RAW == ']') {
Owen Taylor3473f882001-02-23 17:55:21 +00008383 NEXT;
8384 SKIP_BLANKS;
8385 }
8386 }
8387
8388 /*
8389 * We should be at the end of the DOCTYPE declaration.
8390 */
8391 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008392 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008393 }
8394 NEXT;
8395}
8396
Daniel Veillard81273902003-09-30 00:43:48 +00008397#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008398/**
8399 * xmlParseAttribute:
8400 * @ctxt: an XML parser context
8401 * @value: a xmlChar ** used to store the value of the attribute
8402 *
8403 * parse an attribute
8404 *
8405 * [41] Attribute ::= Name Eq AttValue
8406 *
8407 * [ WFC: No External Entity References ]
8408 * Attribute values cannot contain direct or indirect entity references
8409 * to external entities.
8410 *
8411 * [ WFC: No < in Attribute Values ]
8412 * The replacement text of any entity referred to directly or indirectly in
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008413 * an attribute value (other than "&lt;") must not contain a <.
8414 *
Owen Taylor3473f882001-02-23 17:55:21 +00008415 * [ VC: Attribute Value Type ]
8416 * The attribute must have been declared; the value must be of the type
8417 * declared for it.
8418 *
8419 * [25] Eq ::= S? '=' S?
8420 *
8421 * With namespace:
8422 *
8423 * [NS 11] Attribute ::= QName Eq AttValue
8424 *
8425 * Also the case QName == xmlns:??? is handled independently as a namespace
8426 * definition.
8427 *
8428 * Returns the attribute name, and the value in *value.
8429 */
8430
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008431const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008432xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008433 const xmlChar *name;
8434 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008435
8436 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008437 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008438 name = xmlParseName(ctxt);
8439 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008440 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008441 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008442 return(NULL);
8443 }
8444
8445 /*
8446 * read the value
8447 */
8448 SKIP_BLANKS;
8449 if (RAW == '=') {
8450 NEXT;
8451 SKIP_BLANKS;
8452 val = xmlParseAttValue(ctxt);
8453 ctxt->instate = XML_PARSER_CONTENT;
8454 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008455 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00008456 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008457 return(NULL);
8458 }
8459
8460 /*
8461 * Check that xml:lang conforms to the specification
8462 * No more registered as an error, just generate a warning now
8463 * since this was deprecated in XML second edition
8464 */
8465 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8466 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008467 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8468 "Malformed value for xml:lang : %s\n",
8469 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008470 }
8471 }
8472
8473 /*
8474 * Check that xml:space conforms to the specification
8475 */
8476 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8477 if (xmlStrEqual(val, BAD_CAST "default"))
8478 *(ctxt->space) = 0;
8479 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8480 *(ctxt->space) = 1;
8481 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008482 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008483"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008484 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008485 }
8486 }
8487
8488 *value = val;
8489 return(name);
8490}
8491
8492/**
8493 * xmlParseStartTag:
8494 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008495 *
Owen Taylor3473f882001-02-23 17:55:21 +00008496 * parse a start of tag either for rule element or
8497 * EmptyElement. In both case we don't parse the tag closing chars.
8498 *
8499 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8500 *
8501 * [ WFC: Unique Att Spec ]
8502 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008503 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008504 *
8505 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8506 *
8507 * [ WFC: Unique Att Spec ]
8508 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008509 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008510 *
8511 * With namespace:
8512 *
8513 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8514 *
8515 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8516 *
8517 * Returns the element name parsed
8518 */
8519
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008520const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008521xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008522 const xmlChar *name;
8523 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008524 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008525 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008526 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008527 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008528 int i;
8529
8530 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008531 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008532
8533 name = xmlParseName(ctxt);
8534 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008535 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008536 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008537 return(NULL);
8538 }
8539
8540 /*
8541 * Now parse the attributes, it ends up with the ending
8542 *
8543 * (S Attribute)* S?
8544 */
8545 SKIP_BLANKS;
8546 GROW;
8547
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008548 while (((RAW != '>') &&
Daniel Veillard21a0f912001-02-25 19:54:14 +00008549 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008550 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008551 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008552 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008553
8554 attname = xmlParseAttribute(ctxt, &attvalue);
8555 if ((attname != NULL) && (attvalue != NULL)) {
8556 /*
8557 * [ WFC: Unique Att Spec ]
8558 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008559 * start-tag or empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008560 */
8561 for (i = 0; i < nbatts;i += 2) {
8562 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008563 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008564 xmlFree(attvalue);
8565 goto failed;
8566 }
8567 }
Owen Taylor3473f882001-02-23 17:55:21 +00008568 /*
8569 * Add the pair to atts
8570 */
8571 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008572 maxatts = 22; /* allow for 10 attrs by default */
8573 atts = (const xmlChar **)
8574 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008575 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008576 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008577 if (attvalue != NULL)
8578 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008579 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008580 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008581 ctxt->atts = atts;
8582 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008583 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008584 const xmlChar **n;
8585
Owen Taylor3473f882001-02-23 17:55:21 +00008586 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008587 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008588 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008589 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008590 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008591 if (attvalue != NULL)
8592 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008593 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008594 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008595 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008596 ctxt->atts = atts;
8597 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008598 }
8599 atts[nbatts++] = attname;
8600 atts[nbatts++] = attvalue;
8601 atts[nbatts] = NULL;
8602 atts[nbatts + 1] = NULL;
8603 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008604 if (attvalue != NULL)
8605 xmlFree(attvalue);
8606 }
8607
Daniel Veillard52d8ade2012-07-30 10:08:45 +08008608failed:
Owen Taylor3473f882001-02-23 17:55:21 +00008609
Daniel Veillard3772de32002-12-17 10:31:45 +00008610 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008611 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8612 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008613 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008614 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8615 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008616 }
8617 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008618 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8619 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008620 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8621 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008622 break;
8623 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008624 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008625 GROW;
8626 }
8627
8628 /*
8629 * SAX: Start of Element !
8630 */
8631 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008632 (!ctxt->disableSAX)) {
8633 if (nbatts > 0)
8634 ctxt->sax->startElement(ctxt->userData, name, atts);
8635 else
8636 ctxt->sax->startElement(ctxt->userData, name, NULL);
8637 }
Owen Taylor3473f882001-02-23 17:55:21 +00008638
8639 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008640 /* Free only the content strings */
8641 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008642 if (atts[i] != NULL)
8643 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008644 }
8645 return(name);
8646}
8647
8648/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008649 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008650 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008651 * @line: line of the start tag
8652 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008653 *
8654 * parse an end of tag
8655 *
8656 * [42] ETag ::= '</' Name S? '>'
8657 *
8658 * With namespace
8659 *
8660 * [NS 9] ETag ::= '</' QName S? '>'
8661 */
8662
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008663static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008664xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008665 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008666
8667 GROW;
8668 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008669 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008670 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008671 return;
8672 }
8673 SKIP(2);
8674
Daniel Veillard46de64e2002-05-29 08:21:33 +00008675 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008676
8677 /*
8678 * We should definitely be at the ending "S? '>'" part
8679 */
8680 GROW;
8681 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008682 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008683 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008684 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008685 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008686
8687 /*
8688 * [ WFC: Element Type Match ]
8689 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008690 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008691 *
8692 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008693 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008694 if (name == NULL) name = BAD_CAST "unparseable";
8695 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008696 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008697 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008698 }
8699
8700 /*
8701 * SAX: End of Tag
8702 */
8703 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8704 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008705 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008706
Daniel Veillarde57ec792003-09-10 10:50:59 +00008707 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008708 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008709 return;
8710}
8711
8712/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008713 * xmlParseEndTag:
8714 * @ctxt: an XML parser context
8715 *
8716 * parse an end of tag
8717 *
8718 * [42] ETag ::= '</' Name S? '>'
8719 *
8720 * With namespace
8721 *
8722 * [NS 9] ETag ::= '</' QName S? '>'
8723 */
8724
8725void
8726xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008727 xmlParseEndTag1(ctxt, 0);
8728}
Daniel Veillard81273902003-09-30 00:43:48 +00008729#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008730
8731/************************************************************************
8732 * *
8733 * SAX 2 specific operations *
8734 * *
8735 ************************************************************************/
8736
Daniel Veillard0fb18932003-09-07 09:14:37 +00008737/*
8738 * xmlGetNamespace:
8739 * @ctxt: an XML parser context
8740 * @prefix: the prefix to lookup
8741 *
8742 * Lookup the namespace name for the @prefix (which ca be NULL)
8743 * The prefix must come from the @ctxt->dict dictionnary
8744 *
8745 * Returns the namespace name or NULL if not bound
8746 */
8747static const xmlChar *
8748xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8749 int i;
8750
Daniel Veillarde57ec792003-09-10 10:50:59 +00008751 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008752 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008753 if (ctxt->nsTab[i] == prefix) {
8754 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8755 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008756 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008757 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008758 return(NULL);
8759}
8760
8761/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008762 * xmlParseQName:
8763 * @ctxt: an XML parser context
8764 * @prefix: pointer to store the prefix part
8765 *
8766 * parse an XML Namespace QName
8767 *
8768 * [6] QName ::= (Prefix ':')? LocalPart
8769 * [7] Prefix ::= NCName
8770 * [8] LocalPart ::= NCName
8771 *
8772 * Returns the Name parsed or NULL
8773 */
8774
8775static const xmlChar *
8776xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8777 const xmlChar *l, *p;
8778
8779 GROW;
8780
8781 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008782 if (l == NULL) {
8783 if (CUR == ':') {
8784 l = xmlParseName(ctxt);
8785 if (l != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008786 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008787 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008788 *prefix = NULL;
8789 return(l);
8790 }
8791 }
8792 return(NULL);
8793 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008794 if (CUR == ':') {
8795 NEXT;
8796 p = l;
8797 l = xmlParseNCName(ctxt);
8798 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008799 xmlChar *tmp;
8800
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008801 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8802 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008803 l = xmlParseNmtoken(ctxt);
8804 if (l == NULL)
8805 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8806 else {
8807 tmp = xmlBuildQName(l, p, NULL, 0);
8808 xmlFree((char *)l);
8809 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008810 p = xmlDictLookup(ctxt->dict, tmp, -1);
8811 if (tmp != NULL) xmlFree(tmp);
8812 *prefix = NULL;
8813 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008814 }
8815 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008816 xmlChar *tmp;
8817
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008818 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8819 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008820 NEXT;
8821 tmp = (xmlChar *) xmlParseName(ctxt);
8822 if (tmp != NULL) {
8823 tmp = xmlBuildQName(tmp, l, NULL, 0);
8824 l = xmlDictLookup(ctxt->dict, tmp, -1);
8825 if (tmp != NULL) xmlFree(tmp);
8826 *prefix = p;
8827 return(l);
8828 }
8829 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8830 l = xmlDictLookup(ctxt->dict, tmp, -1);
8831 if (tmp != NULL) xmlFree(tmp);
8832 *prefix = p;
8833 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008834 }
8835 *prefix = p;
8836 } else
8837 *prefix = NULL;
8838 return(l);
8839}
8840
8841/**
8842 * xmlParseQNameAndCompare:
8843 * @ctxt: an XML parser context
8844 * @name: the localname
8845 * @prefix: the prefix, if any.
8846 *
8847 * parse an XML name and compares for match
8848 * (specialized for endtag parsing)
8849 *
8850 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8851 * and the name for mismatch
8852 */
8853
8854static const xmlChar *
8855xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8856 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008857 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008858 const xmlChar *in;
8859 const xmlChar *ret;
8860 const xmlChar *prefix2;
8861
8862 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8863
8864 GROW;
8865 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008866
Daniel Veillard0fb18932003-09-07 09:14:37 +00008867 cmp = prefix;
8868 while (*in != 0 && *in == *cmp) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008869 ++in;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008870 ++cmp;
8871 }
8872 if ((*cmp == 0) && (*in == ':')) {
8873 in++;
8874 cmp = name;
8875 while (*in != 0 && *in == *cmp) {
8876 ++in;
8877 ++cmp;
8878 }
William M. Brack76e95df2003-10-18 16:20:14 +00008879 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008880 /* success */
8881 ctxt->input->cur = in;
8882 return((const xmlChar*) 1);
8883 }
8884 }
8885 /*
8886 * all strings coms from the dictionary, equality can be done directly
8887 */
8888 ret = xmlParseQName (ctxt, &prefix2);
8889 if ((ret == name) && (prefix == prefix2))
8890 return((const xmlChar*) 1);
8891 return ret;
8892}
8893
8894/**
8895 * xmlParseAttValueInternal:
8896 * @ctxt: an XML parser context
8897 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008898 * @alloc: whether the attribute was reallocated as a new string
8899 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008900 *
8901 * parse a value for an attribute.
8902 * NOTE: if no normalization is needed, the routine will return pointers
8903 * directly from the data buffer.
8904 *
8905 * 3.3.3 Attribute-Value Normalization:
8906 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008907 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008908 * - a character reference is processed by appending the referenced
8909 * character to the attribute value
8910 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008911 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00008912 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8913 * appending #x20 to the normalized value, except that only a single
8914 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008915 * parsed entity or the literal entity value of an internal parsed entity
8916 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00008917 * If the declared value is not CDATA, then the XML processor must further
8918 * process the normalized attribute value by discarding any leading and
8919 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008920 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00008921 * All attributes for which no declaration has been read should be treated
8922 * by a non-validating parser as if declared CDATA.
8923 *
8924 * Returns the AttValue parsed or NULL. The value has to be freed by the
8925 * caller if it was copied, this can be detected by val[*len] == 0.
8926 */
8927
8928static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008929xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8930 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008931{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008932 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008933 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008934 xmlChar *ret = NULL;
Juergen Keil33f658c2014-08-07 17:30:36 +08008935 int line, col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008936
8937 GROW;
8938 in = (xmlChar *) CUR_PTR;
Juergen Keil33f658c2014-08-07 17:30:36 +08008939 line = ctxt->input->line;
8940 col = ctxt->input->col;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008941 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008942 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008943 return (NULL);
8944 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008945 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008946
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008947 /*
8948 * try to handle in this routine the most common case where no
8949 * allocation of a new string is required and where content is
8950 * pure ASCII.
8951 */
8952 limit = *in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08008953 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008954 end = ctxt->input->end;
8955 start = in;
8956 if (in >= end) {
8957 const xmlChar *oldbase = ctxt->input->base;
8958 GROW;
8959 if (oldbase != ctxt->input->base) {
8960 long delta = ctxt->input->base - oldbase;
8961 start = start + delta;
8962 in = in + delta;
8963 }
8964 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008965 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008966 if (normalize) {
8967 /*
8968 * Skip any leading spaces
8969 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008970 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008971 ((*in == 0x20) || (*in == 0x9) ||
8972 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08008973 if (*in == 0xA) {
8974 line++; col = 1;
8975 } else {
8976 col++;
8977 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008978 in++;
8979 start = in;
8980 if (in >= end) {
8981 const xmlChar *oldbase = ctxt->input->base;
8982 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008983 if (ctxt->instate == XML_PARSER_EOF)
8984 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008985 if (oldbase != ctxt->input->base) {
8986 long delta = ctxt->input->base - oldbase;
8987 start = start + delta;
8988 in = in + delta;
8989 }
8990 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008991 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8992 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8993 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008994 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008995 return(NULL);
8996 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008997 }
8998 }
8999 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9000 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009001 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009002 if ((*in++ == 0x20) && (*in == 0x20)) break;
9003 if (in >= end) {
9004 const xmlChar *oldbase = ctxt->input->base;
9005 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009006 if (ctxt->instate == XML_PARSER_EOF)
9007 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009008 if (oldbase != ctxt->input->base) {
9009 long delta = ctxt->input->base - oldbase;
9010 start = start + delta;
9011 in = in + delta;
9012 }
9013 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009014 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9015 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9016 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009017 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009018 return(NULL);
9019 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009020 }
9021 }
9022 last = in;
9023 /*
9024 * skip the trailing blanks
9025 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00009026 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009027 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009028 ((*in == 0x20) || (*in == 0x9) ||
9029 (*in == 0xA) || (*in == 0xD))) {
Juergen Keil33f658c2014-08-07 17:30:36 +08009030 if (*in == 0xA) {
9031 line++, col = 1;
9032 } else {
9033 col++;
9034 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009035 in++;
9036 if (in >= end) {
9037 const xmlChar *oldbase = ctxt->input->base;
9038 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009039 if (ctxt->instate == XML_PARSER_EOF)
9040 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009041 if (oldbase != ctxt->input->base) {
9042 long delta = ctxt->input->base - oldbase;
9043 start = start + delta;
9044 in = in + delta;
9045 last = last + delta;
9046 }
9047 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009048 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9049 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9050 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009051 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009052 return(NULL);
9053 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009054 }
9055 }
Daniel Veillarde17db992012-07-19 11:25:16 +08009056 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9057 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9058 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009059 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009060 return(NULL);
9061 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009062 if (*in != limit) goto need_complex;
9063 } else {
9064 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9065 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9066 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009067 col++;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009068 if (in >= end) {
9069 const xmlChar *oldbase = ctxt->input->base;
9070 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009071 if (ctxt->instate == XML_PARSER_EOF)
9072 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009073 if (oldbase != ctxt->input->base) {
9074 long delta = ctxt->input->base - oldbase;
9075 start = start + delta;
9076 in = in + delta;
9077 }
9078 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009079 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9080 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9081 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009082 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009083 return(NULL);
9084 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009085 }
9086 }
9087 last = in;
Daniel Veillarde17db992012-07-19 11:25:16 +08009088 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9089 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9090 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009091 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009092 return(NULL);
9093 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009094 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009095 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009096 in++;
Juergen Keil33f658c2014-08-07 17:30:36 +08009097 col++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009098 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009099 *len = last - start;
9100 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009101 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009102 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009103 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009104 }
9105 CUR_PTR = in;
Juergen Keil33f658c2014-08-07 17:30:36 +08009106 ctxt->input->line = line;
9107 ctxt->input->col = col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009108 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009109 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009110need_complex:
9111 if (alloc) *alloc = 1;
9112 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009113}
9114
9115/**
9116 * xmlParseAttribute2:
9117 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009118 * @pref: the element prefix
9119 * @elem: the element name
9120 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00009121 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009122 * @len: an int * to save the length of the attribute
9123 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00009124 *
9125 * parse an attribute in the new SAX2 framework.
9126 *
9127 * Returns the attribute name, and the value in *value, .
9128 */
9129
9130static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009131xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009132 const xmlChar * pref, const xmlChar * elem,
9133 const xmlChar ** prefix, xmlChar ** value,
9134 int *len, int *alloc)
9135{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009136 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00009137 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009138 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009139
9140 *value = NULL;
9141 GROW;
9142 name = xmlParseQName(ctxt, prefix);
9143 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009144 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9145 "error parsing attribute name\n");
9146 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009147 }
9148
9149 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009150 * get the type if needed
9151 */
9152 if (ctxt->attsSpecial != NULL) {
9153 int type;
9154
9155 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009156 pref, elem, *prefix, name);
9157 if (type != 0)
9158 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009159 }
9160
9161 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009162 * read the value
9163 */
9164 SKIP_BLANKS;
9165 if (RAW == '=') {
9166 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009167 SKIP_BLANKS;
9168 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9169 if (normalize) {
9170 /*
9171 * Sometimes a second normalisation pass for spaces is needed
9172 * but that only happens if charrefs or entities refernces
9173 * have been used in the attribute value, i.e. the attribute
9174 * value have been extracted in an allocated string already.
9175 */
9176 if (*alloc) {
9177 const xmlChar *val2;
9178
9179 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009180 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009181 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00009182 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009183 }
9184 }
9185 }
9186 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009187 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009188 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9189 "Specification mandate value for attribute %s\n",
9190 name);
9191 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009192 }
9193
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009194 if (*prefix == ctxt->str_xml) {
9195 /*
9196 * Check that xml:lang conforms to the specification
9197 * No more registered as an error, just generate a warning now
9198 * since this was deprecated in XML second edition
9199 */
9200 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9201 internal_val = xmlStrndup(val, *len);
9202 if (!xmlCheckLanguageID(internal_val)) {
9203 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9204 "Malformed value for xml:lang : %s\n",
9205 internal_val, NULL);
9206 }
9207 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009208
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009209 /*
9210 * Check that xml:space conforms to the specification
9211 */
9212 if (xmlStrEqual(name, BAD_CAST "space")) {
9213 internal_val = xmlStrndup(val, *len);
9214 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9215 *(ctxt->space) = 0;
9216 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9217 *(ctxt->space) = 1;
9218 else {
9219 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9220 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9221 internal_val, NULL);
9222 }
9223 }
9224 if (internal_val) {
9225 xmlFree(internal_val);
9226 }
9227 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009228
9229 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009230 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009231}
Daniel Veillard0fb18932003-09-07 09:14:37 +00009232/**
9233 * xmlParseStartTag2:
9234 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009235 *
Daniel Veillard0fb18932003-09-07 09:14:37 +00009236 * parse a start of tag either for rule element or
9237 * EmptyElement. In both case we don't parse the tag closing chars.
9238 * This routine is called when running SAX2 parsing
9239 *
9240 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9241 *
9242 * [ WFC: Unique Att Spec ]
9243 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009244 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009245 *
9246 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9247 *
9248 * [ WFC: Unique Att Spec ]
9249 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009250 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009251 *
9252 * With namespace:
9253 *
9254 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9255 *
9256 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9257 *
9258 * Returns the element name parsed
9259 */
9260
9261static const xmlChar *
9262xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009263 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009264 const xmlChar *localname;
9265 const xmlChar *prefix;
9266 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009267 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009268 const xmlChar *nsname;
9269 xmlChar *attvalue;
9270 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009271 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009272 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009273 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009274 const xmlChar *base;
9275 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00009276 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009277
9278 if (RAW != '<') return(NULL);
9279 NEXT1;
9280
9281 /*
9282 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9283 * point since the attribute values may be stored as pointers to
9284 * the buffer and calling SHRINK would destroy them !
9285 * The Shrinking is only possible once the full set of attribute
9286 * callbacks have been done.
9287 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009288reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009289 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009290 base = ctxt->input->base;
9291 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009292 oldline = ctxt->input->line;
9293 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009294 nbatts = 0;
9295 nratts = 0;
9296 nbdef = 0;
9297 nbNs = 0;
9298 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00009299 /* Forget any namespaces added during an earlier parse of this element. */
9300 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009301
9302 localname = xmlParseQName(ctxt, &prefix);
9303 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009304 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9305 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009306 return(NULL);
9307 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009308 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009309
9310 /*
9311 * Now parse the attributes, it ends up with the ending
9312 *
9313 * (S Attribute)* S?
9314 */
9315 SKIP_BLANKS;
9316 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009317 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009318
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009319 while (((RAW != '>') &&
Daniel Veillard0fb18932003-09-07 09:14:37 +00009320 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009321 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009322 const xmlChar *q = CUR_PTR;
9323 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009324 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009325
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009326 attname = xmlParseAttribute2(ctxt, prefix, localname,
9327 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00009328 if (ctxt->input->base != base) {
9329 if ((attvalue != NULL) && (alloc != 0))
9330 xmlFree(attvalue);
9331 attvalue = NULL;
9332 goto base_changed;
9333 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009334 if ((attname != NULL) && (attvalue != NULL)) {
9335 if (len < 0) len = xmlStrlen(attvalue);
9336 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009337 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9338 xmlURIPtr uri;
9339
Daniel Veillardc836ba62014-07-14 16:39:50 +08009340 if (URL == NULL) {
9341 xmlErrMemory(ctxt, "dictionary allocation failure");
9342 if ((attvalue != NULL) && (alloc != 0))
9343 xmlFree(attvalue);
9344 return(NULL);
9345 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009346 if (*URL != 0) {
9347 uri = xmlParseURI((const char *) URL);
9348 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009349 xmlNsErr(ctxt, XML_WAR_NS_URI,
9350 "xmlns: '%s' is not a valid URI\n",
9351 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009352 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00009353 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009354 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9355 "xmlns: URI %s is not absolute\n",
9356 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009357 }
9358 xmlFreeURI(uri);
9359 }
Daniel Veillard37334572008-07-31 08:20:02 +00009360 if (URL == ctxt->str_xml_ns) {
9361 if (attname != ctxt->str_xml) {
9362 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9363 "xml namespace URI cannot be the default namespace\n",
9364 NULL, NULL, NULL);
9365 }
9366 goto skip_default_ns;
9367 }
9368 if ((len == 29) &&
9369 (xmlStrEqual(URL,
9370 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9371 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9372 "reuse of the xmlns namespace name is forbidden\n",
9373 NULL, NULL, NULL);
9374 goto skip_default_ns;
9375 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009376 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009377 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009378 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009379 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009380 for (j = 1;j <= nbNs;j++)
9381 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9382 break;
9383 if (j <= nbNs)
9384 xmlErrAttributeDup(ctxt, NULL, attname);
9385 else
9386 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009387skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009388 if (alloc != 0) xmlFree(attvalue);
Dennis Filder7e9bbdf2014-10-06 20:34:14 +08009389 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9390 break;
9391 if (!IS_BLANK_CH(RAW)) {
9392 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9393 "attributes construct error\n");
9394 break;
9395 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009396 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009397 continue;
9398 }
9399 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009400 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9401 xmlURIPtr uri;
9402
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009403 if (attname == ctxt->str_xml) {
9404 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009405 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9406 "xml namespace prefix mapped to wrong URI\n",
9407 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009408 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009409 /*
9410 * Do not keep a namespace definition node
9411 */
Daniel Veillard37334572008-07-31 08:20:02 +00009412 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009413 }
Daniel Veillard37334572008-07-31 08:20:02 +00009414 if (URL == ctxt->str_xml_ns) {
9415 if (attname != ctxt->str_xml) {
9416 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9417 "xml namespace URI mapped to wrong prefix\n",
9418 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009419 }
Daniel Veillard37334572008-07-31 08:20:02 +00009420 goto skip_ns;
9421 }
9422 if (attname == ctxt->str_xmlns) {
9423 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9424 "redefinition of the xmlns prefix is forbidden\n",
9425 NULL, NULL, NULL);
9426 goto skip_ns;
9427 }
9428 if ((len == 29) &&
9429 (xmlStrEqual(URL,
9430 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9431 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9432 "reuse of the xmlns namespace name is forbidden\n",
9433 NULL, NULL, NULL);
9434 goto skip_ns;
9435 }
9436 if ((URL == NULL) || (URL[0] == 0)) {
9437 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9438 "xmlns:%s: Empty XML namespace is not allowed\n",
9439 attname, NULL, NULL);
9440 goto skip_ns;
9441 } else {
9442 uri = xmlParseURI((const char *) URL);
9443 if (uri == NULL) {
9444 xmlNsErr(ctxt, XML_WAR_NS_URI,
9445 "xmlns:%s: '%s' is not a valid URI\n",
9446 attname, URL, NULL);
9447 } else {
9448 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9449 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9450 "xmlns:%s: URI %s is not absolute\n",
9451 attname, URL, NULL);
9452 }
9453 xmlFreeURI(uri);
9454 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009455 }
9456
Daniel Veillard0fb18932003-09-07 09:14:37 +00009457 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009458 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009459 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009460 for (j = 1;j <= nbNs;j++)
9461 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9462 break;
9463 if (j <= nbNs)
9464 xmlErrAttributeDup(ctxt, aprefix, attname);
9465 else
9466 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009467skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009468 if (alloc != 0) xmlFree(attvalue);
Dennis Filder7e9bbdf2014-10-06 20:34:14 +08009469 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9470 break;
9471 if (!IS_BLANK_CH(RAW)) {
9472 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9473 "attributes construct error\n");
9474 break;
9475 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009476 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00009477 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009478 continue;
9479 }
9480
9481 /*
9482 * Add the pair to atts
9483 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009484 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9485 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009486 if (attvalue[len] == 0)
9487 xmlFree(attvalue);
9488 goto failed;
9489 }
9490 maxatts = ctxt->maxatts;
9491 atts = ctxt->atts;
9492 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009493 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009494 atts[nbatts++] = attname;
9495 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009496 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00009497 atts[nbatts++] = attvalue;
9498 attvalue += len;
9499 atts[nbatts++] = attvalue;
9500 /*
9501 * tag if some deallocation is needed
9502 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009503 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009504 } else {
9505 if ((attvalue != NULL) && (attvalue[len] == 0))
9506 xmlFree(attvalue);
9507 }
9508
Daniel Veillard37334572008-07-31 08:20:02 +00009509failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009510
9511 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009512 if (ctxt->instate == XML_PARSER_EOF)
9513 break;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009514 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009515 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9516 break;
William M. Brack76e95df2003-10-18 16:20:14 +00009517 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009518 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9519 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009520 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009521 }
9522 SKIP_BLANKS;
9523 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9524 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009525 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009526 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009527 break;
9528 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009529 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009530 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009531 }
9532
Daniel Veillard0fb18932003-09-07 09:14:37 +00009533 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009534 * The attributes defaulting
9535 */
9536 if (ctxt->attsDefault != NULL) {
9537 xmlDefAttrsPtr defaults;
9538
9539 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9540 if (defaults != NULL) {
9541 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009542 attname = defaults->values[5 * i];
9543 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009544
9545 /*
9546 * special work for namespaces defaulted defs
9547 */
9548 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9549 /*
9550 * check that it's not a defined namespace
9551 */
9552 for (j = 1;j <= nbNs;j++)
9553 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9554 break;
9555 if (j <= nbNs) continue;
9556
9557 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009558 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009559 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009560 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009561 nbNs++;
9562 }
9563 } else if (aprefix == ctxt->str_xmlns) {
9564 /*
9565 * check that it's not a defined namespace
9566 */
9567 for (j = 1;j <= nbNs;j++)
9568 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9569 break;
9570 if (j <= nbNs) continue;
9571
9572 nsname = xmlGetNamespace(ctxt, attname);
9573 if (nsname != defaults->values[2]) {
9574 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009575 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009576 nbNs++;
9577 }
9578 } else {
9579 /*
9580 * check that it's not a defined attribute
9581 */
9582 for (j = 0;j < nbatts;j+=5) {
9583 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9584 break;
9585 }
9586 if (j < nbatts) continue;
9587
9588 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9589 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009590 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009591 }
9592 maxatts = ctxt->maxatts;
9593 atts = ctxt->atts;
9594 }
9595 atts[nbatts++] = attname;
9596 atts[nbatts++] = aprefix;
9597 if (aprefix == NULL)
9598 atts[nbatts++] = NULL;
9599 else
9600 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009601 atts[nbatts++] = defaults->values[5 * i + 2];
9602 atts[nbatts++] = defaults->values[5 * i + 3];
9603 if ((ctxt->standalone == 1) &&
9604 (defaults->values[5 * i + 4] != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009605 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009606 "standalone: attribute %s on %s defaulted from external subset\n",
9607 attname, localname);
9608 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009609 nbdef++;
9610 }
9611 }
9612 }
9613 }
9614
Daniel Veillarde70c8772003-11-25 07:21:18 +00009615 /*
9616 * The attributes checkings
9617 */
9618 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009619 /*
9620 * The default namespace does not apply to attribute names.
9621 */
9622 if (atts[i + 1] != NULL) {
9623 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9624 if (nsname == NULL) {
9625 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9626 "Namespace prefix %s for %s on %s is not defined\n",
9627 atts[i + 1], atts[i], localname);
9628 }
9629 atts[i + 2] = nsname;
9630 } else
9631 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009632 /*
9633 * [ WFC: Unique Att Spec ]
9634 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009635 * start-tag or empty-element tag.
Daniel Veillarde70c8772003-11-25 07:21:18 +00009636 * As extended by the Namespace in XML REC.
9637 */
9638 for (j = 0; j < i;j += 5) {
9639 if (atts[i] == atts[j]) {
9640 if (atts[i+1] == atts[j+1]) {
9641 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9642 break;
9643 }
9644 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9645 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9646 "Namespaced Attribute %s in '%s' redefined\n",
9647 atts[i], nsname, NULL);
9648 break;
9649 }
9650 }
9651 }
9652 }
9653
Daniel Veillarde57ec792003-09-10 10:50:59 +00009654 nsname = xmlGetNamespace(ctxt, prefix);
9655 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009656 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9657 "Namespace prefix %s on %s is not defined\n",
9658 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009659 }
9660 *pref = prefix;
9661 *URI = nsname;
9662
9663 /*
9664 * SAX: Start of Element !
9665 */
9666 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9667 (!ctxt->disableSAX)) {
9668 if (nbNs > 0)
9669 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9670 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9671 nbatts / 5, nbdef, atts);
9672 else
9673 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9674 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9675 }
9676
9677 /*
9678 * Free up attribute allocated strings if needed
9679 */
9680 if (attval != 0) {
9681 for (i = 3,j = 0; j < nratts;i += 5,j++)
9682 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9683 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009684 }
9685
9686 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009687
9688base_changed:
9689 /*
9690 * the attribute strings are valid iif the base didn't changed
9691 */
9692 if (attval != 0) {
9693 for (i = 3,j = 0; j < nratts;i += 5,j++)
9694 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9695 xmlFree((xmlChar *) atts[i]);
9696 }
9697 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009698 ctxt->input->line = oldline;
9699 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009700 if (ctxt->wellFormed == 1) {
9701 goto reparse;
9702 }
9703 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009704}
9705
9706/**
9707 * xmlParseEndTag2:
9708 * @ctxt: an XML parser context
9709 * @line: line of the start tag
9710 * @nsNr: number of namespaces on the start tag
9711 *
9712 * parse an end of tag
9713 *
9714 * [42] ETag ::= '</' Name S? '>'
9715 *
9716 * With namespace
9717 *
9718 * [NS 9] ETag ::= '</' QName S? '>'
9719 */
9720
9721static void
9722xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009723 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009724 const xmlChar *name;
9725
9726 GROW;
9727 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009728 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009729 return;
9730 }
9731 SKIP(2);
9732
William M. Brack13dfa872004-09-18 04:52:08 +00009733 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009734 if (ctxt->input->cur[tlen] == '>') {
9735 ctxt->input->cur += tlen + 1;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009736 ctxt->input->col += tlen + 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009737 goto done;
9738 }
9739 ctxt->input->cur += tlen;
Juergen Keil24fb4c32014-10-06 18:19:12 +08009740 ctxt->input->col += tlen;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009741 name = (xmlChar*)1;
9742 } else {
9743 if (prefix == NULL)
9744 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9745 else
9746 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9747 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009748
9749 /*
9750 * We should definitely be at the ending "S? '>'" part
9751 */
9752 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009753 if (ctxt->instate == XML_PARSER_EOF)
9754 return;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009755 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009756 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009757 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009758 } else
9759 NEXT1;
9760
9761 /*
9762 * [ WFC: Element Type Match ]
9763 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009764 * start-tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009765 *
9766 */
9767 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009768 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009769 if ((line == 0) && (ctxt->node != NULL))
9770 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009771 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009772 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009773 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009774 }
9775
9776 /*
9777 * SAX: End of Tag
9778 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009779done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009780 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9781 (!ctxt->disableSAX))
9782 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9783
Daniel Veillard0fb18932003-09-07 09:14:37 +00009784 spacePop(ctxt);
9785 if (nsNr != 0)
9786 nsPop(ctxt, nsNr);
9787 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009788}
9789
9790/**
Owen Taylor3473f882001-02-23 17:55:21 +00009791 * xmlParseCDSect:
9792 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009793 *
Owen Taylor3473f882001-02-23 17:55:21 +00009794 * Parse escaped pure raw content.
9795 *
9796 * [18] CDSect ::= CDStart CData CDEnd
9797 *
9798 * [19] CDStart ::= '<![CDATA['
9799 *
9800 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9801 *
9802 * [21] CDEnd ::= ']]>'
9803 */
9804void
9805xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9806 xmlChar *buf = NULL;
9807 int len = 0;
9808 int size = XML_PARSER_BUFFER_SIZE;
9809 int r, rl;
9810 int s, sl;
9811 int cur, l;
9812 int count = 0;
9813
Daniel Veillard8f597c32003-10-06 08:19:27 +00009814 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009815 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009816 SKIP(9);
9817 } else
9818 return;
9819
9820 ctxt->instate = XML_PARSER_CDATA_SECTION;
9821 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009822 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009823 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009824 ctxt->instate = XML_PARSER_CONTENT;
9825 return;
9826 }
9827 NEXTL(rl);
9828 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009829 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009830 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009831 ctxt->instate = XML_PARSER_CONTENT;
9832 return;
9833 }
9834 NEXTL(sl);
9835 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009836 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009837 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009838 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009839 return;
9840 }
William M. Brack871611b2003-10-18 04:53:14 +00009841 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009842 ((r != ']') || (s != ']') || (cur != '>'))) {
9843 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009844 xmlChar *tmp;
9845
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009846 if ((size > XML_MAX_TEXT_LENGTH) &&
9847 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9848 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9849 "CData section too big found", NULL);
9850 xmlFree (buf);
9851 return;
9852 }
9853 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00009854 if (tmp == NULL) {
9855 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009856 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009857 return;
9858 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009859 buf = tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009860 size *= 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009861 }
9862 COPY_BUF(rl,buf,len,r);
9863 r = s;
9864 rl = sl;
9865 s = cur;
9866 sl = l;
9867 count++;
9868 if (count > 50) {
9869 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009870 if (ctxt->instate == XML_PARSER_EOF) {
9871 xmlFree(buf);
9872 return;
9873 }
Owen Taylor3473f882001-02-23 17:55:21 +00009874 count = 0;
9875 }
9876 NEXTL(l);
9877 cur = CUR_CHAR(l);
9878 }
9879 buf[len] = 0;
9880 ctxt->instate = XML_PARSER_CONTENT;
9881 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009882 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009883 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009884 xmlFree(buf);
9885 return;
9886 }
9887 NEXTL(l);
9888
9889 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009890 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009891 */
9892 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9893 if (ctxt->sax->cdataBlock != NULL)
9894 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009895 else if (ctxt->sax->characters != NULL)
9896 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009897 }
9898 xmlFree(buf);
9899}
9900
9901/**
9902 * xmlParseContent:
9903 * @ctxt: an XML parser context
9904 *
9905 * Parse a content:
9906 *
9907 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9908 */
9909
9910void
9911xmlParseContent(xmlParserCtxtPtr ctxt) {
9912 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009913 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009914 ((RAW != '<') || (NXT(1) != '/')) &&
9915 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009916 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009917 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009918 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009919
9920 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009921 * First case : a Processing Instruction.
9922 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009923 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009924 xmlParsePI(ctxt);
9925 }
9926
9927 /*
9928 * Second case : a CDSection
9929 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009930 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009931 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009932 xmlParseCDSect(ctxt);
9933 }
9934
9935 /*
9936 * Third case : a comment
9937 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009938 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009939 (NXT(2) == '-') && (NXT(3) == '-')) {
9940 xmlParseComment(ctxt);
9941 ctxt->instate = XML_PARSER_CONTENT;
9942 }
9943
9944 /*
9945 * Fourth case : a sub-element.
9946 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009947 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009948 xmlParseElement(ctxt);
9949 }
9950
9951 /*
9952 * Fifth case : a reference. If if has not been resolved,
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009953 * parsing returns it's Name, create the node
Owen Taylor3473f882001-02-23 17:55:21 +00009954 */
9955
Daniel Veillard21a0f912001-02-25 19:54:14 +00009956 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009957 xmlParseReference(ctxt);
9958 }
9959
9960 /*
9961 * Last case, text. Note that References are handled directly.
9962 */
9963 else {
9964 xmlParseCharData(ctxt, 0);
9965 }
9966
9967 GROW;
9968 /*
9969 * Pop-up of finished entities.
9970 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009971 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009972 xmlPopInput(ctxt);
9973 SHRINK;
9974
Daniel Veillardfdc91562002-07-01 21:52:03 +00009975 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009976 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9977 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009978 ctxt->instate = XML_PARSER_EOF;
9979 break;
9980 }
9981 }
9982}
9983
9984/**
9985 * xmlParseElement:
9986 * @ctxt: an XML parser context
9987 *
9988 * parse an XML element, this is highly recursive
9989 *
9990 * [39] element ::= EmptyElemTag | STag content ETag
9991 *
9992 * [ WFC: Element Type Match ]
9993 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009994 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00009995 *
Owen Taylor3473f882001-02-23 17:55:21 +00009996 */
9997
9998void
9999xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +000010000 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020010001 const xmlChar *prefix = NULL;
10002 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010003 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +080010004 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010005 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +000010006 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +000010007
Daniel Veillard8915c152008-08-26 13:05:34 +000010008 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10009 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
10010 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10011 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10012 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +000010013 ctxt->instate = XML_PARSER_EOF;
10014 return;
10015 }
10016
Owen Taylor3473f882001-02-23 17:55:21 +000010017 /* Capture start position */
10018 if (ctxt->record_info) {
10019 node_info.begin_pos = ctxt->input->consumed +
10020 (CUR_PTR - ctxt->input->base);
10021 node_info.begin_line = ctxt->input->line;
10022 }
10023
10024 if (ctxt->spaceNr == 0)
10025 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010026 else if (*ctxt->space == -2)
10027 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +000010028 else
10029 spacePush(ctxt, *ctxt->space);
10030
Daniel Veillard6c5b2d32003-03-27 14:55:52 +000010031 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +000010032#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010033 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010034#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010035 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010036#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010037 else
10038 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010039#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080010040 if (ctxt->instate == XML_PARSER_EOF)
10041 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010042 if (name == NULL) {
10043 spacePop(ctxt);
10044 return;
10045 }
10046 namePush(ctxt, name);
10047 ret = ctxt->node;
10048
Daniel Veillard4432df22003-09-28 18:58:27 +000010049#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010050 /*
10051 * [ VC: Root Element Type ]
10052 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010053 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +000010054 */
10055 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10056 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10057 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010058#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010059
10060 /*
10061 * Check for an Empty Element.
10062 */
10063 if ((RAW == '/') && (NXT(1) == '>')) {
10064 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010065 if (ctxt->sax2) {
10066 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10067 (!ctxt->disableSAX))
10068 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +000010069#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010070 } else {
10071 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10072 (!ctxt->disableSAX))
10073 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010074#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010075 }
Daniel Veillard0fb18932003-09-07 09:14:37 +000010076 namePop(ctxt);
10077 spacePop(ctxt);
10078 if (nsNr != ctxt->nsNr)
10079 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010080 if ( ret != NULL && ctxt->record_info ) {
10081 node_info.end_pos = ctxt->input->consumed +
10082 (CUR_PTR - ctxt->input->base);
10083 node_info.end_line = ctxt->input->line;
10084 node_info.node = ret;
10085 xmlParserAddNodeInfo(ctxt, &node_info);
10086 }
10087 return;
10088 }
10089 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +000010090 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +000010091 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010092 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10093 "Couldn't find end of Start Tag %s line %d\n",
10094 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010095
10096 /*
10097 * end of parsing of this node.
10098 */
10099 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010100 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010101 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010102 if (nsNr != ctxt->nsNr)
10103 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010104
10105 /*
10106 * Capture end position and add node
10107 */
10108 if ( ret != NULL && ctxt->record_info ) {
10109 node_info.end_pos = ctxt->input->consumed +
10110 (CUR_PTR - ctxt->input->base);
10111 node_info.end_line = ctxt->input->line;
10112 node_info.node = ret;
10113 xmlParserAddNodeInfo(ctxt, &node_info);
10114 }
10115 return;
10116 }
10117
10118 /*
10119 * Parse the content of the element:
10120 */
10121 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010122 if (ctxt->instate == XML_PARSER_EOF)
10123 return;
Daniel Veillard73b013f2003-09-30 12:36:01 +000010124 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +000010125 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +000010126 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +000010127 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010128
10129 /*
10130 * end of parsing of this node.
10131 */
10132 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010133 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010134 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010135 if (nsNr != ctxt->nsNr)
10136 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010137 return;
10138 }
10139
10140 /*
10141 * parse the end of tag: '</' should be here.
10142 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010143 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010144 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010145 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010146 }
10147#ifdef LIBXML_SAX1_ENABLED
10148 else
Daniel Veillard0fb18932003-09-07 09:14:37 +000010149 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +000010150#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010151
10152 /*
10153 * Capture end position and add node
10154 */
10155 if ( ret != NULL && ctxt->record_info ) {
10156 node_info.end_pos = ctxt->input->consumed +
10157 (CUR_PTR - ctxt->input->base);
10158 node_info.end_line = ctxt->input->line;
10159 node_info.node = ret;
10160 xmlParserAddNodeInfo(ctxt, &node_info);
10161 }
10162}
10163
10164/**
10165 * xmlParseVersionNum:
10166 * @ctxt: an XML parser context
10167 *
10168 * parse the XML version value.
10169 *
Daniel Veillard34e3f642008-07-29 09:02:27 +000010170 * [26] VersionNum ::= '1.' [0-9]+
10171 *
10172 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +000010173 *
10174 * Returns the string giving the XML version number, or NULL
10175 */
10176xmlChar *
10177xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10178 xmlChar *buf = NULL;
10179 int len = 0;
10180 int size = 10;
10181 xmlChar cur;
10182
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010183 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010184 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010185 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010186 return(NULL);
10187 }
10188 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +000010189 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010190 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010191 return(NULL);
10192 }
10193 buf[len++] = cur;
10194 NEXT;
10195 cur=CUR;
10196 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010197 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010198 return(NULL);
10199 }
10200 buf[len++] = cur;
10201 NEXT;
10202 cur=CUR;
10203 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010204 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010205 xmlChar *tmp;
10206
Owen Taylor3473f882001-02-23 17:55:21 +000010207 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010208 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10209 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +000010210 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010211 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010212 return(NULL);
10213 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010214 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010215 }
10216 buf[len++] = cur;
10217 NEXT;
10218 cur=CUR;
10219 }
10220 buf[len] = 0;
10221 return(buf);
10222}
10223
10224/**
10225 * xmlParseVersionInfo:
10226 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +000010227 *
Owen Taylor3473f882001-02-23 17:55:21 +000010228 * parse the XML version.
10229 *
10230 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +000010231 *
Owen Taylor3473f882001-02-23 17:55:21 +000010232 * [25] Eq ::= S? '=' S?
10233 *
10234 * Returns the version string, e.g. "1.0"
10235 */
10236
10237xmlChar *
10238xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10239 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010240
Daniel Veillarda07050d2003-10-19 14:46:32 +000010241 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010242 SKIP(7);
10243 SKIP_BLANKS;
10244 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010245 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010246 return(NULL);
10247 }
10248 NEXT;
10249 SKIP_BLANKS;
10250 if (RAW == '"') {
10251 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010252 version = xmlParseVersionNum(ctxt);
10253 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010254 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010255 } else
10256 NEXT;
10257 } else if (RAW == '\''){
10258 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010259 version = xmlParseVersionNum(ctxt);
10260 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010261 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010262 } else
10263 NEXT;
10264 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010265 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010266 }
10267 }
10268 return(version);
10269}
10270
10271/**
10272 * xmlParseEncName:
10273 * @ctxt: an XML parser context
10274 *
10275 * parse the XML encoding name
10276 *
10277 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10278 *
10279 * Returns the encoding name value or NULL
10280 */
10281xmlChar *
10282xmlParseEncName(xmlParserCtxtPtr ctxt) {
10283 xmlChar *buf = NULL;
10284 int len = 0;
10285 int size = 10;
10286 xmlChar cur;
10287
10288 cur = CUR;
10289 if (((cur >= 'a') && (cur <= 'z')) ||
10290 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010291 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010292 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010293 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010294 return(NULL);
10295 }
Daniel Veillard34e3f642008-07-29 09:02:27 +000010296
Owen Taylor3473f882001-02-23 17:55:21 +000010297 buf[len++] = cur;
10298 NEXT;
10299 cur = CUR;
10300 while (((cur >= 'a') && (cur <= 'z')) ||
10301 ((cur >= 'A') && (cur <= 'Z')) ||
10302 ((cur >= '0') && (cur <= '9')) ||
10303 (cur == '.') || (cur == '_') ||
10304 (cur == '-')) {
10305 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010306 xmlChar *tmp;
10307
Owen Taylor3473f882001-02-23 17:55:21 +000010308 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010309 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10310 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010311 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +000010312 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010313 return(NULL);
10314 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010315 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010316 }
10317 buf[len++] = cur;
10318 NEXT;
10319 cur = CUR;
10320 if (cur == 0) {
10321 SHRINK;
10322 GROW;
10323 cur = CUR;
10324 }
10325 }
10326 buf[len] = 0;
10327 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010328 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010329 }
10330 return(buf);
10331}
10332
10333/**
10334 * xmlParseEncodingDecl:
10335 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010336 *
Owen Taylor3473f882001-02-23 17:55:21 +000010337 * parse the XML encoding declaration
10338 *
10339 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10340 *
10341 * this setups the conversion filters.
10342 *
10343 * Returns the encoding value or NULL
10344 */
10345
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010346const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +000010347xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10348 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010349
10350 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010351 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010352 SKIP(8);
10353 SKIP_BLANKS;
10354 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010355 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010356 return(NULL);
10357 }
10358 NEXT;
10359 SKIP_BLANKS;
10360 if (RAW == '"') {
10361 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010362 encoding = xmlParseEncName(ctxt);
10363 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010364 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010365 } else
10366 NEXT;
10367 } else if (RAW == '\''){
10368 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010369 encoding = xmlParseEncName(ctxt);
10370 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010371 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010372 } else
10373 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +000010374 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010375 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010376 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010377
10378 /*
10379 * Non standard parsing, allowing the user to ignore encoding
10380 */
10381 if (ctxt->options & XML_PARSE_IGNORE_ENC)
10382 return(encoding);
10383
Daniel Veillard6b621b82003-08-11 15:03:34 +000010384 /*
10385 * UTF-16 encoding stwich has already taken place at this stage,
10386 * more over the little-endian/big-endian selection is already done
10387 */
10388 if ((encoding != NULL) &&
10389 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10390 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +000010391 /*
10392 * If no encoding was passed to the parser, that we are
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010393 * using UTF-16 and no decoder is present i.e. the
Daniel Veillard37334572008-07-31 08:20:02 +000010394 * document is apparently UTF-8 compatible, then raise an
10395 * encoding mismatch fatal error
10396 */
10397 if ((ctxt->encoding == NULL) &&
10398 (ctxt->input->buf != NULL) &&
10399 (ctxt->input->buf->encoder == NULL)) {
10400 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10401 "Document labelled UTF-16 but has UTF-8 content\n");
10402 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010403 if (ctxt->encoding != NULL)
10404 xmlFree((xmlChar *) ctxt->encoding);
10405 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +000010406 }
10407 /*
10408 * UTF-8 encoding is handled natively
10409 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010410 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +000010411 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10412 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010413 if (ctxt->encoding != NULL)
10414 xmlFree((xmlChar *) ctxt->encoding);
10415 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010416 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010417 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010418 xmlCharEncodingHandlerPtr handler;
10419
10420 if (ctxt->input->encoding != NULL)
10421 xmlFree((xmlChar *) ctxt->input->encoding);
10422 ctxt->input->encoding = encoding;
10423
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010424 handler = xmlFindCharEncodingHandler((const char *) encoding);
10425 if (handler != NULL) {
10426 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +000010427 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010428 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010429 "Unsupported encoding %s\n", encoding);
10430 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010431 }
10432 }
10433 }
10434 return(encoding);
10435}
10436
10437/**
10438 * xmlParseSDDecl:
10439 * @ctxt: an XML parser context
10440 *
10441 * parse the XML standalone declaration
10442 *
10443 * [32] SDDecl ::= S 'standalone' Eq
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010444 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Owen Taylor3473f882001-02-23 17:55:21 +000010445 *
10446 * [ VC: Standalone Document Declaration ]
10447 * TODO The standalone document declaration must have the value "no"
10448 * if any external markup declarations contain declarations of:
10449 * - attributes with default values, if elements to which these
10450 * attributes apply appear in the document without specifications
10451 * of values for these attributes, or
10452 * - entities (other than amp, lt, gt, apos, quot), if references
10453 * to those entities appear in the document, or
10454 * - attributes with values subject to normalization, where the
10455 * attribute appears in the document with a value which will change
10456 * as a result of normalization, or
10457 * - element types with element content, if white space occurs directly
10458 * within any instance of those types.
10459 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010460 * Returns:
10461 * 1 if standalone="yes"
10462 * 0 if standalone="no"
10463 * -2 if standalone attribute is missing or invalid
10464 * (A standalone value of -2 means that the XML declaration was found,
10465 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010466 */
10467
10468int
10469xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010470 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010471
10472 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010473 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010474 SKIP(10);
10475 SKIP_BLANKS;
10476 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010477 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010478 return(standalone);
10479 }
10480 NEXT;
10481 SKIP_BLANKS;
10482 if (RAW == '\''){
10483 NEXT;
10484 if ((RAW == 'n') && (NXT(1) == 'o')) {
10485 standalone = 0;
10486 SKIP(2);
10487 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10488 (NXT(2) == 's')) {
10489 standalone = 1;
10490 SKIP(3);
10491 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010492 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010493 }
10494 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010495 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010496 } else
10497 NEXT;
10498 } else if (RAW == '"'){
10499 NEXT;
10500 if ((RAW == 'n') && (NXT(1) == 'o')) {
10501 standalone = 0;
10502 SKIP(2);
10503 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10504 (NXT(2) == 's')) {
10505 standalone = 1;
10506 SKIP(3);
10507 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010508 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010509 }
10510 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010511 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010512 } else
10513 NEXT;
10514 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010515 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010516 }
10517 }
10518 return(standalone);
10519}
10520
10521/**
10522 * xmlParseXMLDecl:
10523 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010524 *
Owen Taylor3473f882001-02-23 17:55:21 +000010525 * parse an XML declaration header
10526 *
10527 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10528 */
10529
10530void
10531xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10532 xmlChar *version;
10533
10534 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010535 * This value for standalone indicates that the document has an
10536 * XML declaration but it does not have a standalone attribute.
10537 * It will be overwritten later if a standalone attribute is found.
10538 */
10539 ctxt->input->standalone = -2;
10540
10541 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010542 * We know that '<?xml' is here.
10543 */
10544 SKIP(5);
10545
William M. Brack76e95df2003-10-18 16:20:14 +000010546 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010547 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10548 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010549 }
10550 SKIP_BLANKS;
10551
10552 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010553 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010554 */
10555 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010556 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010557 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010558 } else {
10559 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10560 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010561 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010562 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010563 if (ctxt->options & XML_PARSE_OLD10) {
10564 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10565 "Unsupported version '%s'\n",
10566 version);
10567 } else {
10568 if ((version[0] == '1') && ((version[1] == '.'))) {
10569 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10570 "Unsupported version '%s'\n",
10571 version, NULL);
10572 } else {
10573 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10574 "Unsupported version '%s'\n",
10575 version);
10576 }
10577 }
Daniel Veillard19840942001-11-29 16:11:38 +000010578 }
10579 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010580 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010581 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010582 }
Owen Taylor3473f882001-02-23 17:55:21 +000010583
10584 /*
10585 * We may have the encoding declaration
10586 */
William M. Brack76e95df2003-10-18 16:20:14 +000010587 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010588 if ((RAW == '?') && (NXT(1) == '>')) {
10589 SKIP(2);
10590 return;
10591 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010592 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010593 }
10594 xmlParseEncodingDecl(ctxt);
10595 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10596 /*
10597 * The XML REC instructs us to stop parsing right here
10598 */
10599 return;
10600 }
10601
10602 /*
10603 * We may have the standalone status.
10604 */
William M. Brack76e95df2003-10-18 16:20:14 +000010605 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010606 if ((RAW == '?') && (NXT(1) == '>')) {
10607 SKIP(2);
10608 return;
10609 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010610 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010611 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010612
10613 /*
10614 * We can grow the input buffer freely at that point
10615 */
10616 GROW;
10617
Owen Taylor3473f882001-02-23 17:55:21 +000010618 SKIP_BLANKS;
10619 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10620
10621 SKIP_BLANKS;
10622 if ((RAW == '?') && (NXT(1) == '>')) {
10623 SKIP(2);
10624 } else if (RAW == '>') {
10625 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010626 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010627 NEXT;
10628 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010629 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010630 MOVETO_ENDTAG(CUR_PTR);
10631 NEXT;
10632 }
10633}
10634
10635/**
10636 * xmlParseMisc:
10637 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010638 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010639 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010640 *
10641 * [27] Misc ::= Comment | PI | S
10642 */
10643
10644void
10645xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080010646 while ((ctxt->instate != XML_PARSER_EOF) &&
10647 (((RAW == '<') && (NXT(1) == '?')) ||
10648 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10649 IS_BLANK_CH(CUR))) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010650 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010651 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010652 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010653 NEXT;
10654 } else
10655 xmlParseComment(ctxt);
10656 }
10657}
10658
10659/**
10660 * xmlParseDocument:
10661 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010662 *
Owen Taylor3473f882001-02-23 17:55:21 +000010663 * parse an XML document (and build a tree if using the standard SAX
10664 * interface).
10665 *
10666 * [1] document ::= prolog element Misc*
10667 *
10668 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10669 *
10670 * Returns 0, -1 in case of error. the parser context is augmented
10671 * as a result of the parsing.
10672 */
10673
10674int
10675xmlParseDocument(xmlParserCtxtPtr ctxt) {
10676 xmlChar start[4];
10677 xmlCharEncoding enc;
10678
10679 xmlInitParser();
10680
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010681 if ((ctxt == NULL) || (ctxt->input == NULL))
10682 return(-1);
10683
Owen Taylor3473f882001-02-23 17:55:21 +000010684 GROW;
10685
10686 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010687 * SAX: detecting the level.
10688 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010689 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010690
10691 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010692 * SAX: beginning of the document processing.
10693 */
10694 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10695 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010696 if (ctxt->instate == XML_PARSER_EOF)
10697 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010698
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010699 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010700 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010701 /*
Daniel Veillard4aafa792001-07-28 17:21:12 +000010702 * Get the 4 first bytes and decode the charset
10703 * if enc != XML_CHAR_ENCODING_NONE
10704 * plug some encoding conversion routines.
10705 */
10706 start[0] = RAW;
10707 start[1] = NXT(1);
10708 start[2] = NXT(2);
10709 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010710 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010711 if (enc != XML_CHAR_ENCODING_NONE) {
10712 xmlSwitchEncoding(ctxt, enc);
10713 }
Owen Taylor3473f882001-02-23 17:55:21 +000010714 }
10715
10716
10717 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010718 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010719 }
10720
10721 /*
10722 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010723 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010724 * than just the first line, unless the amount of data is really
10725 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010726 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010727 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10728 GROW;
10729 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010730 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010731
10732 /*
10733 * Note that we will switch encoding on the fly.
10734 */
10735 xmlParseXMLDecl(ctxt);
10736 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10737 /*
10738 * The XML REC instructs us to stop parsing right here
10739 */
10740 return(-1);
10741 }
10742 ctxt->standalone = ctxt->input->standalone;
10743 SKIP_BLANKS;
10744 } else {
10745 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10746 }
10747 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10748 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010749 if (ctxt->instate == XML_PARSER_EOF)
10750 return(-1);
Daniel Veillard63588f42013-05-10 14:01:46 +080010751 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10752 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10753 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10754 }
Owen Taylor3473f882001-02-23 17:55:21 +000010755
10756 /*
10757 * The Misc part of the Prolog
10758 */
10759 GROW;
10760 xmlParseMisc(ctxt);
10761
10762 /*
10763 * Then possibly doc type declaration(s) and more Misc
10764 * (doctypedecl Misc*)?
10765 */
10766 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010767 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010768
10769 ctxt->inSubset = 1;
10770 xmlParseDocTypeDecl(ctxt);
10771 if (RAW == '[') {
10772 ctxt->instate = XML_PARSER_DTD;
10773 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010774 if (ctxt->instate == XML_PARSER_EOF)
10775 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010776 }
10777
10778 /*
10779 * Create and update the external subset.
10780 */
10781 ctxt->inSubset = 2;
10782 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10783 (!ctxt->disableSAX))
10784 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10785 ctxt->extSubSystem, ctxt->extSubURI);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010786 if (ctxt->instate == XML_PARSER_EOF)
10787 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010788 ctxt->inSubset = 0;
10789
Daniel Veillardac4118d2008-01-11 05:27:32 +000010790 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010791
10792 ctxt->instate = XML_PARSER_PROLOG;
10793 xmlParseMisc(ctxt);
10794 }
10795
10796 /*
10797 * Time to start parsing the tree itself
10798 */
10799 GROW;
10800 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010801 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10802 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010803 } else {
10804 ctxt->instate = XML_PARSER_CONTENT;
10805 xmlParseElement(ctxt);
10806 ctxt->instate = XML_PARSER_EPILOG;
10807
10808
10809 /*
10810 * The Misc part at the end
10811 */
10812 xmlParseMisc(ctxt);
10813
Daniel Veillard561b7f82002-03-20 21:55:57 +000010814 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010815 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010816 }
10817 ctxt->instate = XML_PARSER_EOF;
10818 }
10819
10820 /*
10821 * SAX: end of the document processing.
10822 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010823 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010824 ctxt->sax->endDocument(ctxt->userData);
10825
Daniel Veillard5997aca2002-03-18 18:36:20 +000010826 /*
10827 * Remove locally kept entity definitions if the tree was not built
10828 */
10829 if ((ctxt->myDoc != NULL) &&
10830 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10831 xmlFreeDoc(ctxt->myDoc);
10832 ctxt->myDoc = NULL;
10833 }
10834
Daniel Veillardae0765b2008-07-31 19:54:59 +000010835 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10836 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10837 if (ctxt->valid)
10838 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10839 if (ctxt->nsWellFormed)
10840 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10841 if (ctxt->options & XML_PARSE_OLD10)
10842 ctxt->myDoc->properties |= XML_DOC_OLD10;
10843 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010844 if (! ctxt->wellFormed) {
10845 ctxt->valid = 0;
10846 return(-1);
10847 }
Owen Taylor3473f882001-02-23 17:55:21 +000010848 return(0);
10849}
10850
10851/**
10852 * xmlParseExtParsedEnt:
10853 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010854 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010855 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010856 * An external general parsed entity is well-formed if it matches the
10857 * production labeled extParsedEnt.
10858 *
10859 * [78] extParsedEnt ::= TextDecl? content
10860 *
10861 * Returns 0, -1 in case of error. the parser context is augmented
10862 * as a result of the parsing.
10863 */
10864
10865int
10866xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10867 xmlChar start[4];
10868 xmlCharEncoding enc;
10869
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010870 if ((ctxt == NULL) || (ctxt->input == NULL))
10871 return(-1);
10872
Owen Taylor3473f882001-02-23 17:55:21 +000010873 xmlDefaultSAXHandlerInit();
10874
Daniel Veillard309f81d2003-09-23 09:02:53 +000010875 xmlDetectSAX2(ctxt);
10876
Owen Taylor3473f882001-02-23 17:55:21 +000010877 GROW;
10878
10879 /*
10880 * SAX: beginning of the document processing.
10881 */
10882 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10883 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10884
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010885 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010886 * Get the 4 first bytes and decode the charset
10887 * if enc != XML_CHAR_ENCODING_NONE
10888 * plug some encoding conversion routines.
10889 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010890 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10891 start[0] = RAW;
10892 start[1] = NXT(1);
10893 start[2] = NXT(2);
10894 start[3] = NXT(3);
10895 enc = xmlDetectCharEncoding(start, 4);
10896 if (enc != XML_CHAR_ENCODING_NONE) {
10897 xmlSwitchEncoding(ctxt, enc);
10898 }
Owen Taylor3473f882001-02-23 17:55:21 +000010899 }
10900
10901
10902 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010903 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010904 }
10905
10906 /*
10907 * Check for the XMLDecl in the Prolog.
10908 */
10909 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010910 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010911
10912 /*
10913 * Note that we will switch encoding on the fly.
10914 */
10915 xmlParseXMLDecl(ctxt);
10916 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10917 /*
10918 * The XML REC instructs us to stop parsing right here
10919 */
10920 return(-1);
10921 }
10922 SKIP_BLANKS;
10923 } else {
10924 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10925 }
10926 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10927 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010928 if (ctxt->instate == XML_PARSER_EOF)
10929 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010930
10931 /*
10932 * Doing validity checking on chunk doesn't make sense
10933 */
10934 ctxt->instate = XML_PARSER_CONTENT;
10935 ctxt->validate = 0;
10936 ctxt->loadsubset = 0;
10937 ctxt->depth = 0;
10938
10939 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010940 if (ctxt->instate == XML_PARSER_EOF)
10941 return(-1);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010942
Owen Taylor3473f882001-02-23 17:55:21 +000010943 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010944 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010945 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010946 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010947 }
10948
10949 /*
10950 * SAX: end of the document processing.
10951 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010952 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010953 ctxt->sax->endDocument(ctxt->userData);
10954
10955 if (! ctxt->wellFormed) return(-1);
10956 return(0);
10957}
10958
Daniel Veillard73b013f2003-09-30 12:36:01 +000010959#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010960/************************************************************************
10961 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010962 * Progressive parsing interfaces *
Owen Taylor3473f882001-02-23 17:55:21 +000010963 * *
10964 ************************************************************************/
10965
10966/**
10967 * xmlParseLookupSequence:
10968 * @ctxt: an XML parser context
10969 * @first: the first char to lookup
10970 * @next: the next char to lookup or zero
10971 * @third: the next char to lookup or zero
10972 *
10973 * Try to find if a sequence (first, next, third) or just (first next) or
10974 * (first) is available in the input stream.
10975 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10976 * to avoid rescanning sequences of bytes, it DOES change the state of the
10977 * parser, do not use liberally.
10978 *
10979 * Returns the index to the current parsing point if the full sequence
10980 * is available, -1 otherwise.
10981 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010982static int
Owen Taylor3473f882001-02-23 17:55:21 +000010983xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10984 xmlChar next, xmlChar third) {
10985 int base, len;
10986 xmlParserInputPtr in;
10987 const xmlChar *buf;
10988
10989 in = ctxt->input;
10990 if (in == NULL) return(-1);
10991 base = in->cur - in->base;
10992 if (base < 0) return(-1);
10993 if (ctxt->checkIndex > base)
10994 base = ctxt->checkIndex;
10995 if (in->buf == NULL) {
10996 buf = in->base;
10997 len = in->length;
10998 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080010999 buf = xmlBufContent(in->buf->buffer);
11000 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000011001 }
11002 /* take into account the sequence length */
11003 if (third) len -= 2;
11004 else if (next) len --;
11005 for (;base < len;base++) {
11006 if (buf[base] == first) {
11007 if (third != 0) {
11008 if ((buf[base + 1] != next) ||
11009 (buf[base + 2] != third)) continue;
11010 } else if (next != 0) {
11011 if (buf[base + 1] != next) continue;
11012 }
11013 ctxt->checkIndex = 0;
11014#ifdef DEBUG_PUSH
11015 if (next == 0)
11016 xmlGenericError(xmlGenericErrorContext,
11017 "PP: lookup '%c' found at %d\n",
11018 first, base);
11019 else if (third == 0)
11020 xmlGenericError(xmlGenericErrorContext,
11021 "PP: lookup '%c%c' found at %d\n",
11022 first, next, base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011023 else
Owen Taylor3473f882001-02-23 17:55:21 +000011024 xmlGenericError(xmlGenericErrorContext,
11025 "PP: lookup '%c%c%c' found at %d\n",
11026 first, next, third, base);
11027#endif
11028 return(base - (in->cur - in->base));
11029 }
11030 }
11031 ctxt->checkIndex = base;
11032#ifdef DEBUG_PUSH
11033 if (next == 0)
11034 xmlGenericError(xmlGenericErrorContext,
11035 "PP: lookup '%c' failed\n", first);
11036 else if (third == 0)
11037 xmlGenericError(xmlGenericErrorContext,
11038 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011039 else
Owen Taylor3473f882001-02-23 17:55:21 +000011040 xmlGenericError(xmlGenericErrorContext,
11041 "PP: lookup '%c%c%c' failed\n", first, next, third);
11042#endif
11043 return(-1);
11044}
11045
11046/**
Daniel Veillarda880b122003-04-21 21:36:41 +000011047 * xmlParseGetLasts:
11048 * @ctxt: an XML parser context
11049 * @lastlt: pointer to store the last '<' from the input
11050 * @lastgt: pointer to store the last '>' from the input
11051 *
11052 * Lookup the last < and > in the current chunk
11053 */
11054static void
11055xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
11056 const xmlChar **lastgt) {
11057 const xmlChar *tmp;
11058
11059 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11060 xmlGenericError(xmlGenericErrorContext,
11061 "Internal error: xmlParseGetLasts\n");
11062 return;
11063 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011064 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011065 tmp = ctxt->input->end;
11066 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000011067 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000011068 if (tmp < ctxt->input->base) {
11069 *lastlt = NULL;
11070 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000011071 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011072 *lastlt = tmp;
11073 tmp++;
11074 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11075 if (*tmp == '\'') {
11076 tmp++;
11077 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11078 if (tmp < ctxt->input->end) tmp++;
11079 } else if (*tmp == '"') {
11080 tmp++;
11081 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11082 if (tmp < ctxt->input->end) tmp++;
11083 } else
11084 tmp++;
11085 }
11086 if (tmp < ctxt->input->end)
11087 *lastgt = tmp;
11088 else {
11089 tmp = *lastlt;
11090 tmp--;
11091 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11092 if (tmp >= ctxt->input->base)
11093 *lastgt = tmp;
11094 else
11095 *lastgt = NULL;
11096 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011097 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011098 } else {
11099 *lastlt = NULL;
11100 *lastgt = NULL;
11101 }
11102}
11103/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011104 * xmlCheckCdataPush:
11105 * @cur: pointer to the bock of characters
11106 * @len: length of the block in bytes
11107 *
11108 * Check that the block of characters is okay as SCdata content [20]
11109 *
11110 * Returns the number of bytes to pass if okay, a negative index where an
11111 * UTF-8 error occured otherwise
11112 */
11113static int
11114xmlCheckCdataPush(const xmlChar *utf, int len) {
11115 int ix;
11116 unsigned char c;
11117 int codepoint;
11118
11119 if ((utf == NULL) || (len <= 0))
11120 return(0);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011121
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011122 for (ix = 0; ix < len;) { /* string is 0-terminated */
11123 c = utf[ix];
11124 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11125 if (c >= 0x20)
11126 ix++;
11127 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11128 ix++;
11129 else
11130 return(-ix);
11131 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11132 if (ix + 2 > len) return(ix);
11133 if ((utf[ix+1] & 0xc0 ) != 0x80)
11134 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011135 codepoint = (utf[ix] & 0x1f) << 6;
11136 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011137 if (!xmlIsCharQ(codepoint))
11138 return(-ix);
11139 ix += 2;
11140 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11141 if (ix + 3 > len) return(ix);
11142 if (((utf[ix+1] & 0xc0) != 0x80) ||
11143 ((utf[ix+2] & 0xc0) != 0x80))
11144 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011145 codepoint = (utf[ix] & 0xf) << 12;
11146 codepoint |= (utf[ix+1] & 0x3f) << 6;
11147 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011148 if (!xmlIsCharQ(codepoint))
11149 return(-ix);
11150 ix += 3;
11151 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11152 if (ix + 4 > len) return(ix);
11153 if (((utf[ix+1] & 0xc0) != 0x80) ||
11154 ((utf[ix+2] & 0xc0) != 0x80) ||
11155 ((utf[ix+3] & 0xc0) != 0x80))
11156 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011157 codepoint = (utf[ix] & 0x7) << 18;
11158 codepoint |= (utf[ix+1] & 0x3f) << 12;
11159 codepoint |= (utf[ix+2] & 0x3f) << 6;
11160 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011161 if (!xmlIsCharQ(codepoint))
11162 return(-ix);
11163 ix += 4;
11164 } else /* unknown encoding */
11165 return(-ix);
11166 }
11167 return(ix);
11168}
11169
11170/**
Owen Taylor3473f882001-02-23 17:55:21 +000011171 * xmlParseTryOrFinish:
11172 * @ctxt: an XML parser context
11173 * @terminate: last chunk indicator
11174 *
11175 * Try to progress on parsing
11176 *
11177 * Returns zero if no parsing was possible
11178 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011179static int
Owen Taylor3473f882001-02-23 17:55:21 +000011180xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11181 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011182 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000011183 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000011184 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000011185
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011186 if (ctxt->input == NULL)
11187 return(0);
11188
Owen Taylor3473f882001-02-23 17:55:21 +000011189#ifdef DEBUG_PUSH
11190 switch (ctxt->instate) {
11191 case XML_PARSER_EOF:
11192 xmlGenericError(xmlGenericErrorContext,
11193 "PP: try EOF\n"); break;
11194 case XML_PARSER_START:
11195 xmlGenericError(xmlGenericErrorContext,
11196 "PP: try START\n"); break;
11197 case XML_PARSER_MISC:
11198 xmlGenericError(xmlGenericErrorContext,
11199 "PP: try MISC\n");break;
11200 case XML_PARSER_COMMENT:
11201 xmlGenericError(xmlGenericErrorContext,
11202 "PP: try COMMENT\n");break;
11203 case XML_PARSER_PROLOG:
11204 xmlGenericError(xmlGenericErrorContext,
11205 "PP: try PROLOG\n");break;
11206 case XML_PARSER_START_TAG:
11207 xmlGenericError(xmlGenericErrorContext,
11208 "PP: try START_TAG\n");break;
11209 case XML_PARSER_CONTENT:
11210 xmlGenericError(xmlGenericErrorContext,
11211 "PP: try CONTENT\n");break;
11212 case XML_PARSER_CDATA_SECTION:
11213 xmlGenericError(xmlGenericErrorContext,
11214 "PP: try CDATA_SECTION\n");break;
11215 case XML_PARSER_END_TAG:
11216 xmlGenericError(xmlGenericErrorContext,
11217 "PP: try END_TAG\n");break;
11218 case XML_PARSER_ENTITY_DECL:
11219 xmlGenericError(xmlGenericErrorContext,
11220 "PP: try ENTITY_DECL\n");break;
11221 case XML_PARSER_ENTITY_VALUE:
11222 xmlGenericError(xmlGenericErrorContext,
11223 "PP: try ENTITY_VALUE\n");break;
11224 case XML_PARSER_ATTRIBUTE_VALUE:
11225 xmlGenericError(xmlGenericErrorContext,
11226 "PP: try ATTRIBUTE_VALUE\n");break;
11227 case XML_PARSER_DTD:
11228 xmlGenericError(xmlGenericErrorContext,
11229 "PP: try DTD\n");break;
11230 case XML_PARSER_EPILOG:
11231 xmlGenericError(xmlGenericErrorContext,
11232 "PP: try EPILOG\n");break;
11233 case XML_PARSER_PI:
11234 xmlGenericError(xmlGenericErrorContext,
11235 "PP: try PI\n");break;
11236 case XML_PARSER_IGNORE:
11237 xmlGenericError(xmlGenericErrorContext,
11238 "PP: try IGNORE\n");break;
11239 }
11240#endif
11241
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011242 if ((ctxt->input != NULL) &&
11243 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011244 xmlSHRINK(ctxt);
11245 ctxt->checkIndex = 0;
11246 }
11247 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000011248
Daniel Veillarde50ba812013-04-11 15:54:51 +080011249 while (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard14412512005-01-21 23:53:26 +000011250 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011251 return(0);
11252
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011253
Owen Taylor3473f882001-02-23 17:55:21 +000011254 /*
11255 * Pop-up of finished entities.
11256 */
11257 while ((RAW == 0) && (ctxt->inputNr > 1))
11258 xmlPopInput(ctxt);
11259
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011260 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000011261 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011262 avail = ctxt->input->length -
11263 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011264 else {
11265 /*
11266 * If we are operating on converted input, try to flush
11267 * remainng chars to avoid them stalling in the non-converted
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011268 * buffer. But do not do this in document start where
11269 * encoding="..." may not have been read and we work on a
11270 * guessed encoding.
Daniel Veillard158a4d22002-02-20 22:17:58 +000011271 */
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011272 if ((ctxt->instate != XML_PARSER_START) &&
11273 (ctxt->input->buf->raw != NULL) &&
11274 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011275 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11276 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011277 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000011278
11279 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011280 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11281 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011282 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011283 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000011284 (ctxt->input->cur - ctxt->input->base);
11285 }
Owen Taylor3473f882001-02-23 17:55:21 +000011286 if (avail < 1)
11287 goto done;
11288 switch (ctxt->instate) {
11289 case XML_PARSER_EOF:
11290 /*
11291 * Document parsing is done !
11292 */
11293 goto done;
11294 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011295 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11296 xmlChar start[4];
11297 xmlCharEncoding enc;
11298
11299 /*
11300 * Very first chars read from the document flow.
11301 */
11302 if (avail < 4)
11303 goto done;
11304
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011305 /*
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011306 * Get the 4 first bytes and decode the charset
11307 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000011308 * plug some encoding conversion routines,
11309 * else xmlSwitchEncoding will set to (default)
11310 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011311 */
11312 start[0] = RAW;
11313 start[1] = NXT(1);
11314 start[2] = NXT(2);
11315 start[3] = NXT(3);
11316 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000011317 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011318 break;
11319 }
Owen Taylor3473f882001-02-23 17:55:21 +000011320
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011321 if (avail < 2)
11322 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011323 cur = ctxt->input->cur[0];
11324 next = ctxt->input->cur[1];
11325 if (cur == 0) {
11326 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11327 ctxt->sax->setDocumentLocator(ctxt->userData,
11328 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011329 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011330 ctxt->instate = XML_PARSER_EOF;
11331#ifdef DEBUG_PUSH
11332 xmlGenericError(xmlGenericErrorContext,
11333 "PP: entering EOF\n");
11334#endif
11335 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11336 ctxt->sax->endDocument(ctxt->userData);
11337 goto done;
11338 }
11339 if ((cur == '<') && (next == '?')) {
11340 /* PI or XML decl */
11341 if (avail < 5) return(ret);
11342 if ((!terminate) &&
11343 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11344 return(ret);
11345 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11346 ctxt->sax->setDocumentLocator(ctxt->userData,
11347 &xmlDefaultSAXLocator);
11348 if ((ctxt->input->cur[2] == 'x') &&
11349 (ctxt->input->cur[3] == 'm') &&
11350 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000011351 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011352 ret += 5;
11353#ifdef DEBUG_PUSH
11354 xmlGenericError(xmlGenericErrorContext,
11355 "PP: Parsing XML Decl\n");
11356#endif
11357 xmlParseXMLDecl(ctxt);
11358 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11359 /*
11360 * The XML REC instructs us to stop parsing right
11361 * here
11362 */
11363 ctxt->instate = XML_PARSER_EOF;
11364 return(0);
11365 }
11366 ctxt->standalone = ctxt->input->standalone;
11367 if ((ctxt->encoding == NULL) &&
11368 (ctxt->input->encoding != NULL))
11369 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11370 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11371 (!ctxt->disableSAX))
11372 ctxt->sax->startDocument(ctxt->userData);
11373 ctxt->instate = XML_PARSER_MISC;
11374#ifdef DEBUG_PUSH
11375 xmlGenericError(xmlGenericErrorContext,
11376 "PP: entering MISC\n");
11377#endif
11378 } else {
11379 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11380 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11381 (!ctxt->disableSAX))
11382 ctxt->sax->startDocument(ctxt->userData);
11383 ctxt->instate = XML_PARSER_MISC;
11384#ifdef DEBUG_PUSH
11385 xmlGenericError(xmlGenericErrorContext,
11386 "PP: entering MISC\n");
11387#endif
11388 }
11389 } else {
11390 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11391 ctxt->sax->setDocumentLocator(ctxt->userData,
11392 &xmlDefaultSAXLocator);
11393 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000011394 if (ctxt->version == NULL) {
11395 xmlErrMemory(ctxt, NULL);
11396 break;
11397 }
Owen Taylor3473f882001-02-23 17:55:21 +000011398 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11399 (!ctxt->disableSAX))
11400 ctxt->sax->startDocument(ctxt->userData);
11401 ctxt->instate = XML_PARSER_MISC;
11402#ifdef DEBUG_PUSH
11403 xmlGenericError(xmlGenericErrorContext,
11404 "PP: entering MISC\n");
11405#endif
11406 }
11407 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011408 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000011409 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020011410 const xmlChar *prefix = NULL;
11411 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011412 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000011413
11414 if ((avail < 2) && (ctxt->inputNr == 1))
11415 goto done;
11416 cur = ctxt->input->cur[0];
11417 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011418 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000011419 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000011420 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11421 ctxt->sax->endDocument(ctxt->userData);
11422 goto done;
11423 }
11424 if (!terminate) {
11425 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000011426 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000011427 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011428 goto done;
11429 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11430 goto done;
11431 }
11432 }
11433 if (ctxt->spaceNr == 0)
11434 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011435 else if (*ctxt->space == -2)
11436 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011437 else
11438 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011439#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011440 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011441#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011442 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011443#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011444 else
11445 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011446#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011447 if (ctxt->instate == XML_PARSER_EOF)
11448 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011449 if (name == NULL) {
11450 spacePop(ctxt);
11451 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000011452 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11453 ctxt->sax->endDocument(ctxt->userData);
11454 goto done;
11455 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011456#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011457 /*
11458 * [ VC: Root Element Type ]
11459 * The Name in the document type declaration must match
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011460 * the element type of the root element.
Daniel Veillarda880b122003-04-21 21:36:41 +000011461 */
11462 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11463 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11464 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011465#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011466
11467 /*
11468 * Check for an Empty Element.
11469 */
11470 if ((RAW == '/') && (NXT(1) == '>')) {
11471 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011472
11473 if (ctxt->sax2) {
11474 if ((ctxt->sax != NULL) &&
11475 (ctxt->sax->endElementNs != NULL) &&
11476 (!ctxt->disableSAX))
11477 ctxt->sax->endElementNs(ctxt->userData, name,
11478 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011479 if (ctxt->nsNr - nsNr > 0)
11480 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011481#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011482 } else {
11483 if ((ctxt->sax != NULL) &&
11484 (ctxt->sax->endElement != NULL) &&
11485 (!ctxt->disableSAX))
11486 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011487#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011488 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011489 if (ctxt->instate == XML_PARSER_EOF)
11490 goto done;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011491 spacePop(ctxt);
11492 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011493 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011494 } else {
11495 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011496 }
Daniel Veillard65686452012-07-19 18:25:01 +080011497 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011498 break;
11499 }
11500 if (RAW == '>') {
11501 NEXT;
11502 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011503 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011504 "Couldn't find end of Start Tag %s\n",
11505 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011506 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011507 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011508 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011509 if (ctxt->sax2)
11510 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011511#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011512 else
11513 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011514#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011515
Daniel Veillarda880b122003-04-21 21:36:41 +000011516 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011517 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011518 break;
11519 }
11520 case XML_PARSER_CONTENT: {
11521 const xmlChar *test;
11522 unsigned int cons;
11523 if ((avail < 2) && (ctxt->inputNr == 1))
11524 goto done;
11525 cur = ctxt->input->cur[0];
11526 next = ctxt->input->cur[1];
11527
11528 test = CUR_PTR;
11529 cons = ctxt->input->consumed;
11530 if ((cur == '<') && (next == '/')) {
11531 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011532 break;
11533 } else if ((cur == '<') && (next == '?')) {
11534 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011535 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11536 ctxt->progressive = XML_PARSER_PI;
Daniel Veillarda880b122003-04-21 21:36:41 +000011537 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011538 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011539 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011540 ctxt->instate = XML_PARSER_CONTENT;
11541 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011542 } else if ((cur == '<') && (next != '!')) {
11543 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011544 break;
11545 } else if ((cur == '<') && (next == '!') &&
11546 (ctxt->input->cur[2] == '-') &&
11547 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011548 int term;
11549
11550 if (avail < 4)
11551 goto done;
11552 ctxt->input->cur += 4;
11553 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11554 ctxt->input->cur -= 4;
Daniel Veillard65686452012-07-19 18:25:01 +080011555 if ((!terminate) && (term < 0)) {
11556 ctxt->progressive = XML_PARSER_COMMENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011557 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011558 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011559 xmlParseComment(ctxt);
11560 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011561 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011562 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11563 (ctxt->input->cur[2] == '[') &&
11564 (ctxt->input->cur[3] == 'C') &&
11565 (ctxt->input->cur[4] == 'D') &&
11566 (ctxt->input->cur[5] == 'A') &&
11567 (ctxt->input->cur[6] == 'T') &&
11568 (ctxt->input->cur[7] == 'A') &&
11569 (ctxt->input->cur[8] == '[')) {
11570 SKIP(9);
11571 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011572 break;
11573 } else if ((cur == '<') && (next == '!') &&
11574 (avail < 9)) {
11575 goto done;
11576 } else if (cur == '&') {
11577 if ((!terminate) &&
11578 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11579 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011580 xmlParseReference(ctxt);
11581 } else {
11582 /* TODO Avoid the extra copy, handle directly !!! */
11583 /*
11584 * Goal of the following test is:
11585 * - minimize calls to the SAX 'character' callback
11586 * when they are mergeable
11587 * - handle an problem for isBlank when we only parse
11588 * a sequence of blank chars and the next one is
11589 * not available to check against '<' presence.
11590 * - tries to homogenize the differences in SAX
11591 * callbacks between the push and pull versions
11592 * of the parser.
11593 */
11594 if ((ctxt->inputNr == 1) &&
11595 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11596 if (!terminate) {
11597 if (ctxt->progressive) {
11598 if ((lastlt == NULL) ||
11599 (ctxt->input->cur > lastlt))
11600 goto done;
11601 } else if (xmlParseLookupSequence(ctxt,
11602 '<', 0, 0) < 0) {
11603 goto done;
11604 }
11605 }
11606 }
11607 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011608 xmlParseCharData(ctxt, 0);
11609 }
11610 /*
11611 * Pop-up of finished entities.
11612 */
11613 while ((RAW == 0) && (ctxt->inputNr > 1))
11614 xmlPopInput(ctxt);
11615 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011616 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11617 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000011618 ctxt->instate = XML_PARSER_EOF;
11619 break;
11620 }
11621 break;
11622 }
11623 case XML_PARSER_END_TAG:
11624 if (avail < 2)
11625 goto done;
11626 if (!terminate) {
11627 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011628 /* > can be found unescaped in attribute values */
11629 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011630 goto done;
11631 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11632 goto done;
11633 }
11634 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011635 if (ctxt->sax2) {
11636 xmlParseEndTag2(ctxt,
11637 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11638 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011639 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011640 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011641 }
11642#ifdef LIBXML_SAX1_ENABLED
11643 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011644 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011645#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011646 if (ctxt->instate == XML_PARSER_EOF) {
11647 /* Nothing */
11648 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011649 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011650 } else {
11651 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011652 }
11653 break;
11654 case XML_PARSER_CDATA_SECTION: {
11655 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011656 * The Push mode need to have the SAX callback for
Daniel Veillarda880b122003-04-21 21:36:41 +000011657 * cdataBlock merge back contiguous callbacks.
11658 */
11659 int base;
11660
11661 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11662 if (base < 0) {
11663 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011664 int tmp;
11665
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011666 tmp = xmlCheckCdataPush(ctxt->input->cur,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011667 XML_PARSER_BIG_BUFFER_SIZE);
11668 if (tmp < 0) {
11669 tmp = -tmp;
11670 ctxt->input->cur += tmp;
11671 goto encoding_error;
11672 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011673 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11674 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011675 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011676 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011677 else if (ctxt->sax->characters != NULL)
11678 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011679 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011680 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011681 if (ctxt->instate == XML_PARSER_EOF)
11682 goto done;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011683 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011684 ctxt->checkIndex = 0;
11685 }
11686 goto done;
11687 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011688 int tmp;
11689
11690 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11691 if ((tmp < 0) || (tmp != base)) {
11692 tmp = -tmp;
11693 ctxt->input->cur += tmp;
11694 goto encoding_error;
11695 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011696 if ((ctxt->sax != NULL) && (base == 0) &&
11697 (ctxt->sax->cdataBlock != NULL) &&
11698 (!ctxt->disableSAX)) {
11699 /*
11700 * Special case to provide identical behaviour
11701 * between pull and push parsers on enpty CDATA
11702 * sections
11703 */
11704 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11705 (!strncmp((const char *)&ctxt->input->cur[-9],
11706 "<![CDATA[", 9)))
11707 ctxt->sax->cdataBlock(ctxt->userData,
11708 BAD_CAST "", 0);
11709 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011710 (!ctxt->disableSAX)) {
11711 if (ctxt->sax->cdataBlock != NULL)
11712 ctxt->sax->cdataBlock(ctxt->userData,
11713 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011714 else if (ctxt->sax->characters != NULL)
11715 ctxt->sax->characters(ctxt->userData,
11716 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011717 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011718 if (ctxt->instate == XML_PARSER_EOF)
11719 goto done;
Daniel Veillard0b787f32004-03-26 17:29:53 +000011720 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011721 ctxt->checkIndex = 0;
11722 ctxt->instate = XML_PARSER_CONTENT;
11723#ifdef DEBUG_PUSH
11724 xmlGenericError(xmlGenericErrorContext,
11725 "PP: entering CONTENT\n");
11726#endif
11727 }
11728 break;
11729 }
Owen Taylor3473f882001-02-23 17:55:21 +000011730 case XML_PARSER_MISC:
11731 SKIP_BLANKS;
11732 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011733 avail = ctxt->input->length -
11734 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011735 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011736 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011737 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011738 if (avail < 2)
11739 goto done;
11740 cur = ctxt->input->cur[0];
11741 next = ctxt->input->cur[1];
11742 if ((cur == '<') && (next == '?')) {
11743 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011744 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11745 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011746 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011747 }
Owen Taylor3473f882001-02-23 17:55:21 +000011748#ifdef DEBUG_PUSH
11749 xmlGenericError(xmlGenericErrorContext,
11750 "PP: Parsing PI\n");
11751#endif
11752 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011753 if (ctxt->instate == XML_PARSER_EOF)
11754 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011755 ctxt->instate = XML_PARSER_MISC;
11756 ctxt->progressive = 1;
Daniel Veillard40e4b212007-06-12 14:46:40 +000011757 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011758 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011759 (ctxt->input->cur[2] == '-') &&
11760 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011761 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011762 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11763 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011764 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011765 }
Owen Taylor3473f882001-02-23 17:55:21 +000011766#ifdef DEBUG_PUSH
11767 xmlGenericError(xmlGenericErrorContext,
11768 "PP: Parsing Comment\n");
11769#endif
11770 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011771 if (ctxt->instate == XML_PARSER_EOF)
11772 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011773 ctxt->instate = XML_PARSER_MISC;
Daniel Veillard65686452012-07-19 18:25:01 +080011774 ctxt->progressive = 1;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011775 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011776 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011777 (ctxt->input->cur[2] == 'D') &&
11778 (ctxt->input->cur[3] == 'O') &&
11779 (ctxt->input->cur[4] == 'C') &&
11780 (ctxt->input->cur[5] == 'T') &&
11781 (ctxt->input->cur[6] == 'Y') &&
11782 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011783 (ctxt->input->cur[8] == 'E')) {
11784 if ((!terminate) &&
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011785 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11786 ctxt->progressive = XML_PARSER_DTD;
Owen Taylor3473f882001-02-23 17:55:21 +000011787 goto done;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011788 }
Owen Taylor3473f882001-02-23 17:55:21 +000011789#ifdef DEBUG_PUSH
11790 xmlGenericError(xmlGenericErrorContext,
11791 "PP: Parsing internal subset\n");
11792#endif
11793 ctxt->inSubset = 1;
Daniel Veillard6c91aa32012-10-25 15:33:59 +080011794 ctxt->progressive = 0;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011795 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011796 xmlParseDocTypeDecl(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011797 if (ctxt->instate == XML_PARSER_EOF)
11798 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011799 if (RAW == '[') {
11800 ctxt->instate = XML_PARSER_DTD;
11801#ifdef DEBUG_PUSH
11802 xmlGenericError(xmlGenericErrorContext,
11803 "PP: entering DTD\n");
11804#endif
11805 } else {
11806 /*
11807 * Create and update the external subset.
11808 */
11809 ctxt->inSubset = 2;
11810 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11811 (ctxt->sax->externalSubset != NULL))
11812 ctxt->sax->externalSubset(ctxt->userData,
11813 ctxt->intSubName, ctxt->extSubSystem,
11814 ctxt->extSubURI);
11815 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011816 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011817 ctxt->instate = XML_PARSER_PROLOG;
11818#ifdef DEBUG_PUSH
11819 xmlGenericError(xmlGenericErrorContext,
11820 "PP: entering PROLOG\n");
11821#endif
11822 }
11823 } else if ((cur == '<') && (next == '!') &&
11824 (avail < 9)) {
11825 goto done;
11826 } else {
11827 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard65686452012-07-19 18:25:01 +080011828 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011829 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011830#ifdef DEBUG_PUSH
11831 xmlGenericError(xmlGenericErrorContext,
11832 "PP: entering START_TAG\n");
11833#endif
11834 }
11835 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011836 case XML_PARSER_PROLOG:
11837 SKIP_BLANKS;
11838 if (ctxt->input->buf == NULL)
11839 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11840 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011841 avail = xmlBufUse(ctxt->input->buf->buffer) -
11842 (ctxt->input->cur - ctxt->input->base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011843 if (avail < 2)
Owen Taylor3473f882001-02-23 17:55:21 +000011844 goto done;
11845 cur = ctxt->input->cur[0];
11846 next = ctxt->input->cur[1];
11847 if ((cur == '<') && (next == '?')) {
11848 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011849 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11850 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011851 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011852 }
Owen Taylor3473f882001-02-23 17:55:21 +000011853#ifdef DEBUG_PUSH
11854 xmlGenericError(xmlGenericErrorContext,
11855 "PP: Parsing PI\n");
11856#endif
11857 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011858 if (ctxt->instate == XML_PARSER_EOF)
11859 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011860 ctxt->instate = XML_PARSER_PROLOG;
11861 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011862 } else if ((cur == '<') && (next == '!') &&
11863 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11864 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011865 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11866 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011867 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011868 }
Owen Taylor3473f882001-02-23 17:55:21 +000011869#ifdef DEBUG_PUSH
11870 xmlGenericError(xmlGenericErrorContext,
11871 "PP: Parsing Comment\n");
11872#endif
11873 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011874 if (ctxt->instate == XML_PARSER_EOF)
11875 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011876 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011877 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011878 } else if ((cur == '<') && (next == '!') &&
11879 (avail < 4)) {
11880 goto done;
11881 } else {
11882 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011883 if (ctxt->progressive == 0)
Daniel Veillard65686452012-07-19 18:25:01 +080011884 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011885 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011886#ifdef DEBUG_PUSH
11887 xmlGenericError(xmlGenericErrorContext,
11888 "PP: entering START_TAG\n");
11889#endif
11890 }
11891 break;
11892 case XML_PARSER_EPILOG:
11893 SKIP_BLANKS;
11894 if (ctxt->input->buf == NULL)
11895 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11896 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011897 avail = xmlBufUse(ctxt->input->buf->buffer) -
11898 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011899 if (avail < 2)
11900 goto done;
11901 cur = ctxt->input->cur[0];
11902 next = ctxt->input->cur[1];
11903 if ((cur == '<') && (next == '?')) {
11904 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011905 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11906 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011907 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011908 }
Owen Taylor3473f882001-02-23 17:55:21 +000011909#ifdef DEBUG_PUSH
11910 xmlGenericError(xmlGenericErrorContext,
11911 "PP: Parsing PI\n");
11912#endif
11913 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011914 if (ctxt->instate == XML_PARSER_EOF)
11915 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011916 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillardf572a782012-07-19 20:36:25 +080011917 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011918 } else if ((cur == '<') && (next == '!') &&
11919 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11920 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011921 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11922 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011923 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011924 }
Owen Taylor3473f882001-02-23 17:55:21 +000011925#ifdef DEBUG_PUSH
11926 xmlGenericError(xmlGenericErrorContext,
11927 "PP: Parsing Comment\n");
11928#endif
11929 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011930 if (ctxt->instate == XML_PARSER_EOF)
11931 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011932 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011933 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011934 } else if ((cur == '<') && (next == '!') &&
11935 (avail < 4)) {
11936 goto done;
11937 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011938 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011939 ctxt->instate = XML_PARSER_EOF;
11940#ifdef DEBUG_PUSH
11941 xmlGenericError(xmlGenericErrorContext,
11942 "PP: entering EOF\n");
11943#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011944 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011945 ctxt->sax->endDocument(ctxt->userData);
11946 goto done;
11947 }
11948 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011949 case XML_PARSER_DTD: {
11950 /*
11951 * Sorry but progressive parsing of the internal subset
11952 * is not expected to be supported. We first check that
11953 * the full content of the internal subset is available and
11954 * the parsing is launched only at that point.
11955 * Internal subset ends up with "']' S? '>'" in an unescaped
11956 * section and not in a ']]>' sequence which are conditional
11957 * sections (whoever argued to keep that crap in XML deserve
11958 * a place in hell !).
11959 */
11960 int base, i;
11961 xmlChar *buf;
11962 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011963 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000011964
11965 base = ctxt->input->cur - ctxt->input->base;
11966 if (base < 0) return(0);
11967 if (ctxt->checkIndex > base)
11968 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011969 buf = xmlBufContent(ctxt->input->buf->buffer);
11970 use = xmlBufUse(ctxt->input->buf->buffer);
11971 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000011972 if (quote != 0) {
11973 if (buf[base] == quote)
11974 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011975 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000011976 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011977 if ((quote == 0) && (buf[base] == '<')) {
11978 int found = 0;
11979 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011980 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000011981 (buf[base + 1] == '!') &&
11982 (buf[base + 2] == '-') &&
11983 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011984 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000011985 if ((buf[base] == '-') &&
11986 (buf[base + 1] == '-') &&
11987 (buf[base + 2] == '>')) {
11988 found = 1;
11989 base += 2;
11990 break;
11991 }
11992 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011993 if (!found) {
11994#if 0
11995 fprintf(stderr, "unfinished comment\n");
11996#endif
11997 break; /* for */
11998 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011999 continue;
12000 }
12001 }
Owen Taylor3473f882001-02-23 17:55:21 +000012002 if (buf[base] == '"') {
12003 quote = '"';
12004 continue;
12005 }
12006 if (buf[base] == '\'') {
12007 quote = '\'';
12008 continue;
12009 }
12010 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012011#if 0
12012 fprintf(stderr, "%c%c%c%c: ", buf[base],
12013 buf[base + 1], buf[base + 2], buf[base + 3]);
12014#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012015 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000012016 break;
12017 if (buf[base + 1] == ']') {
12018 /* conditional crap, skip both ']' ! */
12019 base++;
12020 continue;
12021 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012022 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012023 if (buf[base + i] == '>') {
12024#if 0
12025 fprintf(stderr, "found\n");
12026#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012027 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012028 }
12029 if (!IS_BLANK_CH(buf[base + i])) {
12030#if 0
12031 fprintf(stderr, "not found\n");
12032#endif
12033 goto not_end_of_int_subset;
12034 }
Owen Taylor3473f882001-02-23 17:55:21 +000012035 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012036#if 0
12037 fprintf(stderr, "end of stream\n");
12038#endif
Owen Taylor3473f882001-02-23 17:55:21 +000012039 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012040
Owen Taylor3473f882001-02-23 17:55:21 +000012041 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000012042not_end_of_int_subset:
12043 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000012044 }
12045 /*
12046 * We didn't found the end of the Internal subset
12047 */
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012048 if (quote == 0)
12049 ctxt->checkIndex = base;
12050 else
12051 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012052#ifdef DEBUG_PUSH
12053 if (next == 0)
12054 xmlGenericError(xmlGenericErrorContext,
12055 "PP: lookup of int subset end filed\n");
12056#endif
12057 goto done;
12058
12059found_end_int_subset:
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012060 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012061 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012062 if (ctxt->instate == XML_PARSER_EOF)
12063 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012064 ctxt->inSubset = 2;
12065 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12066 (ctxt->sax->externalSubset != NULL))
12067 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12068 ctxt->extSubSystem, ctxt->extSubURI);
12069 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000012070 xmlCleanSpecialAttr(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012071 if (ctxt->instate == XML_PARSER_EOF)
12072 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012073 ctxt->instate = XML_PARSER_PROLOG;
12074 ctxt->checkIndex = 0;
12075#ifdef DEBUG_PUSH
12076 xmlGenericError(xmlGenericErrorContext,
12077 "PP: entering PROLOG\n");
12078#endif
12079 break;
12080 }
12081 case XML_PARSER_COMMENT:
12082 xmlGenericError(xmlGenericErrorContext,
12083 "PP: internal error, state == COMMENT\n");
12084 ctxt->instate = XML_PARSER_CONTENT;
12085#ifdef DEBUG_PUSH
12086 xmlGenericError(xmlGenericErrorContext,
12087 "PP: entering CONTENT\n");
12088#endif
12089 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000012090 case XML_PARSER_IGNORE:
12091 xmlGenericError(xmlGenericErrorContext,
12092 "PP: internal error, state == IGNORE");
12093 ctxt->instate = XML_PARSER_DTD;
12094#ifdef DEBUG_PUSH
12095 xmlGenericError(xmlGenericErrorContext,
12096 "PP: entering DTD\n");
12097#endif
12098 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012099 case XML_PARSER_PI:
12100 xmlGenericError(xmlGenericErrorContext,
12101 "PP: internal error, state == PI\n");
12102 ctxt->instate = XML_PARSER_CONTENT;
12103#ifdef DEBUG_PUSH
12104 xmlGenericError(xmlGenericErrorContext,
12105 "PP: entering CONTENT\n");
12106#endif
12107 break;
12108 case XML_PARSER_ENTITY_DECL:
12109 xmlGenericError(xmlGenericErrorContext,
12110 "PP: internal error, state == ENTITY_DECL\n");
12111 ctxt->instate = XML_PARSER_DTD;
12112#ifdef DEBUG_PUSH
12113 xmlGenericError(xmlGenericErrorContext,
12114 "PP: entering DTD\n");
12115#endif
12116 break;
12117 case XML_PARSER_ENTITY_VALUE:
12118 xmlGenericError(xmlGenericErrorContext,
12119 "PP: internal error, state == ENTITY_VALUE\n");
12120 ctxt->instate = XML_PARSER_CONTENT;
12121#ifdef DEBUG_PUSH
12122 xmlGenericError(xmlGenericErrorContext,
12123 "PP: entering DTD\n");
12124#endif
12125 break;
12126 case XML_PARSER_ATTRIBUTE_VALUE:
12127 xmlGenericError(xmlGenericErrorContext,
12128 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12129 ctxt->instate = XML_PARSER_START_TAG;
12130#ifdef DEBUG_PUSH
12131 xmlGenericError(xmlGenericErrorContext,
12132 "PP: entering START_TAG\n");
12133#endif
12134 break;
12135 case XML_PARSER_SYSTEM_LITERAL:
12136 xmlGenericError(xmlGenericErrorContext,
12137 "PP: internal error, state == SYSTEM_LITERAL\n");
12138 ctxt->instate = XML_PARSER_START_TAG;
12139#ifdef DEBUG_PUSH
12140 xmlGenericError(xmlGenericErrorContext,
12141 "PP: entering START_TAG\n");
12142#endif
12143 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000012144 case XML_PARSER_PUBLIC_LITERAL:
12145 xmlGenericError(xmlGenericErrorContext,
12146 "PP: internal error, state == PUBLIC_LITERAL\n");
12147 ctxt->instate = XML_PARSER_START_TAG;
12148#ifdef DEBUG_PUSH
12149 xmlGenericError(xmlGenericErrorContext,
12150 "PP: entering START_TAG\n");
12151#endif
12152 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012153 }
12154 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012155done:
Owen Taylor3473f882001-02-23 17:55:21 +000012156#ifdef DEBUG_PUSH
12157 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12158#endif
12159 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000012160encoding_error:
12161 {
12162 char buffer[150];
12163
12164 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12165 ctxt->input->cur[0], ctxt->input->cur[1],
12166 ctxt->input->cur[2], ctxt->input->cur[3]);
12167 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12168 "Input is not proper UTF-8, indicate encoding !\n%s",
12169 BAD_CAST buffer, NULL);
12170 }
12171 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012172}
12173
12174/**
Daniel Veillard65686452012-07-19 18:25:01 +080012175 * xmlParseCheckTransition:
12176 * @ctxt: an XML parser context
12177 * @chunk: a char array
12178 * @size: the size in byte of the chunk
12179 *
12180 * Check depending on the current parser state if the chunk given must be
12181 * processed immediately or one need more data to advance on parsing.
12182 *
12183 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12184 */
12185static int
12186xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12187 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12188 return(-1);
12189 if (ctxt->instate == XML_PARSER_START_TAG) {
12190 if (memchr(chunk, '>', size) != NULL)
12191 return(1);
12192 return(0);
12193 }
12194 if (ctxt->progressive == XML_PARSER_COMMENT) {
12195 if (memchr(chunk, '>', size) != NULL)
12196 return(1);
12197 return(0);
12198 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012199 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12200 if (memchr(chunk, '>', size) != NULL)
12201 return(1);
12202 return(0);
12203 }
Daniel Veillardf572a782012-07-19 20:36:25 +080012204 if (ctxt->progressive == XML_PARSER_PI) {
12205 if (memchr(chunk, '>', size) != NULL)
12206 return(1);
12207 return(0);
12208 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012209 if (ctxt->instate == XML_PARSER_END_TAG) {
12210 if (memchr(chunk, '>', size) != NULL)
12211 return(1);
12212 return(0);
12213 }
12214 if ((ctxt->progressive == XML_PARSER_DTD) ||
12215 (ctxt->instate == XML_PARSER_DTD)) {
Dan Winshipcf8f0422012-12-21 11:13:31 +080012216 if (memchr(chunk, '>', size) != NULL)
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012217 return(1);
12218 return(0);
12219 }
Daniel Veillard65686452012-07-19 18:25:01 +080012220 return(1);
12221}
12222
12223/**
Owen Taylor3473f882001-02-23 17:55:21 +000012224 * xmlParseChunk:
12225 * @ctxt: an XML parser context
12226 * @chunk: an char array
12227 * @size: the size in byte of the chunk
12228 * @terminate: last chunk indicator
12229 *
12230 * Parse a Chunk of memory
12231 *
12232 * Returns zero if no error, the xmlParserErrors otherwise.
12233 */
12234int
12235xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12236 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000012237 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012238 int remain = 0;
Daniel Veillard65686452012-07-19 18:25:01 +080012239 size_t old_avail = 0;
12240 size_t avail = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000012241
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012242 if (ctxt == NULL)
12243 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000012244 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012245 return(ctxt->errNo);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080012246 if (ctxt->instate == XML_PARSER_EOF)
12247 return(-1);
Daniel Veillard309f81d2003-09-23 09:02:53 +000012248 if (ctxt->instate == XML_PARSER_START)
12249 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000012250 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12251 (chunk[size - 1] == '\r')) {
12252 end_in_lf = 1;
12253 size--;
12254 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012255
12256xmldecl_done:
12257
Owen Taylor3473f882001-02-23 17:55:21 +000012258 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12259 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012260 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12261 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000012262 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012263
Daniel Veillard65686452012-07-19 18:25:01 +080012264 old_avail = xmlBufUse(ctxt->input->buf->buffer);
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012265 /*
12266 * Specific handling if we autodetected an encoding, we should not
12267 * push more than the first line ... which depend on the encoding
12268 * And only push the rest once the final encoding was detected
12269 */
12270 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12271 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010012272 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012273
12274 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12275 BAD_CAST "UTF-16")) ||
12276 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12277 BAD_CAST "UTF16")))
12278 len = 90;
12279 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12280 BAD_CAST "UCS-4")) ||
12281 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12282 BAD_CAST "UCS4")))
12283 len = 180;
12284
12285 if (ctxt->input->buf->rawconsumed < len)
12286 len -= ctxt->input->buf->rawconsumed;
12287
Raul Hudeaba9716a2010-03-15 10:13:29 +010012288 /*
12289 * Change size for reading the initial declaration only
12290 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12291 * will blindly copy extra bytes from memory.
12292 */
Daniel Veillard60587d62010-11-04 15:16:27 +010012293 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010012294 remain = size - len;
12295 size = len;
12296 } else {
12297 remain = 0;
12298 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012299 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012300 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
William M. Bracka3215c72004-07-31 16:24:01 +000012301 if (res < 0) {
12302 ctxt->errNo = XML_PARSER_EOF;
12303 ctxt->disableSAX = 1;
12304 return (XML_PARSER_EOF);
12305 }
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012306 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012307#ifdef DEBUG_PUSH
12308 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12309#endif
12310
Owen Taylor3473f882001-02-23 17:55:21 +000012311 } else if (ctxt->instate != XML_PARSER_EOF) {
12312 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12313 xmlParserInputBufferPtr in = ctxt->input->buf;
12314 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12315 (in->raw != NULL)) {
12316 int nbchars;
Daniel Veillardde0cc202013-02-12 16:55:34 +080012317 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12318 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012319
Daniel Veillardbf058dc2013-02-13 18:19:42 +080012320 nbchars = xmlCharEncInput(in, terminate);
Owen Taylor3473f882001-02-23 17:55:21 +000012321 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012322 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000012323 xmlGenericError(xmlGenericErrorContext,
12324 "xmlParseChunk: encoder error\n");
12325 return(XML_ERR_INVALID_ENCODING);
12326 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012327 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
Owen Taylor3473f882001-02-23 17:55:21 +000012328 }
12329 }
12330 }
Daniel Veillard65686452012-07-19 18:25:01 +080012331 if (remain != 0) {
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012332 xmlParseTryOrFinish(ctxt, 0);
Daniel Veillard65686452012-07-19 18:25:01 +080012333 } else {
12334 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12335 avail = xmlBufUse(ctxt->input->buf->buffer);
12336 /*
12337 * Depending on the current state it may not be such
12338 * a good idea to try parsing if there is nothing in the chunk
12339 * which would be worth doing a parser state transition and we
12340 * need to wait for more data
12341 */
12342 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12343 (old_avail == 0) || (avail == 0) ||
12344 (xmlParseCheckTransition(ctxt,
12345 (const char *)&ctxt->input->base[old_avail],
12346 avail - old_avail)))
12347 xmlParseTryOrFinish(ctxt, terminate);
12348 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080012349 if (ctxt->instate == XML_PARSER_EOF)
12350 return(ctxt->errNo);
12351
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012352 if ((ctxt->input != NULL) &&
12353 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12354 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12355 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12356 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12357 ctxt->instate = XML_PARSER_EOF;
12358 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012359 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12360 return(ctxt->errNo);
12361
12362 if (remain != 0) {
12363 chunk += size;
12364 size = remain;
12365 remain = 0;
12366 goto xmldecl_done;
12367 }
Daniel Veillarda617e242006-01-09 14:38:44 +000012368 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12369 (ctxt->input->buf != NULL)) {
Daniel Veillardde0cc202013-02-12 16:55:34 +080012370 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12371 ctxt->input);
12372 size_t current = ctxt->input->cur - ctxt->input->base;
12373
Daniel Veillarda617e242006-01-09 14:38:44 +000012374 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
Daniel Veillardde0cc202013-02-12 16:55:34 +080012375
12376 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12377 base, current);
Daniel Veillarda617e242006-01-09 14:38:44 +000012378 }
Owen Taylor3473f882001-02-23 17:55:21 +000012379 if (terminate) {
12380 /*
12381 * Check for termination
12382 */
Daniel Veillard65686452012-07-19 18:25:01 +080012383 int cur_avail = 0;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012384
12385 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012386 if (ctxt->input->buf == NULL)
Daniel Veillard65686452012-07-19 18:25:01 +080012387 cur_avail = ctxt->input->length -
12388 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012389 else
Daniel Veillard65686452012-07-19 18:25:01 +080012390 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12391 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012392 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012393
Owen Taylor3473f882001-02-23 17:55:21 +000012394 if ((ctxt->instate != XML_PARSER_EOF) &&
12395 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012396 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012397 }
Daniel Veillard65686452012-07-19 18:25:01 +080012398 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012399 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012400 }
Owen Taylor3473f882001-02-23 17:55:21 +000012401 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012402 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012403 ctxt->sax->endDocument(ctxt->userData);
12404 }
12405 ctxt->instate = XML_PARSER_EOF;
12406 }
Daniel Veillard6c91aa32012-10-25 15:33:59 +080012407 if (ctxt->wellFormed == 0)
12408 return((xmlParserErrors) ctxt->errNo);
12409 else
12410 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012411}
12412
12413/************************************************************************
12414 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012415 * I/O front end functions to the parser *
Owen Taylor3473f882001-02-23 17:55:21 +000012416 * *
12417 ************************************************************************/
12418
12419/**
Owen Taylor3473f882001-02-23 17:55:21 +000012420 * xmlCreatePushParserCtxt:
12421 * @sax: a SAX handler
12422 * @user_data: The user data returned on SAX callbacks
12423 * @chunk: a pointer to an array of chars
12424 * @size: number of chars in the array
12425 * @filename: an optional file name or URI
12426 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000012427 * Create a parser context for using the XML parser in push mode.
12428 * If @buffer and @size are non-NULL, the data is used to detect
12429 * the encoding. The remaining characters will be parsed so they
12430 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000012431 * To allow content encoding detection, @size should be >= 4
12432 * The value of @filename is used for fetching external entities
12433 * and error/warning reports.
12434 *
12435 * Returns the new parser context or NULL
12436 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000012437
Owen Taylor3473f882001-02-23 17:55:21 +000012438xmlParserCtxtPtr
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012439xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
Owen Taylor3473f882001-02-23 17:55:21 +000012440 const char *chunk, int size, const char *filename) {
12441 xmlParserCtxtPtr ctxt;
12442 xmlParserInputPtr inputStream;
12443 xmlParserInputBufferPtr buf;
12444 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12445
12446 /*
12447 * plug some encoding conversion routines
12448 */
12449 if ((chunk != NULL) && (size >= 4))
12450 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12451
12452 buf = xmlAllocParserInputBuffer(enc);
12453 if (buf == NULL) return(NULL);
12454
12455 ctxt = xmlNewParserCtxt();
12456 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012457 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012458 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012459 return(NULL);
12460 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012461 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012462 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12463 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012464 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000012465 xmlFreeParserInputBuffer(buf);
12466 xmlFreeParserCtxt(ctxt);
12467 return(NULL);
12468 }
Owen Taylor3473f882001-02-23 17:55:21 +000012469 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012470#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012471 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012472#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012473 xmlFree(ctxt->sax);
12474 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12475 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012476 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012477 xmlFreeParserInputBuffer(buf);
12478 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012479 return(NULL);
12480 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012481 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12482 if (sax->initialized == XML_SAX2_MAGIC)
12483 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12484 else
12485 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012486 if (user_data != NULL)
12487 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012488 }
Owen Taylor3473f882001-02-23 17:55:21 +000012489 if (filename == NULL) {
12490 ctxt->directory = NULL;
12491 } else {
12492 ctxt->directory = xmlParserGetDirectory(filename);
12493 }
12494
12495 inputStream = xmlNewInputStream(ctxt);
12496 if (inputStream == NULL) {
12497 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012498 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012499 return(NULL);
12500 }
12501
12502 if (filename == NULL)
12503 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000012504 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000012505 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012506 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000012507 if (inputStream->filename == NULL) {
12508 xmlFreeParserCtxt(ctxt);
12509 xmlFreeParserInputBuffer(buf);
12510 return(NULL);
12511 }
12512 }
Owen Taylor3473f882001-02-23 17:55:21 +000012513 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080012514 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000012515 inputPush(ctxt, inputStream);
12516
William M. Brack3a1cd212005-02-11 14:35:54 +000012517 /*
12518 * If the caller didn't provide an initial 'chunk' for determining
12519 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12520 * that it can be automatically determined later
12521 */
12522 if ((size == 0) || (chunk == NULL)) {
12523 ctxt->charset = XML_CHAR_ENCODING_NONE;
12524 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012525 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12526 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012527
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012528 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012529
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012530 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012531#ifdef DEBUG_PUSH
12532 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12533#endif
12534 }
12535
Daniel Veillard0e4cd172001-06-28 12:13:56 +000012536 if (enc != XML_CHAR_ENCODING_NONE) {
12537 xmlSwitchEncoding(ctxt, enc);
12538 }
12539
Owen Taylor3473f882001-02-23 17:55:21 +000012540 return(ctxt);
12541}
Daniel Veillard73b013f2003-09-30 12:36:01 +000012542#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012543
12544/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000012545 * xmlStopParser:
12546 * @ctxt: an XML parser context
12547 *
12548 * Blocks further parser processing
12549 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012550void
Daniel Veillard39e5c892005-07-03 22:48:50 +000012551xmlStopParser(xmlParserCtxtPtr ctxt) {
12552 if (ctxt == NULL)
12553 return;
12554 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarde50ba812013-04-11 15:54:51 +080012555 ctxt->errNo = XML_ERR_USER_STOP;
Daniel Veillard39e5c892005-07-03 22:48:50 +000012556 ctxt->disableSAX = 1;
12557 if (ctxt->input != NULL) {
12558 ctxt->input->cur = BAD_CAST"";
12559 ctxt->input->base = ctxt->input->cur;
12560 }
12561}
12562
12563/**
Owen Taylor3473f882001-02-23 17:55:21 +000012564 * xmlCreateIOParserCtxt:
12565 * @sax: a SAX handler
12566 * @user_data: The user data returned on SAX callbacks
12567 * @ioread: an I/O read function
12568 * @ioclose: an I/O close function
12569 * @ioctx: an I/O handler
12570 * @enc: the charset encoding if known
12571 *
12572 * Create a parser context for using the XML parser with an existing
12573 * I/O stream
12574 *
12575 * Returns the new parser context or NULL
12576 */
12577xmlParserCtxtPtr
12578xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12579 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12580 void *ioctx, xmlCharEncoding enc) {
12581 xmlParserCtxtPtr ctxt;
12582 xmlParserInputPtr inputStream;
12583 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012584
Daniel Veillard42595322004-11-08 10:52:06 +000012585 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012586
12587 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012588 if (buf == NULL) {
12589 if (ioclose != NULL)
12590 ioclose(ioctx);
12591 return (NULL);
12592 }
Owen Taylor3473f882001-02-23 17:55:21 +000012593
12594 ctxt = xmlNewParserCtxt();
12595 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012596 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012597 return(NULL);
12598 }
12599 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012600#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012601 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012602#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012603 xmlFree(ctxt->sax);
12604 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12605 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012606 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012607 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012608 return(NULL);
12609 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012610 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12611 if (sax->initialized == XML_SAX2_MAGIC)
12612 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12613 else
12614 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012615 if (user_data != NULL)
12616 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012617 }
Owen Taylor3473f882001-02-23 17:55:21 +000012618
12619 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12620 if (inputStream == NULL) {
12621 xmlFreeParserCtxt(ctxt);
12622 return(NULL);
12623 }
12624 inputPush(ctxt, inputStream);
12625
12626 return(ctxt);
12627}
12628
Daniel Veillard4432df22003-09-28 18:58:27 +000012629#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012630/************************************************************************
12631 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012632 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012633 * *
12634 ************************************************************************/
12635
12636/**
12637 * xmlIOParseDTD:
12638 * @sax: the SAX handler block or NULL
12639 * @input: an Input Buffer
12640 * @enc: the charset encoding if known
12641 *
12642 * Load and parse a DTD
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012643 *
Owen Taylor3473f882001-02-23 17:55:21 +000012644 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012645 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012646 */
12647
12648xmlDtdPtr
12649xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12650 xmlCharEncoding enc) {
12651 xmlDtdPtr ret = NULL;
12652 xmlParserCtxtPtr ctxt;
12653 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012654 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012655
12656 if (input == NULL)
12657 return(NULL);
12658
12659 ctxt = xmlNewParserCtxt();
12660 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012661 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012662 return(NULL);
12663 }
12664
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012665 /* We are loading a DTD */
12666 ctxt->options |= XML_PARSE_DTDLOAD;
12667
Owen Taylor3473f882001-02-23 17:55:21 +000012668 /*
12669 * Set-up the SAX context
12670 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012671 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012672 if (ctxt->sax != NULL)
12673 xmlFree(ctxt->sax);
12674 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012675 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012676 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012677 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012678
12679 /*
12680 * generate a parser input from the I/O handler
12681 */
12682
Daniel Veillard43caefb2003-12-07 19:32:22 +000012683 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012684 if (pinput == NULL) {
12685 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012686 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012687 xmlFreeParserCtxt(ctxt);
12688 return(NULL);
12689 }
12690
12691 /*
12692 * plug some encoding conversion routines here.
12693 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012694 if (xmlPushInput(ctxt, pinput) < 0) {
12695 if (sax != NULL) ctxt->sax = NULL;
12696 xmlFreeParserCtxt(ctxt);
12697 return(NULL);
12698 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012699 if (enc != XML_CHAR_ENCODING_NONE) {
12700 xmlSwitchEncoding(ctxt, enc);
12701 }
Owen Taylor3473f882001-02-23 17:55:21 +000012702
12703 pinput->filename = NULL;
12704 pinput->line = 1;
12705 pinput->col = 1;
12706 pinput->base = ctxt->input->cur;
12707 pinput->cur = ctxt->input->cur;
12708 pinput->free = NULL;
12709
12710 /*
12711 * let's parse that entity knowing it's an external subset.
12712 */
12713 ctxt->inSubset = 2;
12714 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012715 if (ctxt->myDoc == NULL) {
12716 xmlErrMemory(ctxt, "New Doc failed");
12717 return(NULL);
12718 }
12719 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012720 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12721 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012722
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012723 if ((enc == XML_CHAR_ENCODING_NONE) &&
12724 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012725 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012726 * Get the 4 first bytes and decode the charset
12727 * if enc != XML_CHAR_ENCODING_NONE
12728 * plug some encoding conversion routines.
12729 */
12730 start[0] = RAW;
12731 start[1] = NXT(1);
12732 start[2] = NXT(2);
12733 start[3] = NXT(3);
12734 enc = xmlDetectCharEncoding(start, 4);
12735 if (enc != XML_CHAR_ENCODING_NONE) {
12736 xmlSwitchEncoding(ctxt, enc);
12737 }
12738 }
12739
Owen Taylor3473f882001-02-23 17:55:21 +000012740 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12741
12742 if (ctxt->myDoc != NULL) {
12743 if (ctxt->wellFormed) {
12744 ret = ctxt->myDoc->extSubset;
12745 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012746 if (ret != NULL) {
12747 xmlNodePtr tmp;
12748
12749 ret->doc = NULL;
12750 tmp = ret->children;
12751 while (tmp != NULL) {
12752 tmp->doc = NULL;
12753 tmp = tmp->next;
12754 }
12755 }
Owen Taylor3473f882001-02-23 17:55:21 +000012756 } else {
12757 ret = NULL;
12758 }
12759 xmlFreeDoc(ctxt->myDoc);
12760 ctxt->myDoc = NULL;
12761 }
12762 if (sax != NULL) ctxt->sax = NULL;
12763 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012764
Owen Taylor3473f882001-02-23 17:55:21 +000012765 return(ret);
12766}
12767
12768/**
12769 * xmlSAXParseDTD:
12770 * @sax: the SAX handler block
12771 * @ExternalID: a NAME* containing the External ID of the DTD
12772 * @SystemID: a NAME* containing the URL to the DTD
12773 *
12774 * Load and parse an external subset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012775 *
Owen Taylor3473f882001-02-23 17:55:21 +000012776 * Returns the resulting xmlDtdPtr or NULL in case of error.
12777 */
12778
12779xmlDtdPtr
12780xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12781 const xmlChar *SystemID) {
12782 xmlDtdPtr ret = NULL;
12783 xmlParserCtxtPtr ctxt;
12784 xmlParserInputPtr input = NULL;
12785 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012786 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012787
12788 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12789
12790 ctxt = xmlNewParserCtxt();
12791 if (ctxt == NULL) {
12792 return(NULL);
12793 }
12794
Daniel Veillarddd8367d2014-06-11 16:54:32 +080012795 /* We are loading a DTD */
12796 ctxt->options |= XML_PARSE_DTDLOAD;
12797
Owen Taylor3473f882001-02-23 17:55:21 +000012798 /*
12799 * Set-up the SAX context
12800 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012801 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012802 if (ctxt->sax != NULL)
12803 xmlFree(ctxt->sax);
12804 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012805 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012806 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012807
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012808 /*
12809 * Canonicalise the system ID
12810 */
12811 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012812 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012813 xmlFreeParserCtxt(ctxt);
12814 return(NULL);
12815 }
Owen Taylor3473f882001-02-23 17:55:21 +000012816
12817 /*
12818 * Ask the Entity resolver to load the damn thing
12819 */
12820
12821 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012822 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12823 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012824 if (input == NULL) {
12825 if (sax != NULL) ctxt->sax = NULL;
12826 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012827 if (systemIdCanonic != NULL)
12828 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012829 return(NULL);
12830 }
12831
12832 /*
12833 * plug some encoding conversion routines here.
12834 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012835 if (xmlPushInput(ctxt, input) < 0) {
12836 if (sax != NULL) ctxt->sax = NULL;
12837 xmlFreeParserCtxt(ctxt);
12838 if (systemIdCanonic != NULL)
12839 xmlFree(systemIdCanonic);
12840 return(NULL);
12841 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012842 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12843 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12844 xmlSwitchEncoding(ctxt, enc);
12845 }
Owen Taylor3473f882001-02-23 17:55:21 +000012846
12847 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012848 input->filename = (char *) systemIdCanonic;
12849 else
12850 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012851 input->line = 1;
12852 input->col = 1;
12853 input->base = ctxt->input->cur;
12854 input->cur = ctxt->input->cur;
12855 input->free = NULL;
12856
12857 /*
12858 * let's parse that entity knowing it's an external subset.
12859 */
12860 ctxt->inSubset = 2;
12861 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012862 if (ctxt->myDoc == NULL) {
12863 xmlErrMemory(ctxt, "New Doc failed");
12864 if (sax != NULL) ctxt->sax = NULL;
12865 xmlFreeParserCtxt(ctxt);
12866 return(NULL);
12867 }
12868 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012869 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12870 ExternalID, SystemID);
12871 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12872
12873 if (ctxt->myDoc != NULL) {
12874 if (ctxt->wellFormed) {
12875 ret = ctxt->myDoc->extSubset;
12876 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012877 if (ret != NULL) {
12878 xmlNodePtr tmp;
12879
12880 ret->doc = NULL;
12881 tmp = ret->children;
12882 while (tmp != NULL) {
12883 tmp->doc = NULL;
12884 tmp = tmp->next;
12885 }
12886 }
Owen Taylor3473f882001-02-23 17:55:21 +000012887 } else {
12888 ret = NULL;
12889 }
12890 xmlFreeDoc(ctxt->myDoc);
12891 ctxt->myDoc = NULL;
12892 }
12893 if (sax != NULL) ctxt->sax = NULL;
12894 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012895
Owen Taylor3473f882001-02-23 17:55:21 +000012896 return(ret);
12897}
12898
Daniel Veillard4432df22003-09-28 18:58:27 +000012899
Owen Taylor3473f882001-02-23 17:55:21 +000012900/**
12901 * xmlParseDTD:
12902 * @ExternalID: a NAME* containing the External ID of the DTD
12903 * @SystemID: a NAME* containing the URL to the DTD
12904 *
12905 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012906 *
Owen Taylor3473f882001-02-23 17:55:21 +000012907 * Returns the resulting xmlDtdPtr or NULL in case of error.
12908 */
12909
12910xmlDtdPtr
12911xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12912 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12913}
Daniel Veillard4432df22003-09-28 18:58:27 +000012914#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012915
12916/************************************************************************
12917 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012918 * Front ends when parsing an Entity *
Owen Taylor3473f882001-02-23 17:55:21 +000012919 * *
12920 ************************************************************************/
12921
12922/**
Owen Taylor3473f882001-02-23 17:55:21 +000012923 * xmlParseCtxtExternalEntity:
12924 * @ctx: the existing parsing context
12925 * @URL: the URL for the entity to load
12926 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012927 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012928 *
12929 * Parse an external general entity within an existing parsing context
12930 * An external general parsed entity is well-formed if it matches the
12931 * production labeled extParsedEnt.
12932 *
12933 * [78] extParsedEnt ::= TextDecl? content
12934 *
12935 * Returns 0 if the entity is well formed, -1 in case of args problem and
12936 * the parser error code otherwise
12937 */
12938
12939int
12940xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012941 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012942 xmlParserCtxtPtr ctxt;
12943 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012944 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012945 xmlSAXHandlerPtr oldsax = NULL;
12946 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012947 xmlChar start[4];
12948 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012949
Daniel Veillardce682bc2004-11-05 17:22:25 +000012950 if (ctx == NULL) return(-1);
12951
Daniel Veillard0161e632008-08-28 15:36:32 +000012952 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12953 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012954 return(XML_ERR_ENTITY_LOOP);
12955 }
12956
Daniel Veillardcda96922001-08-21 10:56:31 +000012957 if (lst != NULL)
12958 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012959 if ((URL == NULL) && (ID == NULL))
12960 return(-1);
12961 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12962 return(-1);
12963
Rob Richards798743a2009-06-19 13:54:25 -040012964 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012965 if (ctxt == NULL) {
12966 return(-1);
12967 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012968
Owen Taylor3473f882001-02-23 17:55:21 +000012969 oldsax = ctxt->sax;
12970 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012971 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012972 newDoc = xmlNewDoc(BAD_CAST "1.0");
12973 if (newDoc == NULL) {
12974 xmlFreeParserCtxt(ctxt);
12975 return(-1);
12976 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012977 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012978 if (ctx->myDoc->dict) {
12979 newDoc->dict = ctx->myDoc->dict;
12980 xmlDictReference(newDoc->dict);
12981 }
Owen Taylor3473f882001-02-23 17:55:21 +000012982 if (ctx->myDoc != NULL) {
12983 newDoc->intSubset = ctx->myDoc->intSubset;
12984 newDoc->extSubset = ctx->myDoc->extSubset;
12985 }
12986 if (ctx->myDoc->URL != NULL) {
12987 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12988 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012989 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12990 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012991 ctxt->sax = oldsax;
12992 xmlFreeParserCtxt(ctxt);
12993 newDoc->intSubset = NULL;
12994 newDoc->extSubset = NULL;
12995 xmlFreeDoc(newDoc);
12996 return(-1);
12997 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012998 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012999 nodePush(ctxt, newDoc->children);
13000 if (ctx->myDoc == NULL) {
13001 ctxt->myDoc = newDoc;
13002 } else {
13003 ctxt->myDoc = ctx->myDoc;
13004 newDoc->children->doc = ctx->myDoc;
13005 }
13006
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013007 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013008 * Get the 4 first bytes and decode the charset
13009 * if enc != XML_CHAR_ENCODING_NONE
13010 * plug some encoding conversion routines.
13011 */
13012 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013013 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13014 start[0] = RAW;
13015 start[1] = NXT(1);
13016 start[2] = NXT(2);
13017 start[3] = NXT(3);
13018 enc = xmlDetectCharEncoding(start, 4);
13019 if (enc != XML_CHAR_ENCODING_NONE) {
13020 xmlSwitchEncoding(ctxt, enc);
13021 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013022 }
13023
Owen Taylor3473f882001-02-23 17:55:21 +000013024 /*
13025 * Parse a possible text declaration first
13026 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013027 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013028 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013029 /*
13030 * An XML-1.0 document can't reference an entity not XML-1.0
13031 */
13032 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
13033 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013034 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013035 "Version mismatch between document and entity\n");
13036 }
Owen Taylor3473f882001-02-23 17:55:21 +000013037 }
13038
13039 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080013040 * If the user provided its own SAX callbacks then reuse the
13041 * useData callback field, otherwise the expected setup in a
13042 * DOM builder is to have userData == ctxt
13043 */
13044 if (ctx->userData == ctx)
13045 ctxt->userData = ctxt;
13046 else
13047 ctxt->userData = ctx->userData;
13048
13049 /*
Owen Taylor3473f882001-02-23 17:55:21 +000013050 * Doing validity checking on chunk doesn't make sense
13051 */
13052 ctxt->instate = XML_PARSER_CONTENT;
13053 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000013054 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000013055 ctxt->loadsubset = ctx->loadsubset;
13056 ctxt->depth = ctx->depth + 1;
13057 ctxt->replaceEntities = ctx->replaceEntities;
13058 if (ctxt->validate) {
13059 ctxt->vctxt.error = ctx->vctxt.error;
13060 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000013061 } else {
13062 ctxt->vctxt.error = NULL;
13063 ctxt->vctxt.warning = NULL;
13064 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000013065 ctxt->vctxt.nodeTab = NULL;
13066 ctxt->vctxt.nodeNr = 0;
13067 ctxt->vctxt.nodeMax = 0;
13068 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013069 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13070 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013071 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13072 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13073 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013074 ctxt->dictNames = ctx->dictNames;
13075 ctxt->attsDefault = ctx->attsDefault;
13076 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000013077 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000013078
13079 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013080
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000013081 ctx->validate = ctxt->validate;
13082 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000013083 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013084 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013085 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013086 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013087 }
13088 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013089 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013090 }
13091
13092 if (!ctxt->wellFormed) {
13093 if (ctxt->errNo == 0)
13094 ret = 1;
13095 else
13096 ret = ctxt->errNo;
13097 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000013098 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013099 xmlNodePtr cur;
13100
13101 /*
13102 * Return the newly created nodeset after unlinking it from
13103 * they pseudo parent.
13104 */
13105 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000013106 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013107 while (cur != NULL) {
13108 cur->parent = NULL;
13109 cur = cur->next;
13110 }
13111 newDoc->children->children = NULL;
13112 }
13113 ret = 0;
13114 }
13115 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013116 ctxt->dict = NULL;
13117 ctxt->attsDefault = NULL;
13118 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013119 xmlFreeParserCtxt(ctxt);
13120 newDoc->intSubset = NULL;
13121 newDoc->extSubset = NULL;
13122 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013123
Owen Taylor3473f882001-02-23 17:55:21 +000013124 return(ret);
13125}
13126
13127/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013128 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000013129 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013130 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000013131 * @sax: the SAX handler bloc (possibly NULL)
13132 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13133 * @depth: Used for loop detection, use 0
13134 * @URL: the URL for the entity to load
13135 * @ID: the System ID for the entity to load
13136 * @list: the return value for the set of parsed nodes
13137 *
Daniel Veillard257d9102001-05-08 10:41:44 +000013138 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000013139 *
13140 * Returns 0 if the entity is well formed, -1 in case of args problem and
13141 * the parser error code otherwise
13142 */
13143
Daniel Veillard7d515752003-09-26 19:12:37 +000013144static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013145xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13146 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000013147 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013148 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000013149 xmlParserCtxtPtr ctxt;
13150 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013151 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013152 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000013153 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013154 xmlChar start[4];
13155 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013156
Daniel Veillard0161e632008-08-28 15:36:32 +000013157 if (((depth > 40) &&
13158 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13159 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013160 return(XML_ERR_ENTITY_LOOP);
13161 }
13162
Owen Taylor3473f882001-02-23 17:55:21 +000013163 if (list != NULL)
13164 *list = NULL;
13165 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000013166 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000013167 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000013168 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013169
13170
Rob Richards9c0aa472009-03-26 18:10:19 +000013171 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000013172 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000013173 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013174 if (oldctxt != NULL) {
13175 ctxt->_private = oldctxt->_private;
13176 ctxt->loadsubset = oldctxt->loadsubset;
13177 ctxt->validate = oldctxt->validate;
13178 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013179 ctxt->record_info = oldctxt->record_info;
13180 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13181 ctxt->node_seq.length = oldctxt->node_seq.length;
13182 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013183 } else {
13184 /*
13185 * Doing validity checking on chunk without context
13186 * doesn't make sense
13187 */
13188 ctxt->_private = NULL;
13189 ctxt->validate = 0;
13190 ctxt->external = 2;
13191 ctxt->loadsubset = 0;
13192 }
Owen Taylor3473f882001-02-23 17:55:21 +000013193 if (sax != NULL) {
13194 oldsax = ctxt->sax;
13195 ctxt->sax = sax;
13196 if (user_data != NULL)
13197 ctxt->userData = user_data;
13198 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013199 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013200 newDoc = xmlNewDoc(BAD_CAST "1.0");
13201 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013202 ctxt->node_seq.maximum = 0;
13203 ctxt->node_seq.length = 0;
13204 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013205 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000013206 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013207 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013208 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000013209 newDoc->intSubset = doc->intSubset;
13210 newDoc->extSubset = doc->extSubset;
13211 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013212 xmlDictReference(newDoc->dict);
13213
Owen Taylor3473f882001-02-23 17:55:21 +000013214 if (doc->URL != NULL) {
13215 newDoc->URL = xmlStrdup(doc->URL);
13216 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013217 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13218 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013219 if (sax != NULL)
13220 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013221 ctxt->node_seq.maximum = 0;
13222 ctxt->node_seq.length = 0;
13223 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013224 xmlFreeParserCtxt(ctxt);
13225 newDoc->intSubset = NULL;
13226 newDoc->extSubset = NULL;
13227 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000013228 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013229 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013230 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013231 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000013232 ctxt->myDoc = doc;
13233 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000013234
Daniel Veillard0161e632008-08-28 15:36:32 +000013235 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013236 * Get the 4 first bytes and decode the charset
13237 * if enc != XML_CHAR_ENCODING_NONE
13238 * plug some encoding conversion routines.
13239 */
13240 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013241 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13242 start[0] = RAW;
13243 start[1] = NXT(1);
13244 start[2] = NXT(2);
13245 start[3] = NXT(3);
13246 enc = xmlDetectCharEncoding(start, 4);
13247 if (enc != XML_CHAR_ENCODING_NONE) {
13248 xmlSwitchEncoding(ctxt, enc);
13249 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013250 }
13251
Owen Taylor3473f882001-02-23 17:55:21 +000013252 /*
13253 * Parse a possible text declaration first
13254 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013255 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013256 xmlParseTextDecl(ctxt);
13257 }
13258
Owen Taylor3473f882001-02-23 17:55:21 +000013259 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000013260 ctxt->depth = depth;
13261
13262 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013263
Daniel Veillard561b7f82002-03-20 21:55:57 +000013264 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013265 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000013266 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013267 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013268 }
13269 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013270 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013271 }
13272
13273 if (!ctxt->wellFormed) {
13274 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013275 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000013276 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013277 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000013278 } else {
13279 if (list != NULL) {
13280 xmlNodePtr cur;
13281
13282 /*
13283 * Return the newly created nodeset after unlinking it from
13284 * they pseudo parent.
13285 */
13286 cur = newDoc->children->children;
13287 *list = cur;
13288 while (cur != NULL) {
13289 cur->parent = NULL;
13290 cur = cur->next;
13291 }
13292 newDoc->children->children = NULL;
13293 }
Daniel Veillard7d515752003-09-26 19:12:37 +000013294 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000013295 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013296
13297 /*
13298 * Record in the parent context the number of entities replacement
13299 * done when parsing that reference.
13300 */
Daniel Veillard76d36452009-09-07 11:19:33 +020013301 if (oldctxt != NULL)
13302 oldctxt->nbentities += ctxt->nbentities;
13303
Daniel Veillard0161e632008-08-28 15:36:32 +000013304 /*
13305 * Also record the size of the entity parsed
13306 */
13307 if (ctxt->input != NULL) {
13308 oldctxt->sizeentities += ctxt->input->consumed;
13309 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13310 }
13311 /*
13312 * And record the last error if any
13313 */
13314 if (ctxt->lastError.code != XML_ERR_OK)
13315 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13316
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013317 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013318 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000013319 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13320 oldctxt->node_seq.length = ctxt->node_seq.length;
13321 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013322 ctxt->node_seq.maximum = 0;
13323 ctxt->node_seq.length = 0;
13324 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013325 xmlFreeParserCtxt(ctxt);
13326 newDoc->intSubset = NULL;
13327 newDoc->extSubset = NULL;
13328 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013329
Owen Taylor3473f882001-02-23 17:55:21 +000013330 return(ret);
13331}
13332
Daniel Veillard81273902003-09-30 00:43:48 +000013333#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013334/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013335 * xmlParseExternalEntity:
13336 * @doc: the document the chunk pertains to
13337 * @sax: the SAX handler bloc (possibly NULL)
13338 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13339 * @depth: Used for loop detection, use 0
13340 * @URL: the URL for the entity to load
13341 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013342 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000013343 *
13344 * Parse an external general entity
13345 * An external general parsed entity is well-formed if it matches the
13346 * production labeled extParsedEnt.
13347 *
13348 * [78] extParsedEnt ::= TextDecl? content
13349 *
13350 * Returns 0 if the entity is well formed, -1 in case of args problem and
13351 * the parser error code otherwise
13352 */
13353
13354int
13355xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000013356 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013357 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013358 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000013359}
13360
13361/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000013362 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000013363 * @doc: the document the chunk pertains to
13364 * @sax: the SAX handler bloc (possibly NULL)
13365 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13366 * @depth: Used for loop detection, use 0
13367 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000013368 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013369 *
13370 * Parse a well-balanced chunk of an XML document
13371 * called by the parser
13372 * The allowed sequence for the Well Balanced Chunk is the one defined by
13373 * the content production in the XML grammar:
13374 *
13375 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13376 *
13377 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13378 * the parser error code otherwise
13379 */
13380
13381int
13382xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000013383 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013384 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13385 depth, string, lst, 0 );
13386}
Daniel Veillard81273902003-09-30 00:43:48 +000013387#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000013388
13389/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000013390 * xmlParseBalancedChunkMemoryInternal:
13391 * @oldctxt: the existing parsing context
13392 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13393 * @user_data: the user data field for the parser context
13394 * @lst: the return value for the set of parsed nodes
13395 *
13396 *
13397 * Parse a well-balanced chunk of an XML document
13398 * called by the parser
13399 * The allowed sequence for the Well Balanced Chunk is the one defined by
13400 * the content production in the XML grammar:
13401 *
13402 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13403 *
Daniel Veillard7d515752003-09-26 19:12:37 +000013404 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13405 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000013406 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000013407 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000013408 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000013409 */
Daniel Veillard7d515752003-09-26 19:12:37 +000013410static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000013411xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13412 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13413 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013414 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013415 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013416 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013417 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013418 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013419 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000013420 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020013421#ifdef SAX2
13422 int i;
13423#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000013424
Daniel Veillard0161e632008-08-28 15:36:32 +000013425 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13426 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013427 return(XML_ERR_ENTITY_LOOP);
13428 }
13429
13430
13431 if (lst != NULL)
13432 *lst = NULL;
13433 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000013434 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013435
13436 size = xmlStrlen(string);
13437
13438 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000013439 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013440 if (user_data != NULL)
13441 ctxt->userData = user_data;
13442 else
13443 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013444 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13445 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013446 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13447 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13448 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013449
Daniel Veillard74eaec12009-08-26 15:57:20 +020013450#ifdef SAX2
13451 /* propagate namespaces down the entity */
13452 for (i = 0;i < oldctxt->nsNr;i += 2) {
13453 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13454 }
13455#endif
13456
Daniel Veillard328f48c2002-11-15 15:24:34 +000013457 oldsax = ctxt->sax;
13458 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013459 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013460 ctxt->replaceEntities = oldctxt->replaceEntities;
13461 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000013462
Daniel Veillarde1ca5032002-12-09 14:13:43 +000013463 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013464 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013465 newDoc = xmlNewDoc(BAD_CAST "1.0");
13466 if (newDoc == NULL) {
13467 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013468 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013469 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000013470 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013471 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013472 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013473 newDoc->dict = ctxt->dict;
13474 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013475 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013476 } else {
13477 ctxt->myDoc = oldctxt->myDoc;
13478 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013479 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013480 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013481 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13482 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013483 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013484 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013485 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013486 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013487 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013488 }
William M. Brack7b9154b2003-09-27 19:23:50 +000013489 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013490 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013491 ctxt->myDoc->children = NULL;
13492 ctxt->myDoc->last = NULL;
13493 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013494 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013495 ctxt->instate = XML_PARSER_CONTENT;
13496 ctxt->depth = oldctxt->depth + 1;
13497
Daniel Veillard328f48c2002-11-15 15:24:34 +000013498 ctxt->validate = 0;
13499 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000013500 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13501 /*
13502 * ID/IDREF registration will be done in xmlValidateElement below
13503 */
13504 ctxt->loadsubset |= XML_SKIP_IDS;
13505 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013506 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013507 ctxt->attsDefault = oldctxt->attsDefault;
13508 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013509
Daniel Veillard68e9e742002-11-16 15:35:11 +000013510 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013511 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013512 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013513 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013514 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013515 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013516 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013517 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013518 }
13519
13520 if (!ctxt->wellFormed) {
13521 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013522 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013523 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013524 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013525 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000013526 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013527 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013528
William M. Brack7b9154b2003-09-27 19:23:50 +000013529 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013530 xmlNodePtr cur;
13531
13532 /*
13533 * Return the newly created nodeset after unlinking it from
13534 * they pseudo parent.
13535 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000013536 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013537 *lst = cur;
13538 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000013539#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000013540 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13541 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13542 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000013543 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13544 oldctxt->myDoc, cur);
13545 }
Daniel Veillard4432df22003-09-28 18:58:27 +000013546#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000013547 cur->parent = NULL;
13548 cur = cur->next;
13549 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013550 ctxt->myDoc->children->children = NULL;
13551 }
13552 if (ctxt->myDoc != NULL) {
13553 xmlFreeNode(ctxt->myDoc->children);
13554 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013555 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013556 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013557
13558 /*
13559 * Record in the parent context the number of entities replacement
13560 * done when parsing that reference.
13561 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020013562 if (oldctxt != NULL)
13563 oldctxt->nbentities += ctxt->nbentities;
13564
Daniel Veillard0161e632008-08-28 15:36:32 +000013565 /*
13566 * Also record the last error if any
13567 */
13568 if (ctxt->lastError.code != XML_ERR_OK)
13569 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13570
Daniel Veillard328f48c2002-11-15 15:24:34 +000013571 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013572 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013573 ctxt->attsDefault = NULL;
13574 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013575 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013576 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013577 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013578 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013579
Daniel Veillard328f48c2002-11-15 15:24:34 +000013580 return(ret);
13581}
13582
Daniel Veillard29b17482004-08-16 00:39:03 +000013583/**
13584 * xmlParseInNodeContext:
13585 * @node: the context node
13586 * @data: the input string
13587 * @datalen: the input string length in bytes
13588 * @options: a combination of xmlParserOption
13589 * @lst: the return value for the set of parsed nodes
13590 *
13591 * Parse a well-balanced chunk of an XML document
13592 * within the context (DTD, namespaces, etc ...) of the given node.
13593 *
13594 * The allowed sequence for the data is a Well Balanced Chunk defined by
13595 * the content production in the XML grammar:
13596 *
13597 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13598 *
13599 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13600 * error code otherwise
13601 */
13602xmlParserErrors
13603xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13604 int options, xmlNodePtr *lst) {
13605#ifdef SAX2
13606 xmlParserCtxtPtr ctxt;
13607 xmlDocPtr doc = NULL;
13608 xmlNodePtr fake, cur;
13609 int nsnr = 0;
13610
13611 xmlParserErrors ret = XML_ERR_OK;
13612
13613 /*
13614 * check all input parameters, grab the document
13615 */
13616 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13617 return(XML_ERR_INTERNAL_ERROR);
13618 switch (node->type) {
13619 case XML_ELEMENT_NODE:
13620 case XML_ATTRIBUTE_NODE:
13621 case XML_TEXT_NODE:
13622 case XML_CDATA_SECTION_NODE:
13623 case XML_ENTITY_REF_NODE:
13624 case XML_PI_NODE:
13625 case XML_COMMENT_NODE:
13626 case XML_DOCUMENT_NODE:
13627 case XML_HTML_DOCUMENT_NODE:
13628 break;
13629 default:
13630 return(XML_ERR_INTERNAL_ERROR);
13631
13632 }
13633 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13634 (node->type != XML_DOCUMENT_NODE) &&
13635 (node->type != XML_HTML_DOCUMENT_NODE))
13636 node = node->parent;
13637 if (node == NULL)
13638 return(XML_ERR_INTERNAL_ERROR);
13639 if (node->type == XML_ELEMENT_NODE)
13640 doc = node->doc;
13641 else
13642 doc = (xmlDocPtr) node;
13643 if (doc == NULL)
13644 return(XML_ERR_INTERNAL_ERROR);
13645
13646 /*
13647 * allocate a context and set-up everything not related to the
13648 * node position in the tree
13649 */
13650 if (doc->type == XML_DOCUMENT_NODE)
13651 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13652#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013653 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013654 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013655 /*
13656 * When parsing in context, it makes no sense to add implied
13657 * elements like html/body/etc...
13658 */
13659 options |= HTML_PARSE_NOIMPLIED;
13660 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013661#endif
13662 else
13663 return(XML_ERR_INTERNAL_ERROR);
13664
13665 if (ctxt == NULL)
13666 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013667
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013668 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013669 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13670 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13671 * we must wait until the last moment to free the original one.
13672 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013673 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013674 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013675 xmlDictFree(ctxt->dict);
13676 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013677 } else
13678 options |= XML_PARSE_NODICT;
13679
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013680 if (doc->encoding != NULL) {
13681 xmlCharEncodingHandlerPtr hdlr;
13682
13683 if (ctxt->encoding != NULL)
13684 xmlFree((xmlChar *) ctxt->encoding);
13685 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13686
Nikolay Sivovd4a5d982013-04-30 17:45:36 +040013687 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013688 if (hdlr != NULL) {
13689 xmlSwitchToEncoding(ctxt, hdlr);
13690 } else {
13691 return(XML_ERR_UNSUPPORTED_ENCODING);
13692 }
13693 }
13694
Daniel Veillard37334572008-07-31 08:20:02 +000013695 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013696 xmlDetectSAX2(ctxt);
13697 ctxt->myDoc = doc;
Daniel Veillard6faa1262014-03-21 17:05:51 +080013698 /* parsing in context, i.e. as within existing content */
13699 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard29b17482004-08-16 00:39:03 +000013700
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013701 fake = xmlNewComment(NULL);
13702 if (fake == NULL) {
13703 xmlFreeParserCtxt(ctxt);
13704 return(XML_ERR_NO_MEMORY);
13705 }
13706 xmlAddChild(node, fake);
13707
Daniel Veillard29b17482004-08-16 00:39:03 +000013708 if (node->type == XML_ELEMENT_NODE) {
13709 nodePush(ctxt, node);
13710 /*
13711 * initialize the SAX2 namespaces stack
13712 */
13713 cur = node;
13714 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13715 xmlNsPtr ns = cur->nsDef;
13716 const xmlChar *iprefix, *ihref;
13717
13718 while (ns != NULL) {
13719 if (ctxt->dict) {
13720 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13721 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13722 } else {
13723 iprefix = ns->prefix;
13724 ihref = ns->href;
13725 }
13726
13727 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13728 nsPush(ctxt, iprefix, ihref);
13729 nsnr++;
13730 }
13731 ns = ns->next;
13732 }
13733 cur = cur->parent;
13734 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013735 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013736
13737 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13738 /*
13739 * ID/IDREF registration will be done in xmlValidateElement below
13740 */
13741 ctxt->loadsubset |= XML_SKIP_IDS;
13742 }
13743
Daniel Veillard499cc922006-01-18 17:22:35 +000013744#ifdef LIBXML_HTML_ENABLED
13745 if (doc->type == XML_HTML_DOCUMENT_NODE)
13746 __htmlParseContent(ctxt);
13747 else
13748#endif
13749 xmlParseContent(ctxt);
13750
Daniel Veillard29b17482004-08-16 00:39:03 +000013751 nsPop(ctxt, nsnr);
13752 if ((RAW == '<') && (NXT(1) == '/')) {
13753 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13754 } else if (RAW != 0) {
13755 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13756 }
13757 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13758 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13759 ctxt->wellFormed = 0;
13760 }
13761
13762 if (!ctxt->wellFormed) {
13763 if (ctxt->errNo == 0)
13764 ret = XML_ERR_INTERNAL_ERROR;
13765 else
13766 ret = (xmlParserErrors)ctxt->errNo;
13767 } else {
13768 ret = XML_ERR_OK;
13769 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013770
Daniel Veillard29b17482004-08-16 00:39:03 +000013771 /*
13772 * Return the newly created nodeset after unlinking it from
13773 * the pseudo sibling.
13774 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013775
Daniel Veillard29b17482004-08-16 00:39:03 +000013776 cur = fake->next;
13777 fake->next = NULL;
13778 node->last = fake;
13779
13780 if (cur != NULL) {
13781 cur->prev = NULL;
13782 }
13783
13784 *lst = cur;
13785
13786 while (cur != NULL) {
13787 cur->parent = NULL;
13788 cur = cur->next;
13789 }
13790
13791 xmlUnlinkNode(fake);
13792 xmlFreeNode(fake);
13793
13794
13795 if (ret != XML_ERR_OK) {
13796 xmlFreeNodeList(*lst);
13797 *lst = NULL;
13798 }
William M. Brackc3f81342004-10-03 01:22:44 +000013799
William M. Brackb7b54de2004-10-06 16:38:01 +000013800 if (doc->dict != NULL)
13801 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013802 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013803
Daniel Veillard29b17482004-08-16 00:39:03 +000013804 return(ret);
13805#else /* !SAX2 */
13806 return(XML_ERR_INTERNAL_ERROR);
13807#endif
13808}
13809
Daniel Veillard81273902003-09-30 00:43:48 +000013810#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013811/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013812 * xmlParseBalancedChunkMemoryRecover:
13813 * @doc: the document the chunk pertains to
13814 * @sax: the SAX handler bloc (possibly NULL)
13815 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13816 * @depth: Used for loop detection, use 0
13817 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13818 * @lst: the return value for the set of parsed nodes
13819 * @recover: return nodes even if the data is broken (use 0)
13820 *
13821 *
13822 * Parse a well-balanced chunk of an XML document
13823 * called by the parser
13824 * The allowed sequence for the Well Balanced Chunk is the one defined by
13825 * the content production in the XML grammar:
13826 *
13827 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13828 *
13829 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13830 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013831 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013832 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013833 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13834 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013835 */
13836int
13837xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013838 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013839 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013840 xmlParserCtxtPtr ctxt;
13841 xmlDocPtr newDoc;
13842 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013843 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013844 int size;
13845 int ret = 0;
13846
Daniel Veillard0161e632008-08-28 15:36:32 +000013847 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013848 return(XML_ERR_ENTITY_LOOP);
13849 }
13850
13851
Daniel Veillardcda96922001-08-21 10:56:31 +000013852 if (lst != NULL)
13853 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013854 if (string == NULL)
13855 return(-1);
13856
13857 size = xmlStrlen(string);
13858
13859 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13860 if (ctxt == NULL) return(-1);
13861 ctxt->userData = ctxt;
13862 if (sax != NULL) {
13863 oldsax = ctxt->sax;
13864 ctxt->sax = sax;
13865 if (user_data != NULL)
13866 ctxt->userData = user_data;
13867 }
13868 newDoc = xmlNewDoc(BAD_CAST "1.0");
13869 if (newDoc == NULL) {
13870 xmlFreeParserCtxt(ctxt);
13871 return(-1);
13872 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013873 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013874 if ((doc != NULL) && (doc->dict != NULL)) {
13875 xmlDictFree(ctxt->dict);
13876 ctxt->dict = doc->dict;
13877 xmlDictReference(ctxt->dict);
13878 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13879 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13880 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13881 ctxt->dictNames = 1;
13882 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013883 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013884 }
Owen Taylor3473f882001-02-23 17:55:21 +000013885 if (doc != NULL) {
13886 newDoc->intSubset = doc->intSubset;
13887 newDoc->extSubset = doc->extSubset;
13888 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013889 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13890 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013891 if (sax != NULL)
13892 ctxt->sax = oldsax;
13893 xmlFreeParserCtxt(ctxt);
13894 newDoc->intSubset = NULL;
13895 newDoc->extSubset = NULL;
13896 xmlFreeDoc(newDoc);
13897 return(-1);
13898 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013899 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13900 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013901 if (doc == NULL) {
13902 ctxt->myDoc = newDoc;
13903 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013904 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013905 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013906 /* Ensure that doc has XML spec namespace */
13907 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13908 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013909 }
13910 ctxt->instate = XML_PARSER_CONTENT;
13911 ctxt->depth = depth;
13912
13913 /*
13914 * Doing validity checking on chunk doesn't make sense
13915 */
13916 ctxt->validate = 0;
13917 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013918 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013919
Daniel Veillardb39bc392002-10-26 19:29:51 +000013920 if ( doc != NULL ){
13921 content = doc->children;
13922 doc->children = NULL;
13923 xmlParseContent(ctxt);
13924 doc->children = content;
13925 }
13926 else {
13927 xmlParseContent(ctxt);
13928 }
Owen Taylor3473f882001-02-23 17:55:21 +000013929 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013930 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013931 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013932 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013933 }
13934 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013935 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013936 }
13937
13938 if (!ctxt->wellFormed) {
13939 if (ctxt->errNo == 0)
13940 ret = 1;
13941 else
13942 ret = ctxt->errNo;
13943 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013944 ret = 0;
13945 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013946
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013947 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13948 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013949
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013950 /*
13951 * Return the newly created nodeset after unlinking it from
13952 * they pseudo parent.
13953 */
13954 cur = newDoc->children->children;
13955 *lst = cur;
13956 while (cur != NULL) {
13957 xmlSetTreeDoc(cur, doc);
13958 cur->parent = NULL;
13959 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013960 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013961 newDoc->children->children = NULL;
13962 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013963
13964 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013965 ctxt->sax = oldsax;
13966 xmlFreeParserCtxt(ctxt);
13967 newDoc->intSubset = NULL;
13968 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013969 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013970 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013971
Owen Taylor3473f882001-02-23 17:55:21 +000013972 return(ret);
13973}
13974
13975/**
13976 * xmlSAXParseEntity:
13977 * @sax: the SAX handler block
13978 * @filename: the filename
13979 *
13980 * parse an XML external entity out of context and build a tree.
13981 * It use the given SAX function block to handle the parsing callback.
13982 * If sax is NULL, fallback to the default DOM tree building routines.
13983 *
13984 * [78] extParsedEnt ::= TextDecl? content
13985 *
13986 * This correspond to a "Well Balanced" chunk
13987 *
13988 * Returns the resulting document tree
13989 */
13990
13991xmlDocPtr
13992xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13993 xmlDocPtr ret;
13994 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013995
13996 ctxt = xmlCreateFileParserCtxt(filename);
13997 if (ctxt == NULL) {
13998 return(NULL);
13999 }
14000 if (sax != NULL) {
14001 if (ctxt->sax != NULL)
14002 xmlFree(ctxt->sax);
14003 ctxt->sax = sax;
14004 ctxt->userData = NULL;
14005 }
14006
Owen Taylor3473f882001-02-23 17:55:21 +000014007 xmlParseExtParsedEnt(ctxt);
14008
14009 if (ctxt->wellFormed)
14010 ret = ctxt->myDoc;
14011 else {
14012 ret = NULL;
14013 xmlFreeDoc(ctxt->myDoc);
14014 ctxt->myDoc = NULL;
14015 }
14016 if (sax != NULL)
14017 ctxt->sax = NULL;
14018 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000014019
Owen Taylor3473f882001-02-23 17:55:21 +000014020 return(ret);
14021}
14022
14023/**
14024 * xmlParseEntity:
14025 * @filename: the filename
14026 *
14027 * parse an XML external entity out of context and build a tree.
14028 *
14029 * [78] extParsedEnt ::= TextDecl? content
14030 *
14031 * This correspond to a "Well Balanced" chunk
14032 *
14033 * Returns the resulting document tree
14034 */
14035
14036xmlDocPtr
14037xmlParseEntity(const char *filename) {
14038 return(xmlSAXParseEntity(NULL, filename));
14039}
Daniel Veillard81273902003-09-30 00:43:48 +000014040#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014041
14042/**
Rob Richards9c0aa472009-03-26 18:10:19 +000014043 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000014044 * @URL: the entity URL
14045 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000014046 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000014047 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000014048 *
14049 * Create a parser context for an external entity
14050 * Automatic support for ZLIB/Compress compressed document is provided
14051 * by default if found at compile-time.
14052 *
14053 * Returns the new parser context or NULL
14054 */
Rob Richards9c0aa472009-03-26 18:10:19 +000014055static xmlParserCtxtPtr
14056xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
14057 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000014058 xmlParserCtxtPtr ctxt;
14059 xmlParserInputPtr inputStream;
14060 char *directory = NULL;
14061 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000014062
Owen Taylor3473f882001-02-23 17:55:21 +000014063 ctxt = xmlNewParserCtxt();
14064 if (ctxt == NULL) {
14065 return(NULL);
14066 }
14067
Daniel Veillard48247b42009-07-10 16:12:46 +020014068 if (pctx != NULL) {
14069 ctxt->options = pctx->options;
14070 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000014071 }
14072
Owen Taylor3473f882001-02-23 17:55:21 +000014073 uri = xmlBuildURI(URL, base);
14074
14075 if (uri == NULL) {
14076 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14077 if (inputStream == NULL) {
14078 xmlFreeParserCtxt(ctxt);
14079 return(NULL);
14080 }
14081
14082 inputPush(ctxt, inputStream);
14083
14084 if ((ctxt->directory == NULL) && (directory == NULL))
14085 directory = xmlParserGetDirectory((char *)URL);
14086 if ((ctxt->directory == NULL) && (directory != NULL))
14087 ctxt->directory = directory;
14088 } else {
14089 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14090 if (inputStream == NULL) {
14091 xmlFree(uri);
14092 xmlFreeParserCtxt(ctxt);
14093 return(NULL);
14094 }
14095
14096 inputPush(ctxt, inputStream);
14097
14098 if ((ctxt->directory == NULL) && (directory == NULL))
14099 directory = xmlParserGetDirectory((char *)uri);
14100 if ((ctxt->directory == NULL) && (directory != NULL))
14101 ctxt->directory = directory;
14102 xmlFree(uri);
14103 }
Owen Taylor3473f882001-02-23 17:55:21 +000014104 return(ctxt);
14105}
14106
Rob Richards9c0aa472009-03-26 18:10:19 +000014107/**
14108 * xmlCreateEntityParserCtxt:
14109 * @URL: the entity URL
14110 * @ID: the entity PUBLIC ID
14111 * @base: a possible base for the target URI
14112 *
14113 * Create a parser context for an external entity
14114 * Automatic support for ZLIB/Compress compressed document is provided
14115 * by default if found at compile-time.
14116 *
14117 * Returns the new parser context or NULL
14118 */
14119xmlParserCtxtPtr
14120xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14121 const xmlChar *base) {
14122 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14123
14124}
14125
Owen Taylor3473f882001-02-23 17:55:21 +000014126/************************************************************************
14127 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000014128 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000014129 * *
14130 ************************************************************************/
14131
14132/**
Daniel Veillard61b93382003-11-03 14:28:31 +000014133 * xmlCreateURLParserCtxt:
14134 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014135 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000014136 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014137 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000014138 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000014139 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000014140 *
14141 * Returns the new parser context or NULL
14142 */
14143xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000014144xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000014145{
14146 xmlParserCtxtPtr ctxt;
14147 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000014148 char *directory = NULL;
14149
Owen Taylor3473f882001-02-23 17:55:21 +000014150 ctxt = xmlNewParserCtxt();
14151 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000014152 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000014153 return(NULL);
14154 }
14155
Daniel Veillarddf292f72005-01-16 19:00:15 +000014156 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000014157 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000014158 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000014159
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000014160 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014161 if (inputStream == NULL) {
14162 xmlFreeParserCtxt(ctxt);
14163 return(NULL);
14164 }
14165
Owen Taylor3473f882001-02-23 17:55:21 +000014166 inputPush(ctxt, inputStream);
14167 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000014168 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014169 if ((ctxt->directory == NULL) && (directory != NULL))
14170 ctxt->directory = directory;
14171
14172 return(ctxt);
14173}
14174
Daniel Veillard61b93382003-11-03 14:28:31 +000014175/**
14176 * xmlCreateFileParserCtxt:
14177 * @filename: the filename
14178 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014179 * Create a parser context for a file content.
Daniel Veillard61b93382003-11-03 14:28:31 +000014180 * Automatic support for ZLIB/Compress compressed document is provided
14181 * by default if found at compile-time.
14182 *
14183 * Returns the new parser context or NULL
14184 */
14185xmlParserCtxtPtr
14186xmlCreateFileParserCtxt(const char *filename)
14187{
14188 return(xmlCreateURLParserCtxt(filename, 0));
14189}
14190
Daniel Veillard81273902003-09-30 00:43:48 +000014191#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014192/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014193 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000014194 * @sax: the SAX handler block
14195 * @filename: the filename
14196 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14197 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000014198 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000014199 *
14200 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14201 * compressed document is provided by default if found at compile-time.
14202 * It use the given SAX function block to handle the parsing callback.
14203 * If sax is NULL, fallback to the default DOM tree building routines.
14204 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000014205 * User data (void *) is stored within the parser context in the
14206 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000014207 *
Owen Taylor3473f882001-02-23 17:55:21 +000014208 * Returns the resulting document tree
14209 */
14210
14211xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000014212xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14213 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000014214 xmlDocPtr ret;
14215 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014216
Daniel Veillard635ef722001-10-29 11:48:19 +000014217 xmlInitParser();
14218
Owen Taylor3473f882001-02-23 17:55:21 +000014219 ctxt = xmlCreateFileParserCtxt(filename);
14220 if (ctxt == NULL) {
14221 return(NULL);
14222 }
14223 if (sax != NULL) {
14224 if (ctxt->sax != NULL)
14225 xmlFree(ctxt->sax);
14226 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014227 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014228 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000014229 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000014230 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000014231 }
Owen Taylor3473f882001-02-23 17:55:21 +000014232
Daniel Veillard37d2d162008-03-14 10:54:00 +000014233 if (ctxt->directory == NULL)
14234 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014235
Daniel Veillarddad3f682002-11-17 16:47:27 +000014236 ctxt->recovery = recovery;
14237
Owen Taylor3473f882001-02-23 17:55:21 +000014238 xmlParseDocument(ctxt);
14239
William M. Brackc07329e2003-09-08 01:57:30 +000014240 if ((ctxt->wellFormed) || recovery) {
14241 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000014242 if (ret != NULL) {
14243 if (ctxt->input->buf->compressed > 0)
14244 ret->compression = 9;
14245 else
14246 ret->compression = ctxt->input->buf->compressed;
14247 }
William M. Brackc07329e2003-09-08 01:57:30 +000014248 }
Owen Taylor3473f882001-02-23 17:55:21 +000014249 else {
14250 ret = NULL;
14251 xmlFreeDoc(ctxt->myDoc);
14252 ctxt->myDoc = NULL;
14253 }
14254 if (sax != NULL)
14255 ctxt->sax = NULL;
14256 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014257
Owen Taylor3473f882001-02-23 17:55:21 +000014258 return(ret);
14259}
14260
14261/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014262 * xmlSAXParseFile:
14263 * @sax: the SAX handler block
14264 * @filename: the filename
14265 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14266 * documents
14267 *
14268 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14269 * compressed document is provided by default if found at compile-time.
14270 * It use the given SAX function block to handle the parsing callback.
14271 * If sax is NULL, fallback to the default DOM tree building routines.
14272 *
14273 * Returns the resulting document tree
14274 */
14275
14276xmlDocPtr
14277xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14278 int recovery) {
14279 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14280}
14281
14282/**
Owen Taylor3473f882001-02-23 17:55:21 +000014283 * xmlRecoverDoc:
14284 * @cur: a pointer to an array of xmlChar
14285 *
14286 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014287 * In the case the document is not Well Formed, a attempt to build a
14288 * tree is tried anyway
14289 *
14290 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014291 */
14292
14293xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020014294xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014295 return(xmlSAXParseDoc(NULL, cur, 1));
14296}
14297
14298/**
14299 * xmlParseFile:
14300 * @filename: the filename
14301 *
14302 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14303 * compressed document is provided by default if found at compile-time.
14304 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000014305 * Returns the resulting document tree if the file was wellformed,
14306 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000014307 */
14308
14309xmlDocPtr
14310xmlParseFile(const char *filename) {
14311 return(xmlSAXParseFile(NULL, filename, 0));
14312}
14313
14314/**
14315 * xmlRecoverFile:
14316 * @filename: the filename
14317 *
14318 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14319 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014320 * In the case the document is not Well Formed, it attempts to build
14321 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000014322 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000014323 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014324 */
14325
14326xmlDocPtr
14327xmlRecoverFile(const char *filename) {
14328 return(xmlSAXParseFile(NULL, filename, 1));
14329}
14330
14331
14332/**
14333 * xmlSetupParserForBuffer:
14334 * @ctxt: an XML parser context
14335 * @buffer: a xmlChar * buffer
14336 * @filename: a file name
14337 *
14338 * Setup the parser context to parse a new buffer; Clears any prior
14339 * contents from the parser context. The buffer parameter must not be
14340 * NULL, but the filename parameter can be
14341 */
14342void
14343xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14344 const char* filename)
14345{
14346 xmlParserInputPtr input;
14347
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014348 if ((ctxt == NULL) || (buffer == NULL))
14349 return;
14350
Owen Taylor3473f882001-02-23 17:55:21 +000014351 input = xmlNewInputStream(ctxt);
14352 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000014353 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014354 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014355 return;
14356 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014357
Owen Taylor3473f882001-02-23 17:55:21 +000014358 xmlClearParserCtxt(ctxt);
14359 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000014360 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014361 input->base = buffer;
14362 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000014363 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000014364 inputPush(ctxt, input);
14365}
14366
14367/**
14368 * xmlSAXUserParseFile:
14369 * @sax: a SAX handler
14370 * @user_data: The user data returned on SAX callbacks
14371 * @filename: a file name
14372 *
14373 * parse an XML file and call the given SAX handler routines.
14374 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014375 *
Owen Taylor3473f882001-02-23 17:55:21 +000014376 * Returns 0 in case of success or a error number otherwise
14377 */
14378int
14379xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14380 const char *filename) {
14381 int ret = 0;
14382 xmlParserCtxtPtr ctxt;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014383
Owen Taylor3473f882001-02-23 17:55:21 +000014384 ctxt = xmlCreateFileParserCtxt(filename);
14385 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000014386 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000014387 xmlFree(ctxt->sax);
14388 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014389 xmlDetectSAX2(ctxt);
14390
Owen Taylor3473f882001-02-23 17:55:21 +000014391 if (user_data != NULL)
14392 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014393
Owen Taylor3473f882001-02-23 17:55:21 +000014394 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014395
Owen Taylor3473f882001-02-23 17:55:21 +000014396 if (ctxt->wellFormed)
14397 ret = 0;
14398 else {
14399 if (ctxt->errNo != 0)
14400 ret = ctxt->errNo;
14401 else
14402 ret = -1;
14403 }
14404 if (sax != NULL)
14405 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014406 if (ctxt->myDoc != NULL) {
14407 xmlFreeDoc(ctxt->myDoc);
14408 ctxt->myDoc = NULL;
14409 }
Owen Taylor3473f882001-02-23 17:55:21 +000014410 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014411
Owen Taylor3473f882001-02-23 17:55:21 +000014412 return ret;
14413}
Daniel Veillard81273902003-09-30 00:43:48 +000014414#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014415
14416/************************************************************************
14417 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014418 * Front ends when parsing from memory *
Owen Taylor3473f882001-02-23 17:55:21 +000014419 * *
14420 ************************************************************************/
14421
14422/**
14423 * xmlCreateMemoryParserCtxt:
14424 * @buffer: a pointer to a char array
14425 * @size: the size of the array
14426 *
14427 * Create a parser context for an XML in-memory document.
14428 *
14429 * Returns the new parser context or NULL
14430 */
14431xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014432xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014433 xmlParserCtxtPtr ctxt;
14434 xmlParserInputPtr input;
14435 xmlParserInputBufferPtr buf;
14436
14437 if (buffer == NULL)
14438 return(NULL);
14439 if (size <= 0)
14440 return(NULL);
14441
14442 ctxt = xmlNewParserCtxt();
14443 if (ctxt == NULL)
14444 return(NULL);
14445
Daniel Veillard53350552003-09-18 13:35:51 +000014446 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000014447 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014448 if (buf == NULL) {
14449 xmlFreeParserCtxt(ctxt);
14450 return(NULL);
14451 }
Owen Taylor3473f882001-02-23 17:55:21 +000014452
14453 input = xmlNewInputStream(ctxt);
14454 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014455 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000014456 xmlFreeParserCtxt(ctxt);
14457 return(NULL);
14458 }
14459
14460 input->filename = NULL;
14461 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014462 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000014463
14464 inputPush(ctxt, input);
14465 return(ctxt);
14466}
14467
Daniel Veillard81273902003-09-30 00:43:48 +000014468#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014469/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014470 * xmlSAXParseMemoryWithData:
14471 * @sax: the SAX handler block
14472 * @buffer: an pointer to a char array
14473 * @size: the size of the array
14474 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14475 * documents
14476 * @data: the userdata
14477 *
14478 * parse an XML in-memory block and use the given SAX function block
14479 * to handle the parsing callback. If sax is NULL, fallback to the default
14480 * DOM tree building routines.
14481 *
14482 * User data (void *) is stored within the parser context in the
14483 * context's _private member, so it is available nearly everywhere in libxml
14484 *
14485 * Returns the resulting document tree
14486 */
14487
14488xmlDocPtr
14489xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14490 int size, int recovery, void *data) {
14491 xmlDocPtr ret;
14492 xmlParserCtxtPtr ctxt;
14493
Daniel Veillardab2a7632009-07-09 08:45:03 +020014494 xmlInitParser();
14495
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014496 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14497 if (ctxt == NULL) return(NULL);
14498 if (sax != NULL) {
14499 if (ctxt->sax != NULL)
14500 xmlFree(ctxt->sax);
14501 ctxt->sax = sax;
14502 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014503 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014504 if (data!=NULL) {
14505 ctxt->_private=data;
14506 }
14507
Daniel Veillardadba5f12003-04-04 16:09:01 +000014508 ctxt->recovery = recovery;
14509
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014510 xmlParseDocument(ctxt);
14511
14512 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14513 else {
14514 ret = NULL;
14515 xmlFreeDoc(ctxt->myDoc);
14516 ctxt->myDoc = NULL;
14517 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014518 if (sax != NULL)
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014519 ctxt->sax = NULL;
14520 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020014521
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014522 return(ret);
14523}
14524
14525/**
Owen Taylor3473f882001-02-23 17:55:21 +000014526 * xmlSAXParseMemory:
14527 * @sax: the SAX handler block
14528 * @buffer: an pointer to a char array
14529 * @size: the size of the array
14530 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14531 * documents
14532 *
14533 * parse an XML in-memory block and use the given SAX function block
14534 * to handle the parsing callback. If sax is NULL, fallback to the default
14535 * DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014536 *
Owen Taylor3473f882001-02-23 17:55:21 +000014537 * Returns the resulting document tree
14538 */
14539xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000014540xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14541 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014542 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014543}
14544
14545/**
14546 * xmlParseMemory:
14547 * @buffer: an pointer to a char array
14548 * @size: the size of the array
14549 *
14550 * parse an XML in-memory block and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014551 *
Owen Taylor3473f882001-02-23 17:55:21 +000014552 * Returns the resulting document tree
14553 */
14554
Daniel Veillard50822cb2001-07-26 20:05:51 +000014555xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014556 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14557}
14558
14559/**
14560 * xmlRecoverMemory:
14561 * @buffer: an pointer to a char array
14562 * @size: the size of the array
14563 *
14564 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014565 * In the case the document is not Well Formed, an attempt to
14566 * build a tree is tried anyway
14567 *
14568 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000014569 */
14570
Daniel Veillard50822cb2001-07-26 20:05:51 +000014571xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014572 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14573}
14574
14575/**
14576 * xmlSAXUserParseMemory:
14577 * @sax: a SAX handler
14578 * @user_data: The user data returned on SAX callbacks
14579 * @buffer: an in-memory XML document input
14580 * @size: the length of the XML document in bytes
14581 *
14582 * A better SAX parsing routine.
14583 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014584 *
Owen Taylor3473f882001-02-23 17:55:21 +000014585 * Returns 0 in case of success or a error number otherwise
14586 */
14587int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014588 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014589 int ret = 0;
14590 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014591
14592 xmlInitParser();
14593
Owen Taylor3473f882001-02-23 17:55:21 +000014594 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14595 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014596 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14597 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014598 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014599 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014600
Daniel Veillard30211a02001-04-26 09:33:18 +000014601 if (user_data != NULL)
14602 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014603
Owen Taylor3473f882001-02-23 17:55:21 +000014604 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014605
Owen Taylor3473f882001-02-23 17:55:21 +000014606 if (ctxt->wellFormed)
14607 ret = 0;
14608 else {
14609 if (ctxt->errNo != 0)
14610 ret = ctxt->errNo;
14611 else
14612 ret = -1;
14613 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014614 if (sax != NULL)
14615 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014616 if (ctxt->myDoc != NULL) {
14617 xmlFreeDoc(ctxt->myDoc);
14618 ctxt->myDoc = NULL;
14619 }
Owen Taylor3473f882001-02-23 17:55:21 +000014620 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014621
Owen Taylor3473f882001-02-23 17:55:21 +000014622 return ret;
14623}
Daniel Veillard81273902003-09-30 00:43:48 +000014624#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014625
14626/**
14627 * xmlCreateDocParserCtxt:
14628 * @cur: a pointer to an array of xmlChar
14629 *
14630 * Creates a parser context for an XML in-memory document.
14631 *
14632 * Returns the new parser context or NULL
14633 */
14634xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014635xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014636 int len;
14637
14638 if (cur == NULL)
14639 return(NULL);
14640 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014641 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014642}
14643
Daniel Veillard81273902003-09-30 00:43:48 +000014644#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014645/**
14646 * xmlSAXParseDoc:
14647 * @sax: the SAX handler block
14648 * @cur: a pointer to an array of xmlChar
14649 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14650 * documents
14651 *
14652 * parse an XML in-memory document and build a tree.
14653 * It use the given SAX function block to handle the parsing callback.
14654 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014655 *
Owen Taylor3473f882001-02-23 17:55:21 +000014656 * Returns the resulting document tree
14657 */
14658
14659xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014660xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014661 xmlDocPtr ret;
14662 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014663 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014664
Daniel Veillard38936062004-11-04 17:45:11 +000014665 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014666
14667
14668 ctxt = xmlCreateDocParserCtxt(cur);
14669 if (ctxt == NULL) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014670 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014671 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014672 ctxt->sax = sax;
14673 ctxt->userData = NULL;
14674 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014675 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014676
14677 xmlParseDocument(ctxt);
14678 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14679 else {
14680 ret = NULL;
14681 xmlFreeDoc(ctxt->myDoc);
14682 ctxt->myDoc = NULL;
14683 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014684 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014685 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014686 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014687
Owen Taylor3473f882001-02-23 17:55:21 +000014688 return(ret);
14689}
14690
14691/**
14692 * xmlParseDoc:
14693 * @cur: a pointer to an array of xmlChar
14694 *
14695 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014696 *
Owen Taylor3473f882001-02-23 17:55:21 +000014697 * Returns the resulting document tree
14698 */
14699
14700xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014701xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014702 return(xmlSAXParseDoc(NULL, cur, 0));
14703}
Daniel Veillard81273902003-09-30 00:43:48 +000014704#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014705
Daniel Veillard81273902003-09-30 00:43:48 +000014706#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014707/************************************************************************
14708 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014709 * Specific function to keep track of entities references *
14710 * and used by the XSLT debugger *
Daniel Veillard8107a222002-01-13 14:10:10 +000014711 * *
14712 ************************************************************************/
14713
14714static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14715
14716/**
14717 * xmlAddEntityReference:
14718 * @ent : A valid entity
14719 * @firstNode : A valid first node for children of entity
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014720 * @lastNode : A valid last node of children entity
Daniel Veillard8107a222002-01-13 14:10:10 +000014721 *
14722 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14723 */
14724static void
14725xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14726 xmlNodePtr lastNode)
14727{
14728 if (xmlEntityRefFunc != NULL) {
14729 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14730 }
14731}
14732
14733
14734/**
14735 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014736 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014737 *
14738 * Set the function to call call back when a xml reference has been made
14739 */
14740void
14741xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14742{
14743 xmlEntityRefFunc = func;
14744}
Daniel Veillard81273902003-09-30 00:43:48 +000014745#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014746
14747/************************************************************************
14748 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014749 * Miscellaneous *
Owen Taylor3473f882001-02-23 17:55:21 +000014750 * *
14751 ************************************************************************/
14752
14753#ifdef LIBXML_XPATH_ENABLED
14754#include <libxml/xpath.h>
14755#endif
14756
Daniel Veillardffa3c742005-07-21 13:24:09 +000014757extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014758static int xmlParserInitialized = 0;
14759
14760/**
14761 * xmlInitParser:
14762 *
14763 * Initialization function for the XML parser.
14764 * This is not reentrant. Call once before processing in case of
14765 * use in multithreaded programs.
14766 */
14767
14768void
14769xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014770 if (xmlParserInitialized != 0)
14771 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014772
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014773#ifdef LIBXML_THREAD_ENABLED
14774 __xmlGlobalInitMutexLock();
14775 if (xmlParserInitialized == 0) {
14776#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014777 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014778 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014779 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14780 (xmlGenericError == NULL))
14781 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014782 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014783 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014784 xmlInitCharEncodingHandlers();
14785 xmlDefaultSAXHandlerInit();
14786 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014787#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014788 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014789#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014790#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014791 htmlInitAutoClose();
14792 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014793#endif
14794#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014795 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014796#endif
Daniel Veillard054c7162014-01-26 15:02:25 +010014797#ifdef LIBXML_CATALOG_ENABLED
14798 xmlInitializeCatalog();
14799#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014800 xmlParserInitialized = 1;
14801#ifdef LIBXML_THREAD_ENABLED
14802 }
14803 __xmlGlobalInitMutexUnlock();
14804#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014805}
14806
14807/**
14808 * xmlCleanupParser:
14809 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014810 * This function name is somewhat misleading. It does not clean up
14811 * parser state, it cleans up memory allocated by the library itself.
14812 * It is a cleanup function for the XML library. It tries to reclaim all
14813 * related global memory allocated for the library processing.
14814 * It doesn't deallocate any document related memory. One should
14815 * call xmlCleanupParser() only when the process has finished using
14816 * the library and all XML/HTML documents built with it.
14817 * See also xmlInitParser() which has the opposite function of preparing
14818 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014819 *
14820 * WARNING: if your application is multithreaded or has plugin support
14821 * calling this may crash the application if another thread or
14822 * a plugin is still using libxml2. It's sometimes very hard to
14823 * guess if libxml2 is in use in the application, some libraries
14824 * or plugins may use it without notice. In case of doubt abstain
14825 * from calling this function or do it just before calling exit()
14826 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014827 */
14828
14829void
14830xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014831 if (!xmlParserInitialized)
14832 return;
14833
Owen Taylor3473f882001-02-23 17:55:21 +000014834 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014835#ifdef LIBXML_CATALOG_ENABLED
14836 xmlCatalogCleanup();
14837#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014838 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014839 xmlCleanupInputCallbacks();
14840#ifdef LIBXML_OUTPUT_ENABLED
14841 xmlCleanupOutputCallbacks();
14842#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014843#ifdef LIBXML_SCHEMAS_ENABLED
14844 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014845 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014846#endif
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014847 xmlResetLastError();
Alexander Pastukhov704d8c52013-04-23 13:02:11 +080014848 xmlCleanupGlobals();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014849 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014850 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014851 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014852}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014853
14854/************************************************************************
14855 * *
14856 * New set (2.6.0) of simpler and more flexible APIs *
14857 * *
14858 ************************************************************************/
14859
14860/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014861 * DICT_FREE:
14862 * @str: a string
14863 *
14864 * Free a string if it is not owned by the "dict" dictionnary in the
14865 * current scope
14866 */
14867#define DICT_FREE(str) \
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014868 if ((str) && ((!dict) || \
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014869 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14870 xmlFree((char *)(str));
14871
14872/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014873 * xmlCtxtReset:
14874 * @ctxt: an XML parser context
14875 *
14876 * Reset a parser context
14877 */
14878void
14879xmlCtxtReset(xmlParserCtxtPtr ctxt)
14880{
14881 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014882 xmlDictPtr dict;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014883
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014884 if (ctxt == NULL)
14885 return;
14886
14887 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014888
14889 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14890 xmlFreeInputStream(input);
14891 }
14892 ctxt->inputNr = 0;
14893 ctxt->input = NULL;
14894
14895 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014896 if (ctxt->spaceTab != NULL) {
14897 ctxt->spaceTab[0] = -1;
14898 ctxt->space = &ctxt->spaceTab[0];
14899 } else {
14900 ctxt->space = NULL;
14901 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014902
14903
14904 ctxt->nodeNr = 0;
14905 ctxt->node = NULL;
14906
14907 ctxt->nameNr = 0;
14908 ctxt->name = NULL;
14909
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014910 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014911 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014912 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014913 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014914 DICT_FREE(ctxt->directory);
14915 ctxt->directory = NULL;
14916 DICT_FREE(ctxt->extSubURI);
14917 ctxt->extSubURI = NULL;
14918 DICT_FREE(ctxt->extSubSystem);
14919 ctxt->extSubSystem = NULL;
14920 if (ctxt->myDoc != NULL)
14921 xmlFreeDoc(ctxt->myDoc);
14922 ctxt->myDoc = NULL;
14923
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014924 ctxt->standalone = -1;
14925 ctxt->hasExternalSubset = 0;
14926 ctxt->hasPErefs = 0;
14927 ctxt->html = 0;
14928 ctxt->external = 0;
14929 ctxt->instate = XML_PARSER_START;
14930 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014931
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014932 ctxt->wellFormed = 1;
14933 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014934 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014935 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014936#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014937 ctxt->vctxt.userData = ctxt;
14938 ctxt->vctxt.error = xmlParserValidityError;
14939 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014940#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014941 ctxt->record_info = 0;
14942 ctxt->nbChars = 0;
14943 ctxt->checkIndex = 0;
14944 ctxt->inSubset = 0;
14945 ctxt->errNo = XML_ERR_OK;
14946 ctxt->depth = 0;
14947 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14948 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014949 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014950 ctxt->sizeentities = 0;
Daniel Veillard23f05e02013-02-19 10:21:49 +080014951 ctxt->sizeentcopy = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014952 xmlInitNodeInfoSeq(&ctxt->node_seq);
14953
14954 if (ctxt->attsDefault != NULL) {
14955 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14956 ctxt->attsDefault = NULL;
14957 }
14958 if (ctxt->attsSpecial != NULL) {
14959 xmlHashFree(ctxt->attsSpecial, NULL);
14960 ctxt->attsSpecial = NULL;
14961 }
14962
Daniel Veillard4432df22003-09-28 18:58:27 +000014963#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014964 if (ctxt->catalogs != NULL)
14965 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014966#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014967 if (ctxt->lastError.code != XML_ERR_OK)
14968 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014969}
14970
14971/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014972 * xmlCtxtResetPush:
14973 * @ctxt: an XML parser context
14974 * @chunk: a pointer to an array of chars
14975 * @size: number of chars in the array
14976 * @filename: an optional file name or URI
14977 * @encoding: the document encoding, or NULL
14978 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014979 * Reset a push parser context
14980 *
14981 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014982 */
14983int
14984xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14985 int size, const char *filename, const char *encoding)
14986{
14987 xmlParserInputPtr inputStream;
14988 xmlParserInputBufferPtr buf;
14989 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14990
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014991 if (ctxt == NULL)
14992 return(1);
14993
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014994 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14995 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14996
14997 buf = xmlAllocParserInputBuffer(enc);
14998 if (buf == NULL)
14999 return(1);
15000
15001 if (ctxt == NULL) {
15002 xmlFreeParserInputBuffer(buf);
15003 return(1);
15004 }
15005
15006 xmlCtxtReset(ctxt);
15007
15008 if (ctxt->pushTab == NULL) {
15009 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
15010 sizeof(xmlChar *));
15011 if (ctxt->pushTab == NULL) {
15012 xmlErrMemory(ctxt, NULL);
15013 xmlFreeParserInputBuffer(buf);
15014 return(1);
15015 }
15016 }
15017
15018 if (filename == NULL) {
15019 ctxt->directory = NULL;
15020 } else {
15021 ctxt->directory = xmlParserGetDirectory(filename);
15022 }
15023
15024 inputStream = xmlNewInputStream(ctxt);
15025 if (inputStream == NULL) {
15026 xmlFreeParserInputBuffer(buf);
15027 return(1);
15028 }
15029
15030 if (filename == NULL)
15031 inputStream->filename = NULL;
15032 else
15033 inputStream->filename = (char *)
15034 xmlCanonicPath((const xmlChar *) filename);
15035 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080015036 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015037
15038 inputPush(ctxt, inputStream);
15039
15040 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
15041 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080015042 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
15043 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015044
15045 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
15046
Daniel Veillard00ac0d32012-07-16 18:03:01 +080015047 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015048#ifdef DEBUG_PUSH
15049 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
15050#endif
15051 }
15052
15053 if (encoding != NULL) {
15054 xmlCharEncodingHandlerPtr hdlr;
15055
Daniel Veillard37334572008-07-31 08:20:02 +000015056 if (ctxt->encoding != NULL)
15057 xmlFree((xmlChar *) ctxt->encoding);
15058 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15059
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015060 hdlr = xmlFindCharEncodingHandler(encoding);
15061 if (hdlr != NULL) {
15062 xmlSwitchToEncoding(ctxt, hdlr);
15063 } else {
15064 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
15065 "Unsupported encoding %s\n", BAD_CAST encoding);
15066 }
15067 } else if (enc != XML_CHAR_ENCODING_NONE) {
15068 xmlSwitchEncoding(ctxt, enc);
15069 }
15070
15071 return(0);
15072}
15073
Daniel Veillard37334572008-07-31 08:20:02 +000015074
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015075/**
Daniel Veillard37334572008-07-31 08:20:02 +000015076 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015077 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015078 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000015079 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015080 *
15081 * Applies the options to the parser context
15082 *
15083 * Returns 0 in case of success, the set of unknown or unimplemented options
15084 * in case of error.
15085 */
Daniel Veillard37334572008-07-31 08:20:02 +000015086static int
15087xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015088{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015089 if (ctxt == NULL)
15090 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000015091 if (encoding != NULL) {
15092 if (ctxt->encoding != NULL)
15093 xmlFree((xmlChar *) ctxt->encoding);
15094 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15095 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015096 if (options & XML_PARSE_RECOVER) {
15097 ctxt->recovery = 1;
15098 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015099 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015100 } else
15101 ctxt->recovery = 0;
15102 if (options & XML_PARSE_DTDLOAD) {
15103 ctxt->loadsubset = XML_DETECT_IDS;
15104 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015105 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015106 } else
15107 ctxt->loadsubset = 0;
15108 if (options & XML_PARSE_DTDATTR) {
15109 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15110 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015111 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015112 }
15113 if (options & XML_PARSE_NOENT) {
15114 ctxt->replaceEntities = 1;
15115 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15116 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015117 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015118 } else
15119 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015120 if (options & XML_PARSE_PEDANTIC) {
15121 ctxt->pedantic = 1;
15122 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015123 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015124 } else
15125 ctxt->pedantic = 0;
15126 if (options & XML_PARSE_NOBLANKS) {
15127 ctxt->keepBlanks = 0;
15128 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15129 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015130 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015131 } else
15132 ctxt->keepBlanks = 1;
15133 if (options & XML_PARSE_DTDVALID) {
15134 ctxt->validate = 1;
15135 if (options & XML_PARSE_NOWARNING)
15136 ctxt->vctxt.warning = NULL;
15137 if (options & XML_PARSE_NOERROR)
15138 ctxt->vctxt.error = NULL;
15139 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015140 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015141 } else
15142 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000015143 if (options & XML_PARSE_NOWARNING) {
15144 ctxt->sax->warning = NULL;
15145 options -= XML_PARSE_NOWARNING;
15146 }
15147 if (options & XML_PARSE_NOERROR) {
15148 ctxt->sax->error = NULL;
15149 ctxt->sax->fatalError = NULL;
15150 options -= XML_PARSE_NOERROR;
15151 }
Daniel Veillard81273902003-09-30 00:43:48 +000015152#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015153 if (options & XML_PARSE_SAX1) {
15154 ctxt->sax->startElement = xmlSAX2StartElement;
15155 ctxt->sax->endElement = xmlSAX2EndElement;
15156 ctxt->sax->startElementNs = NULL;
15157 ctxt->sax->endElementNs = NULL;
15158 ctxt->sax->initialized = 1;
15159 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015160 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015161 }
Daniel Veillard81273902003-09-30 00:43:48 +000015162#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015163 if (options & XML_PARSE_NODICT) {
15164 ctxt->dictNames = 0;
15165 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015166 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015167 } else {
15168 ctxt->dictNames = 1;
15169 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015170 if (options & XML_PARSE_NOCDATA) {
15171 ctxt->sax->cdataBlock = NULL;
15172 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015173 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015174 }
15175 if (options & XML_PARSE_NSCLEAN) {
15176 ctxt->options |= XML_PARSE_NSCLEAN;
15177 options -= XML_PARSE_NSCLEAN;
15178 }
Daniel Veillard61b93382003-11-03 14:28:31 +000015179 if (options & XML_PARSE_NONET) {
15180 ctxt->options |= XML_PARSE_NONET;
15181 options -= XML_PARSE_NONET;
15182 }
Daniel Veillard8874b942005-08-25 13:19:21 +000015183 if (options & XML_PARSE_COMPACT) {
15184 ctxt->options |= XML_PARSE_COMPACT;
15185 options -= XML_PARSE_COMPACT;
15186 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000015187 if (options & XML_PARSE_OLD10) {
15188 ctxt->options |= XML_PARSE_OLD10;
15189 options -= XML_PARSE_OLD10;
15190 }
Daniel Veillard8915c152008-08-26 13:05:34 +000015191 if (options & XML_PARSE_NOBASEFIX) {
15192 ctxt->options |= XML_PARSE_NOBASEFIX;
15193 options -= XML_PARSE_NOBASEFIX;
15194 }
15195 if (options & XML_PARSE_HUGE) {
15196 ctxt->options |= XML_PARSE_HUGE;
15197 options -= XML_PARSE_HUGE;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080015198 if (ctxt->dict != NULL)
15199 xmlDictSetLimit(ctxt->dict, 0);
Daniel Veillard8915c152008-08-26 13:05:34 +000015200 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000015201 if (options & XML_PARSE_OLDSAX) {
15202 ctxt->options |= XML_PARSE_OLDSAX;
15203 options -= XML_PARSE_OLDSAX;
15204 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080015205 if (options & XML_PARSE_IGNORE_ENC) {
15206 ctxt->options |= XML_PARSE_IGNORE_ENC;
15207 options -= XML_PARSE_IGNORE_ENC;
15208 }
Daniel Veillard968a03a2012-08-13 12:41:33 +080015209 if (options & XML_PARSE_BIG_LINES) {
15210 ctxt->options |= XML_PARSE_BIG_LINES;
15211 options -= XML_PARSE_BIG_LINES;
15212 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000015213 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015214 return (options);
15215}
15216
15217/**
Daniel Veillard37334572008-07-31 08:20:02 +000015218 * xmlCtxtUseOptions:
15219 * @ctxt: an XML parser context
15220 * @options: a combination of xmlParserOption
15221 *
15222 * Applies the options to the parser context
15223 *
15224 * Returns 0 in case of success, the set of unknown or unimplemented options
15225 * in case of error.
15226 */
15227int
15228xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15229{
15230 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15231}
15232
15233/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015234 * xmlDoRead:
15235 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000015236 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015237 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015238 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015239 * @reuse: keep the context for reuse
15240 *
15241 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000015242 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015243 * Returns the resulting document tree or NULL
15244 */
15245static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015246xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15247 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015248{
15249 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000015250
15251 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015252 if (encoding != NULL) {
15253 xmlCharEncodingHandlerPtr hdlr;
15254
15255 hdlr = xmlFindCharEncodingHandler(encoding);
15256 if (hdlr != NULL)
15257 xmlSwitchToEncoding(ctxt, hdlr);
15258 }
Daniel Veillard60942de2003-09-25 21:05:58 +000015259 if ((URL != NULL) && (ctxt->input != NULL) &&
15260 (ctxt->input->filename == NULL))
15261 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015262 xmlParseDocument(ctxt);
15263 if ((ctxt->wellFormed) || ctxt->recovery)
15264 ret = ctxt->myDoc;
15265 else {
15266 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015267 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015268 xmlFreeDoc(ctxt->myDoc);
15269 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015270 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015271 ctxt->myDoc = NULL;
15272 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015273 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015274 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015275
15276 return (ret);
15277}
15278
15279/**
15280 * xmlReadDoc:
15281 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015282 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015283 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015284 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015285 *
15286 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015287 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015288 * Returns the resulting document tree
15289 */
15290xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015291xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015292{
15293 xmlParserCtxtPtr ctxt;
15294
15295 if (cur == NULL)
15296 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015297 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015298
15299 ctxt = xmlCreateDocParserCtxt(cur);
15300 if (ctxt == NULL)
15301 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015302 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015303}
15304
15305/**
15306 * xmlReadFile:
15307 * @filename: a file or URL
15308 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015309 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015310 *
15311 * parse an XML file from the filesystem or the network.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015312 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015313 * Returns the resulting document tree
15314 */
15315xmlDocPtr
15316xmlReadFile(const char *filename, const char *encoding, int options)
15317{
15318 xmlParserCtxtPtr ctxt;
15319
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015320 xmlInitParser();
Daniel Veillard61b93382003-11-03 14:28:31 +000015321 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015322 if (ctxt == NULL)
15323 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015324 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015325}
15326
15327/**
15328 * xmlReadMemory:
15329 * @buffer: a pointer to a char array
15330 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015331 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015332 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015333 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015334 *
15335 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015336 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015337 * Returns the resulting document tree
15338 */
15339xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015340xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015341{
15342 xmlParserCtxtPtr ctxt;
15343
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015344 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015345 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15346 if (ctxt == NULL)
15347 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015348 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015349}
15350
15351/**
15352 * xmlReadFd:
15353 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015354 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015355 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015356 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015357 *
15358 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015359 * NOTE that the file descriptor will not be closed when the
15360 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015361 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015362 * Returns the resulting document tree
15363 */
15364xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015365xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015366{
15367 xmlParserCtxtPtr ctxt;
15368 xmlParserInputBufferPtr input;
15369 xmlParserInputPtr stream;
15370
15371 if (fd < 0)
15372 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015373 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015374
15375 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15376 if (input == NULL)
15377 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015378 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015379 ctxt = xmlNewParserCtxt();
15380 if (ctxt == NULL) {
15381 xmlFreeParserInputBuffer(input);
15382 return (NULL);
15383 }
15384 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15385 if (stream == NULL) {
15386 xmlFreeParserInputBuffer(input);
15387 xmlFreeParserCtxt(ctxt);
15388 return (NULL);
15389 }
15390 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015391 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015392}
15393
15394/**
15395 * xmlReadIO:
15396 * @ioread: an I/O read function
15397 * @ioclose: an I/O close function
15398 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015399 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015400 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015401 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015402 *
15403 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080015404 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015405 * Returns the resulting document tree
15406 */
15407xmlDocPtr
15408xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000015409 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015410{
15411 xmlParserCtxtPtr ctxt;
15412 xmlParserInputBufferPtr input;
15413 xmlParserInputPtr stream;
15414
15415 if (ioread == NULL)
15416 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015417 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015418
15419 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15420 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015421 if (input == NULL) {
15422 if (ioclose != NULL)
15423 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015424 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015425 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015426 ctxt = xmlNewParserCtxt();
15427 if (ctxt == NULL) {
15428 xmlFreeParserInputBuffer(input);
15429 return (NULL);
15430 }
15431 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15432 if (stream == NULL) {
15433 xmlFreeParserInputBuffer(input);
15434 xmlFreeParserCtxt(ctxt);
15435 return (NULL);
15436 }
15437 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015438 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015439}
15440
15441/**
15442 * xmlCtxtReadDoc:
15443 * @ctxt: an XML parser context
15444 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015445 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015446 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015447 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015448 *
15449 * parse an XML in-memory document and build a tree.
15450 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015451 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015452 * Returns the resulting document tree
15453 */
15454xmlDocPtr
15455xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000015456 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015457{
15458 xmlParserInputPtr stream;
15459
15460 if (cur == NULL)
15461 return (NULL);
15462 if (ctxt == NULL)
15463 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015464 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015465
15466 xmlCtxtReset(ctxt);
15467
15468 stream = xmlNewStringInputStream(ctxt, cur);
15469 if (stream == NULL) {
15470 return (NULL);
15471 }
15472 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015473 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015474}
15475
15476/**
15477 * xmlCtxtReadFile:
15478 * @ctxt: an XML parser context
15479 * @filename: a file or URL
15480 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015481 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015482 *
15483 * parse an XML file from the filesystem or the network.
15484 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015485 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015486 * Returns the resulting document tree
15487 */
15488xmlDocPtr
15489xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15490 const char *encoding, int options)
15491{
15492 xmlParserInputPtr stream;
15493
15494 if (filename == NULL)
15495 return (NULL);
15496 if (ctxt == NULL)
15497 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015498 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015499
15500 xmlCtxtReset(ctxt);
15501
Daniel Veillard29614c72004-11-26 10:47:26 +000015502 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015503 if (stream == NULL) {
15504 return (NULL);
15505 }
15506 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015507 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015508}
15509
15510/**
15511 * xmlCtxtReadMemory:
15512 * @ctxt: an XML parser context
15513 * @buffer: a pointer to a char array
15514 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015515 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015516 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015517 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015518 *
15519 * parse an XML in-memory document and build a tree.
15520 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015521 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015522 * Returns the resulting document tree
15523 */
15524xmlDocPtr
15525xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000015526 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015527{
15528 xmlParserInputBufferPtr input;
15529 xmlParserInputPtr stream;
15530
15531 if (ctxt == NULL)
15532 return (NULL);
15533 if (buffer == NULL)
15534 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015535 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015536
15537 xmlCtxtReset(ctxt);
15538
15539 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15540 if (input == NULL) {
15541 return(NULL);
15542 }
15543
15544 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15545 if (stream == NULL) {
15546 xmlFreeParserInputBuffer(input);
15547 return(NULL);
15548 }
15549
15550 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015551 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015552}
15553
15554/**
15555 * xmlCtxtReadFd:
15556 * @ctxt: an XML parser context
15557 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015558 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015559 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015560 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015561 *
15562 * parse an XML from a file descriptor and build a tree.
15563 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015564 * NOTE that the file descriptor will not be closed when the
15565 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015566 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015567 * Returns the resulting document tree
15568 */
15569xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015570xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15571 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015572{
15573 xmlParserInputBufferPtr input;
15574 xmlParserInputPtr stream;
15575
15576 if (fd < 0)
15577 return (NULL);
15578 if (ctxt == NULL)
15579 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015580 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015581
15582 xmlCtxtReset(ctxt);
15583
15584
15585 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15586 if (input == NULL)
15587 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015588 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015589 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15590 if (stream == NULL) {
15591 xmlFreeParserInputBuffer(input);
15592 return (NULL);
15593 }
15594 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015595 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015596}
15597
15598/**
15599 * xmlCtxtReadIO:
15600 * @ctxt: an XML parser context
15601 * @ioread: an I/O read function
15602 * @ioclose: an I/O close function
15603 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015604 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015605 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015606 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015607 *
15608 * parse an XML document from I/O functions and source and build a tree.
15609 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015610 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015611 * Returns the resulting document tree
15612 */
15613xmlDocPtr
15614xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15615 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015616 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015617 const char *encoding, int options)
15618{
15619 xmlParserInputBufferPtr input;
15620 xmlParserInputPtr stream;
15621
15622 if (ioread == NULL)
15623 return (NULL);
15624 if (ctxt == NULL)
15625 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015626 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015627
15628 xmlCtxtReset(ctxt);
15629
15630 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15631 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015632 if (input == NULL) {
15633 if (ioclose != NULL)
15634 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015635 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015636 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015637 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15638 if (stream == NULL) {
15639 xmlFreeParserInputBuffer(input);
15640 return (NULL);
15641 }
15642 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015643 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015644}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015645
15646#define bottom_parser
15647#include "elfgcchack.h"