blob: 9347ac98f4b741e7cd0a598a55730d248ef47d1c [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080020 * different ranges of character are actually implanted either in
Owen Taylor3473f882001-02-23 17:55:21 +000021 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080043#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000045#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000047#include <libxml/threads.h>
48#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <libxml/tree.h>
50#include <libxml/parser.h>
51#include <libxml/parserInternals.h>
52#include <libxml/valid.h>
53#include <libxml/entities.h>
54#include <libxml/xmlerror.h>
55#include <libxml/encoding.h>
56#include <libxml/xmlIO.h>
57#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000058#ifdef LIBXML_CATALOG_ENABLED
59#include <libxml/catalog.h>
60#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000061#ifdef LIBXML_SCHEMAS_ENABLED
62#include <libxml/xmlschemastypes.h>
63#include <libxml/relaxng.h>
64#endif
Owen Taylor3473f882001-02-23 17:55:21 +000065#ifdef HAVE_CTYPE_H
66#include <ctype.h>
67#endif
68#ifdef HAVE_STDLIB_H
69#include <stdlib.h>
70#endif
71#ifdef HAVE_SYS_STAT_H
72#include <sys/stat.h>
73#endif
74#ifdef HAVE_FCNTL_H
75#include <fcntl.h>
76#endif
77#ifdef HAVE_UNISTD_H
78#include <unistd.h>
79#endif
80#ifdef HAVE_ZLIB_H
81#include <zlib.h>
82#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +020083#ifdef HAVE_LZMA_H
84#include <lzma.h>
85#endif
Owen Taylor3473f882001-02-23 17:55:21 +000086
Daniel Veillard768eb3b2012-07-16 14:19:49 +080087#include "buf.h"
88#include "enc.h"
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090static void
91xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
Rob Richards9c0aa472009-03-26 18:10:19 +000093static xmlParserCtxtPtr
94xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
Daniel Veillard0161e632008-08-28 15:36:32 +000097/************************************************************************
98 * *
99 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
100 * *
101 ************************************************************************/
102
103#define XML_PARSER_BIG_ENTITY 1000
104#define XML_PARSER_LOT_ENTITY 5000
105
106/*
107 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
108 * replacement over the size in byte of the input indicates that you have
109 * and eponential behaviour. A value of 10 correspond to at least 3 entity
110 * replacement per byte of input.
111 */
112#define XML_PARSER_NON_LINEAR 10
113
114/*
115 * xmlParserEntityCheck
116 *
117 * Function to check non-linear entity expansion behaviour
118 * This is here to detect and stop exponential linear entity expansion
119 * This is not a limitation of the parser but a safety
120 * boundary feature. It can be disabled with the XML_PARSE_HUGE
121 * parser option.
122 */
123static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800124xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard23f05e02013-02-19 10:21:49 +0800125 xmlEntityPtr ent, size_t replacement)
Daniel Veillard0161e632008-08-28 15:36:32 +0000126{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800127 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000128
129 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
130 return (0);
131 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
132 return (1);
Daniel Veillard23f05e02013-02-19 10:21:49 +0800133 if (replacement != 0) {
134 if (replacement < XML_MAX_TEXT_LENGTH)
135 return(0);
136
137 /*
138 * If the volume of entity copy reaches 10 times the
139 * amount of parsed data and over the large text threshold
140 * then that's very likely to be an abuse.
141 */
142 if (ctxt->input != NULL) {
143 consumed = ctxt->input->consumed +
144 (ctxt->input->cur - ctxt->input->base);
145 }
146 consumed += ctxt->sizeentities;
147
148 if (replacement < XML_PARSER_NON_LINEAR * consumed)
149 return(0);
150 } else if (size != 0) {
Daniel Veillard0161e632008-08-28 15:36:32 +0000151 /*
152 * Do the check based on the replacement size of the entity
153 */
154 if (size < XML_PARSER_BIG_ENTITY)
155 return(0);
156
157 /*
158 * A limit on the amount of text data reasonably used
159 */
160 if (ctxt->input != NULL) {
161 consumed = ctxt->input->consumed +
162 (ctxt->input->cur - ctxt->input->base);
163 }
164 consumed += ctxt->sizeentities;
165
166 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
167 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
168 return (0);
169 } else if (ent != NULL) {
170 /*
171 * use the number of parsed entities in the replacement
172 */
Daniel Veillardcff25462013-03-11 15:57:55 +0800173 size = ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +0000174
175 /*
176 * The amount of data parsed counting entities size only once
177 */
178 if (ctxt->input != NULL) {
179 consumed = ctxt->input->consumed +
180 (ctxt->input->cur - ctxt->input->base);
181 }
182 consumed += ctxt->sizeentities;
183
184 /*
185 * Check the density of entities for the amount of data
186 * knowing an entity reference will take at least 3 bytes
187 */
188 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
189 return (0);
190 } else {
191 /*
192 * strange we got no data for checking just return
193 */
194 return (0);
195 }
Daniel Veillard0161e632008-08-28 15:36:32 +0000196 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
197 return (1);
198}
199
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000200/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000201 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000202 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000203 * arbitrary depth limit for the XML documents that we allow to
204 * process. This is not a limitation of the parser but a safety
205 * boundary feature. It can be disabled with the XML_PARSE_HUGE
206 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000207 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000208unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000209
Daniel Veillard0fb18932003-09-07 09:14:37 +0000210
Daniel Veillard0161e632008-08-28 15:36:32 +0000211
212#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000213#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000214#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000215#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
216
Daniel Veillard1f972e92012-08-15 10:16:37 +0800217/**
218 * XML_PARSER_CHUNK_SIZE
219 *
220 * When calling GROW that's the minimal amount of data
221 * the parser expected to have received. It is not a hard
222 * limit but an optimization when reading strings like Names
223 * It is not strictly needed as long as inputs available characters
224 * are followed by 0, which should be provided by the I/O level
225 */
226#define XML_PARSER_CHUNK_SIZE 100
227
Owen Taylor3473f882001-02-23 17:55:21 +0000228/*
Owen Taylor3473f882001-02-23 17:55:21 +0000229 * List of XML prefixed PI allowed by W3C specs
230 */
231
Daniel Veillardb44025c2001-10-11 22:55:55 +0000232static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000233 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800234 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000235 NULL
236};
237
Daniel Veillarda07050d2003-10-19 14:46:32 +0000238
Owen Taylor3473f882001-02-23 17:55:21 +0000239/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200240static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
241 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000242
Daniel Veillard7d515752003-09-26 19:12:37 +0000243static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000244xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
245 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000246 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000247 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000248
Daniel Veillard37334572008-07-31 08:20:02 +0000249static int
250xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
251 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000252#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000253static void
254xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
255 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000256#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000257
Daniel Veillard7d515752003-09-26 19:12:37 +0000258static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000259xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
260 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000261
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000262static int
263xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
264
Daniel Veillarde57ec792003-09-10 10:50:59 +0000265/************************************************************************
266 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800267 * Some factorized error routines *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000268 * *
269 ************************************************************************/
270
271/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000272 * xmlErrAttributeDup:
273 * @ctxt: an XML parser context
274 * @prefix: the attribute prefix
275 * @localname: the attribute localname
276 *
277 * Handle a redefinition of attribute error
278 */
279static void
280xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
281 const xmlChar * localname)
282{
Daniel Veillard157fee02003-10-31 10:36:03 +0000283 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
284 (ctxt->instate == XML_PARSER_EOF))
285 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000286 if (ctxt != NULL)
287 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200288
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000289 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000290 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200291 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000292 (const char *) localname, NULL, NULL, 0, 0,
293 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000294 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000295 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200296 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 (const char *) prefix, (const char *) localname,
298 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
299 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000300 if (ctxt != NULL) {
301 ctxt->wellFormed = 0;
302 if (ctxt->recovery == 0)
303 ctxt->disableSAX = 1;
304 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000305}
306
307/**
308 * xmlFatalErr:
309 * @ctxt: an XML parser context
310 * @error: the error number
311 * @extra: extra information string
312 *
313 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
314 */
315static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000317{
318 const char *errmsg;
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800319 char errstr[129] = "";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000320
Daniel Veillard157fee02003-10-31 10:36:03 +0000321 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
322 (ctxt->instate == XML_PARSER_EOF))
323 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 switch (error) {
325 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800326 errmsg = "CharRef: invalid hexadecimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000327 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000328 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800329 errmsg = "CharRef: invalid decimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000330 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000331 case XML_ERR_INVALID_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800332 errmsg = "CharRef: invalid value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000333 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000334 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000335 errmsg = "internal error";
336 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000337 case XML_ERR_PEREF_AT_EOF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800338 errmsg = "PEReference at end of document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000339 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000340 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800341 errmsg = "PEReference in prolog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000342 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000343 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800344 errmsg = "PEReference in epilog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000345 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000346 case XML_ERR_PEREF_NO_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800347 errmsg = "PEReference: no name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000348 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000349 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800350 errmsg = "PEReference: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000351 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000352 case XML_ERR_ENTITY_LOOP:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800353 errmsg = "Detected an entity reference loop";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800356 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800359 errmsg = "PEReferences forbidden in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000360 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000361 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800362 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000363 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800365 errmsg = "AttValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000366 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000367 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800368 errmsg = "Unescaped '<' not allowed in attributes values";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000369 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000370 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800371 errmsg = "SystemLiteral \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000372 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000373 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800374 errmsg = "Unfinished System or Public ID \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000375 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000376 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800377 errmsg = "Sequence ']]>' not allowed in content";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000378 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000379 case XML_ERR_URI_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800380 errmsg = "SYSTEM or PUBLIC, the URI is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000381 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000382 case XML_ERR_PUBID_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800383 errmsg = "PUBLIC, the Public Identifier is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000384 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000385 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800386 errmsg = "Comment must not contain '--' (double-hyphen)";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000387 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000388 case XML_ERR_PI_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800389 errmsg = "xmlParsePI : no target name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000390 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000391 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800392 errmsg = "Invalid PI name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000393 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000394 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800395 errmsg = "NOTATION: Name expected here";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000396 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000397 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800398 errmsg = "'>' required to close NOTATION declaration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000399 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000400 case XML_ERR_VALUE_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800401 errmsg = "Entity value required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000402 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000403 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000404 errmsg = "Fragment not allowed";
405 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000406 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800407 errmsg = "'(' required to start ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000408 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000409 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800410 errmsg = "NmToken expected in ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000411 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000412 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800413 errmsg = "')' required to finish ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000415 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800416 errmsg = "MixedContentDecl : '|' or ')*' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000418 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800419 errmsg = "MixedContentDecl : '#PCDATA' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000421 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800422 errmsg = "ContentDecl : Name or '(' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000424 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800425 errmsg = "ContentDecl : ',' '|' or ')' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000427 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000428 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800429 "PEReference: forbidden within markup decl in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000430 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000431 case XML_ERR_GT_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800432 errmsg = "expected '>'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000433 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000434 case XML_ERR_CONDSEC_INVALID:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800435 errmsg = "XML conditional section '[' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000436 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000437 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800438 errmsg = "Content error in the external subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000439 break;
440 case XML_ERR_CONDSEC_INVALID_KEYWORD:
441 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800442 "conditional section INCLUDE or IGNORE keyword expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000443 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000444 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800445 errmsg = "XML conditional section not closed";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000446 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000447 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800448 errmsg = "Text declaration '<?xml' required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000449 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000450 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800451 errmsg = "parsing XML declaration: '?>' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000452 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000453 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800454 errmsg = "external parsed entities cannot be standalone";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000455 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000456 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800457 errmsg = "EntityRef: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000458 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000459 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800460 errmsg = "DOCTYPE improperly terminated";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000461 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000462 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800463 errmsg = "EndTag: '</' not found";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000464 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000465 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800466 errmsg = "expected '='";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000467 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000468 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800469 errmsg = "String not closed expecting \" or '";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000470 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000471 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800472 errmsg = "String not started expecting ' or \"";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000473 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000474 case XML_ERR_ENCODING_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800475 errmsg = "Invalid XML encoding name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000476 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000477 case XML_ERR_STANDALONE_VALUE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800478 errmsg = "standalone accepts only 'yes' or 'no'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000479 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000480 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800481 errmsg = "Document is empty";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000482 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000483 case XML_ERR_DOCUMENT_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800484 errmsg = "Extra content at the end of the document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000485 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000486 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800487 errmsg = "chunk is not well balanced";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000488 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000489 case XML_ERR_EXTRA_CONTENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800490 errmsg = "extra content at the end of well balanced chunk";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000491 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000492 case XML_ERR_VERSION_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800493 errmsg = "Malformed declaration expecting version";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000494 break;
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800495 case XML_ERR_NAME_TOO_LONG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800496 errmsg = "Name too long use XML_PARSE_HUGE option";
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800497 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000498#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000499 case:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800500 errmsg = "";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000501 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000502#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000503 default:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800504 errmsg = "Unregistered error message";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000505 }
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800506 if (info == NULL)
507 snprintf(errstr, 128, "%s\n", errmsg);
508 else
509 snprintf(errstr, 128, "%s: %%s\n", errmsg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000510 if (ctxt != NULL)
511 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000512 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800513 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000514 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000515 if (ctxt != NULL) {
516 ctxt->wellFormed = 0;
517 if (ctxt->recovery == 0)
518 ctxt->disableSAX = 1;
519 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000520}
521
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000522/**
523 * xmlFatalErrMsg:
524 * @ctxt: an XML parser context
525 * @error: the error number
526 * @msg: the error message
527 *
528 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
529 */
530static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000531xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
532 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000533{
Daniel Veillard157fee02003-10-31 10:36:03 +0000534 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
535 (ctxt->instate == XML_PARSER_EOF))
536 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000537 if (ctxt != NULL)
538 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000539 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200540 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000541 if (ctxt != NULL) {
542 ctxt->wellFormed = 0;
543 if (ctxt->recovery == 0)
544 ctxt->disableSAX = 1;
545 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000546}
547
548/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000549 * xmlWarningMsg:
550 * @ctxt: an XML parser context
551 * @error: the error number
552 * @msg: the error message
553 * @str1: extra data
554 * @str2: extra data
555 *
556 * Handle a warning.
557 */
558static void
559xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
560 const char *msg, const xmlChar *str1, const xmlChar *str2)
561{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000562 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000563
Daniel Veillard157fee02003-10-31 10:36:03 +0000564 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
565 (ctxt->instate == XML_PARSER_EOF))
566 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000567 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
568 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000569 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200570 if (ctxt != NULL) {
571 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000572 (ctxt->sax) ? ctxt->sax->warning : NULL,
573 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000574 ctxt, NULL, XML_FROM_PARSER, error,
575 XML_ERR_WARNING, NULL, 0,
576 (const char *) str1, (const char *) str2, NULL, 0, 0,
577 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200578 } else {
579 __xmlRaiseError(schannel, NULL, NULL,
580 ctxt, NULL, XML_FROM_PARSER, error,
581 XML_ERR_WARNING, NULL, 0,
582 (const char *) str1, (const char *) str2, NULL, 0, 0,
583 msg, (const char *) str1, (const char *) str2);
584 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000585}
586
587/**
588 * xmlValidityError:
589 * @ctxt: an XML parser context
590 * @error: the error number
591 * @msg: the error message
592 * @str1: extra data
593 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000594 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000595 */
596static void
597xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000598 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000599{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000600 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000601
602 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
603 (ctxt->instate == XML_PARSER_EOF))
604 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000605 if (ctxt != NULL) {
606 ctxt->errNo = error;
607 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
608 schannel = ctxt->sax->serror;
609 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200610 if (ctxt != NULL) {
611 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000612 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000613 ctxt, NULL, XML_FROM_DTD, error,
614 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000615 (const char *) str2, NULL, 0, 0,
616 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000617 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200618 } else {
619 __xmlRaiseError(schannel, NULL, NULL,
620 ctxt, NULL, XML_FROM_DTD, error,
621 XML_ERR_ERROR, NULL, 0, (const char *) str1,
622 (const char *) str2, NULL, 0, 0,
623 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000624 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000625}
626
627/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000628 * xmlFatalErrMsgInt:
629 * @ctxt: an XML parser context
630 * @error: the error number
631 * @msg: the error message
632 * @val: an integer value
633 *
634 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
635 */
636static void
637xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000638 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000639{
Daniel Veillard157fee02003-10-31 10:36:03 +0000640 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
641 (ctxt->instate == XML_PARSER_EOF))
642 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000643 if (ctxt != NULL)
644 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000645 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000646 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
647 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000648 if (ctxt != NULL) {
649 ctxt->wellFormed = 0;
650 if (ctxt->recovery == 0)
651 ctxt->disableSAX = 1;
652 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000653}
654
655/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000656 * xmlFatalErrMsgStrIntStr:
657 * @ctxt: an XML parser context
658 * @error: the error number
659 * @msg: the error message
660 * @str1: an string info
661 * @val: an integer value
662 * @str2: an string info
663 *
664 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
665 */
666static void
667xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800668 const char *msg, const xmlChar *str1, int val,
Daniel Veillardf403d292003-10-05 13:51:35 +0000669 const xmlChar *str2)
670{
Daniel Veillard157fee02003-10-31 10:36:03 +0000671 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672 (ctxt->instate == XML_PARSER_EOF))
673 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000674 if (ctxt != NULL)
675 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000676 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000677 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
678 NULL, 0, (const char *) str1, (const char *) str2,
679 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000680 if (ctxt != NULL) {
681 ctxt->wellFormed = 0;
682 if (ctxt->recovery == 0)
683 ctxt->disableSAX = 1;
684 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000685}
686
687/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000688 * xmlFatalErrMsgStr:
689 * @ctxt: an XML parser context
690 * @error: the error number
691 * @msg: the error message
692 * @val: a string value
693 *
694 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
695 */
696static void
697xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000698 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000699{
Daniel Veillard157fee02003-10-31 10:36:03 +0000700 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
701 (ctxt->instate == XML_PARSER_EOF))
702 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000703 if (ctxt != NULL)
704 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000705 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000706 XML_FROM_PARSER, error, XML_ERR_FATAL,
707 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
708 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000709 if (ctxt != NULL) {
710 ctxt->wellFormed = 0;
711 if (ctxt->recovery == 0)
712 ctxt->disableSAX = 1;
713 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000714}
715
716/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000717 * xmlErrMsgStr:
718 * @ctxt: an XML parser context
719 * @error: the error number
720 * @msg: the error message
721 * @val: a string value
722 *
723 * Handle a non fatal parser error
724 */
725static void
726xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
727 const char *msg, const xmlChar * val)
728{
Daniel Veillard157fee02003-10-31 10:36:03 +0000729 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
730 (ctxt->instate == XML_PARSER_EOF))
731 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000732 if (ctxt != NULL)
733 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000734 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000735 XML_FROM_PARSER, error, XML_ERR_ERROR,
736 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
737 val);
738}
739
740/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000741 * xmlNsErr:
742 * @ctxt: an XML parser context
743 * @error: the error number
744 * @msg: the message
745 * @info1: extra information string
746 * @info2: extra information string
747 *
748 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
749 */
750static void
751xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
752 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000753 const xmlChar * info1, const xmlChar * info2,
754 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000755{
Daniel Veillard157fee02003-10-31 10:36:03 +0000756 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
757 (ctxt->instate == XML_PARSER_EOF))
758 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000759 if (ctxt != NULL)
760 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000761 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000762 XML_ERR_ERROR, NULL, 0, (const char *) info1,
763 (const char *) info2, (const char *) info3, 0, 0, msg,
764 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000765 if (ctxt != NULL)
766 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000767}
768
Daniel Veillard37334572008-07-31 08:20:02 +0000769/**
770 * xmlNsWarn
771 * @ctxt: an XML parser context
772 * @error: the error number
773 * @msg: the message
774 * @info1: extra information string
775 * @info2: extra information string
776 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800777 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000778 */
779static void
780xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
781 const char *msg,
782 const xmlChar * info1, const xmlChar * info2,
783 const xmlChar * info3)
784{
785 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
786 (ctxt->instate == XML_PARSER_EOF))
787 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000788 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
789 XML_ERR_WARNING, NULL, 0, (const char *) info1,
790 (const char *) info2, (const char *) info3, 0, 0, msg,
791 info1, info2, info3);
792}
793
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000794/************************************************************************
795 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800796 * Library wide options *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000797 * *
798 ************************************************************************/
799
800/**
801 * xmlHasFeature:
802 * @feature: the feature to be examined
803 *
804 * Examines if the library has been compiled with a given feature.
805 *
806 * Returns a non-zero value if the feature exist, otherwise zero.
807 * Returns zero (0) if the feature does not exist or an unknown
808 * unknown feature is requested, non-zero otherwise.
809 */
810int
811xmlHasFeature(xmlFeature feature)
812{
813 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000814 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000815#ifdef LIBXML_THREAD_ENABLED
816 return(1);
817#else
818 return(0);
819#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000820 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000821#ifdef LIBXML_TREE_ENABLED
822 return(1);
823#else
824 return(0);
825#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000826 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000827#ifdef LIBXML_OUTPUT_ENABLED
828 return(1);
829#else
830 return(0);
831#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000832 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000833#ifdef LIBXML_PUSH_ENABLED
834 return(1);
835#else
836 return(0);
837#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000838 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000839#ifdef LIBXML_READER_ENABLED
840 return(1);
841#else
842 return(0);
843#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000844 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000845#ifdef LIBXML_PATTERN_ENABLED
846 return(1);
847#else
848 return(0);
849#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000850 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000851#ifdef LIBXML_WRITER_ENABLED
852 return(1);
853#else
854 return(0);
855#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000856 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000857#ifdef LIBXML_SAX1_ENABLED
858 return(1);
859#else
860 return(0);
861#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000862 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000863#ifdef LIBXML_FTP_ENABLED
864 return(1);
865#else
866 return(0);
867#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000868 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000869#ifdef LIBXML_HTTP_ENABLED
870 return(1);
871#else
872 return(0);
873#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000874 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000875#ifdef LIBXML_VALID_ENABLED
876 return(1);
877#else
878 return(0);
879#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000880 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000881#ifdef LIBXML_HTML_ENABLED
882 return(1);
883#else
884 return(0);
885#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000886 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000887#ifdef LIBXML_LEGACY_ENABLED
888 return(1);
889#else
890 return(0);
891#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000892 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000893#ifdef LIBXML_C14N_ENABLED
894 return(1);
895#else
896 return(0);
897#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000898 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000899#ifdef LIBXML_CATALOG_ENABLED
900 return(1);
901#else
902 return(0);
903#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000904 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000905#ifdef LIBXML_XPATH_ENABLED
906 return(1);
907#else
908 return(0);
909#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000910 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000911#ifdef LIBXML_XPTR_ENABLED
912 return(1);
913#else
914 return(0);
915#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000916 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000917#ifdef LIBXML_XINCLUDE_ENABLED
918 return(1);
919#else
920 return(0);
921#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000922 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000923#ifdef LIBXML_ICONV_ENABLED
924 return(1);
925#else
926 return(0);
927#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000928 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000929#ifdef LIBXML_ISO8859X_ENABLED
930 return(1);
931#else
932 return(0);
933#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000934 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000935#ifdef LIBXML_UNICODE_ENABLED
936 return(1);
937#else
938 return(0);
939#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000940 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000941#ifdef LIBXML_REGEXP_ENABLED
942 return(1);
943#else
944 return(0);
945#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000946 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000947#ifdef LIBXML_AUTOMATA_ENABLED
948 return(1);
949#else
950 return(0);
951#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000952 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000953#ifdef LIBXML_EXPR_ENABLED
954 return(1);
955#else
956 return(0);
957#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000958 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000959#ifdef LIBXML_SCHEMAS_ENABLED
960 return(1);
961#else
962 return(0);
963#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000964 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000965#ifdef LIBXML_SCHEMATRON_ENABLED
966 return(1);
967#else
968 return(0);
969#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000970 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000971#ifdef LIBXML_MODULES_ENABLED
972 return(1);
973#else
974 return(0);
975#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000976 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000977#ifdef LIBXML_DEBUG_ENABLED
978 return(1);
979#else
980 return(0);
981#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000982 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000983#ifdef DEBUG_MEMORY_LOCATION
984 return(1);
985#else
986 return(0);
987#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000988 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000989#ifdef LIBXML_DEBUG_RUNTIME
990 return(1);
991#else
992 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000993#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000994 case XML_WITH_ZLIB:
995#ifdef LIBXML_ZLIB_ENABLED
996 return(1);
997#else
998 return(0);
999#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +02001000 case XML_WITH_LZMA:
1001#ifdef LIBXML_LZMA_ENABLED
1002 return(1);
1003#else
1004 return(0);
1005#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +01001006 case XML_WITH_ICU:
1007#ifdef LIBXML_ICU_ENABLED
1008 return(1);
1009#else
1010 return(0);
1011#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +00001012 default:
1013 break;
1014 }
1015 return(0);
1016}
1017
1018/************************************************************************
1019 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001020 * SAX2 defaulted attributes handling *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001021 * *
1022 ************************************************************************/
1023
1024/**
1025 * xmlDetectSAX2:
1026 * @ctxt: an XML parser context
1027 *
1028 * Do the SAX2 detection and specific intialization
1029 */
1030static void
1031xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1032 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +00001033#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001034 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1035 ((ctxt->sax->startElementNs != NULL) ||
1036 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001037#else
1038 ctxt->sax2 = 1;
1039#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001040
1041 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1042 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1043 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001044 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1045 (ctxt->str_xml_ns == NULL)) {
William M. Brack9f797ab2004-07-28 07:40:12 +00001046 xmlErrMemory(ctxt, NULL);
1047 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001048}
1049
Daniel Veillarde57ec792003-09-10 10:50:59 +00001050typedef struct _xmlDefAttrs xmlDefAttrs;
1051typedef xmlDefAttrs *xmlDefAttrsPtr;
1052struct _xmlDefAttrs {
1053 int nbAttrs; /* number of defaulted attributes on that element */
1054 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +00001055 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001056};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001057
1058/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001059 * xmlAttrNormalizeSpace:
1060 * @src: the source string
1061 * @dst: the target string
1062 *
1063 * Normalize the space in non CDATA attribute values:
1064 * If the attribute type is not CDATA, then the XML processor MUST further
1065 * process the normalized attribute value by discarding any leading and
1066 * trailing space (#x20) characters, and by replacing sequences of space
1067 * (#x20) characters by a single space (#x20) character.
1068 * Note that the size of dst need to be at least src, and if one doesn't need
1069 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1070 * passing src as dst is just fine.
1071 *
1072 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1073 * is needed.
1074 */
1075static xmlChar *
1076xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1077{
1078 if ((src == NULL) || (dst == NULL))
1079 return(NULL);
1080
1081 while (*src == 0x20) src++;
1082 while (*src != 0) {
1083 if (*src == 0x20) {
1084 while (*src == 0x20) src++;
1085 if (*src != 0)
1086 *dst++ = 0x20;
1087 } else {
1088 *dst++ = *src++;
1089 }
1090 }
1091 *dst = 0;
1092 if (dst == src)
1093 return(NULL);
1094 return(dst);
1095}
1096
1097/**
1098 * xmlAttrNormalizeSpace2:
1099 * @src: the source string
1100 *
1101 * Normalize the space in non CDATA attribute values, a slightly more complex
1102 * front end to avoid allocation problems when running on attribute values
1103 * coming from the input.
1104 *
1105 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1106 * is needed.
1107 */
1108static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001109xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001110{
1111 int i;
1112 int remove_head = 0;
1113 int need_realloc = 0;
1114 const xmlChar *cur;
1115
1116 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1117 return(NULL);
1118 i = *len;
1119 if (i <= 0)
1120 return(NULL);
1121
1122 cur = src;
1123 while (*cur == 0x20) {
1124 cur++;
1125 remove_head++;
1126 }
1127 while (*cur != 0) {
1128 if (*cur == 0x20) {
1129 cur++;
1130 if ((*cur == 0x20) || (*cur == 0)) {
1131 need_realloc = 1;
1132 break;
1133 }
1134 } else
1135 cur++;
1136 }
1137 if (need_realloc) {
1138 xmlChar *ret;
1139
1140 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1141 if (ret == NULL) {
1142 xmlErrMemory(ctxt, NULL);
1143 return(NULL);
1144 }
1145 xmlAttrNormalizeSpace(ret, ret);
1146 *len = (int) strlen((const char *)ret);
1147 return(ret);
1148 } else if (remove_head) {
1149 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001150 memmove(src, src + remove_head, 1 + *len);
1151 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001152 }
1153 return(NULL);
1154}
1155
1156/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001157 * xmlAddDefAttrs:
1158 * @ctxt: an XML parser context
1159 * @fullname: the element fullname
1160 * @fullattr: the attribute fullname
1161 * @value: the attribute value
1162 *
1163 * Add a defaulted attribute for an element
1164 */
1165static void
1166xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1167 const xmlChar *fullname,
1168 const xmlChar *fullattr,
1169 const xmlChar *value) {
1170 xmlDefAttrsPtr defaults;
1171 int len;
1172 const xmlChar *name;
1173 const xmlChar *prefix;
1174
Daniel Veillard6a31b832008-03-26 14:06:44 +00001175 /*
1176 * Allows to detect attribute redefinitions
1177 */
1178 if (ctxt->attsSpecial != NULL) {
1179 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1180 return;
1181 }
1182
Daniel Veillarde57ec792003-09-10 10:50:59 +00001183 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001184 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001185 if (ctxt->attsDefault == NULL)
1186 goto mem_error;
1187 }
1188
1189 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001190 * split the element name into prefix:localname , the string found
1191 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001192 */
1193 name = xmlSplitQName3(fullname, &len);
1194 if (name == NULL) {
1195 name = xmlDictLookup(ctxt->dict, fullname, -1);
1196 prefix = NULL;
1197 } else {
1198 name = xmlDictLookup(ctxt->dict, name, -1);
1199 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1200 }
1201
1202 /*
1203 * make sure there is some storage
1204 */
1205 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1206 if (defaults == NULL) {
1207 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001208 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001209 if (defaults == NULL)
1210 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001211 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001212 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001213 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1214 defaults, NULL) < 0) {
1215 xmlFree(defaults);
1216 goto mem_error;
1217 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001218 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001219 xmlDefAttrsPtr temp;
1220
1221 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001222 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001223 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001224 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001225 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001226 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001227 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1228 defaults, NULL) < 0) {
1229 xmlFree(defaults);
1230 goto mem_error;
1231 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001232 }
1233
1234 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001235 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001236 * are within the DTD and hen not associated to namespace names.
1237 */
1238 name = xmlSplitQName3(fullattr, &len);
1239 if (name == NULL) {
1240 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1241 prefix = NULL;
1242 } else {
1243 name = xmlDictLookup(ctxt->dict, name, -1);
1244 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1245 }
1246
Daniel Veillardae0765b2008-07-31 19:54:59 +00001247 defaults->values[5 * defaults->nbAttrs] = name;
1248 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001249 /* intern the string and precompute the end */
1250 len = xmlStrlen(value);
1251 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001252 defaults->values[5 * defaults->nbAttrs + 2] = value;
1253 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1254 if (ctxt->external)
1255 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1256 else
1257 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001258 defaults->nbAttrs++;
1259
1260 return;
1261
1262mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001263 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001264 return;
1265}
1266
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001267/**
1268 * xmlAddSpecialAttr:
1269 * @ctxt: an XML parser context
1270 * @fullname: the element fullname
1271 * @fullattr: the attribute fullname
1272 * @type: the attribute type
1273 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001274 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001275 */
1276static void
1277xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1278 const xmlChar *fullname,
1279 const xmlChar *fullattr,
1280 int type)
1281{
1282 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001283 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001284 if (ctxt->attsSpecial == NULL)
1285 goto mem_error;
1286 }
1287
Daniel Veillardac4118d2008-01-11 05:27:32 +00001288 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1289 return;
1290
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001291 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1292 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001293 return;
1294
1295mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001296 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001297 return;
1298}
1299
Daniel Veillard4432df22003-09-28 18:58:27 +00001300/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001301 * xmlCleanSpecialAttrCallback:
1302 *
1303 * Removes CDATA attributes from the special attribute table
1304 */
1305static void
1306xmlCleanSpecialAttrCallback(void *payload, void *data,
1307 const xmlChar *fullname, const xmlChar *fullattr,
1308 const xmlChar *unused ATTRIBUTE_UNUSED) {
1309 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1310
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001311 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001312 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1313 }
1314}
1315
1316/**
1317 * xmlCleanSpecialAttr:
1318 * @ctxt: an XML parser context
1319 *
1320 * Trim the list of attributes defined to remove all those of type
1321 * CDATA as they are not special. This call should be done when finishing
1322 * to parse the DTD and before starting to parse the document root.
1323 */
1324static void
1325xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1326{
1327 if (ctxt->attsSpecial == NULL)
1328 return;
1329
1330 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1331
1332 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1333 xmlHashFree(ctxt->attsSpecial, NULL);
1334 ctxt->attsSpecial = NULL;
1335 }
1336 return;
1337}
1338
1339/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001340 * xmlCheckLanguageID:
1341 * @lang: pointer to the string value
1342 *
1343 * Checks that the value conforms to the LanguageID production:
1344 *
1345 * NOTE: this is somewhat deprecated, those productions were removed from
1346 * the XML Second edition.
1347 *
1348 * [33] LanguageID ::= Langcode ('-' Subcode)*
1349 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1350 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1351 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1352 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1353 * [38] Subcode ::= ([a-z] | [A-Z])+
1354 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001355 * The current REC reference the sucessors of RFC 1766, currently 5646
1356 *
1357 * http://www.rfc-editor.org/rfc/rfc5646.txt
1358 * langtag = language
1359 * ["-" script]
1360 * ["-" region]
1361 * *("-" variant)
1362 * *("-" extension)
1363 * ["-" privateuse]
1364 * language = 2*3ALPHA ; shortest ISO 639 code
1365 * ["-" extlang] ; sometimes followed by
1366 * ; extended language subtags
1367 * / 4ALPHA ; or reserved for future use
1368 * / 5*8ALPHA ; or registered language subtag
1369 *
1370 * extlang = 3ALPHA ; selected ISO 639 codes
1371 * *2("-" 3ALPHA) ; permanently reserved
1372 *
1373 * script = 4ALPHA ; ISO 15924 code
1374 *
1375 * region = 2ALPHA ; ISO 3166-1 code
1376 * / 3DIGIT ; UN M.49 code
1377 *
1378 * variant = 5*8alphanum ; registered variants
1379 * / (DIGIT 3alphanum)
1380 *
1381 * extension = singleton 1*("-" (2*8alphanum))
1382 *
1383 * ; Single alphanumerics
1384 * ; "x" reserved for private use
1385 * singleton = DIGIT ; 0 - 9
1386 * / %x41-57 ; A - W
1387 * / %x59-5A ; Y - Z
1388 * / %x61-77 ; a - w
1389 * / %x79-7A ; y - z
1390 *
1391 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1392 * The parser below doesn't try to cope with extension or privateuse
1393 * that could be added but that's not interoperable anyway
1394 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001395 * Returns 1 if correct 0 otherwise
1396 **/
1397int
1398xmlCheckLanguageID(const xmlChar * lang)
1399{
Daniel Veillard60587d62010-11-04 15:16:27 +01001400 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001401
1402 if (cur == NULL)
1403 return (0);
1404 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001405 ((cur[0] == 'I') && (cur[1] == '-')) ||
1406 ((cur[0] == 'x') && (cur[1] == '-')) ||
1407 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001408 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001409 * Still allow IANA code and user code which were coming
1410 * from the previous version of the XML-1.0 specification
1411 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001412 */
1413 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001414 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001415 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1416 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001417 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001418 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001419 nxt = cur;
1420 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1421 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1422 nxt++;
1423 if (nxt - cur >= 4) {
1424 /*
1425 * Reserved
1426 */
1427 if ((nxt - cur > 8) || (nxt[0] != 0))
1428 return(0);
1429 return(1);
1430 }
1431 if (nxt - cur < 2)
1432 return(0);
1433 /* we got an ISO 639 code */
1434 if (nxt[0] == 0)
1435 return(1);
1436 if (nxt[0] != '-')
1437 return(0);
1438
1439 nxt++;
1440 cur = nxt;
1441 /* now we can have extlang or script or region or variant */
1442 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1443 goto region_m49;
1444
1445 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1446 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1447 nxt++;
1448 if (nxt - cur == 4)
1449 goto script;
1450 if (nxt - cur == 2)
1451 goto region;
1452 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1453 goto variant;
1454 if (nxt - cur != 3)
1455 return(0);
1456 /* we parsed an extlang */
1457 if (nxt[0] == 0)
1458 return(1);
1459 if (nxt[0] != '-')
1460 return(0);
1461
1462 nxt++;
1463 cur = nxt;
1464 /* now we can have script or region or variant */
1465 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1466 goto region_m49;
1467
1468 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1469 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1470 nxt++;
1471 if (nxt - cur == 2)
1472 goto region;
1473 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1474 goto variant;
1475 if (nxt - cur != 4)
1476 return(0);
1477 /* we parsed a script */
1478script:
1479 if (nxt[0] == 0)
1480 return(1);
1481 if (nxt[0] != '-')
1482 return(0);
1483
1484 nxt++;
1485 cur = nxt;
1486 /* now we can have region or variant */
1487 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1488 goto region_m49;
1489
1490 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1491 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1492 nxt++;
1493
1494 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1495 goto variant;
1496 if (nxt - cur != 2)
1497 return(0);
1498 /* we parsed a region */
1499region:
1500 if (nxt[0] == 0)
1501 return(1);
1502 if (nxt[0] != '-')
1503 return(0);
1504
1505 nxt++;
1506 cur = nxt;
1507 /* now we can just have a variant */
1508 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1509 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1510 nxt++;
1511
1512 if ((nxt - cur < 5) || (nxt - cur > 8))
1513 return(0);
1514
1515 /* we parsed a variant */
1516variant:
1517 if (nxt[0] == 0)
1518 return(1);
1519 if (nxt[0] != '-')
1520 return(0);
1521 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001522 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001523
1524region_m49:
1525 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1526 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1527 nxt += 3;
1528 goto region;
1529 }
1530 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001531}
1532
Owen Taylor3473f882001-02-23 17:55:21 +00001533/************************************************************************
1534 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001535 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001536 * *
1537 ************************************************************************/
1538
Daniel Veillard8ed10722009-08-20 19:17:36 +02001539static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1540 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001541
Daniel Veillard0fb18932003-09-07 09:14:37 +00001542#ifdef SAX2
1543/**
1544 * nsPush:
1545 * @ctxt: an XML parser context
1546 * @prefix: the namespace prefix or NULL
1547 * @URL: the namespace name
1548 *
1549 * Pushes a new parser namespace on top of the ns stack
1550 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001551 * Returns -1 in case of error, -2 if the namespace should be discarded
1552 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001553 */
1554static int
1555nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1556{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001557 if (ctxt->options & XML_PARSE_NSCLEAN) {
1558 int i;
Daniel Veillard711b15d2012-10-25 19:23:26 +08001559 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001560 if (ctxt->nsTab[i] == prefix) {
1561 /* in scope */
1562 if (ctxt->nsTab[i + 1] == URL)
1563 return(-2);
1564 /* out of scope keep it */
1565 break;
1566 }
1567 }
1568 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001569 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1570 ctxt->nsMax = 10;
1571 ctxt->nsNr = 0;
1572 ctxt->nsTab = (const xmlChar **)
1573 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1574 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001575 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001576 ctxt->nsMax = 0;
1577 return (-1);
1578 }
1579 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001580 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001581 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001582 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1583 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1584 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001585 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001586 ctxt->nsMax /= 2;
1587 return (-1);
1588 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001589 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001590 }
1591 ctxt->nsTab[ctxt->nsNr++] = prefix;
1592 ctxt->nsTab[ctxt->nsNr++] = URL;
1593 return (ctxt->nsNr);
1594}
1595/**
1596 * nsPop:
1597 * @ctxt: an XML parser context
1598 * @nr: the number to pop
1599 *
1600 * Pops the top @nr parser prefix/namespace from the ns stack
1601 *
1602 * Returns the number of namespaces removed
1603 */
1604static int
1605nsPop(xmlParserCtxtPtr ctxt, int nr)
1606{
1607 int i;
1608
1609 if (ctxt->nsTab == NULL) return(0);
1610 if (ctxt->nsNr < nr) {
1611 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1612 nr = ctxt->nsNr;
1613 }
1614 if (ctxt->nsNr <= 0)
1615 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001616
Daniel Veillard0fb18932003-09-07 09:14:37 +00001617 for (i = 0;i < nr;i++) {
1618 ctxt->nsNr--;
1619 ctxt->nsTab[ctxt->nsNr] = NULL;
1620 }
1621 return(nr);
1622}
1623#endif
1624
1625static int
1626xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1627 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001628 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001629 int maxatts;
1630
1631 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001632 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001633 atts = (const xmlChar **)
1634 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001635 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001636 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001637 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1638 if (attallocs == NULL) goto mem_error;
1639 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001640 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001641 } else if (nr + 5 > ctxt->maxatts) {
1642 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001643 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1644 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001645 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001646 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001647 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1648 (maxatts / 5) * sizeof(int));
1649 if (attallocs == NULL) goto mem_error;
1650 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001651 ctxt->maxatts = maxatts;
1652 }
1653 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001654mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001655 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001656 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001657}
1658
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001659/**
1660 * inputPush:
1661 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001662 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001663 *
1664 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001665 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001666 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001667 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001668int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001669inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1670{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001671 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001672 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001673 if (ctxt->inputNr >= ctxt->inputMax) {
1674 ctxt->inputMax *= 2;
1675 ctxt->inputTab =
1676 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1677 ctxt->inputMax *
1678 sizeof(ctxt->inputTab[0]));
1679 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001680 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001681 xmlFreeInputStream(value);
1682 ctxt->inputMax /= 2;
1683 value = NULL;
1684 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001685 }
1686 }
1687 ctxt->inputTab[ctxt->inputNr] = value;
1688 ctxt->input = value;
1689 return (ctxt->inputNr++);
1690}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001691/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001692 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001693 * @ctxt: an XML parser context
1694 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001695 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001696 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001697 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001698 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001699xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001700inputPop(xmlParserCtxtPtr ctxt)
1701{
1702 xmlParserInputPtr ret;
1703
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001704 if (ctxt == NULL)
1705 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001706 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001707 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001708 ctxt->inputNr--;
1709 if (ctxt->inputNr > 0)
1710 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1711 else
1712 ctxt->input = NULL;
1713 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001714 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001715 return (ret);
1716}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001717/**
1718 * nodePush:
1719 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001720 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001721 *
1722 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001723 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001724 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001725 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001726int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001727nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1728{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001729 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001730 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001731 xmlNodePtr *tmp;
1732
1733 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1734 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001735 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001736 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001737 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001738 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001739 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001740 ctxt->nodeTab = tmp;
1741 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001742 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001743 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1744 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001745 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001746 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001747 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001748 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001749 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001750 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001751 ctxt->nodeTab[ctxt->nodeNr] = value;
1752 ctxt->node = value;
1753 return (ctxt->nodeNr++);
1754}
Daniel Veillard8915c152008-08-26 13:05:34 +00001755
Daniel Veillard1c732d22002-11-30 11:22:59 +00001756/**
1757 * nodePop:
1758 * @ctxt: an XML parser context
1759 *
1760 * Pops the top element node from the node stack
1761 *
1762 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001763 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001764xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001765nodePop(xmlParserCtxtPtr ctxt)
1766{
1767 xmlNodePtr ret;
1768
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001769 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001770 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001771 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001772 ctxt->nodeNr--;
1773 if (ctxt->nodeNr > 0)
1774 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1775 else
1776 ctxt->node = NULL;
1777 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001778 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001779 return (ret);
1780}
Daniel Veillarda2351322004-06-27 12:08:10 +00001781
1782#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001783/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001784 * nameNsPush:
1785 * @ctxt: an XML parser context
1786 * @value: the element name
1787 * @prefix: the element prefix
1788 * @URI: the element namespace name
1789 *
1790 * Pushes a new element name/prefix/URL on top of the name stack
1791 *
1792 * Returns -1 in case of error, the index in the stack otherwise
1793 */
1794static int
1795nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1796 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1797{
1798 if (ctxt->nameNr >= ctxt->nameMax) {
1799 const xmlChar * *tmp;
1800 void **tmp2;
1801 ctxt->nameMax *= 2;
1802 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1803 ctxt->nameMax *
1804 sizeof(ctxt->nameTab[0]));
1805 if (tmp == NULL) {
1806 ctxt->nameMax /= 2;
1807 goto mem_error;
1808 }
1809 ctxt->nameTab = tmp;
1810 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1811 ctxt->nameMax * 3 *
1812 sizeof(ctxt->pushTab[0]));
1813 if (tmp2 == NULL) {
1814 ctxt->nameMax /= 2;
1815 goto mem_error;
1816 }
1817 ctxt->pushTab = tmp2;
1818 }
1819 ctxt->nameTab[ctxt->nameNr] = value;
1820 ctxt->name = value;
1821 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1822 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001823 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001824 return (ctxt->nameNr++);
1825mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001826 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001827 return (-1);
1828}
1829/**
1830 * nameNsPop:
1831 * @ctxt: an XML parser context
1832 *
1833 * Pops the top element/prefix/URI name from the name stack
1834 *
1835 * Returns the name just removed
1836 */
1837static const xmlChar *
1838nameNsPop(xmlParserCtxtPtr ctxt)
1839{
1840 const xmlChar *ret;
1841
1842 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001843 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001844 ctxt->nameNr--;
1845 if (ctxt->nameNr > 0)
1846 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1847 else
1848 ctxt->name = NULL;
1849 ret = ctxt->nameTab[ctxt->nameNr];
1850 ctxt->nameTab[ctxt->nameNr] = NULL;
1851 return (ret);
1852}
Daniel Veillarda2351322004-06-27 12:08:10 +00001853#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001854
1855/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001856 * namePush:
1857 * @ctxt: an XML parser context
1858 * @value: the element name
1859 *
1860 * Pushes a new element name on top of the name stack
1861 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001862 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001863 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001864int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001865namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001866{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001867 if (ctxt == NULL) return (-1);
1868
Daniel Veillard1c732d22002-11-30 11:22:59 +00001869 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001870 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001871 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001872 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001873 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001874 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001875 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001876 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001877 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001878 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001879 }
1880 ctxt->nameTab[ctxt->nameNr] = value;
1881 ctxt->name = value;
1882 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001883mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001884 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001885 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001886}
1887/**
1888 * namePop:
1889 * @ctxt: an XML parser context
1890 *
1891 * Pops the top element name from the name stack
1892 *
1893 * Returns the name just removed
1894 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001895const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001896namePop(xmlParserCtxtPtr ctxt)
1897{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001898 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001899
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001900 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1901 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001902 ctxt->nameNr--;
1903 if (ctxt->nameNr > 0)
1904 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1905 else
1906 ctxt->name = NULL;
1907 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001908 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001909 return (ret);
1910}
Owen Taylor3473f882001-02-23 17:55:21 +00001911
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001912static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001913 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001914 int *tmp;
1915
Owen Taylor3473f882001-02-23 17:55:21 +00001916 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001917 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1918 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1919 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001920 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001921 ctxt->spaceMax /=2;
1922 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001923 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001924 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001925 }
1926 ctxt->spaceTab[ctxt->spaceNr] = val;
1927 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1928 return(ctxt->spaceNr++);
1929}
1930
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001931static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001932 int ret;
1933 if (ctxt->spaceNr <= 0) return(0);
1934 ctxt->spaceNr--;
1935 if (ctxt->spaceNr > 0)
1936 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1937 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001938 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001939 ret = ctxt->spaceTab[ctxt->spaceNr];
1940 ctxt->spaceTab[ctxt->spaceNr] = -1;
1941 return(ret);
1942}
1943
1944/*
1945 * Macros for accessing the content. Those should be used only by the parser,
1946 * and not exported.
1947 *
1948 * Dirty macros, i.e. one often need to make assumption on the context to
1949 * use them
1950 *
1951 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1952 * To be used with extreme caution since operations consuming
1953 * characters may move the input buffer to a different location !
1954 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1955 * This should be used internally by the parser
1956 * only to compare to ASCII values otherwise it would break when
1957 * running with UTF-8 encoding.
1958 * RAW same as CUR but in the input buffer, bypass any token
1959 * extraction that may have been done
1960 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1961 * to compare on ASCII based substring.
1962 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001963 * strings without newlines within the parser.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001964 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
Daniel Veillard77a90a72003-03-22 00:04:05 +00001965 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001966 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1967 *
1968 * NEXT Skip to the next character, this does the proper decoding
1969 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001970 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001971 * CUR_CHAR(l) returns the current unicode character (int), set l
1972 * to the number of xmlChars used for the encoding [0-5].
1973 * CUR_SCHAR same but operate on a string instead of the context
1974 * COPY_BUF copy the current unicode char to the target buffer, increment
1975 * the index
1976 * GROW, SHRINK handling of input buffers
1977 */
1978
Daniel Veillardfdc91562002-07-01 21:52:03 +00001979#define RAW (*ctxt->input->cur)
1980#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001981#define NXT(val) ctxt->input->cur[(val)]
1982#define CUR_PTR ctxt->input->cur
1983
Daniel Veillarda07050d2003-10-19 14:46:32 +00001984#define CMP4( s, c1, c2, c3, c4 ) \
1985 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1986 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1987#define CMP5( s, c1, c2, c3, c4, c5 ) \
1988 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1989#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1990 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1991#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1992 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1993#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1994 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1995#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1996 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1997 ((unsigned char *) s)[ 8 ] == c9 )
1998#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1999 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2000 ((unsigned char *) s)[ 9 ] == c10 )
2001
Owen Taylor3473f882001-02-23 17:55:21 +00002002#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002003 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00002004 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002005 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00002006 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2007 xmlPopInput(ctxt); \
2008 } while (0)
2009
Daniel Veillard0b787f32004-03-26 17:29:53 +00002010#define SKIPL(val) do { \
2011 int skipl; \
2012 for(skipl=0; skipl<val; skipl++) { \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002013 if (*(ctxt->input->cur) == '\n') { \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002014 ctxt->input->line++; ctxt->input->col = 1; \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002015 } else ctxt->input->col++; \
2016 ctxt->nbChars++; \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002017 ctxt->input->cur++; \
2018 } \
2019 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2020 if ((*ctxt->input->cur == 0) && \
2021 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2022 xmlPopInput(ctxt); \
2023 } while (0)
2024
Daniel Veillarda880b122003-04-21 21:36:41 +00002025#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002026 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2027 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002028 xmlSHRINK (ctxt);
2029
2030static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2031 xmlParserInputShrink(ctxt->input);
2032 if ((*ctxt->input->cur == 0) &&
2033 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2034 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002035 }
Owen Taylor3473f882001-02-23 17:55:21 +00002036
Daniel Veillarda880b122003-04-21 21:36:41 +00002037#define GROW if ((ctxt->progressive == 0) && \
2038 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002039 xmlGROW (ctxt);
2040
2041static void xmlGROW (xmlParserCtxtPtr ctxt) {
Longstreth Jon190a0b82014-02-06 10:58:17 +01002042 unsigned long curEnd = ctxt->input->end - ctxt->input->cur;
2043 unsigned long curBase = ctxt->input->cur - ctxt->input->base;
2044
2045 if (((curEnd > (unsigned long) XML_MAX_LOOKUP_LIMIT) ||
2046 (curBase > (unsigned long) XML_MAX_LOOKUP_LIMIT)) &&
Patrick Gansterer9c8eaab2013-01-04 12:41:53 +01002047 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != (xmlInputReadCallback) xmlNop)) &&
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002048 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2049 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillard0df83ca2012-07-30 15:41:10 +08002050 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002051 }
Daniel Veillard46de64e2002-05-29 08:21:33 +00002052 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard59df7832010-02-02 10:24:01 +01002053 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
Daniel Veillard46de64e2002-05-29 08:21:33 +00002054 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2055 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00002056}
Owen Taylor3473f882001-02-23 17:55:21 +00002057
2058#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2059
2060#define NEXT xmlNextChar(ctxt)
2061
Daniel Veillard21a0f912001-02-25 19:54:14 +00002062#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002063 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002064 ctxt->input->cur++; \
2065 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002066 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002067 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2068 }
2069
Owen Taylor3473f882001-02-23 17:55:21 +00002070#define NEXTL(l) do { \
2071 if (*(ctxt->input->cur) == '\n') { \
2072 ctxt->input->line++; ctxt->input->col = 1; \
2073 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002074 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002075 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00002076 } while (0)
2077
2078#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2079#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2080
2081#define COPY_BUF(l,b,i,v) \
2082 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002083 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002084
2085/**
2086 * xmlSkipBlankChars:
2087 * @ctxt: the XML parser context
2088 *
2089 * skip all blanks character found at that point in the input streams.
2090 * It pops up finished entities in the process if allowable at that point.
2091 *
2092 * Returns the number of space chars skipped
2093 */
2094
2095int
2096xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002097 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002098
2099 /*
2100 * It's Okay to use CUR/NEXT here since all the blanks are on
2101 * the ASCII range.
2102 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002103 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2104 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002105 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002106 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002107 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002108 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002109 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002110 if (*cur == '\n') {
2111 ctxt->input->line++; ctxt->input->col = 1;
2112 }
2113 cur++;
2114 res++;
2115 if (*cur == 0) {
2116 ctxt->input->cur = cur;
2117 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2118 cur = ctxt->input->cur;
2119 }
2120 }
2121 ctxt->input->cur = cur;
2122 } else {
2123 int cur;
2124 do {
2125 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00002126 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002127 NEXT;
2128 cur = CUR;
2129 res++;
2130 }
2131 while ((cur == 0) && (ctxt->inputNr > 1) &&
2132 (ctxt->instate != XML_PARSER_COMMENT)) {
2133 xmlPopInput(ctxt);
2134 cur = CUR;
2135 }
2136 /*
2137 * Need to handle support of entities branching here
2138 */
2139 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2140 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2141 }
Owen Taylor3473f882001-02-23 17:55:21 +00002142 return(res);
2143}
2144
2145/************************************************************************
2146 * *
2147 * Commodity functions to handle entities *
2148 * *
2149 ************************************************************************/
2150
2151/**
2152 * xmlPopInput:
2153 * @ctxt: an XML parser context
2154 *
2155 * xmlPopInput: the current input pointed by ctxt->input came to an end
2156 * pop it and return the next char.
2157 *
2158 * Returns the current xmlChar in the parser context
2159 */
2160xmlChar
2161xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002162 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002163 if (xmlParserDebugEntities)
2164 xmlGenericError(xmlGenericErrorContext,
2165 "Popping input %d\n", ctxt->inputNr);
2166 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00002167 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00002168 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2169 return(xmlPopInput(ctxt));
2170 return(CUR);
2171}
2172
2173/**
2174 * xmlPushInput:
2175 * @ctxt: an XML parser context
2176 * @input: an XML parser input fragment (entity, XML fragment ...).
2177 *
2178 * xmlPushInput: switch to a new input stream which is stacked on top
2179 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002180 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002181 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002182int
Owen Taylor3473f882001-02-23 17:55:21 +00002183xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002184 int ret;
2185 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002186
2187 if (xmlParserDebugEntities) {
2188 if ((ctxt->input != NULL) && (ctxt->input->filename))
2189 xmlGenericError(xmlGenericErrorContext,
2190 "%s(%d): ", ctxt->input->filename,
2191 ctxt->input->line);
2192 xmlGenericError(xmlGenericErrorContext,
2193 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2194 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002195 ret = inputPush(ctxt, input);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002196 if (ctxt->instate == XML_PARSER_EOF)
2197 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002198 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002199 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002200}
2201
2202/**
2203 * xmlParseCharRef:
2204 * @ctxt: an XML parser context
2205 *
2206 * parse Reference declarations
2207 *
2208 * [66] CharRef ::= '&#' [0-9]+ ';' |
2209 * '&#x' [0-9a-fA-F]+ ';'
2210 *
2211 * [ WFC: Legal Character ]
2212 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002213 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002214 *
2215 * Returns the value parsed (as an int), 0 in case of error
2216 */
2217int
2218xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002219 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002220 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002221 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002222
Owen Taylor3473f882001-02-23 17:55:21 +00002223 /*
2224 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2225 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002226 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002227 (NXT(2) == 'x')) {
2228 SKIP(3);
2229 GROW;
2230 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002231 if (count++ > 20) {
2232 count = 0;
2233 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002234 if (ctxt->instate == XML_PARSER_EOF)
2235 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002236 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002237 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002238 val = val * 16 + (CUR - '0');
2239 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2240 val = val * 16 + (CUR - 'a') + 10;
2241 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2242 val = val * 16 + (CUR - 'A') + 10;
2243 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002244 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002245 val = 0;
2246 break;
2247 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002248 if (val > 0x10FFFF)
2249 outofrange = val;
2250
Owen Taylor3473f882001-02-23 17:55:21 +00002251 NEXT;
2252 count++;
2253 }
2254 if (RAW == ';') {
2255 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002256 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002257 ctxt->nbChars ++;
2258 ctxt->input->cur++;
2259 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002260 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002261 SKIP(2);
2262 GROW;
2263 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002264 if (count++ > 20) {
2265 count = 0;
2266 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002267 if (ctxt->instate == XML_PARSER_EOF)
2268 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002269 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002270 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002271 val = val * 10 + (CUR - '0');
2272 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002273 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002274 val = 0;
2275 break;
2276 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002277 if (val > 0x10FFFF)
2278 outofrange = val;
2279
Owen Taylor3473f882001-02-23 17:55:21 +00002280 NEXT;
2281 count++;
2282 }
2283 if (RAW == ';') {
2284 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002285 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002286 ctxt->nbChars ++;
2287 ctxt->input->cur++;
2288 }
2289 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002290 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002291 }
2292
2293 /*
2294 * [ WFC: Legal Character ]
2295 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002296 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002297 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002298 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002299 return(val);
2300 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002301 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2302 "xmlParseCharRef: invalid xmlChar value %d\n",
2303 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002304 }
2305 return(0);
2306}
2307
2308/**
2309 * xmlParseStringCharRef:
2310 * @ctxt: an XML parser context
2311 * @str: a pointer to an index in the string
2312 *
2313 * parse Reference declarations, variant parsing from a string rather
2314 * than an an input flow.
2315 *
2316 * [66] CharRef ::= '&#' [0-9]+ ';' |
2317 * '&#x' [0-9a-fA-F]+ ';'
2318 *
2319 * [ WFC: Legal Character ]
2320 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002321 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002322 *
2323 * Returns the value parsed (as an int), 0 in case of error, str will be
2324 * updated to the current value of the index
2325 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002326static int
Owen Taylor3473f882001-02-23 17:55:21 +00002327xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2328 const xmlChar *ptr;
2329 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002330 unsigned int val = 0;
2331 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002332
2333 if ((str == NULL) || (*str == NULL)) return(0);
2334 ptr = *str;
2335 cur = *ptr;
2336 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2337 ptr += 3;
2338 cur = *ptr;
2339 while (cur != ';') { /* Non input consuming loop */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002340 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002341 val = val * 16 + (cur - '0');
2342 else if ((cur >= 'a') && (cur <= 'f'))
2343 val = val * 16 + (cur - 'a') + 10;
2344 else if ((cur >= 'A') && (cur <= 'F'))
2345 val = val * 16 + (cur - 'A') + 10;
2346 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002347 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002348 val = 0;
2349 break;
2350 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002351 if (val > 0x10FFFF)
2352 outofrange = val;
2353
Owen Taylor3473f882001-02-23 17:55:21 +00002354 ptr++;
2355 cur = *ptr;
2356 }
2357 if (cur == ';')
2358 ptr++;
2359 } else if ((cur == '&') && (ptr[1] == '#')){
2360 ptr += 2;
2361 cur = *ptr;
2362 while (cur != ';') { /* Non input consuming loops */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002363 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002364 val = val * 10 + (cur - '0');
2365 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002366 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002367 val = 0;
2368 break;
2369 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002370 if (val > 0x10FFFF)
2371 outofrange = val;
2372
Owen Taylor3473f882001-02-23 17:55:21 +00002373 ptr++;
2374 cur = *ptr;
2375 }
2376 if (cur == ';')
2377 ptr++;
2378 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002379 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002380 return(0);
2381 }
2382 *str = ptr;
2383
2384 /*
2385 * [ WFC: Legal Character ]
2386 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002387 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002388 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002389 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002390 return(val);
2391 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002392 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2393 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2394 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002395 }
2396 return(0);
2397}
2398
2399/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002400 * xmlNewBlanksWrapperInputStream:
2401 * @ctxt: an XML parser context
2402 * @entity: an Entity pointer
2403 *
2404 * Create a new input stream for wrapping
2405 * blanks around a PEReference
2406 *
2407 * Returns the new input stream or NULL
2408 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002409
Daniel Veillardf5582f12002-06-11 10:08:16 +00002410static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002411
Daniel Veillardf4862f02002-09-10 11:13:43 +00002412static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002413xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2414 xmlParserInputPtr input;
2415 xmlChar *buffer;
2416 size_t length;
2417 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002418 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2419 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002420 return(NULL);
2421 }
2422 if (xmlParserDebugEntities)
2423 xmlGenericError(xmlGenericErrorContext,
2424 "new blanks wrapper for entity: %s\n", entity->name);
2425 input = xmlNewInputStream(ctxt);
2426 if (input == NULL) {
2427 return(NULL);
2428 }
2429 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002430 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002431 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002432 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002433 xmlFree(input);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002434 return(NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002435 }
2436 buffer [0] = ' ';
2437 buffer [1] = '%';
2438 buffer [length-3] = ';';
2439 buffer [length-2] = ' ';
2440 buffer [length-1] = 0;
2441 memcpy(buffer + 2, entity->name, length - 5);
2442 input->free = deallocblankswrapper;
2443 input->base = buffer;
2444 input->cur = buffer;
2445 input->length = length;
2446 input->end = &buffer[length];
2447 return(input);
2448}
2449
2450/**
Owen Taylor3473f882001-02-23 17:55:21 +00002451 * xmlParserHandlePEReference:
2452 * @ctxt: the parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002453 *
Owen Taylor3473f882001-02-23 17:55:21 +00002454 * [69] PEReference ::= '%' Name ';'
2455 *
2456 * [ WFC: No Recursion ]
2457 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002458 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00002459 *
2460 * [ WFC: Entity Declared ]
2461 * In a document without any DTD, a document with only an internal DTD
2462 * subset which contains no parameter entity references, or a document
2463 * with "standalone='yes'", ... ... The declaration of a parameter
2464 * entity must precede any reference to it...
2465 *
2466 * [ VC: Entity Declared ]
2467 * In a document with an external subset or external parameter entities
2468 * with "standalone='no'", ... ... The declaration of a parameter entity
2469 * must precede any reference to it...
2470 *
2471 * [ WFC: In DTD ]
2472 * Parameter-entity references may only appear in the DTD.
2473 * NOTE: misleading but this is handled.
2474 *
2475 * A PEReference may have been detected in the current input stream
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002476 * the handling is done accordingly to
Owen Taylor3473f882001-02-23 17:55:21 +00002477 * http://www.w3.org/TR/REC-xml#entproc
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002478 * i.e.
Owen Taylor3473f882001-02-23 17:55:21 +00002479 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002480 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002481 */
2482void
2483xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002484 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002485 xmlEntityPtr entity = NULL;
2486 xmlParserInputPtr input;
2487
Owen Taylor3473f882001-02-23 17:55:21 +00002488 if (RAW != '%') return;
2489 switch(ctxt->instate) {
2490 case XML_PARSER_CDATA_SECTION:
2491 return;
2492 case XML_PARSER_COMMENT:
2493 return;
2494 case XML_PARSER_START_TAG:
2495 return;
2496 case XML_PARSER_END_TAG:
2497 return;
2498 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002499 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002500 return;
2501 case XML_PARSER_PROLOG:
2502 case XML_PARSER_START:
2503 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002504 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002505 return;
2506 case XML_PARSER_ENTITY_DECL:
2507 case XML_PARSER_CONTENT:
2508 case XML_PARSER_ATTRIBUTE_VALUE:
2509 case XML_PARSER_PI:
2510 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002511 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002512 /* we just ignore it there */
2513 return;
2514 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002515 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002516 return;
2517 case XML_PARSER_ENTITY_VALUE:
2518 /*
2519 * NOTE: in the case of entity values, we don't do the
2520 * substitution here since we need the literal
2521 * entity value to be able to save the internal
2522 * subset of the document.
2523 * This will be handled by xmlStringDecodeEntities
2524 */
2525 return;
2526 case XML_PARSER_DTD:
2527 /*
2528 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2529 * In the internal DTD subset, parameter-entity references
2530 * can occur only where markup declarations can occur, not
2531 * within markup declarations.
2532 * In that case this is handled in xmlParseMarkupDecl
2533 */
2534 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2535 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002536 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002537 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002538 break;
2539 case XML_PARSER_IGNORE:
2540 return;
2541 }
2542
2543 NEXT;
2544 name = xmlParseName(ctxt);
2545 if (xmlParserDebugEntities)
2546 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002547 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002548 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002549 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002550 } else {
2551 if (RAW == ';') {
2552 NEXT;
2553 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2554 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
Daniel Veillarde50ba812013-04-11 15:54:51 +08002555 if (ctxt->instate == XML_PARSER_EOF)
2556 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002557 if (entity == NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002558
Owen Taylor3473f882001-02-23 17:55:21 +00002559 /*
2560 * [ WFC: Entity Declared ]
2561 * In a document without any DTD, a document with only an
2562 * internal DTD subset which contains no parameter entity
2563 * references, or a document with "standalone='yes'", ...
2564 * ... The declaration of a parameter entity must precede
2565 * any reference to it...
2566 */
2567 if ((ctxt->standalone == 1) ||
2568 ((ctxt->hasExternalSubset == 0) &&
2569 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002570 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002571 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002572 } else {
2573 /*
2574 * [ VC: Entity Declared ]
2575 * In a document with an external subset or external
2576 * parameter entities with "standalone='no'", ...
2577 * ... The declaration of a parameter entity must precede
2578 * any reference to it...
2579 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002580 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2581 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2582 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002583 name, NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002584 } else
Daniel Veillard24eb9782003-10-04 21:08:09 +00002585 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2586 "PEReference: %%%s; not found\n",
2587 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002588 ctxt->valid = 0;
2589 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002590 } else if (ctxt->input->free != deallocblankswrapper) {
2591 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002592 if (xmlPushInput(ctxt, input) < 0)
2593 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002594 } else {
2595 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2596 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002597 xmlChar start[4];
2598 xmlCharEncoding enc;
2599
Owen Taylor3473f882001-02-23 17:55:21 +00002600 /*
2601 * handle the extra spaces added before and after
2602 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002603 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002604 */
2605 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002606 if (xmlPushInput(ctxt, input) < 0)
2607 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002608
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002609 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +00002610 * Get the 4 first bytes and decode the charset
2611 * if enc != XML_CHAR_ENCODING_NONE
2612 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002613 * Note that, since we may have some non-UTF8
2614 * encoding (like UTF16, bug 135229), the 'length'
2615 * is not known, but we can calculate based upon
2616 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002617 */
2618 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002619 if (ctxt->instate == XML_PARSER_EOF)
2620 return;
William M. Bracka0c48ad2004-04-16 15:58:29 +00002621 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002622 start[0] = RAW;
2623 start[1] = NXT(1);
2624 start[2] = NXT(2);
2625 start[3] = NXT(3);
2626 enc = xmlDetectCharEncoding(start, 4);
2627 if (enc != XML_CHAR_ENCODING_NONE) {
2628 xmlSwitchEncoding(ctxt, enc);
2629 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002630 }
2631
Owen Taylor3473f882001-02-23 17:55:21 +00002632 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002633 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2634 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002635 xmlParseTextDecl(ctxt);
2636 }
Owen Taylor3473f882001-02-23 17:55:21 +00002637 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002638 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2639 "PEReference: %s is not a parameter entity\n",
2640 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002641 }
2642 }
2643 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002644 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002645 }
Owen Taylor3473f882001-02-23 17:55:21 +00002646 }
2647}
2648
2649/*
2650 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002651 * buffer##_size is expected to be a size_t
2652 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002653 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002654#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002655 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002656 size_t new_size = buffer##_size * 2 + n; \
2657 if (new_size < buffer##_size) goto mem_error; \
2658 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002659 if (tmp == NULL) goto mem_error; \
2660 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002661 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002662}
2663
2664/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002665 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002666 * @ctxt: the parser context
2667 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002668 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002669 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2670 * @end: an end marker xmlChar, 0 if none
2671 * @end2: an end marker xmlChar, 0 if none
2672 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002673 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002674 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002675 *
2676 * [67] Reference ::= EntityRef | CharRef
2677 *
2678 * [69] PEReference ::= '%' Name ';'
2679 *
2680 * Returns A newly allocated string with the substitution done. The caller
2681 * must deallocate it !
2682 */
2683xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002684xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2685 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002686 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002687 size_t buffer_size = 0;
2688 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002689
2690 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002691 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002692 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002693 xmlEntityPtr ent;
2694 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002695
Daniel Veillarda82b1822004-11-08 16:24:57 +00002696 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002697 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002698 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002699
Daniel Veillard0161e632008-08-28 15:36:32 +00002700 if (((ctxt->depth > 40) &&
2701 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2702 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002703 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002704 return(NULL);
2705 }
2706
2707 /*
2708 * allocate a translation buffer.
2709 */
2710 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002711 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002712 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002713
2714 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002715 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002716 * we are operating on already parsed values.
2717 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002718 if (str < last)
2719 c = CUR_SCHAR(str, l);
2720 else
2721 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002722 while ((c != 0) && (c != end) && /* non input consuming loop */
2723 (c != end2) && (c != end3)) {
2724
2725 if (c == 0) break;
2726 if ((c == '&') && (str[1] == '#')) {
2727 int val = xmlParseStringCharRef(ctxt, &str);
2728 if (val != 0) {
2729 COPY_BUF(0,buffer,nbchars,val);
2730 }
Daniel Veillard459eeb92012-07-17 16:19:17 +08002731 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002732 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002733 }
Owen Taylor3473f882001-02-23 17:55:21 +00002734 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2735 if (xmlParserDebugEntities)
2736 xmlGenericError(xmlGenericErrorContext,
2737 "String decoding Entity Reference: %.30s\n",
2738 str);
2739 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002740 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2741 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002742 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002743 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002744 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002745 if ((ent != NULL) &&
2746 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2747 if (ent->content != NULL) {
2748 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002749 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002750 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002751 }
Owen Taylor3473f882001-02-23 17:55:21 +00002752 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002753 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2754 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002755 }
2756 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002757 ctxt->depth++;
2758 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2759 0, 0, 0);
2760 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002761
Owen Taylor3473f882001-02-23 17:55:21 +00002762 if (rep != NULL) {
2763 current = rep;
2764 while (*current != 0) { /* non input consuming loop */
2765 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002766 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002767 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002768 goto int_error;
2769 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002770 }
2771 }
2772 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002773 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002774 }
2775 } else if (ent != NULL) {
2776 int i = xmlStrlen(ent->name);
2777 const xmlChar *cur = ent->name;
2778
2779 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002780 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002781 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002782 }
2783 for (;i > 0;i--)
2784 buffer[nbchars++] = *cur++;
2785 buffer[nbchars++] = ';';
2786 }
2787 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2788 if (xmlParserDebugEntities)
2789 xmlGenericError(xmlGenericErrorContext,
2790 "String decoding PE Reference: %.30s\n", str);
2791 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002792 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2793 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002794 if (ent != NULL)
Daniel Veillardcff25462013-03-11 15:57:55 +08002795 ctxt->nbentities += ent->checked / 2;
Owen Taylor3473f882001-02-23 17:55:21 +00002796 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002797 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002798 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002799 }
Owen Taylor3473f882001-02-23 17:55:21 +00002800 ctxt->depth++;
2801 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2802 0, 0, 0);
2803 ctxt->depth--;
2804 if (rep != NULL) {
2805 current = rep;
2806 while (*current != 0) { /* non input consuming loop */
2807 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002808 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard23f05e02013-02-19 10:21:49 +08002809 if (xmlParserEntityCheck(ctxt, nbchars, ent, 0))
Daniel Veillard0161e632008-08-28 15:36:32 +00002810 goto int_error;
2811 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002812 }
2813 }
2814 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002815 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002816 }
2817 }
2818 } else {
2819 COPY_BUF(l,buffer,nbchars,c);
2820 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002821 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2822 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002823 }
2824 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002825 if (str < last)
2826 c = CUR_SCHAR(str, l);
2827 else
2828 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002829 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002830 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002831 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002832
2833mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002834 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002835int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002836 if (rep != NULL)
2837 xmlFree(rep);
2838 if (buffer != NULL)
2839 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002840 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002841}
2842
Daniel Veillarde57ec792003-09-10 10:50:59 +00002843/**
2844 * xmlStringDecodeEntities:
2845 * @ctxt: the parser context
2846 * @str: the input string
2847 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2848 * @end: an end marker xmlChar, 0 if none
2849 * @end2: an end marker xmlChar, 0 if none
2850 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002851 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002852 * Takes a entity string content and process to do the adequate substitutions.
2853 *
2854 * [67] Reference ::= EntityRef | CharRef
2855 *
2856 * [69] PEReference ::= '%' Name ';'
2857 *
2858 * Returns A newly allocated string with the substitution done. The caller
2859 * must deallocate it !
2860 */
2861xmlChar *
2862xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2863 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002864 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002865 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2866 end, end2, end3));
2867}
Owen Taylor3473f882001-02-23 17:55:21 +00002868
2869/************************************************************************
2870 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002871 * Commodity functions, cleanup needed ? *
2872 * *
2873 ************************************************************************/
2874
2875/**
2876 * areBlanks:
2877 * @ctxt: an XML parser context
2878 * @str: a xmlChar *
2879 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002880 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002881 *
2882 * Is this a sequence of blank chars that one can ignore ?
2883 *
2884 * Returns 1 if ignorable 0 otherwise.
2885 */
2886
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002887static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2888 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002889 int i, ret;
2890 xmlNodePtr lastChild;
2891
Daniel Veillard05c13a22001-09-09 08:38:09 +00002892 /*
2893 * Don't spend time trying to differentiate them, the same callback is
2894 * used !
2895 */
2896 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002897 return(0);
2898
Owen Taylor3473f882001-02-23 17:55:21 +00002899 /*
2900 * Check for xml:space value.
2901 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002902 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2903 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002904 return(0);
2905
2906 /*
2907 * Check that the string is made of blanks
2908 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002909 if (blank_chars == 0) {
2910 for (i = 0;i < len;i++)
2911 if (!(IS_BLANK_CH(str[i]))) return(0);
2912 }
Owen Taylor3473f882001-02-23 17:55:21 +00002913
2914 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002915 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002916 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002917 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002918 if (ctxt->myDoc != NULL) {
2919 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2920 if (ret == 0) return(1);
2921 if (ret == 1) return(0);
2922 }
2923
2924 /*
2925 * Otherwise, heuristic :-\
2926 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002927 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002928 if ((ctxt->node->children == NULL) &&
2929 (RAW == '<') && (NXT(1) == '/')) return(0);
2930
2931 lastChild = xmlGetLastChild(ctxt->node);
2932 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002933 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2934 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002935 } else if (xmlNodeIsText(lastChild))
2936 return(0);
2937 else if ((ctxt->node->children != NULL) &&
2938 (xmlNodeIsText(ctxt->node->children)))
2939 return(0);
2940 return(1);
2941}
2942
Owen Taylor3473f882001-02-23 17:55:21 +00002943/************************************************************************
2944 * *
2945 * Extra stuff for namespace support *
2946 * Relates to http://www.w3.org/TR/WD-xml-names *
2947 * *
2948 ************************************************************************/
2949
2950/**
2951 * xmlSplitQName:
2952 * @ctxt: an XML parser context
2953 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002954 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002955 *
2956 * parse an UTF8 encoded XML qualified name string
2957 *
2958 * [NS 5] QName ::= (Prefix ':')? LocalPart
2959 *
2960 * [NS 6] Prefix ::= NCName
2961 *
2962 * [NS 7] LocalPart ::= NCName
2963 *
2964 * Returns the local part, and prefix is updated
2965 * to get the Prefix if any.
2966 */
2967
2968xmlChar *
2969xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2970 xmlChar buf[XML_MAX_NAMELEN + 5];
2971 xmlChar *buffer = NULL;
2972 int len = 0;
2973 int max = XML_MAX_NAMELEN;
2974 xmlChar *ret = NULL;
2975 const xmlChar *cur = name;
2976 int c;
2977
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002978 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002979 *prefix = NULL;
2980
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002981 if (cur == NULL) return(NULL);
2982
Owen Taylor3473f882001-02-23 17:55:21 +00002983#ifndef XML_XML_NAMESPACE
2984 /* xml: prefix is not really a namespace */
2985 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2986 (cur[2] == 'l') && (cur[3] == ':'))
2987 return(xmlStrdup(name));
2988#endif
2989
Daniel Veillard597bc482003-07-24 16:08:28 +00002990 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002991 if (cur[0] == ':')
2992 return(xmlStrdup(name));
2993
2994 c = *cur++;
2995 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2996 buf[len++] = c;
2997 c = *cur++;
2998 }
2999 if (len >= max) {
3000 /*
3001 * Okay someone managed to make a huge name, so he's ready to pay
3002 * for the processing speed.
3003 */
3004 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003005
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003006 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003007 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003008 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003009 return(NULL);
3010 }
3011 memcpy(buffer, buf, len);
3012 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3013 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003014 xmlChar *tmp;
3015
Owen Taylor3473f882001-02-23 17:55:21 +00003016 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003017 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003018 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003019 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00003020 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003021 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003022 return(NULL);
3023 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003024 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003025 }
3026 buffer[len++] = c;
3027 c = *cur++;
3028 }
3029 buffer[len] = 0;
3030 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003031
Daniel Veillard597bc482003-07-24 16:08:28 +00003032 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00003033 if (buffer != NULL)
3034 xmlFree(buffer);
3035 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00003036 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00003037 }
Daniel Veillard597bc482003-07-24 16:08:28 +00003038
Owen Taylor3473f882001-02-23 17:55:21 +00003039 if (buffer == NULL)
3040 ret = xmlStrndup(buf, len);
3041 else {
3042 ret = buffer;
3043 buffer = NULL;
3044 max = XML_MAX_NAMELEN;
3045 }
3046
3047
3048 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00003049 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003050 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00003051 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00003052 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00003053 }
Owen Taylor3473f882001-02-23 17:55:21 +00003054 len = 0;
3055
Daniel Veillardbb284f42002-10-16 18:02:47 +00003056 /*
3057 * Check that the first character is proper to start
3058 * a new name
3059 */
3060 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3061 ((c >= 0x41) && (c <= 0x5A)) ||
3062 (c == '_') || (c == ':'))) {
3063 int l;
3064 int first = CUR_SCHAR(cur, l);
3065
3066 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003067 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003068 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003069 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003070 }
3071 }
3072 cur++;
3073
Owen Taylor3473f882001-02-23 17:55:21 +00003074 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3075 buf[len++] = c;
3076 c = *cur++;
3077 }
3078 if (len >= max) {
3079 /*
3080 * Okay someone managed to make a huge name, so he's ready to pay
3081 * for the processing speed.
3082 */
3083 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003084
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003085 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003086 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003087 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003088 return(NULL);
3089 }
3090 memcpy(buffer, buf, len);
3091 while (c != 0) { /* tested bigname2.xml */
3092 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003093 xmlChar *tmp;
3094
Owen Taylor3473f882001-02-23 17:55:21 +00003095 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003096 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003097 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003098 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003099 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003100 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003101 return(NULL);
3102 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003103 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003104 }
3105 buffer[len++] = c;
3106 c = *cur++;
3107 }
3108 buffer[len] = 0;
3109 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003110
Owen Taylor3473f882001-02-23 17:55:21 +00003111 if (buffer == NULL)
3112 ret = xmlStrndup(buf, len);
3113 else {
3114 ret = buffer;
3115 }
3116 }
3117
3118 return(ret);
3119}
3120
3121/************************************************************************
3122 * *
3123 * The parser itself *
3124 * Relates to http://www.w3.org/TR/REC-xml *
3125 * *
3126 ************************************************************************/
3127
Daniel Veillard34e3f642008-07-29 09:02:27 +00003128/************************************************************************
3129 * *
3130 * Routines to parse Name, NCName and NmToken *
3131 * *
3132 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003133#ifdef DEBUG
3134static unsigned long nbParseName = 0;
3135static unsigned long nbParseNmToken = 0;
3136static unsigned long nbParseNCName = 0;
3137static unsigned long nbParseNCNameComplex = 0;
3138static unsigned long nbParseNameComplex = 0;
3139static unsigned long nbParseStringName = 0;
3140#endif
3141
Daniel Veillard34e3f642008-07-29 09:02:27 +00003142/*
3143 * The two following functions are related to the change of accepted
3144 * characters for Name and NmToken in the Revision 5 of XML-1.0
3145 * They correspond to the modified production [4] and the new production [4a]
3146 * changes in that revision. Also note that the macros used for the
3147 * productions Letter, Digit, CombiningChar and Extender are not needed
3148 * anymore.
3149 * We still keep compatibility to pre-revision5 parsing semantic if the
3150 * new XML_PARSE_OLD10 option is given to the parser.
3151 */
3152static int
3153xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3154 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3155 /*
3156 * Use the new checks of production [4] [4a] amd [5] of the
3157 * Update 5 of XML-1.0
3158 */
3159 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3160 (((c >= 'a') && (c <= 'z')) ||
3161 ((c >= 'A') && (c <= 'Z')) ||
3162 (c == '_') || (c == ':') ||
3163 ((c >= 0xC0) && (c <= 0xD6)) ||
3164 ((c >= 0xD8) && (c <= 0xF6)) ||
3165 ((c >= 0xF8) && (c <= 0x2FF)) ||
3166 ((c >= 0x370) && (c <= 0x37D)) ||
3167 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3168 ((c >= 0x200C) && (c <= 0x200D)) ||
3169 ((c >= 0x2070) && (c <= 0x218F)) ||
3170 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3171 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3172 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3173 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3174 ((c >= 0x10000) && (c <= 0xEFFFF))))
3175 return(1);
3176 } else {
3177 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3178 return(1);
3179 }
3180 return(0);
3181}
3182
3183static int
3184xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3185 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3186 /*
3187 * Use the new checks of production [4] [4a] amd [5] of the
3188 * Update 5 of XML-1.0
3189 */
3190 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3191 (((c >= 'a') && (c <= 'z')) ||
3192 ((c >= 'A') && (c <= 'Z')) ||
3193 ((c >= '0') && (c <= '9')) || /* !start */
3194 (c == '_') || (c == ':') ||
3195 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3196 ((c >= 0xC0) && (c <= 0xD6)) ||
3197 ((c >= 0xD8) && (c <= 0xF6)) ||
3198 ((c >= 0xF8) && (c <= 0x2FF)) ||
3199 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3200 ((c >= 0x370) && (c <= 0x37D)) ||
3201 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3202 ((c >= 0x200C) && (c <= 0x200D)) ||
3203 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3204 ((c >= 0x2070) && (c <= 0x218F)) ||
3205 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3206 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3207 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3208 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3209 ((c >= 0x10000) && (c <= 0xEFFFF))))
3210 return(1);
3211 } else {
3212 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3213 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003214 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003215 (IS_COMBINING(c)) ||
3216 (IS_EXTENDER(c)))
3217 return(1);
3218 }
3219 return(0);
3220}
3221
Daniel Veillarde57ec792003-09-10 10:50:59 +00003222static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003223 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003224
Daniel Veillard34e3f642008-07-29 09:02:27 +00003225static const xmlChar *
3226xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3227 int len = 0, l;
3228 int c;
3229 int count = 0;
3230
Daniel Veillardc6561462009-03-25 10:22:31 +00003231#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003232 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003233#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003234
3235 /*
3236 * Handler for more complex cases
3237 */
3238 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003239 if (ctxt->instate == XML_PARSER_EOF)
3240 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003241 c = CUR_CHAR(l);
3242 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3243 /*
3244 * Use the new checks of production [4] [4a] amd [5] of the
3245 * Update 5 of XML-1.0
3246 */
3247 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3248 (!(((c >= 'a') && (c <= 'z')) ||
3249 ((c >= 'A') && (c <= 'Z')) ||
3250 (c == '_') || (c == ':') ||
3251 ((c >= 0xC0) && (c <= 0xD6)) ||
3252 ((c >= 0xD8) && (c <= 0xF6)) ||
3253 ((c >= 0xF8) && (c <= 0x2FF)) ||
3254 ((c >= 0x370) && (c <= 0x37D)) ||
3255 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3256 ((c >= 0x200C) && (c <= 0x200D)) ||
3257 ((c >= 0x2070) && (c <= 0x218F)) ||
3258 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3259 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3260 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3261 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3262 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3263 return(NULL);
3264 }
3265 len += l;
3266 NEXTL(l);
3267 c = CUR_CHAR(l);
3268 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3269 (((c >= 'a') && (c <= 'z')) ||
3270 ((c >= 'A') && (c <= 'Z')) ||
3271 ((c >= '0') && (c <= '9')) || /* !start */
3272 (c == '_') || (c == ':') ||
3273 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3274 ((c >= 0xC0) && (c <= 0xD6)) ||
3275 ((c >= 0xD8) && (c <= 0xF6)) ||
3276 ((c >= 0xF8) && (c <= 0x2FF)) ||
3277 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3278 ((c >= 0x370) && (c <= 0x37D)) ||
3279 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3280 ((c >= 0x200C) && (c <= 0x200D)) ||
3281 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3282 ((c >= 0x2070) && (c <= 0x218F)) ||
3283 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3284 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3285 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3286 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3287 ((c >= 0x10000) && (c <= 0xEFFFF))
3288 )) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003289 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003290 count = 0;
3291 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003292 if (ctxt->instate == XML_PARSER_EOF)
3293 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003294 }
3295 len += l;
3296 NEXTL(l);
3297 c = CUR_CHAR(l);
3298 }
3299 } else {
3300 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3301 (!IS_LETTER(c) && (c != '_') &&
3302 (c != ':'))) {
3303 return(NULL);
3304 }
3305 len += l;
3306 NEXTL(l);
3307 c = CUR_CHAR(l);
3308
3309 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3310 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3311 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003312 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003313 (IS_COMBINING(c)) ||
3314 (IS_EXTENDER(c)))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003315 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003316 count = 0;
3317 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003318 if (ctxt->instate == XML_PARSER_EOF)
3319 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003320 }
3321 len += l;
3322 NEXTL(l);
3323 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003324 if (c == 0) {
3325 count = 0;
3326 GROW;
3327 if (ctxt->instate == XML_PARSER_EOF)
3328 return(NULL);
3329 c = CUR_CHAR(l);
3330 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003331 }
3332 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003333 if ((len > XML_MAX_NAME_LENGTH) &&
3334 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3335 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3336 return(NULL);
3337 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003338 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3339 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3340 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3341}
3342
Owen Taylor3473f882001-02-23 17:55:21 +00003343/**
3344 * xmlParseName:
3345 * @ctxt: an XML parser context
3346 *
3347 * parse an XML name.
3348 *
3349 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3350 * CombiningChar | Extender
3351 *
3352 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3353 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003354 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003355 *
3356 * Returns the Name parsed or NULL
3357 */
3358
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003359const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003360xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003361 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003362 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003363 int count = 0;
3364
3365 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003366
Daniel Veillardc6561462009-03-25 10:22:31 +00003367#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003368 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003369#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003370
Daniel Veillard48b2f892001-02-25 16:11:03 +00003371 /*
3372 * Accelerator for simple ASCII names
3373 */
3374 in = ctxt->input->cur;
3375 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3376 ((*in >= 0x41) && (*in <= 0x5A)) ||
3377 (*in == '_') || (*in == ':')) {
3378 in++;
3379 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3380 ((*in >= 0x41) && (*in <= 0x5A)) ||
3381 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003382 (*in == '_') || (*in == '-') ||
3383 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003384 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003385 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003386 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003387 if ((count > XML_MAX_NAME_LENGTH) &&
3388 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3389 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3390 return(NULL);
3391 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003392 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003393 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003394 ctxt->nbChars += count;
3395 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003396 if (ret == NULL)
3397 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003398 return(ret);
3399 }
3400 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003401 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003402 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003403}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003404
Daniel Veillard34e3f642008-07-29 09:02:27 +00003405static const xmlChar *
3406xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3407 int len = 0, l;
3408 int c;
3409 int count = 0;
Daniel Veillarddcc19502013-05-22 22:56:45 +02003410 const xmlChar *end; /* needed because CUR_CHAR() can move cur on \r\n */
Daniel Veillard34e3f642008-07-29 09:02:27 +00003411
Daniel Veillardc6561462009-03-25 10:22:31 +00003412#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003413 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003414#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003415
3416 /*
3417 * Handler for more complex cases
3418 */
3419 GROW;
Daniel Veillarddcc19502013-05-22 22:56:45 +02003420 end = ctxt->input->cur;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003421 c = CUR_CHAR(l);
3422 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3423 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3424 return(NULL);
3425 }
3426
3427 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3428 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003429 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003430 if ((len > XML_MAX_NAME_LENGTH) &&
3431 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3432 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3433 return(NULL);
3434 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003435 count = 0;
3436 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003437 if (ctxt->instate == XML_PARSER_EOF)
3438 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003439 }
3440 len += l;
3441 NEXTL(l);
Daniel Veillarddcc19502013-05-22 22:56:45 +02003442 end = ctxt->input->cur;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003443 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003444 if (c == 0) {
3445 count = 0;
3446 GROW;
3447 if (ctxt->instate == XML_PARSER_EOF)
3448 return(NULL);
Daniel Veillarddcc19502013-05-22 22:56:45 +02003449 end = ctxt->input->cur;
Daniel Veillard1f972e92012-08-15 10:16:37 +08003450 c = CUR_CHAR(l);
3451 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003452 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003453 if ((len > XML_MAX_NAME_LENGTH) &&
3454 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3455 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3456 return(NULL);
3457 }
Daniel Veillarddcc19502013-05-22 22:56:45 +02003458 return(xmlDictLookup(ctxt->dict, end - len, len));
Daniel Veillard34e3f642008-07-29 09:02:27 +00003459}
3460
3461/**
3462 * xmlParseNCName:
3463 * @ctxt: an XML parser context
Michael Woodfb27e2c2012-09-28 08:59:33 +02003464 * @len: length of the string parsed
Daniel Veillard34e3f642008-07-29 09:02:27 +00003465 *
3466 * parse an XML name.
3467 *
3468 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3469 * CombiningChar | Extender
3470 *
3471 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3472 *
3473 * Returns the Name parsed or NULL
3474 */
3475
3476static const xmlChar *
3477xmlParseNCName(xmlParserCtxtPtr ctxt) {
3478 const xmlChar *in;
3479 const xmlChar *ret;
3480 int count = 0;
3481
Daniel Veillardc6561462009-03-25 10:22:31 +00003482#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003483 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003484#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003485
3486 /*
3487 * Accelerator for simple ASCII names
3488 */
3489 in = ctxt->input->cur;
3490 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3491 ((*in >= 0x41) && (*in <= 0x5A)) ||
3492 (*in == '_')) {
3493 in++;
3494 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3495 ((*in >= 0x41) && (*in <= 0x5A)) ||
3496 ((*in >= 0x30) && (*in <= 0x39)) ||
3497 (*in == '_') || (*in == '-') ||
3498 (*in == '.'))
3499 in++;
3500 if ((*in > 0) && (*in < 0x80)) {
3501 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003502 if ((count > XML_MAX_NAME_LENGTH) &&
3503 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3504 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3505 return(NULL);
3506 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003507 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3508 ctxt->input->cur = in;
3509 ctxt->nbChars += count;
3510 ctxt->input->col += count;
3511 if (ret == NULL) {
3512 xmlErrMemory(ctxt, NULL);
3513 }
3514 return(ret);
3515 }
3516 }
3517 return(xmlParseNCNameComplex(ctxt));
3518}
3519
Daniel Veillard46de64e2002-05-29 08:21:33 +00003520/**
3521 * xmlParseNameAndCompare:
3522 * @ctxt: an XML parser context
3523 *
3524 * parse an XML name and compares for match
3525 * (specialized for endtag parsing)
3526 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003527 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3528 * and the name for mismatch
3529 */
3530
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003531static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003532xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003533 register const xmlChar *cmp = other;
3534 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003535 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003536
3537 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003538 if (ctxt->instate == XML_PARSER_EOF)
3539 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003540
Daniel Veillard46de64e2002-05-29 08:21:33 +00003541 in = ctxt->input->cur;
3542 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003543 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003544 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003545 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003546 }
William M. Brack76e95df2003-10-18 16:20:14 +00003547 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003548 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003549 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003550 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003551 }
3552 /* failure (or end of input buffer), check with full function */
3553 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003554 /* strings coming from the dictionnary direct compare possible */
3555 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003556 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003557 }
3558 return ret;
3559}
3560
Owen Taylor3473f882001-02-23 17:55:21 +00003561/**
3562 * xmlParseStringName:
3563 * @ctxt: an XML parser context
3564 * @str: a pointer to the string pointer (IN/OUT)
3565 *
3566 * parse an XML name.
3567 *
3568 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3569 * CombiningChar | Extender
3570 *
3571 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3572 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003573 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003574 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003575 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003576 * is updated to the current location in the string.
3577 */
3578
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003579static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003580xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3581 xmlChar buf[XML_MAX_NAMELEN + 5];
3582 const xmlChar *cur = *str;
3583 int len = 0, l;
3584 int c;
3585
Daniel Veillardc6561462009-03-25 10:22:31 +00003586#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003587 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003588#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003589
Owen Taylor3473f882001-02-23 17:55:21 +00003590 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003591 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003592 return(NULL);
3593 }
3594
Daniel Veillard34e3f642008-07-29 09:02:27 +00003595 COPY_BUF(l,buf,len,c);
3596 cur += l;
3597 c = CUR_SCHAR(cur, l);
3598 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003599 COPY_BUF(l,buf,len,c);
3600 cur += l;
3601 c = CUR_SCHAR(cur, l);
3602 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3603 /*
3604 * Okay someone managed to make a huge name, so he's ready to pay
3605 * for the processing speed.
3606 */
3607 xmlChar *buffer;
3608 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003609
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003610 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003611 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003612 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003613 return(NULL);
3614 }
3615 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003616 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003617 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003618 xmlChar *tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003619
3620 if ((len > XML_MAX_NAME_LENGTH) &&
3621 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3622 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3623 xmlFree(buffer);
3624 return(NULL);
3625 }
Owen Taylor3473f882001-02-23 17:55:21 +00003626 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003627 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003628 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003629 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003630 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003631 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003632 return(NULL);
3633 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003634 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003635 }
3636 COPY_BUF(l,buffer,len,c);
3637 cur += l;
3638 c = CUR_SCHAR(cur, l);
3639 }
3640 buffer[len] = 0;
3641 *str = cur;
3642 return(buffer);
3643 }
3644 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003645 if ((len > XML_MAX_NAME_LENGTH) &&
3646 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3647 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3648 return(NULL);
3649 }
Owen Taylor3473f882001-02-23 17:55:21 +00003650 *str = cur;
3651 return(xmlStrndup(buf, len));
3652}
3653
3654/**
3655 * xmlParseNmtoken:
3656 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003657 *
Owen Taylor3473f882001-02-23 17:55:21 +00003658 * parse an XML Nmtoken.
3659 *
3660 * [7] Nmtoken ::= (NameChar)+
3661 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003662 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003663 *
3664 * Returns the Nmtoken parsed or NULL
3665 */
3666
3667xmlChar *
3668xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3669 xmlChar buf[XML_MAX_NAMELEN + 5];
3670 int len = 0, l;
3671 int c;
3672 int count = 0;
3673
Daniel Veillardc6561462009-03-25 10:22:31 +00003674#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003675 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003676#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003677
Owen Taylor3473f882001-02-23 17:55:21 +00003678 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003679 if (ctxt->instate == XML_PARSER_EOF)
3680 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003681 c = CUR_CHAR(l);
3682
Daniel Veillard34e3f642008-07-29 09:02:27 +00003683 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003684 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003685 count = 0;
3686 GROW;
3687 }
3688 COPY_BUF(l,buf,len,c);
3689 NEXTL(l);
3690 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003691 if (c == 0) {
3692 count = 0;
3693 GROW;
3694 if (ctxt->instate == XML_PARSER_EOF)
3695 return(NULL);
3696 c = CUR_CHAR(l);
3697 }
Owen Taylor3473f882001-02-23 17:55:21 +00003698 if (len >= XML_MAX_NAMELEN) {
3699 /*
3700 * Okay someone managed to make a huge token, so he's ready to pay
3701 * for the processing speed.
3702 */
3703 xmlChar *buffer;
3704 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003705
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003706 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003707 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003708 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003709 return(NULL);
3710 }
3711 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003712 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003713 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003714 count = 0;
3715 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003716 if (ctxt->instate == XML_PARSER_EOF) {
3717 xmlFree(buffer);
3718 return(NULL);
3719 }
Owen Taylor3473f882001-02-23 17:55:21 +00003720 }
3721 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003722 xmlChar *tmp;
3723
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003724 if ((max > XML_MAX_NAME_LENGTH) &&
3725 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3726 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3727 xmlFree(buffer);
3728 return(NULL);
3729 }
Owen Taylor3473f882001-02-23 17:55:21 +00003730 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003731 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003732 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003733 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003734 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003735 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003736 return(NULL);
3737 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003738 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003739 }
3740 COPY_BUF(l,buffer,len,c);
3741 NEXTL(l);
3742 c = CUR_CHAR(l);
3743 }
3744 buffer[len] = 0;
3745 return(buffer);
3746 }
3747 }
3748 if (len == 0)
3749 return(NULL);
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003750 if ((len > XML_MAX_NAME_LENGTH) &&
3751 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3752 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3753 return(NULL);
3754 }
Owen Taylor3473f882001-02-23 17:55:21 +00003755 return(xmlStrndup(buf, len));
3756}
3757
3758/**
3759 * xmlParseEntityValue:
3760 * @ctxt: an XML parser context
3761 * @orig: if non-NULL store a copy of the original entity value
3762 *
3763 * parse a value for ENTITY declarations
3764 *
3765 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3766 * "'" ([^%&'] | PEReference | Reference)* "'"
3767 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003768 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003769 */
3770
3771xmlChar *
3772xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3773 xmlChar *buf = NULL;
3774 int len = 0;
3775 int size = XML_PARSER_BUFFER_SIZE;
3776 int c, l;
3777 xmlChar stop;
3778 xmlChar *ret = NULL;
3779 const xmlChar *cur = NULL;
3780 xmlParserInputPtr input;
3781
3782 if (RAW == '"') stop = '"';
3783 else if (RAW == '\'') stop = '\'';
3784 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003785 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003786 return(NULL);
3787 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003788 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003789 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003790 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003791 return(NULL);
3792 }
3793
3794 /*
3795 * The content of the entity definition is copied in a buffer.
3796 */
3797
3798 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3799 input = ctxt->input;
3800 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003801 if (ctxt->instate == XML_PARSER_EOF) {
3802 xmlFree(buf);
3803 return(NULL);
3804 }
Owen Taylor3473f882001-02-23 17:55:21 +00003805 NEXT;
3806 c = CUR_CHAR(l);
3807 /*
3808 * NOTE: 4.4.5 Included in Literal
3809 * When a parameter entity reference appears in a literal entity
3810 * value, ... a single or double quote character in the replacement
3811 * text is always treated as a normal data character and will not
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003812 * terminate the literal.
Owen Taylor3473f882001-02-23 17:55:21 +00003813 * In practice it means we stop the loop only when back at parsing
3814 * the initial entity and the quote is found
3815 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003816 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3817 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003818 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003819 xmlChar *tmp;
3820
Owen Taylor3473f882001-02-23 17:55:21 +00003821 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003822 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3823 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003824 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003825 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003826 return(NULL);
3827 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003828 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003829 }
3830 COPY_BUF(l,buf,len,c);
3831 NEXTL(l);
3832 /*
3833 * Pop-up of finished entities.
3834 */
3835 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3836 xmlPopInput(ctxt);
3837
3838 GROW;
3839 c = CUR_CHAR(l);
3840 if (c == 0) {
3841 GROW;
3842 c = CUR_CHAR(l);
3843 }
3844 }
3845 buf[len] = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003846 if (ctxt->instate == XML_PARSER_EOF) {
3847 xmlFree(buf);
3848 return(NULL);
3849 }
Owen Taylor3473f882001-02-23 17:55:21 +00003850
3851 /*
3852 * Raise problem w.r.t. '&' and '%' being used in non-entities
3853 * reference constructs. Note Charref will be handled in
3854 * xmlStringDecodeEntities()
3855 */
3856 cur = buf;
3857 while (*cur != 0) { /* non input consuming */
3858 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3859 xmlChar *name;
3860 xmlChar tmp = *cur;
3861
3862 cur++;
3863 name = xmlParseStringName(ctxt, &cur);
3864 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003865 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003866 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003867 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003868 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003869 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3870 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003871 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003872 }
3873 if (name != NULL)
3874 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003875 if (*cur == 0)
3876 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003877 }
3878 cur++;
3879 }
3880
3881 /*
3882 * Then PEReference entities are substituted.
3883 */
3884 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003885 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003886 xmlFree(buf);
3887 } else {
3888 NEXT;
3889 /*
3890 * NOTE: 4.4.7 Bypassed
3891 * When a general entity reference appears in the EntityValue in
3892 * an entity declaration, it is bypassed and left as is.
3893 * so XML_SUBSTITUTE_REF is not set here.
3894 */
3895 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3896 0, 0, 0);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003897 if (orig != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00003898 *orig = buf;
3899 else
3900 xmlFree(buf);
3901 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003902
Owen Taylor3473f882001-02-23 17:55:21 +00003903 return(ret);
3904}
3905
3906/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003907 * xmlParseAttValueComplex:
3908 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003909 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003910 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003911 *
3912 * parse a value for an attribute, this is the fallback function
3913 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003914 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003915 *
3916 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3917 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003918static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003919xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003920 xmlChar limit = 0;
3921 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003922 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003923 size_t len = 0;
3924 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003925 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003926 xmlChar *current = NULL;
3927 xmlEntityPtr ent;
3928
Owen Taylor3473f882001-02-23 17:55:21 +00003929 if (NXT(0) == '"') {
3930 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3931 limit = '"';
3932 NEXT;
3933 } else if (NXT(0) == '\'') {
3934 limit = '\'';
3935 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3936 NEXT;
3937 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003938 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003939 return(NULL);
3940 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003941
Owen Taylor3473f882001-02-23 17:55:21 +00003942 /*
3943 * allocate a translation buffer.
3944 */
3945 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003946 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003947 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003948
3949 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003950 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003951 */
3952 c = CUR_CHAR(l);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003953 while (((NXT(0) != limit) && /* checked */
3954 (IS_CHAR(c)) && (c != '<')) &&
3955 (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillarde17db992012-07-19 11:25:16 +08003956 /*
3957 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3958 * special option is given
3959 */
3960 if ((len > XML_MAX_TEXT_LENGTH) &&
3961 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3962 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02003963 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08003964 goto mem_error;
3965 }
Owen Taylor3473f882001-02-23 17:55:21 +00003966 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003967 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003968 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003969 if (NXT(1) == '#') {
3970 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003971
Owen Taylor3473f882001-02-23 17:55:21 +00003972 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003973 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003974 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003975 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003976 }
3977 buf[len++] = '&';
3978 } else {
3979 /*
3980 * The reparsing will be done in xmlStringGetNodeList()
3981 * called by the attribute() function in SAX.c
3982 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08003983 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003984 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003985 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003986 buf[len++] = '&';
3987 buf[len++] = '#';
3988 buf[len++] = '3';
3989 buf[len++] = '8';
3990 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003991 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003992 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003993 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003994 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003995 }
Owen Taylor3473f882001-02-23 17:55:21 +00003996 len += xmlCopyChar(0, &buf[len], val);
3997 }
3998 } else {
3999 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00004000 ctxt->nbentities++;
4001 if (ent != NULL)
4002 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004003 if ((ent != NULL) &&
4004 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004005 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004006 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004007 }
4008 if ((ctxt->replaceEntities == 0) &&
4009 (ent->content[0] == '&')) {
4010 buf[len++] = '&';
4011 buf[len++] = '#';
4012 buf[len++] = '3';
4013 buf[len++] = '8';
4014 buf[len++] = ';';
4015 } else {
4016 buf[len++] = ent->content[0];
4017 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004018 } else if ((ent != NULL) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004019 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004020 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4021 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004022 XML_SUBSTITUTE_REF,
4023 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00004024 if (rep != NULL) {
4025 current = rep;
4026 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02004027 if ((*current == 0xD) || (*current == 0xA) ||
4028 (*current == 0x9)) {
4029 buf[len++] = 0x20;
4030 current++;
4031 } else
4032 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004033 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004034 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004035 }
4036 }
4037 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004038 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004039 }
4040 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004041 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004042 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004043 }
Owen Taylor3473f882001-02-23 17:55:21 +00004044 if (ent->content != NULL)
4045 buf[len++] = ent->content[0];
4046 }
4047 } else if (ent != NULL) {
4048 int i = xmlStrlen(ent->name);
4049 const xmlChar *cur = ent->name;
4050
4051 /*
4052 * This may look absurd but is needed to detect
4053 * entities problems
4054 */
4055 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004056 (ent->content != NULL) && (ent->checked == 0)) {
4057 unsigned long oldnbent = ctxt->nbentities;
4058
Owen Taylor3473f882001-02-23 17:55:21 +00004059 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00004060 XML_SUBSTITUTE_REF, 0, 0, 0);
Daniel Veillarda3f1e3e2013-03-11 13:57:53 +08004061
Daniel Veillardcff25462013-03-11 15:57:55 +08004062 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00004063 if (rep != NULL) {
Daniel Veillardcff25462013-03-11 15:57:55 +08004064 if (xmlStrchr(rep, '<'))
4065 ent->checked |= 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004066 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004067 rep = NULL;
4068 }
Owen Taylor3473f882001-02-23 17:55:21 +00004069 }
4070
4071 /*
4072 * Just output the reference
4073 */
4074 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08004075 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004076 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004077 }
4078 for (;i > 0;i--)
4079 buf[len++] = *cur++;
4080 buf[len++] = ';';
4081 }
4082 }
4083 } else {
4084 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004085 if ((len != 0) || (!normalize)) {
4086 if ((!normalize) || (!in_space)) {
4087 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004088 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004089 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004090 }
4091 }
4092 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004093 }
4094 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004095 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004096 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004097 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004098 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004099 }
4100 }
4101 NEXTL(l);
4102 }
4103 GROW;
4104 c = CUR_CHAR(l);
4105 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004106 if (ctxt->instate == XML_PARSER_EOF)
4107 goto error;
4108
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004109 if ((in_space) && (normalize)) {
Daniel Veillard6a36fbe2012-10-29 10:39:55 +08004110 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004111 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00004112 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004113 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004114 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004115 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00004116 if ((c != 0) && (!IS_CHAR(c))) {
4117 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4118 "invalid character in attribute value\n");
4119 } else {
4120 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4121 "AttValue: ' expected\n");
4122 }
Owen Taylor3473f882001-02-23 17:55:21 +00004123 } else
4124 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004125
4126 /*
4127 * There we potentially risk an overflow, don't allow attribute value of
Michael Woodfb27e2c2012-09-28 08:59:33 +02004128 * length more than INT_MAX it is a very reasonnable assumption !
Daniel Veillard459eeb92012-07-17 16:19:17 +08004129 */
4130 if (len >= INT_MAX) {
4131 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004132 "AttValue length too long\n");
Daniel Veillard459eeb92012-07-17 16:19:17 +08004133 goto mem_error;
4134 }
4135
4136 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00004137 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004138
4139mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004140 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004141error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00004142 if (buf != NULL)
4143 xmlFree(buf);
4144 if (rep != NULL)
4145 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004146 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004147}
4148
4149/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00004150 * xmlParseAttValue:
4151 * @ctxt: an XML parser context
4152 *
4153 * parse a value for an attribute
4154 * Note: the parser won't do substitution of entities here, this
4155 * will be handled later in xmlStringGetNodeList
4156 *
4157 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4158 * "'" ([^<&'] | Reference)* "'"
4159 *
4160 * 3.3.3 Attribute-Value Normalization:
4161 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004162 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00004163 * - a character reference is processed by appending the referenced
4164 * character to the attribute value
4165 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004166 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00004167 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4168 * appending #x20 to the normalized value, except that only a single
4169 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004170 * parsed entity or the literal entity value of an internal parsed entity
4171 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00004172 * If the declared value is not CDATA, then the XML processor must further
4173 * process the normalized attribute value by discarding any leading and
4174 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004175 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00004176 * All attributes for which no declaration has been read should be treated
4177 * by a non-validating parser as if declared CDATA.
4178 *
4179 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4180 */
4181
4182
4183xmlChar *
4184xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004185 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004186 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004187}
4188
4189/**
Owen Taylor3473f882001-02-23 17:55:21 +00004190 * xmlParseSystemLiteral:
4191 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004192 *
Owen Taylor3473f882001-02-23 17:55:21 +00004193 * parse an XML Literal
4194 *
4195 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4196 *
4197 * Returns the SystemLiteral parsed or NULL
4198 */
4199
4200xmlChar *
4201xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4202 xmlChar *buf = NULL;
4203 int len = 0;
4204 int size = XML_PARSER_BUFFER_SIZE;
4205 int cur, l;
4206 xmlChar stop;
4207 int state = ctxt->instate;
4208 int count = 0;
4209
4210 SHRINK;
4211 if (RAW == '"') {
4212 NEXT;
4213 stop = '"';
4214 } else if (RAW == '\'') {
4215 NEXT;
4216 stop = '\'';
4217 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004218 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004219 return(NULL);
4220 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004221
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004222 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004223 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004224 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004225 return(NULL);
4226 }
4227 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4228 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004229 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004230 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004231 xmlChar *tmp;
4232
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004233 if ((size > XML_MAX_NAME_LENGTH) &&
4234 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4235 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4236 xmlFree(buf);
4237 ctxt->instate = (xmlParserInputState) state;
4238 return(NULL);
4239 }
Owen Taylor3473f882001-02-23 17:55:21 +00004240 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004241 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4242 if (tmp == NULL) {
4243 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004244 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004245 ctxt->instate = (xmlParserInputState) state;
4246 return(NULL);
4247 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004248 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004249 }
4250 count++;
4251 if (count > 50) {
4252 GROW;
4253 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004254 if (ctxt->instate == XML_PARSER_EOF) {
4255 xmlFree(buf);
4256 return(NULL);
4257 }
Owen Taylor3473f882001-02-23 17:55:21 +00004258 }
4259 COPY_BUF(l,buf,len,cur);
4260 NEXTL(l);
4261 cur = CUR_CHAR(l);
4262 if (cur == 0) {
4263 GROW;
4264 SHRINK;
4265 cur = CUR_CHAR(l);
4266 }
4267 }
4268 buf[len] = 0;
4269 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004270 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004271 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004272 } else {
4273 NEXT;
4274 }
4275 return(buf);
4276}
4277
4278/**
4279 * xmlParsePubidLiteral:
4280 * @ctxt: an XML parser context
4281 *
4282 * parse an XML public literal
4283 *
4284 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4285 *
4286 * Returns the PubidLiteral parsed or NULL.
4287 */
4288
4289xmlChar *
4290xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4291 xmlChar *buf = NULL;
4292 int len = 0;
4293 int size = XML_PARSER_BUFFER_SIZE;
4294 xmlChar cur;
4295 xmlChar stop;
4296 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004297 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004298
4299 SHRINK;
4300 if (RAW == '"') {
4301 NEXT;
4302 stop = '"';
4303 } else if (RAW == '\'') {
4304 NEXT;
4305 stop = '\'';
4306 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004307 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004308 return(NULL);
4309 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004310 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004311 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004312 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004313 return(NULL);
4314 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004315 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004316 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004317 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004318 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004319 xmlChar *tmp;
4320
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004321 if ((size > XML_MAX_NAME_LENGTH) &&
4322 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4323 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4324 xmlFree(buf);
4325 return(NULL);
4326 }
Owen Taylor3473f882001-02-23 17:55:21 +00004327 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004328 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4329 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004330 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004331 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004332 return(NULL);
4333 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004334 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004335 }
4336 buf[len++] = cur;
4337 count++;
4338 if (count > 50) {
4339 GROW;
4340 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004341 if (ctxt->instate == XML_PARSER_EOF) {
4342 xmlFree(buf);
4343 return(NULL);
4344 }
Owen Taylor3473f882001-02-23 17:55:21 +00004345 }
4346 NEXT;
4347 cur = CUR;
4348 if (cur == 0) {
4349 GROW;
4350 SHRINK;
4351 cur = CUR;
4352 }
4353 }
4354 buf[len] = 0;
4355 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004356 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004357 } else {
4358 NEXT;
4359 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004360 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004361 return(buf);
4362}
4363
Daniel Veillard8ed10722009-08-20 19:17:36 +02004364static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004365
4366/*
4367 * used for the test in the inner loop of the char data testing
4368 */
4369static const unsigned char test_char_data[256] = {
4370 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4371 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4372 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4373 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4374 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4375 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4376 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4377 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4378 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4379 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4380 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4381 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4382 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4383 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4384 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4385 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4386 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4387 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4388 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4389 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4390 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4391 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4392 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4393 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4394 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4395 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4396 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4397 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4398 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4399 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4400 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4401 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4402};
4403
Owen Taylor3473f882001-02-23 17:55:21 +00004404/**
4405 * xmlParseCharData:
4406 * @ctxt: an XML parser context
4407 * @cdata: int indicating whether we are within a CDATA section
4408 *
4409 * parse a CharData section.
4410 * if we are within a CDATA section ']]>' marks an end of section.
4411 *
4412 * The right angle bracket (>) may be represented using the string "&gt;",
4413 * and must, for compatibility, be escaped using "&gt;" or a character
4414 * reference when it appears in the string "]]>" in content, when that
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004415 * string is not marking the end of a CDATA section.
Owen Taylor3473f882001-02-23 17:55:21 +00004416 *
4417 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4418 */
4419
4420void
4421xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004422 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004423 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004424 int line = ctxt->input->line;
4425 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004426 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004427
4428 SHRINK;
4429 GROW;
4430 /*
4431 * Accelerated common case where input don't need to be
4432 * modified before passing it to the handler.
4433 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004434 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004435 in = ctxt->input->cur;
4436 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004437get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004438 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004439 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004440 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004441 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004442 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004443 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004444 goto get_more_space;
4445 }
4446 if (*in == '<') {
4447 nbchar = in - ctxt->input->cur;
4448 if (nbchar > 0) {
4449 const xmlChar *tmp = ctxt->input->cur;
4450 ctxt->input->cur = in;
4451
Daniel Veillard34099b42004-11-04 17:34:35 +00004452 if ((ctxt->sax != NULL) &&
4453 (ctxt->sax->ignorableWhitespace !=
4454 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004455 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004456 if (ctxt->sax->ignorableWhitespace != NULL)
4457 ctxt->sax->ignorableWhitespace(ctxt->userData,
4458 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004459 } else {
4460 if (ctxt->sax->characters != NULL)
4461 ctxt->sax->characters(ctxt->userData,
4462 tmp, nbchar);
4463 if (*ctxt->space == -1)
4464 *ctxt->space = -2;
4465 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004466 } else if ((ctxt->sax != NULL) &&
4467 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004468 ctxt->sax->characters(ctxt->userData,
4469 tmp, nbchar);
4470 }
4471 }
4472 return;
4473 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004474
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004475get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004476 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004477 while (test_char_data[*in]) {
4478 in++;
4479 ccol++;
4480 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004481 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004482 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004483 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004484 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004485 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004486 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004487 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004488 }
4489 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004490 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004491 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004492 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004493 return;
4494 }
4495 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004496 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004497 goto get_more;
4498 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004499 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004500 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004501 if ((ctxt->sax != NULL) &&
4502 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004503 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004504 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004505 const xmlChar *tmp = ctxt->input->cur;
4506 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004507
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004508 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004509 if (ctxt->sax->ignorableWhitespace != NULL)
4510 ctxt->sax->ignorableWhitespace(ctxt->userData,
4511 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004512 } else {
4513 if (ctxt->sax->characters != NULL)
4514 ctxt->sax->characters(ctxt->userData,
4515 tmp, nbchar);
4516 if (*ctxt->space == -1)
4517 *ctxt->space = -2;
4518 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004519 line = ctxt->input->line;
4520 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004521 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004522 if (ctxt->sax->characters != NULL)
4523 ctxt->sax->characters(ctxt->userData,
4524 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004525 line = ctxt->input->line;
4526 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004527 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004528 /* something really bad happened in the SAX callback */
4529 if (ctxt->instate != XML_PARSER_CONTENT)
4530 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004531 }
4532 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004533 if (*in == 0xD) {
4534 in++;
4535 if (*in == 0xA) {
4536 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004537 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004538 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004539 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004540 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004541 in--;
4542 }
4543 if (*in == '<') {
4544 return;
4545 }
4546 if (*in == '&') {
4547 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004548 }
4549 SHRINK;
4550 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004551 if (ctxt->instate == XML_PARSER_EOF)
4552 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004553 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004554 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004555 nbchar = 0;
4556 }
Daniel Veillard50582112001-03-26 22:52:16 +00004557 ctxt->input->line = line;
4558 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004559 xmlParseCharDataComplex(ctxt, cdata);
4560}
4561
Daniel Veillard01c13b52002-12-10 15:19:08 +00004562/**
4563 * xmlParseCharDataComplex:
4564 * @ctxt: an XML parser context
4565 * @cdata: int indicating whether we are within a CDATA section
4566 *
4567 * parse a CharData section.this is the fallback function
4568 * of xmlParseCharData() when the parsing requires handling
4569 * of non-ASCII characters.
4570 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004571static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004572xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004573 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4574 int nbchar = 0;
4575 int cur, l;
4576 int count = 0;
4577
4578 SHRINK;
4579 GROW;
4580 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004581 while ((cur != '<') && /* checked */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004582 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004583 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004584 if ((cur == ']') && (NXT(1) == ']') &&
4585 (NXT(2) == '>')) {
4586 if (cdata) break;
4587 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004588 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004589 }
4590 }
4591 COPY_BUF(l,buf,nbchar,cur);
4592 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004593 buf[nbchar] = 0;
4594
Owen Taylor3473f882001-02-23 17:55:21 +00004595 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004596 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004597 */
4598 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004599 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004600 if (ctxt->sax->ignorableWhitespace != NULL)
4601 ctxt->sax->ignorableWhitespace(ctxt->userData,
4602 buf, nbchar);
4603 } else {
4604 if (ctxt->sax->characters != NULL)
4605 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004606 if ((ctxt->sax->characters !=
4607 ctxt->sax->ignorableWhitespace) &&
4608 (*ctxt->space == -1))
4609 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004610 }
4611 }
4612 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004613 /* something really bad happened in the SAX callback */
4614 if (ctxt->instate != XML_PARSER_CONTENT)
4615 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004616 }
4617 count++;
4618 if (count > 50) {
4619 GROW;
4620 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004621 if (ctxt->instate == XML_PARSER_EOF)
4622 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004623 }
4624 NEXTL(l);
4625 cur = CUR_CHAR(l);
4626 }
4627 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004628 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004629 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004630 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004631 */
4632 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004633 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004634 if (ctxt->sax->ignorableWhitespace != NULL)
4635 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4636 } else {
4637 if (ctxt->sax->characters != NULL)
4638 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004639 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4640 (*ctxt->space == -1))
4641 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004642 }
4643 }
4644 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004645 if ((cur != 0) && (!IS_CHAR(cur))) {
4646 /* Generate the error and skip the offending character */
4647 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4648 "PCDATA invalid Char value %d\n",
4649 cur);
4650 NEXTL(l);
4651 }
Owen Taylor3473f882001-02-23 17:55:21 +00004652}
4653
4654/**
4655 * xmlParseExternalID:
4656 * @ctxt: an XML parser context
4657 * @publicID: a xmlChar** receiving PubidLiteral
4658 * @strict: indicate whether we should restrict parsing to only
4659 * production [75], see NOTE below
4660 *
4661 * Parse an External ID or a Public ID
4662 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004663 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004664 * 'PUBLIC' S PubidLiteral S SystemLiteral
4665 *
4666 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4667 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4668 *
4669 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4670 *
4671 * Returns the function returns SystemLiteral and in the second
4672 * case publicID receives PubidLiteral, is strict is off
4673 * it is possible to return NULL and have publicID set.
4674 */
4675
4676xmlChar *
4677xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4678 xmlChar *URI = NULL;
4679
4680 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004681
4682 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004683 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004684 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004685 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004686 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4687 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004688 }
4689 SKIP_BLANKS;
4690 URI = xmlParseSystemLiteral(ctxt);
4691 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004692 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004693 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004694 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004695 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004696 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004697 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004698 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004699 }
4700 SKIP_BLANKS;
4701 *publicID = xmlParsePubidLiteral(ctxt);
4702 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004703 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004704 }
4705 if (strict) {
4706 /*
4707 * We don't handle [83] so "S SystemLiteral" is required.
4708 */
William M. Brack76e95df2003-10-18 16:20:14 +00004709 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004710 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004711 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004712 }
4713 } else {
4714 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004715 * We handle [83] so we return immediately, if
Owen Taylor3473f882001-02-23 17:55:21 +00004716 * "S SystemLiteral" is not detected. From a purely parsing
4717 * point of view that's a nice mess.
4718 */
4719 const xmlChar *ptr;
4720 GROW;
4721
4722 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004723 if (!IS_BLANK_CH(*ptr)) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004724
William M. Brack76e95df2003-10-18 16:20:14 +00004725 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004726 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4727 }
4728 SKIP_BLANKS;
4729 URI = xmlParseSystemLiteral(ctxt);
4730 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004731 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004732 }
4733 }
4734 return(URI);
4735}
4736
4737/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004738 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004739 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004740 * @buf: the already parsed part of the buffer
4741 * @len: number of bytes filles in the buffer
4742 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004743 *
4744 * Skip an XML (SGML) comment <!-- .... -->
4745 * The spec says that "For compatibility, the string "--" (double-hyphen)
4746 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004747 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004748 *
4749 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4750 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004751static void
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004752xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4753 size_t len, size_t size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004754 int q, ql;
4755 int r, rl;
4756 int cur, l;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004757 size_t count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004758 int inputid;
4759
4760 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004761
Owen Taylor3473f882001-02-23 17:55:21 +00004762 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004763 len = 0;
4764 size = XML_PARSER_BUFFER_SIZE;
4765 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4766 if (buf == NULL) {
4767 xmlErrMemory(ctxt, NULL);
4768 return;
4769 }
Owen Taylor3473f882001-02-23 17:55:21 +00004770 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004771 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004772 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004773 if (q == 0)
4774 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004775 if (!IS_CHAR(q)) {
4776 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4777 "xmlParseComment: invalid xmlChar value %d\n",
4778 q);
4779 xmlFree (buf);
4780 return;
4781 }
Owen Taylor3473f882001-02-23 17:55:21 +00004782 NEXTL(ql);
4783 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004784 if (r == 0)
4785 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004786 if (!IS_CHAR(r)) {
4787 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4788 "xmlParseComment: invalid xmlChar value %d\n",
4789 q);
4790 xmlFree (buf);
4791 return;
4792 }
Owen Taylor3473f882001-02-23 17:55:21 +00004793 NEXTL(rl);
4794 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004795 if (cur == 0)
4796 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004797 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004798 ((cur != '>') ||
4799 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004800 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004801 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004802 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004803 if ((len > XML_MAX_TEXT_LENGTH) &&
4804 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4805 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4806 "Comment too big found", NULL);
4807 xmlFree (buf);
4808 return;
4809 }
Owen Taylor3473f882001-02-23 17:55:21 +00004810 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004811 xmlChar *new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004812 size_t new_size;
4813
4814 new_size = size * 2;
4815 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
William M. Bracka3215c72004-07-31 16:24:01 +00004816 if (new_buf == NULL) {
4817 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004818 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004819 return;
4820 }
William M. Bracka3215c72004-07-31 16:24:01 +00004821 buf = new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004822 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00004823 }
4824 COPY_BUF(ql,buf,len,q);
4825 q = r;
4826 ql = rl;
4827 r = cur;
4828 rl = l;
4829
4830 count++;
4831 if (count > 50) {
4832 GROW;
4833 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004834 if (ctxt->instate == XML_PARSER_EOF) {
4835 xmlFree(buf);
4836 return;
4837 }
Owen Taylor3473f882001-02-23 17:55:21 +00004838 }
4839 NEXTL(l);
4840 cur = CUR_CHAR(l);
4841 if (cur == 0) {
4842 SHRINK;
4843 GROW;
4844 cur = CUR_CHAR(l);
4845 }
4846 }
4847 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004848 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004849 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004850 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004851 } else if (!IS_CHAR(cur)) {
4852 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4853 "xmlParseComment: invalid xmlChar value %d\n",
4854 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004855 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004856 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004857 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4858 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004859 }
4860 NEXT;
4861 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4862 (!ctxt->disableSAX))
4863 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004864 }
Daniel Veillardda629342007-08-01 07:49:06 +00004865 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004866 return;
4867not_terminated:
4868 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4869 "Comment not terminated\n", NULL);
4870 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004871 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004872}
Daniel Veillardda629342007-08-01 07:49:06 +00004873
Daniel Veillard4c778d82005-01-23 17:37:44 +00004874/**
4875 * xmlParseComment:
4876 * @ctxt: an XML parser context
4877 *
4878 * Skip an XML (SGML) comment <!-- .... -->
4879 * The spec says that "For compatibility, the string "--" (double-hyphen)
4880 * must not occur within comments. "
4881 *
4882 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4883 */
4884void
4885xmlParseComment(xmlParserCtxtPtr ctxt) {
4886 xmlChar *buf = NULL;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004887 size_t size = XML_PARSER_BUFFER_SIZE;
4888 size_t len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004889 xmlParserInputState state;
4890 const xmlChar *in;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004891 size_t nbchar = 0;
4892 int ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004893 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004894
4895 /*
4896 * Check that there is a comment right here.
4897 */
4898 if ((RAW != '<') || (NXT(1) != '!') ||
4899 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004900 state = ctxt->instate;
4901 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004902 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004903 SKIP(4);
4904 SHRINK;
4905 GROW;
4906
4907 /*
4908 * Accelerated common case where input don't need to be
4909 * modified before passing it to the handler.
4910 */
4911 in = ctxt->input->cur;
4912 do {
4913 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004914 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004915 ctxt->input->line++; ctxt->input->col = 1;
4916 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004917 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004918 }
4919get_more:
4920 ccol = ctxt->input->col;
4921 while (((*in > '-') && (*in <= 0x7F)) ||
4922 ((*in >= 0x20) && (*in < '-')) ||
4923 (*in == 0x09)) {
4924 in++;
4925 ccol++;
4926 }
4927 ctxt->input->col = ccol;
4928 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004929 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004930 ctxt->input->line++; ctxt->input->col = 1;
4931 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004932 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004933 goto get_more;
4934 }
4935 nbchar = in - ctxt->input->cur;
4936 /*
4937 * save current set of data
4938 */
4939 if (nbchar > 0) {
4940 if ((ctxt->sax != NULL) &&
4941 (ctxt->sax->comment != NULL)) {
4942 if (buf == NULL) {
4943 if ((*in == '-') && (in[1] == '-'))
4944 size = nbchar + 1;
4945 else
4946 size = XML_PARSER_BUFFER_SIZE + nbchar;
4947 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4948 if (buf == NULL) {
4949 xmlErrMemory(ctxt, NULL);
4950 ctxt->instate = state;
4951 return;
4952 }
4953 len = 0;
4954 } else if (len + nbchar + 1 >= size) {
4955 xmlChar *new_buf;
4956 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4957 new_buf = (xmlChar *) xmlRealloc(buf,
4958 size * sizeof(xmlChar));
4959 if (new_buf == NULL) {
4960 xmlFree (buf);
4961 xmlErrMemory(ctxt, NULL);
4962 ctxt->instate = state;
4963 return;
4964 }
4965 buf = new_buf;
4966 }
4967 memcpy(&buf[len], ctxt->input->cur, nbchar);
4968 len += nbchar;
4969 buf[len] = 0;
4970 }
4971 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004972 if ((len > XML_MAX_TEXT_LENGTH) &&
4973 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4974 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4975 "Comment too big found", NULL);
4976 xmlFree (buf);
4977 return;
4978 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004979 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004980 if (*in == 0xA) {
4981 in++;
4982 ctxt->input->line++; ctxt->input->col = 1;
4983 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004984 if (*in == 0xD) {
4985 in++;
4986 if (*in == 0xA) {
4987 ctxt->input->cur = in;
4988 in++;
4989 ctxt->input->line++; ctxt->input->col = 1;
4990 continue; /* while */
4991 }
4992 in--;
4993 }
4994 SHRINK;
4995 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004996 if (ctxt->instate == XML_PARSER_EOF) {
4997 xmlFree(buf);
4998 return;
4999 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005000 in = ctxt->input->cur;
5001 if (*in == '-') {
5002 if (in[1] == '-') {
5003 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00005004 if (ctxt->input->id != inputid) {
5005 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5006 "comment doesn't start and stop in the same entity\n");
5007 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00005008 SKIP(3);
5009 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5010 (!ctxt->disableSAX)) {
5011 if (buf != NULL)
5012 ctxt->sax->comment(ctxt->userData, buf);
5013 else
5014 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5015 }
5016 if (buf != NULL)
5017 xmlFree(buf);
Daniel Veillarde50ba812013-04-11 15:54:51 +08005018 if (ctxt->instate != XML_PARSER_EOF)
5019 ctxt->instate = state;
Daniel Veillard4c778d82005-01-23 17:37:44 +00005020 return;
5021 }
Bryan Henderson8658d272012-05-08 16:39:05 +08005022 if (buf != NULL) {
5023 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5024 "Double hyphen within comment: "
5025 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00005026 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08005027 } else
5028 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5029 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00005030 in++;
5031 ctxt->input->col++;
5032 }
5033 in++;
5034 ctxt->input->col++;
5035 goto get_more;
5036 }
5037 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5038 xmlParseCommentComplex(ctxt, buf, len, size);
5039 ctxt->instate = state;
5040 return;
5041}
5042
Owen Taylor3473f882001-02-23 17:55:21 +00005043
5044/**
5045 * xmlParsePITarget:
5046 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005047 *
Owen Taylor3473f882001-02-23 17:55:21 +00005048 * parse the name of a PI
5049 *
5050 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5051 *
5052 * Returns the PITarget name or NULL
5053 */
5054
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005055const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00005056xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005057 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005058
5059 name = xmlParseName(ctxt);
5060 if ((name != NULL) &&
5061 ((name[0] == 'x') || (name[0] == 'X')) &&
5062 ((name[1] == 'm') || (name[1] == 'M')) &&
5063 ((name[2] == 'l') || (name[2] == 'L'))) {
5064 int i;
5065 if ((name[0] == 'x') && (name[1] == 'm') &&
5066 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005067 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00005068 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005069 return(name);
5070 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005071 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005072 return(name);
5073 }
5074 for (i = 0;;i++) {
5075 if (xmlW3CPIs[i] == NULL) break;
5076 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5077 return(name);
5078 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005079 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5080 "xmlParsePITarget: invalid name prefix 'xml'\n",
5081 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005082 }
Daniel Veillard37334572008-07-31 08:20:02 +00005083 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005084 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard37334572008-07-31 08:20:02 +00005085 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
5086 }
Owen Taylor3473f882001-02-23 17:55:21 +00005087 return(name);
5088}
5089
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005090#ifdef LIBXML_CATALOG_ENABLED
5091/**
5092 * xmlParseCatalogPI:
5093 * @ctxt: an XML parser context
5094 * @catalog: the PI value string
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005095 *
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005096 * parse an XML Catalog Processing Instruction.
5097 *
5098 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5099 *
5100 * Occurs only if allowed by the user and if happening in the Misc
5101 * part of the document before any doctype informations
5102 * This will add the given catalog to the parsing context in order
5103 * to be used if there is a resolution need further down in the document
5104 */
5105
5106static void
5107xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5108 xmlChar *URL = NULL;
5109 const xmlChar *tmp, *base;
5110 xmlChar marker;
5111
5112 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00005113 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005114 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5115 goto error;
5116 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00005117 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005118 if (*tmp != '=') {
5119 return;
5120 }
5121 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005122 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005123 marker = *tmp;
5124 if ((marker != '\'') && (marker != '"'))
5125 goto error;
5126 tmp++;
5127 base = tmp;
5128 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5129 if (*tmp == 0)
5130 goto error;
5131 URL = xmlStrndup(base, tmp - base);
5132 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005133 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005134 if (*tmp != 0)
5135 goto error;
5136
5137 if (URL != NULL) {
5138 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5139 xmlFree(URL);
5140 }
5141 return;
5142
5143error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00005144 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5145 "Catalog PI syntax error: %s\n",
5146 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005147 if (URL != NULL)
5148 xmlFree(URL);
5149}
5150#endif
5151
Owen Taylor3473f882001-02-23 17:55:21 +00005152/**
5153 * xmlParsePI:
5154 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005155 *
Owen Taylor3473f882001-02-23 17:55:21 +00005156 * parse an XML Processing Instruction.
5157 *
5158 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5159 *
5160 * The processing is transfered to SAX once parsed.
5161 */
5162
5163void
5164xmlParsePI(xmlParserCtxtPtr ctxt) {
5165 xmlChar *buf = NULL;
Daniel Veillard51304812012-07-19 20:34:26 +08005166 size_t len = 0;
5167 size_t size = XML_PARSER_BUFFER_SIZE;
Owen Taylor3473f882001-02-23 17:55:21 +00005168 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005169 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00005170 xmlParserInputState state;
5171 int count = 0;
5172
5173 if ((RAW == '<') && (NXT(1) == '?')) {
5174 xmlParserInputPtr input = ctxt->input;
5175 state = ctxt->instate;
5176 ctxt->instate = XML_PARSER_PI;
5177 /*
5178 * this is a Processing Instruction.
5179 */
5180 SKIP(2);
5181 SHRINK;
5182
5183 /*
5184 * Parse the target name and check for special support like
5185 * namespace.
5186 */
5187 target = xmlParsePITarget(ctxt);
5188 if (target != NULL) {
5189 if ((RAW == '?') && (NXT(1) == '>')) {
5190 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005191 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5192 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005193 }
5194 SKIP(2);
5195
5196 /*
5197 * SAX: PI detected.
5198 */
5199 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5200 (ctxt->sax->processingInstruction != NULL))
5201 ctxt->sax->processingInstruction(ctxt->userData,
5202 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08005203 if (ctxt->instate != XML_PARSER_EOF)
5204 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005205 return;
5206 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00005207 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00005208 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005209 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005210 ctxt->instate = state;
5211 return;
5212 }
5213 cur = CUR;
5214 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005215 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5216 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005217 }
5218 SKIP_BLANKS;
5219 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00005220 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00005221 ((cur != '?') || (NXT(1) != '>'))) {
5222 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00005223 xmlChar *tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005224 size_t new_size = size * 2;
5225 tmp = (xmlChar *) xmlRealloc(buf, new_size);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005226 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005227 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005228 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005229 ctxt->instate = state;
5230 return;
5231 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005232 buf = tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005233 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00005234 }
5235 count++;
5236 if (count > 50) {
5237 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005238 if (ctxt->instate == XML_PARSER_EOF) {
5239 xmlFree(buf);
5240 return;
5241 }
Owen Taylor3473f882001-02-23 17:55:21 +00005242 count = 0;
Daniel Veillard51304812012-07-19 20:34:26 +08005243 if ((len > XML_MAX_TEXT_LENGTH) &&
5244 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5245 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5246 "PI %s too big found", target);
5247 xmlFree(buf);
5248 ctxt->instate = state;
5249 return;
5250 }
Owen Taylor3473f882001-02-23 17:55:21 +00005251 }
5252 COPY_BUF(l,buf,len,cur);
5253 NEXTL(l);
5254 cur = CUR_CHAR(l);
5255 if (cur == 0) {
5256 SHRINK;
5257 GROW;
5258 cur = CUR_CHAR(l);
5259 }
5260 }
Daniel Veillard51304812012-07-19 20:34:26 +08005261 if ((len > XML_MAX_TEXT_LENGTH) &&
5262 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5263 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5264 "PI %s too big found", target);
5265 xmlFree(buf);
5266 ctxt->instate = state;
5267 return;
5268 }
Owen Taylor3473f882001-02-23 17:55:21 +00005269 buf[len] = 0;
5270 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005271 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5272 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005273 } else {
5274 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005275 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5276 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005277 }
5278 SKIP(2);
5279
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005280#ifdef LIBXML_CATALOG_ENABLED
5281 if (((state == XML_PARSER_MISC) ||
5282 (state == XML_PARSER_START)) &&
5283 (xmlStrEqual(target, XML_CATALOG_PI))) {
5284 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5285 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5286 (allow == XML_CATA_ALLOW_ALL))
5287 xmlParseCatalogPI(ctxt, buf);
5288 }
5289#endif
5290
5291
Owen Taylor3473f882001-02-23 17:55:21 +00005292 /*
5293 * SAX: PI detected.
5294 */
5295 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5296 (ctxt->sax->processingInstruction != NULL))
5297 ctxt->sax->processingInstruction(ctxt->userData,
5298 target, buf);
5299 }
5300 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005301 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005302 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005303 }
Chris Evans77404b82011-12-14 16:18:25 +08005304 if (ctxt->instate != XML_PARSER_EOF)
5305 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005306 }
5307}
5308
5309/**
5310 * xmlParseNotationDecl:
5311 * @ctxt: an XML parser context
5312 *
5313 * parse a notation declaration
5314 *
5315 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5316 *
5317 * Hence there is actually 3 choices:
5318 * 'PUBLIC' S PubidLiteral
5319 * 'PUBLIC' S PubidLiteral S SystemLiteral
5320 * and 'SYSTEM' S SystemLiteral
5321 *
5322 * See the NOTE on xmlParseExternalID().
5323 */
5324
5325void
5326xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005327 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005328 xmlChar *Pubid;
5329 xmlChar *Systemid;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005330
Daniel Veillarda07050d2003-10-19 14:46:32 +00005331 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005332 xmlParserInputPtr input = ctxt->input;
5333 SHRINK;
5334 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00005335 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005336 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5337 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005338 return;
5339 }
5340 SKIP_BLANKS;
5341
Daniel Veillard76d66f42001-05-16 21:05:17 +00005342 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005343 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005344 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005345 return;
5346 }
William M. Brack76e95df2003-10-18 16:20:14 +00005347 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005348 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005349 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005350 return;
5351 }
Daniel Veillard37334572008-07-31 08:20:02 +00005352 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005353 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard37334572008-07-31 08:20:02 +00005354 "colon are forbidden from notation names '%s'\n",
5355 name, NULL, NULL);
5356 }
Owen Taylor3473f882001-02-23 17:55:21 +00005357 SKIP_BLANKS;
5358
5359 /*
5360 * Parse the IDs.
5361 */
5362 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5363 SKIP_BLANKS;
5364
5365 if (RAW == '>') {
5366 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005367 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5368 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005369 }
5370 NEXT;
5371 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5372 (ctxt->sax->notationDecl != NULL))
5373 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5374 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005375 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005376 }
Owen Taylor3473f882001-02-23 17:55:21 +00005377 if (Systemid != NULL) xmlFree(Systemid);
5378 if (Pubid != NULL) xmlFree(Pubid);
5379 }
5380}
5381
5382/**
5383 * xmlParseEntityDecl:
5384 * @ctxt: an XML parser context
5385 *
5386 * parse <!ENTITY declarations
5387 *
5388 * [70] EntityDecl ::= GEDecl | PEDecl
5389 *
5390 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5391 *
5392 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5393 *
5394 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5395 *
5396 * [74] PEDef ::= EntityValue | ExternalID
5397 *
5398 * [76] NDataDecl ::= S 'NDATA' S Name
5399 *
5400 * [ VC: Notation Declared ]
5401 * The Name must match the declared name of a notation.
5402 */
5403
5404void
5405xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005406 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005407 xmlChar *value = NULL;
5408 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005409 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005410 int isParameter = 0;
5411 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005412 int skipped;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005413
Daniel Veillard4c778d82005-01-23 17:37:44 +00005414 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005415 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005416 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005417 SHRINK;
5418 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005419 skipped = SKIP_BLANKS;
5420 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005421 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5422 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005423 }
Owen Taylor3473f882001-02-23 17:55:21 +00005424
5425 if (RAW == '%') {
5426 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005427 skipped = SKIP_BLANKS;
5428 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005429 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5430 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005431 }
Owen Taylor3473f882001-02-23 17:55:21 +00005432 isParameter = 1;
5433 }
5434
Daniel Veillard76d66f42001-05-16 21:05:17 +00005435 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005436 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005437 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5438 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005439 return;
5440 }
Daniel Veillard37334572008-07-31 08:20:02 +00005441 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005442 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard37334572008-07-31 08:20:02 +00005443 "colon are forbidden from entities names '%s'\n",
5444 name, NULL, NULL);
5445 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005446 skipped = SKIP_BLANKS;
5447 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005448 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5449 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005450 }
Owen Taylor3473f882001-02-23 17:55:21 +00005451
Daniel Veillardf5582f12002-06-11 10:08:16 +00005452 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005453 /*
5454 * handle the various case of definitions...
5455 */
5456 if (isParameter) {
5457 if ((RAW == '"') || (RAW == '\'')) {
5458 value = xmlParseEntityValue(ctxt, &orig);
5459 if (value) {
5460 if ((ctxt->sax != NULL) &&
5461 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5462 ctxt->sax->entityDecl(ctxt->userData, name,
5463 XML_INTERNAL_PARAMETER_ENTITY,
5464 NULL, NULL, value);
5465 }
5466 } else {
5467 URI = xmlParseExternalID(ctxt, &literal, 1);
5468 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005469 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005470 }
5471 if (URI) {
5472 xmlURIPtr uri;
5473
5474 uri = xmlParseURI((const char *) URI);
5475 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005476 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5477 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005478 /*
5479 * This really ought to be a well formedness error
5480 * but the XML Core WG decided otherwise c.f. issue
5481 * E26 of the XML erratas.
5482 */
Owen Taylor3473f882001-02-23 17:55:21 +00005483 } else {
5484 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005485 /*
5486 * Okay this is foolish to block those but not
5487 * invalid URIs.
5488 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005489 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005490 } else {
5491 if ((ctxt->sax != NULL) &&
5492 (!ctxt->disableSAX) &&
5493 (ctxt->sax->entityDecl != NULL))
5494 ctxt->sax->entityDecl(ctxt->userData, name,
5495 XML_EXTERNAL_PARAMETER_ENTITY,
5496 literal, URI, NULL);
5497 }
5498 xmlFreeURI(uri);
5499 }
5500 }
5501 }
5502 } else {
5503 if ((RAW == '"') || (RAW == '\'')) {
5504 value = xmlParseEntityValue(ctxt, &orig);
5505 if ((ctxt->sax != NULL) &&
5506 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5507 ctxt->sax->entityDecl(ctxt->userData, name,
5508 XML_INTERNAL_GENERAL_ENTITY,
5509 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005510 /*
5511 * For expat compatibility in SAX mode.
5512 */
5513 if ((ctxt->myDoc == NULL) ||
5514 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5515 if (ctxt->myDoc == NULL) {
5516 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005517 if (ctxt->myDoc == NULL) {
5518 xmlErrMemory(ctxt, "New Doc failed");
5519 return;
5520 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005521 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005522 }
5523 if (ctxt->myDoc->intSubset == NULL)
5524 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5525 BAD_CAST "fake", NULL, NULL);
5526
Daniel Veillard1af9a412003-08-20 22:54:39 +00005527 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5528 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005529 }
Owen Taylor3473f882001-02-23 17:55:21 +00005530 } else {
5531 URI = xmlParseExternalID(ctxt, &literal, 1);
5532 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005533 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005534 }
5535 if (URI) {
5536 xmlURIPtr uri;
5537
5538 uri = xmlParseURI((const char *)URI);
5539 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005540 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5541 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005542 /*
5543 * This really ought to be a well formedness error
5544 * but the XML Core WG decided otherwise c.f. issue
5545 * E26 of the XML erratas.
5546 */
Owen Taylor3473f882001-02-23 17:55:21 +00005547 } else {
5548 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005549 /*
5550 * Okay this is foolish to block those but not
5551 * invalid URIs.
5552 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005553 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005554 }
5555 xmlFreeURI(uri);
5556 }
5557 }
William M. Brack76e95df2003-10-18 16:20:14 +00005558 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005559 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5560 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005561 }
5562 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005563 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005564 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005565 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005566 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5567 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005568 }
5569 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005570 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005571 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5572 (ctxt->sax->unparsedEntityDecl != NULL))
5573 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5574 literal, URI, ndata);
5575 } else {
5576 if ((ctxt->sax != NULL) &&
5577 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5578 ctxt->sax->entityDecl(ctxt->userData, name,
5579 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5580 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005581 /*
5582 * For expat compatibility in SAX mode.
5583 * assuming the entity repalcement was asked for
5584 */
5585 if ((ctxt->replaceEntities != 0) &&
5586 ((ctxt->myDoc == NULL) ||
5587 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5588 if (ctxt->myDoc == NULL) {
5589 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005590 if (ctxt->myDoc == NULL) {
5591 xmlErrMemory(ctxt, "New Doc failed");
5592 return;
5593 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005594 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005595 }
5596
5597 if (ctxt->myDoc->intSubset == NULL)
5598 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5599 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005600 xmlSAX2EntityDecl(ctxt, name,
5601 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5602 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005603 }
Owen Taylor3473f882001-02-23 17:55:21 +00005604 }
5605 }
5606 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08005607 if (ctxt->instate == XML_PARSER_EOF)
5608 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005609 SKIP_BLANKS;
5610 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005611 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005612 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005613 } else {
5614 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005615 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5616 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005617 }
5618 NEXT;
5619 }
5620 if (orig != NULL) {
5621 /*
5622 * Ugly mechanism to save the raw entity value.
5623 */
5624 xmlEntityPtr cur = NULL;
5625
5626 if (isParameter) {
5627 if ((ctxt->sax != NULL) &&
5628 (ctxt->sax->getParameterEntity != NULL))
5629 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5630 } else {
5631 if ((ctxt->sax != NULL) &&
5632 (ctxt->sax->getEntity != NULL))
5633 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005634 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005635 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005636 }
Owen Taylor3473f882001-02-23 17:55:21 +00005637 }
5638 if (cur != NULL) {
5639 if (cur->orig != NULL)
5640 xmlFree(orig);
5641 else
5642 cur->orig = orig;
5643 } else
5644 xmlFree(orig);
5645 }
Owen Taylor3473f882001-02-23 17:55:21 +00005646 if (value != NULL) xmlFree(value);
5647 if (URI != NULL) xmlFree(URI);
5648 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005649 }
5650}
5651
5652/**
5653 * xmlParseDefaultDecl:
5654 * @ctxt: an XML parser context
5655 * @value: Receive a possible fixed default value for the attribute
5656 *
5657 * Parse an attribute default declaration
5658 *
5659 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5660 *
5661 * [ VC: Required Attribute ]
5662 * if the default declaration is the keyword #REQUIRED, then the
5663 * attribute must be specified for all elements of the type in the
5664 * attribute-list declaration.
5665 *
5666 * [ VC: Attribute Default Legal ]
5667 * The declared default value must meet the lexical constraints of
5668 * the declared attribute type c.f. xmlValidateAttributeDecl()
5669 *
5670 * [ VC: Fixed Attribute Default ]
5671 * if an attribute has a default value declared with the #FIXED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005672 * keyword, instances of that attribute must match the default value.
Owen Taylor3473f882001-02-23 17:55:21 +00005673 *
5674 * [ WFC: No < in Attribute Values ]
5675 * handled in xmlParseAttValue()
5676 *
5677 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005678 * or XML_ATTRIBUTE_FIXED.
Owen Taylor3473f882001-02-23 17:55:21 +00005679 */
5680
5681int
5682xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5683 int val;
5684 xmlChar *ret;
5685
5686 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005687 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005688 SKIP(9);
5689 return(XML_ATTRIBUTE_REQUIRED);
5690 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005691 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005692 SKIP(8);
5693 return(XML_ATTRIBUTE_IMPLIED);
5694 }
5695 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005696 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005697 SKIP(6);
5698 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005699 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005700 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5701 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005702 }
5703 SKIP_BLANKS;
5704 }
5705 ret = xmlParseAttValue(ctxt);
5706 ctxt->instate = XML_PARSER_DTD;
5707 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005708 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005709 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005710 } else
5711 *value = ret;
5712 return(val);
5713}
5714
5715/**
5716 * xmlParseNotationType:
5717 * @ctxt: an XML parser context
5718 *
5719 * parse an Notation attribute type.
5720 *
5721 * Note: the leading 'NOTATION' S part has already being parsed...
5722 *
5723 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5724 *
5725 * [ VC: Notation Attributes ]
5726 * Values of this type must match one of the notation names included
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005727 * in the declaration; all notation names in the declaration must be declared.
Owen Taylor3473f882001-02-23 17:55:21 +00005728 *
5729 * Returns: the notation attribute tree built while parsing
5730 */
5731
5732xmlEnumerationPtr
5733xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005734 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005735 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005736
5737 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005738 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005739 return(NULL);
5740 }
5741 SHRINK;
5742 do {
5743 NEXT;
5744 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005745 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005746 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005747 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5748 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005749 xmlFreeEnumeration(ret);
5750 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005751 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005752 tmp = ret;
5753 while (tmp != NULL) {
5754 if (xmlStrEqual(name, tmp->name)) {
5755 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5756 "standalone: attribute notation value token %s duplicated\n",
5757 name, NULL);
5758 if (!xmlDictOwns(ctxt->dict, name))
5759 xmlFree((xmlChar *) name);
5760 break;
5761 }
5762 tmp = tmp->next;
5763 }
5764 if (tmp == NULL) {
5765 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005766 if (cur == NULL) {
5767 xmlFreeEnumeration(ret);
5768 return(NULL);
5769 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005770 if (last == NULL) ret = last = cur;
5771 else {
5772 last->next = cur;
5773 last = cur;
5774 }
Owen Taylor3473f882001-02-23 17:55:21 +00005775 }
5776 SKIP_BLANKS;
5777 } while (RAW == '|');
5778 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005779 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005780 xmlFreeEnumeration(ret);
5781 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005782 }
5783 NEXT;
5784 return(ret);
5785}
5786
5787/**
5788 * xmlParseEnumerationType:
5789 * @ctxt: an XML parser context
5790 *
5791 * parse an Enumeration attribute type.
5792 *
5793 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5794 *
5795 * [ VC: Enumeration ]
5796 * Values of this type must match one of the Nmtoken tokens in
5797 * the declaration
5798 *
5799 * Returns: the enumeration attribute tree built while parsing
5800 */
5801
5802xmlEnumerationPtr
5803xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5804 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005805 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005806
5807 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005808 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005809 return(NULL);
5810 }
5811 SHRINK;
5812 do {
5813 NEXT;
5814 SKIP_BLANKS;
5815 name = xmlParseNmtoken(ctxt);
5816 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005817 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005818 return(ret);
5819 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005820 tmp = ret;
5821 while (tmp != NULL) {
5822 if (xmlStrEqual(name, tmp->name)) {
5823 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5824 "standalone: attribute enumeration value token %s duplicated\n",
5825 name, NULL);
5826 if (!xmlDictOwns(ctxt->dict, name))
5827 xmlFree(name);
5828 break;
5829 }
5830 tmp = tmp->next;
5831 }
5832 if (tmp == NULL) {
5833 cur = xmlCreateEnumeration(name);
5834 if (!xmlDictOwns(ctxt->dict, name))
5835 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005836 if (cur == NULL) {
5837 xmlFreeEnumeration(ret);
5838 return(NULL);
5839 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005840 if (last == NULL) ret = last = cur;
5841 else {
5842 last->next = cur;
5843 last = cur;
5844 }
Owen Taylor3473f882001-02-23 17:55:21 +00005845 }
5846 SKIP_BLANKS;
5847 } while (RAW == '|');
5848 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005849 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005850 return(ret);
5851 }
5852 NEXT;
5853 return(ret);
5854}
5855
5856/**
5857 * xmlParseEnumeratedType:
5858 * @ctxt: an XML parser context
5859 * @tree: the enumeration tree built while parsing
5860 *
5861 * parse an Enumerated attribute type.
5862 *
5863 * [57] EnumeratedType ::= NotationType | Enumeration
5864 *
5865 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5866 *
5867 *
5868 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5869 */
5870
5871int
5872xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005873 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005874 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005875 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005876 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5877 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005878 return(0);
5879 }
5880 SKIP_BLANKS;
5881 *tree = xmlParseNotationType(ctxt);
5882 if (*tree == NULL) return(0);
5883 return(XML_ATTRIBUTE_NOTATION);
5884 }
5885 *tree = xmlParseEnumerationType(ctxt);
5886 if (*tree == NULL) return(0);
5887 return(XML_ATTRIBUTE_ENUMERATION);
5888}
5889
5890/**
5891 * xmlParseAttributeType:
5892 * @ctxt: an XML parser context
5893 * @tree: the enumeration tree built while parsing
5894 *
5895 * parse the Attribute list def for an element
5896 *
5897 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5898 *
5899 * [55] StringType ::= 'CDATA'
5900 *
5901 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5902 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5903 *
5904 * Validity constraints for attribute values syntax are checked in
5905 * xmlValidateAttributeValue()
5906 *
5907 * [ VC: ID ]
5908 * Values of type ID must match the Name production. A name must not
5909 * appear more than once in an XML document as a value of this type;
5910 * i.e., ID values must uniquely identify the elements which bear them.
5911 *
5912 * [ VC: One ID per Element Type ]
5913 * No element type may have more than one ID attribute specified.
5914 *
5915 * [ VC: ID Attribute Default ]
5916 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5917 *
5918 * [ VC: IDREF ]
5919 * Values of type IDREF must match the Name production, and values
5920 * of type IDREFS must match Names; each IDREF Name must match the value
5921 * of an ID attribute on some element in the XML document; i.e. IDREF
5922 * values must match the value of some ID attribute.
5923 *
5924 * [ VC: Entity Name ]
5925 * Values of type ENTITY must match the Name production, values
5926 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005927 * name of an unparsed entity declared in the DTD.
Owen Taylor3473f882001-02-23 17:55:21 +00005928 *
5929 * [ VC: Name Token ]
5930 * Values of type NMTOKEN must match the Nmtoken production; values
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005931 * of type NMTOKENS must match Nmtokens.
Owen Taylor3473f882001-02-23 17:55:21 +00005932 *
5933 * Returns the attribute type
5934 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005935int
Owen Taylor3473f882001-02-23 17:55:21 +00005936xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5937 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005938 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005939 SKIP(5);
5940 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005941 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005942 SKIP(6);
5943 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005944 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005945 SKIP(5);
5946 return(XML_ATTRIBUTE_IDREF);
5947 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5948 SKIP(2);
5949 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005950 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005951 SKIP(6);
5952 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005953 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005954 SKIP(8);
5955 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005956 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005957 SKIP(8);
5958 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005959 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005960 SKIP(7);
5961 return(XML_ATTRIBUTE_NMTOKEN);
5962 }
5963 return(xmlParseEnumeratedType(ctxt, tree));
5964}
5965
5966/**
5967 * xmlParseAttributeListDecl:
5968 * @ctxt: an XML parser context
5969 *
5970 * : parse the Attribute list def for an element
5971 *
5972 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5973 *
5974 * [53] AttDef ::= S Name S AttType S DefaultDecl
5975 *
5976 */
5977void
5978xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005979 const xmlChar *elemName;
5980 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005981 xmlEnumerationPtr tree;
5982
Daniel Veillarda07050d2003-10-19 14:46:32 +00005983 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005984 xmlParserInputPtr input = ctxt->input;
5985
5986 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005987 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005988 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005989 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005990 }
5991 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005992 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005993 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005994 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5995 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005996 return;
5997 }
5998 SKIP_BLANKS;
5999 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006000 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006001 const xmlChar *check = CUR_PTR;
6002 int type;
6003 int def;
6004 xmlChar *defaultValue = NULL;
6005
6006 GROW;
6007 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006008 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006009 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006010 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6011 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006012 break;
6013 }
6014 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006015 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006016 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006017 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006018 break;
6019 }
6020 SKIP_BLANKS;
6021
6022 type = xmlParseAttributeType(ctxt, &tree);
6023 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006024 break;
6025 }
6026
6027 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00006028 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006029 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6030 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006031 if (tree != NULL)
6032 xmlFreeEnumeration(tree);
6033 break;
6034 }
6035 SKIP_BLANKS;
6036
6037 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6038 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006039 if (defaultValue != NULL)
6040 xmlFree(defaultValue);
6041 if (tree != NULL)
6042 xmlFreeEnumeration(tree);
6043 break;
6044 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00006045 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6046 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00006047
6048 GROW;
6049 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00006050 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006051 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006052 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006053 if (defaultValue != NULL)
6054 xmlFree(defaultValue);
6055 if (tree != NULL)
6056 xmlFreeEnumeration(tree);
6057 break;
6058 }
6059 SKIP_BLANKS;
6060 }
6061 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006062 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6063 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006064 if (defaultValue != NULL)
6065 xmlFree(defaultValue);
6066 if (tree != NULL)
6067 xmlFreeEnumeration(tree);
6068 break;
6069 }
6070 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6071 (ctxt->sax->attributeDecl != NULL))
6072 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6073 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006074 else if (tree != NULL)
6075 xmlFreeEnumeration(tree);
6076
6077 if ((ctxt->sax2) && (defaultValue != NULL) &&
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006078 (def != XML_ATTRIBUTE_IMPLIED) &&
Daniel Veillarde57ec792003-09-10 10:50:59 +00006079 (def != XML_ATTRIBUTE_REQUIRED)) {
6080 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6081 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00006082 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006083 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6084 }
Owen Taylor3473f882001-02-23 17:55:21 +00006085 if (defaultValue != NULL)
6086 xmlFree(defaultValue);
6087 GROW;
6088 }
6089 if (RAW == '>') {
6090 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006091 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6092 "Attribute list declaration doesn't start and stop in the same entity\n",
6093 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006094 }
6095 NEXT;
6096 }
Owen Taylor3473f882001-02-23 17:55:21 +00006097 }
6098}
6099
6100/**
6101 * xmlParseElementMixedContentDecl:
6102 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006103 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00006104 *
6105 * parse the declaration for a Mixed Element content
6106 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006107 *
Owen Taylor3473f882001-02-23 17:55:21 +00006108 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6109 * '(' S? '#PCDATA' S? ')'
6110 *
6111 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6112 *
6113 * [ VC: No Duplicate Types ]
6114 * The same name must not appear more than once in a single
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006115 * mixed-content declaration.
Owen Taylor3473f882001-02-23 17:55:21 +00006116 *
6117 * returns: the list of the xmlElementContentPtr describing the element choices
6118 */
6119xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006120xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00006121 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006122 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006123
6124 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006125 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006126 SKIP(7);
6127 SKIP_BLANKS;
6128 SHRINK;
6129 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006130 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006131 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6132"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006133 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006134 }
Owen Taylor3473f882001-02-23 17:55:21 +00006135 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006136 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006137 if (ret == NULL)
6138 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006139 if (RAW == '*') {
6140 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6141 NEXT;
6142 }
6143 return(ret);
6144 }
6145 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006146 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00006147 if (ret == NULL) return(NULL);
6148 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006149 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006150 NEXT;
6151 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006152 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006153 if (ret == NULL) return(NULL);
6154 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006155 if (cur != NULL)
6156 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00006157 cur = ret;
6158 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006159 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006160 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006161 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006162 if (n->c1 != NULL)
6163 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006164 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006165 if (n != NULL)
6166 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006167 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006168 }
6169 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006170 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006171 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006172 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006173 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006174 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00006175 return(NULL);
6176 }
6177 SKIP_BLANKS;
6178 GROW;
6179 }
6180 if ((RAW == ')') && (NXT(1) == '*')) {
6181 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006182 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00006183 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006184 if (cur->c2 != NULL)
6185 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006186 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02006187 if (ret != NULL)
6188 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006189 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006190 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6191"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006192 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006193 }
Owen Taylor3473f882001-02-23 17:55:21 +00006194 SKIP(2);
6195 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006196 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006197 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006198 return(NULL);
6199 }
6200
6201 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006202 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006203 }
6204 return(ret);
6205}
6206
6207/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006208 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00006209 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006210 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006211 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00006212 *
6213 * parse the declaration for a Mixed Element content
6214 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006215 *
Owen Taylor3473f882001-02-23 17:55:21 +00006216 *
6217 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6218 *
6219 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6220 *
6221 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6222 *
6223 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6224 *
6225 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6226 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006227 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00006228 * opening or closing parentheses in a choice, seq, or Mixed
6229 * construct is contained in the replacement text for a parameter
6230 * entity, both must be contained in the same replacement text. For
6231 * interoperability, if a parameter-entity reference appears in a
6232 * choice, seq, or Mixed construct, its replacement text should not
6233 * be empty, and neither the first nor last non-blank character of
6234 * the replacement text should be a connector (| or ,).
6235 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006236 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00006237 * hierarchy.
6238 */
Daniel Veillard489f9672009-08-10 16:49:30 +02006239static xmlElementContentPtr
6240xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6241 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00006242 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006243 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00006244 xmlChar type = 0;
6245
Daniel Veillard489f9672009-08-10 16:49:30 +02006246 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6247 (depth > 2048)) {
6248 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6249"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6250 depth);
6251 return(NULL);
6252 }
Owen Taylor3473f882001-02-23 17:55:21 +00006253 SKIP_BLANKS;
6254 GROW;
6255 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006256 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006257
Owen Taylor3473f882001-02-23 17:55:21 +00006258 /* Recurse on first child */
6259 NEXT;
6260 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006261 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6262 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006263 SKIP_BLANKS;
6264 GROW;
6265 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006266 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006267 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006268 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006269 return(NULL);
6270 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006271 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006272 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006273 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006274 return(NULL);
6275 }
Owen Taylor3473f882001-02-23 17:55:21 +00006276 GROW;
6277 if (RAW == '?') {
6278 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6279 NEXT;
6280 } else if (RAW == '*') {
6281 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6282 NEXT;
6283 } else if (RAW == '+') {
6284 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6285 NEXT;
6286 } else {
6287 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6288 }
Owen Taylor3473f882001-02-23 17:55:21 +00006289 GROW;
6290 }
6291 SKIP_BLANKS;
6292 SHRINK;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006293 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006294 /*
6295 * Each loop we parse one separator and one element.
6296 */
6297 if (RAW == ',') {
6298 if (type == 0) type = CUR;
6299
6300 /*
6301 * Detect "Name | Name , Name" error
6302 */
6303 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006304 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006305 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006306 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006307 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006308 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006309 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006310 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006311 return(NULL);
6312 }
6313 NEXT;
6314
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006315 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006316 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006317 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006318 xmlFreeDocElementContent(ctxt->myDoc, last);
6319 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006320 return(NULL);
6321 }
6322 if (last == NULL) {
6323 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006324 if (ret != NULL)
6325 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006326 ret = cur = op;
6327 } else {
6328 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006329 if (op != NULL)
6330 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006331 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006332 if (last != NULL)
6333 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006334 cur =op;
6335 last = NULL;
6336 }
6337 } else if (RAW == '|') {
6338 if (type == 0) type = CUR;
6339
6340 /*
6341 * Detect "Name , Name | Name" error
6342 */
6343 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006344 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006345 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006346 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006347 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006348 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006349 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006350 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006351 return(NULL);
6352 }
6353 NEXT;
6354
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006355 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006356 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006357 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006358 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006359 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006360 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006361 return(NULL);
6362 }
6363 if (last == NULL) {
6364 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006365 if (ret != NULL)
6366 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006367 ret = cur = op;
6368 } else {
6369 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006370 if (op != NULL)
6371 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006372 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006373 if (last != NULL)
6374 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006375 cur =op;
6376 last = NULL;
6377 }
6378 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006379 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006380 if ((last != NULL) && (last != ret))
6381 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006382 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006383 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006384 return(NULL);
6385 }
6386 GROW;
6387 SKIP_BLANKS;
6388 GROW;
6389 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006390 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006391 /* Recurse on second child */
6392 NEXT;
6393 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006394 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6395 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006396 SKIP_BLANKS;
6397 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006398 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006399 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006400 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006401 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006402 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006403 return(NULL);
6404 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006405 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006406 if (last == NULL) {
6407 if (ret != NULL)
6408 xmlFreeDocElementContent(ctxt->myDoc, ret);
6409 return(NULL);
6410 }
Owen Taylor3473f882001-02-23 17:55:21 +00006411 if (RAW == '?') {
6412 last->ocur = XML_ELEMENT_CONTENT_OPT;
6413 NEXT;
6414 } else if (RAW == '*') {
6415 last->ocur = XML_ELEMENT_CONTENT_MULT;
6416 NEXT;
6417 } else if (RAW == '+') {
6418 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6419 NEXT;
6420 } else {
6421 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6422 }
6423 }
6424 SKIP_BLANKS;
6425 GROW;
6426 }
6427 if ((cur != NULL) && (last != NULL)) {
6428 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006429 if (last != NULL)
6430 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006431 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006432 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006433 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6434"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006435 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006436 }
Owen Taylor3473f882001-02-23 17:55:21 +00006437 NEXT;
6438 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006439 if (ret != NULL) {
6440 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6441 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6442 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6443 else
6444 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6445 }
Owen Taylor3473f882001-02-23 17:55:21 +00006446 NEXT;
6447 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006448 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006449 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006450 cur = ret;
6451 /*
6452 * Some normalization:
6453 * (a | b* | c?)* == (a | b | c)*
6454 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006455 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006456 if ((cur->c1 != NULL) &&
6457 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6458 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6459 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6460 if ((cur->c2 != NULL) &&
6461 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6462 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6463 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6464 cur = cur->c2;
6465 }
6466 }
Owen Taylor3473f882001-02-23 17:55:21 +00006467 NEXT;
6468 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006469 if (ret != NULL) {
6470 int found = 0;
6471
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006472 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6473 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6474 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006475 else
6476 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006477 /*
6478 * Some normalization:
6479 * (a | b*)+ == (a | b)*
6480 * (a | b?)+ == (a | b)*
6481 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006482 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006483 if ((cur->c1 != NULL) &&
6484 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6485 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6486 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6487 found = 1;
6488 }
6489 if ((cur->c2 != NULL) &&
6490 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6491 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6492 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6493 found = 1;
6494 }
6495 cur = cur->c2;
6496 }
6497 if (found)
6498 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6499 }
Owen Taylor3473f882001-02-23 17:55:21 +00006500 NEXT;
6501 }
6502 return(ret);
6503}
6504
6505/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006506 * xmlParseElementChildrenContentDecl:
6507 * @ctxt: an XML parser context
6508 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006509 *
6510 * parse the declaration for a Mixed Element content
6511 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6512 *
6513 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6514 *
6515 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6516 *
6517 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6518 *
6519 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6520 *
6521 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6522 * TODO Parameter-entity replacement text must be properly nested
6523 * with parenthesized groups. That is to say, if either of the
6524 * opening or closing parentheses in a choice, seq, or Mixed
6525 * construct is contained in the replacement text for a parameter
6526 * entity, both must be contained in the same replacement text. For
6527 * interoperability, if a parameter-entity reference appears in a
6528 * choice, seq, or Mixed construct, its replacement text should not
6529 * be empty, and neither the first nor last non-blank character of
6530 * the replacement text should be a connector (| or ,).
6531 *
6532 * Returns the tree of xmlElementContentPtr describing the element
6533 * hierarchy.
6534 */
6535xmlElementContentPtr
6536xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6537 /* stub left for API/ABI compat */
6538 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6539}
6540
6541/**
Owen Taylor3473f882001-02-23 17:55:21 +00006542 * xmlParseElementContentDecl:
6543 * @ctxt: an XML parser context
6544 * @name: the name of the element being defined.
6545 * @result: the Element Content pointer will be stored here if any
6546 *
6547 * parse the declaration for an Element content either Mixed or Children,
6548 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006549 *
Owen Taylor3473f882001-02-23 17:55:21 +00006550 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6551 *
6552 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6553 */
6554
6555int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006556xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006557 xmlElementContentPtr *result) {
6558
6559 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006560 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006561 int res;
6562
6563 *result = NULL;
6564
6565 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006566 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006567 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006568 return(-1);
6569 }
6570 NEXT;
6571 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006572 if (ctxt->instate == XML_PARSER_EOF)
6573 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006574 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006575 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006576 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006577 res = XML_ELEMENT_TYPE_MIXED;
6578 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006579 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006580 res = XML_ELEMENT_TYPE_ELEMENT;
6581 }
Owen Taylor3473f882001-02-23 17:55:21 +00006582 SKIP_BLANKS;
6583 *result = tree;
6584 return(res);
6585}
6586
6587/**
6588 * xmlParseElementDecl:
6589 * @ctxt: an XML parser context
6590 *
6591 * parse an Element declaration.
6592 *
6593 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6594 *
6595 * [ VC: Unique Element Type Declaration ]
6596 * No element type may be declared more than once
6597 *
6598 * Returns the type of the element, or -1 in case of error
6599 */
6600int
6601xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006602 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006603 int ret = -1;
6604 xmlElementContentPtr content = NULL;
6605
Daniel Veillard4c778d82005-01-23 17:37:44 +00006606 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006607 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006608 xmlParserInputPtr input = ctxt->input;
6609
6610 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006611 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006612 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6613 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006614 }
6615 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006616 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006617 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006618 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6619 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006620 return(-1);
6621 }
6622 while ((RAW == 0) && (ctxt->inputNr > 1))
6623 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006624 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006625 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6626 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006627 }
6628 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006629 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006630 SKIP(5);
6631 /*
6632 * Element must always be empty.
6633 */
6634 ret = XML_ELEMENT_TYPE_EMPTY;
6635 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6636 (NXT(2) == 'Y')) {
6637 SKIP(3);
6638 /*
6639 * Element is a generic container.
6640 */
6641 ret = XML_ELEMENT_TYPE_ANY;
6642 } else if (RAW == '(') {
6643 ret = xmlParseElementContentDecl(ctxt, name, &content);
6644 } else {
6645 /*
6646 * [ WFC: PEs in Internal Subset ] error handling.
6647 */
6648 if ((RAW == '%') && (ctxt->external == 0) &&
6649 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006650 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006651 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006652 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006653 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006654 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6655 }
Owen Taylor3473f882001-02-23 17:55:21 +00006656 return(-1);
6657 }
6658
6659 SKIP_BLANKS;
6660 /*
6661 * Pop-up of finished entities.
6662 */
6663 while ((RAW == 0) && (ctxt->inputNr > 1))
6664 xmlPopInput(ctxt);
6665 SKIP_BLANKS;
6666
6667 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006668 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006669 if (content != NULL) {
6670 xmlFreeDocElementContent(ctxt->myDoc, content);
6671 }
Owen Taylor3473f882001-02-23 17:55:21 +00006672 } else {
6673 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006674 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6675 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006676 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006677
Owen Taylor3473f882001-02-23 17:55:21 +00006678 NEXT;
6679 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006680 (ctxt->sax->elementDecl != NULL)) {
6681 if (content != NULL)
6682 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006683 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6684 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006685 if ((content != NULL) && (content->parent == NULL)) {
6686 /*
6687 * this is a trick: if xmlAddElementDecl is called,
6688 * instead of copying the full tree it is plugged directly
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006689 * if called from the parser. Avoid duplicating the
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006690 * interfaces or change the API/ABI
6691 */
6692 xmlFreeDocElementContent(ctxt->myDoc, content);
6693 }
6694 } else if (content != NULL) {
6695 xmlFreeDocElementContent(ctxt->myDoc, content);
6696 }
Owen Taylor3473f882001-02-23 17:55:21 +00006697 }
Owen Taylor3473f882001-02-23 17:55:21 +00006698 }
6699 return(ret);
6700}
6701
6702/**
Owen Taylor3473f882001-02-23 17:55:21 +00006703 * xmlParseConditionalSections
6704 * @ctxt: an XML parser context
6705 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006706 * [61] conditionalSect ::= includeSect | ignoreSect
6707 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
Owen Taylor3473f882001-02-23 17:55:21 +00006708 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6709 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6710 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6711 */
6712
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006713static void
Owen Taylor3473f882001-02-23 17:55:21 +00006714xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006715 int id = ctxt->input->id;
6716
Owen Taylor3473f882001-02-23 17:55:21 +00006717 SKIP(3);
6718 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006719 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006720 SKIP(7);
6721 SKIP_BLANKS;
6722 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006723 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006724 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006725 if (ctxt->input->id != id) {
6726 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6727 "All markup of the conditional section is not in the same entity\n",
6728 NULL, NULL);
6729 }
Owen Taylor3473f882001-02-23 17:55:21 +00006730 NEXT;
6731 }
6732 if (xmlParserDebugEntities) {
6733 if ((ctxt->input != NULL) && (ctxt->input->filename))
6734 xmlGenericError(xmlGenericErrorContext,
6735 "%s(%d): ", ctxt->input->filename,
6736 ctxt->input->line);
6737 xmlGenericError(xmlGenericErrorContext,
6738 "Entering INCLUDE Conditional Section\n");
6739 }
6740
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006741 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6742 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006743 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006744 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006745
6746 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6747 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006748 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006749 NEXT;
6750 } else if (RAW == '%') {
6751 xmlParsePEReference(ctxt);
6752 } else
6753 xmlParseMarkupDecl(ctxt);
6754
6755 /*
6756 * Pop-up of finished entities.
6757 */
6758 while ((RAW == 0) && (ctxt->inputNr > 1))
6759 xmlPopInput(ctxt);
6760
Daniel Veillardfdc91562002-07-01 21:52:03 +00006761 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006762 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006763 break;
6764 }
6765 }
6766 if (xmlParserDebugEntities) {
6767 if ((ctxt->input != NULL) && (ctxt->input->filename))
6768 xmlGenericError(xmlGenericErrorContext,
6769 "%s(%d): ", ctxt->input->filename,
6770 ctxt->input->line);
6771 xmlGenericError(xmlGenericErrorContext,
6772 "Leaving INCLUDE Conditional Section\n");
6773 }
6774
Daniel Veillarda07050d2003-10-19 14:46:32 +00006775 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006776 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006777 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006778 int depth = 0;
6779
6780 SKIP(6);
6781 SKIP_BLANKS;
6782 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006783 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006784 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006785 if (ctxt->input->id != id) {
6786 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6787 "All markup of the conditional section is not in the same entity\n",
6788 NULL, NULL);
6789 }
Owen Taylor3473f882001-02-23 17:55:21 +00006790 NEXT;
6791 }
6792 if (xmlParserDebugEntities) {
6793 if ((ctxt->input != NULL) && (ctxt->input->filename))
6794 xmlGenericError(xmlGenericErrorContext,
6795 "%s(%d): ", ctxt->input->filename,
6796 ctxt->input->line);
6797 xmlGenericError(xmlGenericErrorContext,
6798 "Entering IGNORE Conditional Section\n");
6799 }
6800
6801 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006802 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006803 * But disable SAX event generating DTD building in the meantime
6804 */
6805 state = ctxt->disableSAX;
6806 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006807 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006808 ctxt->instate = XML_PARSER_IGNORE;
6809
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006810 while (((depth >= 0) && (RAW != 0)) &&
6811 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006812 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6813 depth++;
6814 SKIP(3);
6815 continue;
6816 }
6817 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6818 if (--depth >= 0) SKIP(3);
6819 continue;
6820 }
6821 NEXT;
6822 continue;
6823 }
6824
6825 ctxt->disableSAX = state;
6826 ctxt->instate = instate;
6827
6828 if (xmlParserDebugEntities) {
6829 if ((ctxt->input != NULL) && (ctxt->input->filename))
6830 xmlGenericError(xmlGenericErrorContext,
6831 "%s(%d): ", ctxt->input->filename,
6832 ctxt->input->line);
6833 xmlGenericError(xmlGenericErrorContext,
6834 "Leaving IGNORE Conditional Section\n");
6835 }
6836
6837 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006838 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006839 }
6840
6841 if (RAW == 0)
6842 SHRINK;
6843
6844 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006845 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006846 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006847 if (ctxt->input->id != id) {
6848 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6849 "All markup of the conditional section is not in the same entity\n",
6850 NULL, NULL);
6851 }
Owen Taylor3473f882001-02-23 17:55:21 +00006852 SKIP(3);
6853 }
6854}
6855
6856/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006857 * xmlParseMarkupDecl:
6858 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006859 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006860 * parse Markup declarations
6861 *
6862 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6863 * NotationDecl | PI | Comment
6864 *
6865 * [ VC: Proper Declaration/PE Nesting ]
6866 * Parameter-entity replacement text must be properly nested with
6867 * markup declarations. That is to say, if either the first character
6868 * or the last character of a markup declaration (markupdecl above) is
6869 * contained in the replacement text for a parameter-entity reference,
6870 * both must be contained in the same replacement text.
6871 *
6872 * [ WFC: PEs in Internal Subset ]
6873 * In the internal DTD subset, parameter-entity references can occur
6874 * only where markup declarations can occur, not within markup declarations.
6875 * (This does not apply to references that occur in external parameter
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006876 * entities or to the external subset.)
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006877 */
6878void
6879xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6880 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006881 if (CUR == '<') {
6882 if (NXT(1) == '!') {
6883 switch (NXT(2)) {
6884 case 'E':
6885 if (NXT(3) == 'L')
6886 xmlParseElementDecl(ctxt);
6887 else if (NXT(3) == 'N')
6888 xmlParseEntityDecl(ctxt);
6889 break;
6890 case 'A':
6891 xmlParseAttributeListDecl(ctxt);
6892 break;
6893 case 'N':
6894 xmlParseNotationDecl(ctxt);
6895 break;
6896 case '-':
6897 xmlParseComment(ctxt);
6898 break;
6899 default:
6900 /* there is an error but it will be detected later */
6901 break;
6902 }
6903 } else if (NXT(1) == '?') {
6904 xmlParsePI(ctxt);
6905 }
6906 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006907 /*
6908 * This is only for internal subset. On external entities,
6909 * the replacement is done before parsing stage
6910 */
6911 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6912 xmlParsePEReference(ctxt);
6913
6914 /*
6915 * Conditional sections are allowed from entities included
6916 * by PE References in the internal subset.
6917 */
6918 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6919 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6920 xmlParseConditionalSections(ctxt);
6921 }
6922 }
6923
6924 ctxt->instate = XML_PARSER_DTD;
6925}
6926
6927/**
6928 * xmlParseTextDecl:
6929 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006930 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006931 * parse an XML declaration header for external entities
6932 *
6933 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006934 */
6935
6936void
6937xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6938 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006939 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006940
6941 /*
6942 * We know that '<?xml' is here.
6943 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006944 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006945 SKIP(5);
6946 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006947 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006948 return;
6949 }
6950
William M. Brack76e95df2003-10-18 16:20:14 +00006951 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006952 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6953 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006954 }
6955 SKIP_BLANKS;
6956
6957 /*
6958 * We may have the VersionInfo here.
6959 */
6960 version = xmlParseVersionInfo(ctxt);
6961 if (version == NULL)
6962 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006963 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006964 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006965 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6966 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006967 }
6968 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006969 ctxt->input->version = version;
6970
6971 /*
6972 * We must have the encoding declaration
6973 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006974 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006975 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6976 /*
6977 * The XML REC instructs us to stop parsing right here
6978 */
6979 return;
6980 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006981 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6982 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6983 "Missing encoding in text declaration\n");
6984 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006985
6986 SKIP_BLANKS;
6987 if ((RAW == '?') && (NXT(1) == '>')) {
6988 SKIP(2);
6989 } else if (RAW == '>') {
6990 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006991 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006992 NEXT;
6993 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006994 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006995 MOVETO_ENDTAG(CUR_PTR);
6996 NEXT;
6997 }
6998}
6999
7000/**
Owen Taylor3473f882001-02-23 17:55:21 +00007001 * xmlParseExternalSubset:
7002 * @ctxt: an XML parser context
7003 * @ExternalID: the external identifier
7004 * @SystemID: the system identifier (or URL)
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007005 *
Owen Taylor3473f882001-02-23 17:55:21 +00007006 * parse Markup declarations from an external subset
7007 *
7008 * [30] extSubset ::= textDecl? extSubsetDecl
7009 *
7010 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7011 */
7012void
7013xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7014 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00007015 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00007016 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007017
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01007018 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00007019 (ctxt->input->end - ctxt->input->cur >= 4)) {
7020 xmlChar start[4];
7021 xmlCharEncoding enc;
7022
7023 start[0] = RAW;
7024 start[1] = NXT(1);
7025 start[2] = NXT(2);
7026 start[3] = NXT(3);
7027 enc = xmlDetectCharEncoding(start, 4);
7028 if (enc != XML_CHAR_ENCODING_NONE)
7029 xmlSwitchEncoding(ctxt, enc);
7030 }
7031
Daniel Veillarda07050d2003-10-19 14:46:32 +00007032 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007033 xmlParseTextDecl(ctxt);
7034 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7035 /*
7036 * The XML REC instructs us to stop parsing right here
7037 */
7038 ctxt->instate = XML_PARSER_EOF;
7039 return;
7040 }
7041 }
7042 if (ctxt->myDoc == NULL) {
7043 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00007044 if (ctxt->myDoc == NULL) {
7045 xmlErrMemory(ctxt, "New Doc failed");
7046 return;
7047 }
7048 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00007049 }
7050 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7051 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7052
7053 ctxt->instate = XML_PARSER_DTD;
7054 ctxt->external = 1;
7055 while (((RAW == '<') && (NXT(1) == '?')) ||
7056 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00007057 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007058 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007059 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007060
7061 GROW;
7062 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7063 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00007064 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007065 NEXT;
7066 } else if (RAW == '%') {
7067 xmlParsePEReference(ctxt);
7068 } else
7069 xmlParseMarkupDecl(ctxt);
7070
7071 /*
7072 * Pop-up of finished entities.
7073 */
7074 while ((RAW == 0) && (ctxt->inputNr > 1))
7075 xmlPopInput(ctxt);
7076
Daniel Veillardfdc91562002-07-01 21:52:03 +00007077 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007078 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007079 break;
7080 }
7081 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007082
Owen Taylor3473f882001-02-23 17:55:21 +00007083 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007084 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007085 }
7086
7087}
7088
7089/**
7090 * xmlParseReference:
7091 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00007092 *
Owen Taylor3473f882001-02-23 17:55:21 +00007093 * parse and handle entity references in content, depending on the SAX
7094 * interface, this may end-up in a call to character() if this is a
7095 * CharRef, a predefined entity, if there is no reference() callback.
7096 * or if the parser was asked to switch to that mode.
7097 *
7098 * [67] Reference ::= EntityRef | CharRef
7099 */
7100void
7101xmlParseReference(xmlParserCtxtPtr ctxt) {
7102 xmlEntityPtr ent;
7103 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00007104 int was_checked;
7105 xmlNodePtr list = NULL;
7106 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00007107
Daniel Veillard0161e632008-08-28 15:36:32 +00007108
7109 if (RAW != '&')
7110 return;
7111
7112 /*
7113 * Simple case of a CharRef
7114 */
Owen Taylor3473f882001-02-23 17:55:21 +00007115 if (NXT(1) == '#') {
7116 int i = 0;
7117 xmlChar out[10];
7118 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007119 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007120
Daniel Veillarddc171602008-03-26 17:41:38 +00007121 if (value == 0)
7122 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007123 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7124 /*
7125 * So we are using non-UTF-8 buffers
7126 * Check that the char fit on 8bits, if not
7127 * generate a CharRef.
7128 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007129 if (value <= 0xFF) {
7130 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00007131 out[1] = 0;
7132 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7133 (!ctxt->disableSAX))
7134 ctxt->sax->characters(ctxt->userData, out, 1);
7135 } else {
7136 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007137 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007138 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007139 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007140 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7141 (!ctxt->disableSAX))
7142 ctxt->sax->reference(ctxt->userData, out);
7143 }
7144 } else {
7145 /*
7146 * Just encode the value in UTF-8
7147 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007148 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00007149 out[i] = 0;
7150 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7151 (!ctxt->disableSAX))
7152 ctxt->sax->characters(ctxt->userData, out, i);
7153 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007154 return;
7155 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007156
Daniel Veillard0161e632008-08-28 15:36:32 +00007157 /*
7158 * We are seeing an entity reference
7159 */
7160 ent = xmlParseEntityRef(ctxt);
7161 if (ent == NULL) return;
7162 if (!ctxt->wellFormed)
7163 return;
7164 was_checked = ent->checked;
7165
7166 /* special case of predefined entities */
7167 if ((ent->name == NULL) ||
7168 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7169 val = ent->content;
7170 if (val == NULL) return;
7171 /*
7172 * inline the entity.
7173 */
7174 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7175 (!ctxt->disableSAX))
7176 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7177 return;
7178 }
7179
7180 /*
7181 * The first reference to the entity trigger a parsing phase
7182 * where the ent->children is filled with the result from
7183 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08007184 * Note: external parsed entities will not be loaded, it is not
7185 * required for a non-validating parser, unless the parsing option
7186 * of validating, or substituting entities were given. Doing so is
7187 * far more secure as the parser will only process data coming from
7188 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00007189 */
Daniel Veillard4629ee02012-07-23 14:15:40 +08007190 if ((ent->checked == 0) &&
7191 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7192 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007193 unsigned long oldnbent = ctxt->nbentities;
7194
7195 /*
7196 * This is a bit hackish but this seems the best
7197 * way to make sure both SAX and DOM entity support
7198 * behaves okay.
7199 */
7200 void *user_data;
7201 if (ctxt->userData == ctxt)
7202 user_data = NULL;
7203 else
7204 user_data = ctxt->userData;
7205
7206 /*
7207 * Check that this entity is well formed
7208 * 4.3.2: An internal general parsed entity is well-formed
7209 * if its replacement text matches the production labeled
7210 * content.
7211 */
7212 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7213 ctxt->depth++;
7214 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7215 user_data, &list);
7216 ctxt->depth--;
7217
7218 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7219 ctxt->depth++;
7220 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7221 user_data, ctxt->depth, ent->URI,
7222 ent->ExternalID, &list);
7223 ctxt->depth--;
7224 } else {
7225 ret = XML_ERR_ENTITY_PE_INTERNAL;
7226 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7227 "invalid entity type found\n", NULL);
7228 }
7229
7230 /*
7231 * Store the number of entities needing parsing for this entity
7232 * content and do checkings
7233 */
Daniel Veillardcff25462013-03-11 15:57:55 +08007234 ent->checked = (ctxt->nbentities - oldnbent + 1) * 2;
7235 if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7236 ent->checked |= 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007237 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007238 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007239 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007240 return;
7241 }
Daniel Veillard23f05e02013-02-19 10:21:49 +08007242 if (xmlParserEntityCheck(ctxt, 0, ent, 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007243 xmlFreeNodeList(list);
7244 return;
7245 }
Owen Taylor3473f882001-02-23 17:55:21 +00007246
Daniel Veillard0161e632008-08-28 15:36:32 +00007247 if ((ret == XML_ERR_OK) && (list != NULL)) {
7248 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7249 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7250 (ent->children == NULL)) {
7251 ent->children = list;
7252 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00007253 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007254 * Prune it directly in the generated document
7255 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00007256 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007257 if (((list->type == XML_TEXT_NODE) &&
7258 (list->next == NULL)) ||
7259 (ctxt->parseMode == XML_PARSE_READER)) {
7260 list->parent = (xmlNodePtr) ent;
7261 list = NULL;
7262 ent->owner = 1;
7263 } else {
7264 ent->owner = 0;
7265 while (list != NULL) {
7266 list->parent = (xmlNodePtr) ctxt->node;
7267 list->doc = ctxt->myDoc;
7268 if (list->next == NULL)
7269 ent->last = list;
7270 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007271 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007272 list = ent->children;
7273#ifdef LIBXML_LEGACY_ENABLED
7274 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7275 xmlAddEntityReference(ent, list, NULL);
7276#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007277 }
7278 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007279 ent->owner = 1;
7280 while (list != NULL) {
7281 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007282 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007283 if (list->next == NULL)
7284 ent->last = list;
7285 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007286 }
7287 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007288 } else {
7289 xmlFreeNodeList(list);
7290 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007291 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007292 } else if ((ret != XML_ERR_OK) &&
7293 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7294 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7295 "Entity '%s' failed to parse\n", ent->name);
7296 } else if (list != NULL) {
7297 xmlFreeNodeList(list);
7298 list = NULL;
7299 }
7300 if (ent->checked == 0)
Daniel Veillardcff25462013-03-11 15:57:55 +08007301 ent->checked = 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007302 } else if (ent->checked != 1) {
Daniel Veillardcff25462013-03-11 15:57:55 +08007303 ctxt->nbentities += ent->checked / 2;
Daniel Veillard0161e632008-08-28 15:36:32 +00007304 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007305
Daniel Veillard0161e632008-08-28 15:36:32 +00007306 /*
7307 * Now that the entity content has been gathered
7308 * provide it to the application, this can take different forms based
7309 * on the parsing modes.
7310 */
7311 if (ent->children == NULL) {
7312 /*
7313 * Probably running in SAX mode and the callbacks don't
7314 * build the entity content. So unless we already went
7315 * though parsing for first checking go though the entity
7316 * content to generate callbacks associated to the entity
7317 */
7318 if (was_checked != 0) {
7319 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007320 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007321 * This is a bit hackish but this seems the best
7322 * way to make sure both SAX and DOM entity support
7323 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007324 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007325 if (ctxt->userData == ctxt)
7326 user_data = NULL;
7327 else
7328 user_data = ctxt->userData;
7329
7330 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7331 ctxt->depth++;
7332 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7333 ent->content, user_data, NULL);
7334 ctxt->depth--;
7335 } else if (ent->etype ==
7336 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7337 ctxt->depth++;
7338 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7339 ctxt->sax, user_data, ctxt->depth,
7340 ent->URI, ent->ExternalID, NULL);
7341 ctxt->depth--;
7342 } else {
7343 ret = XML_ERR_ENTITY_PE_INTERNAL;
7344 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7345 "invalid entity type found\n", NULL);
7346 }
7347 if (ret == XML_ERR_ENTITY_LOOP) {
7348 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7349 return;
7350 }
7351 }
7352 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7353 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7354 /*
7355 * Entity reference callback comes second, it's somewhat
7356 * superfluous but a compatibility to historical behaviour
7357 */
7358 ctxt->sax->reference(ctxt->userData, ent->name);
7359 }
7360 return;
7361 }
7362
7363 /*
7364 * If we didn't get any children for the entity being built
7365 */
7366 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7367 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7368 /*
7369 * Create a node.
7370 */
7371 ctxt->sax->reference(ctxt->userData, ent->name);
7372 return;
7373 }
7374
7375 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7376 /*
7377 * There is a problem on the handling of _private for entities
7378 * (bug 155816): Should we copy the content of the field from
7379 * the entity (possibly overwriting some value set by the user
7380 * when a copy is created), should we leave it alone, or should
7381 * we try to take care of different situations? The problem
7382 * is exacerbated by the usage of this field by the xmlReader.
7383 * To fix this bug, we look at _private on the created node
7384 * and, if it's NULL, we copy in whatever was in the entity.
7385 * If it's not NULL we leave it alone. This is somewhat of a
7386 * hack - maybe we should have further tests to determine
7387 * what to do.
7388 */
7389 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7390 /*
7391 * Seems we are generating the DOM content, do
7392 * a simple tree copy for all references except the first
7393 * In the first occurrence list contains the replacement.
Daniel Veillard0161e632008-08-28 15:36:32 +00007394 */
7395 if (((list == NULL) && (ent->owner == 0)) ||
7396 (ctxt->parseMode == XML_PARSE_READER)) {
7397 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7398
7399 /*
Daniel Veillard23f05e02013-02-19 10:21:49 +08007400 * We are copying here, make sure there is no abuse
7401 */
7402 ctxt->sizeentcopy += ent->length;
7403 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7404 return;
7405
7406 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007407 * when operating on a reader, the entities definitions
7408 * are always owning the entities subtree.
7409 if (ctxt->parseMode == XML_PARSE_READER)
7410 ent->owner = 1;
7411 */
7412
7413 cur = ent->children;
7414 while (cur != NULL) {
7415 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7416 if (nw != NULL) {
7417 if (nw->_private == NULL)
7418 nw->_private = cur->_private;
7419 if (firstChild == NULL){
7420 firstChild = nw;
7421 }
7422 nw = xmlAddChild(ctxt->node, nw);
7423 }
7424 if (cur == ent->last) {
7425 /*
7426 * needed to detect some strange empty
7427 * node cases in the reader tests
7428 */
7429 if ((ctxt->parseMode == XML_PARSE_READER) &&
7430 (nw != NULL) &&
7431 (nw->type == XML_ELEMENT_NODE) &&
7432 (nw->children == NULL))
7433 nw->extra = 1;
7434
7435 break;
7436 }
7437 cur = cur->next;
7438 }
7439#ifdef LIBXML_LEGACY_ENABLED
7440 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7441 xmlAddEntityReference(ent, firstChild, nw);
7442#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard28f5e1a2012-09-04 11:18:39 +08007443 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007444 xmlNodePtr nw = NULL, cur, next, last,
7445 firstChild = NULL;
Daniel Veillard23f05e02013-02-19 10:21:49 +08007446
7447 /*
7448 * We are copying here, make sure there is no abuse
7449 */
7450 ctxt->sizeentcopy += ent->length;
7451 if (xmlParserEntityCheck(ctxt, 0, ent, ctxt->sizeentcopy))
7452 return;
7453
Daniel Veillard0161e632008-08-28 15:36:32 +00007454 /*
7455 * Copy the entity child list and make it the new
7456 * entity child list. The goal is to make sure any
7457 * ID or REF referenced will be the one from the
7458 * document content and not the entity copy.
7459 */
7460 cur = ent->children;
7461 ent->children = NULL;
7462 last = ent->last;
7463 ent->last = NULL;
7464 while (cur != NULL) {
7465 next = cur->next;
7466 cur->next = NULL;
7467 cur->parent = NULL;
7468 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7469 if (nw != NULL) {
7470 if (nw->_private == NULL)
7471 nw->_private = cur->_private;
7472 if (firstChild == NULL){
7473 firstChild = cur;
7474 }
7475 xmlAddChild((xmlNodePtr) ent, nw);
7476 xmlAddChild(ctxt->node, cur);
7477 }
7478 if (cur == last)
7479 break;
7480 cur = next;
7481 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007482 if (ent->owner == 0)
7483 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007484#ifdef LIBXML_LEGACY_ENABLED
7485 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7486 xmlAddEntityReference(ent, firstChild, nw);
7487#endif /* LIBXML_LEGACY_ENABLED */
7488 } else {
7489 const xmlChar *nbktext;
7490
7491 /*
7492 * the name change is to avoid coalescing of the
7493 * node with a possible previous text one which
7494 * would make ent->children a dangling pointer
7495 */
7496 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7497 -1);
7498 if (ent->children->type == XML_TEXT_NODE)
7499 ent->children->name = nbktext;
7500 if ((ent->last != ent->children) &&
7501 (ent->last->type == XML_TEXT_NODE))
7502 ent->last->name = nbktext;
7503 xmlAddChildList(ctxt->node, ent->children);
7504 }
7505
7506 /*
7507 * This is to avoid a nasty side effect, see
7508 * characters() in SAX.c
7509 */
7510 ctxt->nodemem = 0;
7511 ctxt->nodelen = 0;
7512 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007513 }
7514 }
7515}
7516
7517/**
7518 * xmlParseEntityRef:
7519 * @ctxt: an XML parser context
7520 *
7521 * parse ENTITY references declarations
7522 *
7523 * [68] EntityRef ::= '&' Name ';'
7524 *
7525 * [ WFC: Entity Declared ]
7526 * In a document without any DTD, a document with only an internal DTD
7527 * subset which contains no parameter entity references, or a document
7528 * with "standalone='yes'", the Name given in the entity reference
7529 * must match that in an entity declaration, except that well-formed
7530 * documents need not declare any of the following entities: amp, lt,
7531 * gt, apos, quot. The declaration of a parameter entity must precede
7532 * any reference to it. Similarly, the declaration of a general entity
7533 * must precede any reference to it which appears in a default value in an
7534 * attribute-list declaration. Note that if entities are declared in the
7535 * external subset or in external parameter entities, a non-validating
7536 * processor is not obligated to read and process their declarations;
7537 * for such documents, the rule that an entity must be declared is a
7538 * well-formedness constraint only if standalone='yes'.
7539 *
7540 * [ WFC: Parsed Entity ]
7541 * An entity reference must not contain the name of an unparsed entity
7542 *
7543 * Returns the xmlEntityPtr if found, or NULL otherwise.
7544 */
7545xmlEntityPtr
7546xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007547 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007548 xmlEntityPtr ent = NULL;
7549
7550 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007551 if (ctxt->instate == XML_PARSER_EOF)
7552 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007553
Daniel Veillard0161e632008-08-28 15:36:32 +00007554 if (RAW != '&')
7555 return(NULL);
7556 NEXT;
7557 name = xmlParseName(ctxt);
7558 if (name == NULL) {
7559 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7560 "xmlParseEntityRef: no name\n");
7561 return(NULL);
7562 }
7563 if (RAW != ';') {
7564 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7565 return(NULL);
7566 }
7567 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007568
Daniel Veillard0161e632008-08-28 15:36:32 +00007569 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007570 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007571 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007572 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7573 ent = xmlGetPredefinedEntity(name);
7574 if (ent != NULL)
7575 return(ent);
7576 }
Owen Taylor3473f882001-02-23 17:55:21 +00007577
Daniel Veillard0161e632008-08-28 15:36:32 +00007578 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007579 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007580 */
7581 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007582
Daniel Veillard0161e632008-08-28 15:36:32 +00007583 /*
7584 * Ask first SAX for entity resolution, otherwise try the
7585 * entities which may have stored in the parser context.
7586 */
7587 if (ctxt->sax != NULL) {
7588 if (ctxt->sax->getEntity != NULL)
7589 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007590 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007591 (ctxt->options & XML_PARSE_OLDSAX))
7592 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007593 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7594 (ctxt->userData==ctxt)) {
7595 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007596 }
7597 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007598 if (ctxt->instate == XML_PARSER_EOF)
7599 return(NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007600 /*
7601 * [ WFC: Entity Declared ]
7602 * In a document without any DTD, a document with only an
7603 * internal DTD subset which contains no parameter entity
7604 * references, or a document with "standalone='yes'", the
7605 * Name given in the entity reference must match that in an
7606 * entity declaration, except that well-formed documents
7607 * need not declare any of the following entities: amp, lt,
7608 * gt, apos, quot.
7609 * The declaration of a parameter entity must precede any
7610 * reference to it.
7611 * Similarly, the declaration of a general entity must
7612 * precede any reference to it which appears in a default
7613 * value in an attribute-list declaration. Note that if
7614 * entities are declared in the external subset or in
7615 * external parameter entities, a non-validating processor
7616 * is not obligated to read and process their declarations;
7617 * for such documents, the rule that an entity must be
7618 * declared is a well-formedness constraint only if
7619 * standalone='yes'.
7620 */
7621 if (ent == NULL) {
7622 if ((ctxt->standalone == 1) ||
7623 ((ctxt->hasExternalSubset == 0) &&
7624 (ctxt->hasPErefs == 0))) {
7625 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7626 "Entity '%s' not defined\n", name);
7627 } else {
7628 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7629 "Entity '%s' not defined\n", name);
7630 if ((ctxt->inSubset == 0) &&
7631 (ctxt->sax != NULL) &&
7632 (ctxt->sax->reference != NULL)) {
7633 ctxt->sax->reference(ctxt->userData, name);
7634 }
7635 }
7636 ctxt->valid = 0;
7637 }
7638
7639 /*
7640 * [ WFC: Parsed Entity ]
7641 * An entity reference must not contain the name of an
7642 * unparsed entity
7643 */
7644 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7645 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7646 "Entity reference to unparsed entity %s\n", name);
7647 }
7648
7649 /*
7650 * [ WFC: No External Entity References ]
7651 * Attribute values cannot contain direct or indirect
7652 * entity references to external entities.
7653 */
7654 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7655 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7656 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7657 "Attribute references external entity '%s'\n", name);
7658 }
7659 /*
7660 * [ WFC: No < in Attribute Values ]
7661 * The replacement text of any entity referred to directly or
7662 * indirectly in an attribute value (other than "&lt;") must
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007663 * not contain a <.
Daniel Veillard0161e632008-08-28 15:36:32 +00007664 */
7665 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcff25462013-03-11 15:57:55 +08007666 (ent != NULL) &&
7667 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7668 if ((ent->checked & 1) || ((ent->checked == 0) &&
7669 (ent->content != NULL) &&(xmlStrchr(ent->content, '<')))) {
7670 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7671 "'<' in entity '%s' is not allowed in attributes values\n", name);
7672 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007673 }
7674
7675 /*
7676 * Internal check, no parameter entities here ...
7677 */
7678 else {
7679 switch (ent->etype) {
7680 case XML_INTERNAL_PARAMETER_ENTITY:
7681 case XML_EXTERNAL_PARAMETER_ENTITY:
7682 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7683 "Attempt to reference the parameter entity '%s'\n",
7684 name);
7685 break;
7686 default:
7687 break;
7688 }
7689 }
7690
7691 /*
7692 * [ WFC: No Recursion ]
7693 * A parsed entity must not contain a recursive reference
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007694 * to itself, either directly or indirectly.
Daniel Veillard0161e632008-08-28 15:36:32 +00007695 * Done somewhere else
7696 */
Owen Taylor3473f882001-02-23 17:55:21 +00007697 return(ent);
7698}
7699
7700/**
7701 * xmlParseStringEntityRef:
7702 * @ctxt: an XML parser context
7703 * @str: a pointer to an index in the string
7704 *
7705 * parse ENTITY references declarations, but this version parses it from
7706 * a string value.
7707 *
7708 * [68] EntityRef ::= '&' Name ';'
7709 *
7710 * [ WFC: Entity Declared ]
7711 * In a document without any DTD, a document with only an internal DTD
7712 * subset which contains no parameter entity references, or a document
7713 * with "standalone='yes'", the Name given in the entity reference
7714 * must match that in an entity declaration, except that well-formed
7715 * documents need not declare any of the following entities: amp, lt,
7716 * gt, apos, quot. The declaration of a parameter entity must precede
7717 * any reference to it. Similarly, the declaration of a general entity
7718 * must precede any reference to it which appears in a default value in an
7719 * attribute-list declaration. Note that if entities are declared in the
7720 * external subset or in external parameter entities, a non-validating
7721 * processor is not obligated to read and process their declarations;
7722 * for such documents, the rule that an entity must be declared is a
7723 * well-formedness constraint only if standalone='yes'.
7724 *
7725 * [ WFC: Parsed Entity ]
7726 * An entity reference must not contain the name of an unparsed entity
7727 *
7728 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7729 * is updated to the current location in the string.
7730 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007731static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007732xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7733 xmlChar *name;
7734 const xmlChar *ptr;
7735 xmlChar cur;
7736 xmlEntityPtr ent = NULL;
7737
7738 if ((str == NULL) || (*str == NULL))
7739 return(NULL);
7740 ptr = *str;
7741 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007742 if (cur != '&')
7743 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007744
Daniel Veillard0161e632008-08-28 15:36:32 +00007745 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007746 name = xmlParseStringName(ctxt, &ptr);
7747 if (name == NULL) {
7748 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7749 "xmlParseStringEntityRef: no name\n");
7750 *str = ptr;
7751 return(NULL);
7752 }
7753 if (*ptr != ';') {
7754 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007755 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007756 *str = ptr;
7757 return(NULL);
7758 }
7759 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007760
Owen Taylor3473f882001-02-23 17:55:21 +00007761
Daniel Veillard0161e632008-08-28 15:36:32 +00007762 /*
Jan Pokorný9a85d402013-11-29 23:26:25 +01007763 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007764 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007765 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7766 ent = xmlGetPredefinedEntity(name);
7767 if (ent != NULL) {
7768 xmlFree(name);
7769 *str = ptr;
7770 return(ent);
7771 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007772 }
Owen Taylor3473f882001-02-23 17:55:21 +00007773
Daniel Veillard0161e632008-08-28 15:36:32 +00007774 /*
7775 * Increate the number of entity references parsed
7776 */
7777 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007778
Daniel Veillard0161e632008-08-28 15:36:32 +00007779 /*
7780 * Ask first SAX for entity resolution, otherwise try the
7781 * entities which may have stored in the parser context.
7782 */
7783 if (ctxt->sax != NULL) {
7784 if (ctxt->sax->getEntity != NULL)
7785 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007786 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7787 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007788 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7789 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007790 }
7791 }
Daniel Veillarde50ba812013-04-11 15:54:51 +08007792 if (ctxt->instate == XML_PARSER_EOF) {
7793 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08007794 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08007795 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007796
7797 /*
7798 * [ WFC: Entity Declared ]
7799 * In a document without any DTD, a document with only an
7800 * internal DTD subset which contains no parameter entity
7801 * references, or a document with "standalone='yes'", the
7802 * Name given in the entity reference must match that in an
7803 * entity declaration, except that well-formed documents
7804 * need not declare any of the following entities: amp, lt,
7805 * gt, apos, quot.
7806 * The declaration of a parameter entity must precede any
7807 * reference to it.
7808 * Similarly, the declaration of a general entity must
7809 * precede any reference to it which appears in a default
7810 * value in an attribute-list declaration. Note that if
7811 * entities are declared in the external subset or in
7812 * external parameter entities, a non-validating processor
7813 * is not obligated to read and process their declarations;
7814 * for such documents, the rule that an entity must be
7815 * declared is a well-formedness constraint only if
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007816 * standalone='yes'.
Daniel Veillard0161e632008-08-28 15:36:32 +00007817 */
7818 if (ent == NULL) {
7819 if ((ctxt->standalone == 1) ||
7820 ((ctxt->hasExternalSubset == 0) &&
7821 (ctxt->hasPErefs == 0))) {
7822 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7823 "Entity '%s' not defined\n", name);
7824 } else {
7825 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7826 "Entity '%s' not defined\n",
7827 name);
7828 }
7829 /* TODO ? check regressions ctxt->valid = 0; */
7830 }
7831
7832 /*
7833 * [ WFC: Parsed Entity ]
7834 * An entity reference must not contain the name of an
7835 * unparsed entity
7836 */
7837 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7838 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7839 "Entity reference to unparsed entity %s\n", name);
7840 }
7841
7842 /*
7843 * [ WFC: No External Entity References ]
7844 * Attribute values cannot contain direct or indirect
7845 * entity references to external entities.
7846 */
7847 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7848 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7849 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7850 "Attribute references external entity '%s'\n", name);
7851 }
7852 /*
7853 * [ WFC: No < in Attribute Values ]
7854 * The replacement text of any entity referred to directly or
7855 * indirectly in an attribute value (other than "&lt;") must
7856 * not contain a <.
7857 */
7858 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7859 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007860 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007861 (xmlStrchr(ent->content, '<'))) {
7862 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7863 "'<' in entity '%s' is not allowed in attributes values\n",
7864 name);
7865 }
7866
7867 /*
7868 * Internal check, no parameter entities here ...
7869 */
7870 else {
7871 switch (ent->etype) {
7872 case XML_INTERNAL_PARAMETER_ENTITY:
7873 case XML_EXTERNAL_PARAMETER_ENTITY:
7874 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7875 "Attempt to reference the parameter entity '%s'\n",
7876 name);
7877 break;
7878 default:
7879 break;
7880 }
7881 }
7882
7883 /*
7884 * [ WFC: No Recursion ]
7885 * A parsed entity must not contain a recursive reference
7886 * to itself, either directly or indirectly.
7887 * Done somewhere else
7888 */
7889
7890 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007891 *str = ptr;
7892 return(ent);
7893}
7894
7895/**
7896 * xmlParsePEReference:
7897 * @ctxt: an XML parser context
7898 *
7899 * parse PEReference declarations
7900 * The entity content is handled directly by pushing it's content as
7901 * a new input stream.
7902 *
7903 * [69] PEReference ::= '%' Name ';'
7904 *
7905 * [ WFC: No Recursion ]
7906 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007907 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007908 *
7909 * [ WFC: Entity Declared ]
7910 * In a document without any DTD, a document with only an internal DTD
7911 * subset which contains no parameter entity references, or a document
7912 * with "standalone='yes'", ... ... The declaration of a parameter
7913 * entity must precede any reference to it...
7914 *
7915 * [ VC: Entity Declared ]
7916 * In a document with an external subset or external parameter entities
7917 * with "standalone='no'", ... ... The declaration of a parameter entity
7918 * must precede any reference to it...
7919 *
7920 * [ WFC: In DTD ]
7921 * Parameter-entity references may only appear in the DTD.
7922 * NOTE: misleading but this is handled.
7923 */
7924void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007925xmlParsePEReference(xmlParserCtxtPtr ctxt)
7926{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007927 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007928 xmlEntityPtr entity = NULL;
7929 xmlParserInputPtr input;
7930
Daniel Veillard0161e632008-08-28 15:36:32 +00007931 if (RAW != '%')
7932 return;
7933 NEXT;
7934 name = xmlParseName(ctxt);
7935 if (name == NULL) {
7936 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7937 "xmlParsePEReference: no name\n");
7938 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007939 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007940 if (RAW != ';') {
7941 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7942 return;
7943 }
7944
7945 NEXT;
7946
7947 /*
7948 * Increate the number of entity references parsed
7949 */
7950 ctxt->nbentities++;
7951
7952 /*
7953 * Request the entity from SAX
7954 */
7955 if ((ctxt->sax != NULL) &&
7956 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08007957 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
7958 if (ctxt->instate == XML_PARSER_EOF)
7959 return;
Daniel Veillard0161e632008-08-28 15:36:32 +00007960 if (entity == NULL) {
7961 /*
7962 * [ WFC: Entity Declared ]
7963 * In a document without any DTD, a document with only an
7964 * internal DTD subset which contains no parameter entity
7965 * references, or a document with "standalone='yes'", ...
7966 * ... The declaration of a parameter entity must precede
7967 * any reference to it...
7968 */
7969 if ((ctxt->standalone == 1) ||
7970 ((ctxt->hasExternalSubset == 0) &&
7971 (ctxt->hasPErefs == 0))) {
7972 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7973 "PEReference: %%%s; not found\n",
7974 name);
7975 } else {
7976 /*
7977 * [ VC: Entity Declared ]
7978 * In a document with an external subset or external
7979 * parameter entities with "standalone='no'", ...
7980 * ... The declaration of a parameter entity must
7981 * precede any reference to it...
7982 */
7983 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7984 "PEReference: %%%s; not found\n",
7985 name, NULL);
7986 ctxt->valid = 0;
7987 }
7988 } else {
7989 /*
7990 * Internal checking in case the entity quest barfed
7991 */
7992 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7993 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7994 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7995 "Internal: %%%s; is not a parameter entity\n",
7996 name, NULL);
7997 } else if (ctxt->input->free != deallocblankswrapper) {
7998 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7999 if (xmlPushInput(ctxt, input) < 0)
8000 return;
8001 } else {
8002 /*
8003 * TODO !!!
8004 * handle the extra spaces added before and after
8005 * c.f. http://www.w3.org/TR/REC-xml#as-PE
8006 */
8007 input = xmlNewEntityInputStream(ctxt, entity);
8008 if (xmlPushInput(ctxt, input) < 0)
8009 return;
8010 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8011 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8012 (IS_BLANK_CH(NXT(5)))) {
8013 xmlParseTextDecl(ctxt);
8014 if (ctxt->errNo ==
8015 XML_ERR_UNSUPPORTED_ENCODING) {
8016 /*
8017 * The XML REC instructs us to stop parsing
8018 * right here
8019 */
8020 ctxt->instate = XML_PARSER_EOF;
8021 return;
8022 }
8023 }
8024 }
8025 }
8026 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00008027}
8028
8029/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008030 * xmlLoadEntityContent:
8031 * @ctxt: an XML parser context
8032 * @entity: an unloaded system entity
8033 *
8034 * Load the original content of the given system entity from the
8035 * ExternalID/SystemID given. This is to be used for Included in Literal
8036 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8037 *
8038 * Returns 0 in case of success and -1 in case of failure
8039 */
8040static int
8041xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8042 xmlParserInputPtr input;
8043 xmlBufferPtr buf;
8044 int l, c;
8045 int count = 0;
8046
8047 if ((ctxt == NULL) || (entity == NULL) ||
8048 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8049 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8050 (entity->content != NULL)) {
8051 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8052 "xmlLoadEntityContent parameter error");
8053 return(-1);
8054 }
8055
8056 if (xmlParserDebugEntities)
8057 xmlGenericError(xmlGenericErrorContext,
8058 "Reading %s entity content input\n", entity->name);
8059
8060 buf = xmlBufferCreate();
8061 if (buf == NULL) {
8062 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8063 "xmlLoadEntityContent parameter error");
8064 return(-1);
8065 }
8066
8067 input = xmlNewEntityInputStream(ctxt, entity);
8068 if (input == NULL) {
8069 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8070 "xmlLoadEntityContent input error");
8071 xmlBufferFree(buf);
8072 return(-1);
8073 }
8074
8075 /*
8076 * Push the entity as the current input, read char by char
8077 * saving to the buffer until the end of the entity or an error
8078 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00008079 if (xmlPushInput(ctxt, input) < 0) {
8080 xmlBufferFree(buf);
8081 return(-1);
8082 }
8083
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008084 GROW;
8085 c = CUR_CHAR(l);
8086 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8087 (IS_CHAR(c))) {
8088 xmlBufferAdd(buf, ctxt->input->cur, l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008089 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008090 count = 0;
8091 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008092 if (ctxt->instate == XML_PARSER_EOF) {
8093 xmlBufferFree(buf);
8094 return(-1);
8095 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008096 }
8097 NEXTL(l);
8098 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008099 if (c == 0) {
8100 count = 0;
8101 GROW;
8102 if (ctxt->instate == XML_PARSER_EOF) {
8103 xmlBufferFree(buf);
8104 return(-1);
8105 }
8106 c = CUR_CHAR(l);
8107 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008108 }
8109
8110 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8111 xmlPopInput(ctxt);
8112 } else if (!IS_CHAR(c)) {
8113 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8114 "xmlLoadEntityContent: invalid char value %d\n",
8115 c);
8116 xmlBufferFree(buf);
8117 return(-1);
8118 }
8119 entity->content = buf->content;
8120 buf->content = NULL;
8121 xmlBufferFree(buf);
8122
8123 return(0);
8124}
8125
8126/**
Owen Taylor3473f882001-02-23 17:55:21 +00008127 * xmlParseStringPEReference:
8128 * @ctxt: an XML parser context
8129 * @str: a pointer to an index in the string
8130 *
8131 * parse PEReference declarations
8132 *
8133 * [69] PEReference ::= '%' Name ';'
8134 *
8135 * [ WFC: No Recursion ]
8136 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008137 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008138 *
8139 * [ WFC: Entity Declared ]
8140 * In a document without any DTD, a document with only an internal DTD
8141 * subset which contains no parameter entity references, or a document
8142 * with "standalone='yes'", ... ... The declaration of a parameter
8143 * entity must precede any reference to it...
8144 *
8145 * [ VC: Entity Declared ]
8146 * In a document with an external subset or external parameter entities
8147 * with "standalone='no'", ... ... The declaration of a parameter entity
8148 * must precede any reference to it...
8149 *
8150 * [ WFC: In DTD ]
8151 * Parameter-entity references may only appear in the DTD.
8152 * NOTE: misleading but this is handled.
8153 *
8154 * Returns the string of the entity content.
8155 * str is updated to the current value of the index
8156 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02008157static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00008158xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8159 const xmlChar *ptr;
8160 xmlChar cur;
8161 xmlChar *name;
8162 xmlEntityPtr entity = NULL;
8163
8164 if ((str == NULL) || (*str == NULL)) return(NULL);
8165 ptr = *str;
8166 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00008167 if (cur != '%')
8168 return(NULL);
8169 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00008170 name = xmlParseStringName(ctxt, &ptr);
8171 if (name == NULL) {
8172 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8173 "xmlParseStringPEReference: no name\n");
8174 *str = ptr;
8175 return(NULL);
8176 }
8177 cur = *ptr;
8178 if (cur != ';') {
8179 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8180 xmlFree(name);
8181 *str = ptr;
8182 return(NULL);
8183 }
8184 ptr++;
8185
8186 /*
8187 * Increate the number of entity references parsed
8188 */
8189 ctxt->nbentities++;
8190
8191 /*
8192 * Request the entity from SAX
8193 */
8194 if ((ctxt->sax != NULL) &&
8195 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillarde50ba812013-04-11 15:54:51 +08008196 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8197 if (ctxt->instate == XML_PARSER_EOF) {
8198 xmlFree(name);
Jüri Aedla9ca816b2013-04-16 22:00:13 +08008199 return(NULL);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008200 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008201 if (entity == NULL) {
8202 /*
8203 * [ WFC: Entity Declared ]
8204 * In a document without any DTD, a document with only an
8205 * internal DTD subset which contains no parameter entity
8206 * references, or a document with "standalone='yes'", ...
8207 * ... The declaration of a parameter entity must precede
8208 * any reference to it...
8209 */
8210 if ((ctxt->standalone == 1) ||
8211 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8212 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8213 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008214 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00008215 /*
8216 * [ VC: Entity Declared ]
8217 * In a document with an external subset or external
8218 * parameter entities with "standalone='no'", ...
8219 * ... The declaration of a parameter entity must
8220 * precede any reference to it...
8221 */
8222 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8223 "PEReference: %%%s; not found\n",
8224 name, NULL);
8225 ctxt->valid = 0;
8226 }
8227 } else {
8228 /*
8229 * Internal checking in case the entity quest barfed
8230 */
8231 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8232 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8233 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8234 "%%%s; is not a parameter entity\n",
8235 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008236 }
8237 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008238 ctxt->hasPErefs = 1;
8239 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008240 *str = ptr;
8241 return(entity);
8242}
8243
8244/**
8245 * xmlParseDocTypeDecl:
8246 * @ctxt: an XML parser context
8247 *
8248 * parse a DOCTYPE declaration
8249 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008250 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
Owen Taylor3473f882001-02-23 17:55:21 +00008251 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8252 *
8253 * [ VC: Root Element Type ]
8254 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008255 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00008256 */
8257
8258void
8259xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008260 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008261 xmlChar *ExternalID = NULL;
8262 xmlChar *URI = NULL;
8263
8264 /*
8265 * We know that '<!DOCTYPE' has been detected.
8266 */
8267 SKIP(9);
8268
8269 SKIP_BLANKS;
8270
8271 /*
8272 * Parse the DOCTYPE name.
8273 */
8274 name = xmlParseName(ctxt);
8275 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008276 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8277 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008278 }
8279 ctxt->intSubName = name;
8280
8281 SKIP_BLANKS;
8282
8283 /*
8284 * Check for SystemID and ExternalID
8285 */
8286 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8287
8288 if ((URI != NULL) || (ExternalID != NULL)) {
8289 ctxt->hasExternalSubset = 1;
8290 }
8291 ctxt->extSubURI = URI;
8292 ctxt->extSubSystem = ExternalID;
8293
8294 SKIP_BLANKS;
8295
8296 /*
8297 * Create and update the internal subset.
8298 */
8299 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8300 (!ctxt->disableSAX))
8301 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillarde50ba812013-04-11 15:54:51 +08008302 if (ctxt->instate == XML_PARSER_EOF)
8303 return;
Owen Taylor3473f882001-02-23 17:55:21 +00008304
8305 /*
8306 * Is there any internal subset declarations ?
8307 * they are handled separately in xmlParseInternalSubset()
8308 */
8309 if (RAW == '[')
8310 return;
8311
8312 /*
8313 * We should be at the end of the DOCTYPE declaration.
8314 */
8315 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008316 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008317 }
8318 NEXT;
8319}
8320
8321/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008322 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008323 * @ctxt: an XML parser context
8324 *
8325 * parse the internal subset declaration
8326 *
8327 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8328 */
8329
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008330static void
Owen Taylor3473f882001-02-23 17:55:21 +00008331xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8332 /*
8333 * Is there any DTD definition ?
8334 */
8335 if (RAW == '[') {
8336 ctxt->instate = XML_PARSER_DTD;
8337 NEXT;
8338 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008339 * Parse the succession of Markup declarations and
Owen Taylor3473f882001-02-23 17:55:21 +00008340 * PEReferences.
8341 * Subsequence (markupdecl | PEReference | S)*
8342 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008343 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008344 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008345 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008346
8347 SKIP_BLANKS;
8348 xmlParseMarkupDecl(ctxt);
8349 xmlParsePEReference(ctxt);
8350
8351 /*
8352 * Pop-up of finished entities.
8353 */
8354 while ((RAW == 0) && (ctxt->inputNr > 1))
8355 xmlPopInput(ctxt);
8356
8357 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008358 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008359 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008360 break;
8361 }
8362 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008363 if (RAW == ']') {
Owen Taylor3473f882001-02-23 17:55:21 +00008364 NEXT;
8365 SKIP_BLANKS;
8366 }
8367 }
8368
8369 /*
8370 * We should be at the end of the DOCTYPE declaration.
8371 */
8372 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008373 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008374 }
8375 NEXT;
8376}
8377
Daniel Veillard81273902003-09-30 00:43:48 +00008378#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008379/**
8380 * xmlParseAttribute:
8381 * @ctxt: an XML parser context
8382 * @value: a xmlChar ** used to store the value of the attribute
8383 *
8384 * parse an attribute
8385 *
8386 * [41] Attribute ::= Name Eq AttValue
8387 *
8388 * [ WFC: No External Entity References ]
8389 * Attribute values cannot contain direct or indirect entity references
8390 * to external entities.
8391 *
8392 * [ WFC: No < in Attribute Values ]
8393 * The replacement text of any entity referred to directly or indirectly in
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008394 * an attribute value (other than "&lt;") must not contain a <.
8395 *
Owen Taylor3473f882001-02-23 17:55:21 +00008396 * [ VC: Attribute Value Type ]
8397 * The attribute must have been declared; the value must be of the type
8398 * declared for it.
8399 *
8400 * [25] Eq ::= S? '=' S?
8401 *
8402 * With namespace:
8403 *
8404 * [NS 11] Attribute ::= QName Eq AttValue
8405 *
8406 * Also the case QName == xmlns:??? is handled independently as a namespace
8407 * definition.
8408 *
8409 * Returns the attribute name, and the value in *value.
8410 */
8411
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008412const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008413xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008414 const xmlChar *name;
8415 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008416
8417 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008418 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008419 name = xmlParseName(ctxt);
8420 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008421 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008422 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008423 return(NULL);
8424 }
8425
8426 /*
8427 * read the value
8428 */
8429 SKIP_BLANKS;
8430 if (RAW == '=') {
8431 NEXT;
8432 SKIP_BLANKS;
8433 val = xmlParseAttValue(ctxt);
8434 ctxt->instate = XML_PARSER_CONTENT;
8435 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008436 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00008437 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008438 return(NULL);
8439 }
8440
8441 /*
8442 * Check that xml:lang conforms to the specification
8443 * No more registered as an error, just generate a warning now
8444 * since this was deprecated in XML second edition
8445 */
8446 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8447 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008448 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8449 "Malformed value for xml:lang : %s\n",
8450 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008451 }
8452 }
8453
8454 /*
8455 * Check that xml:space conforms to the specification
8456 */
8457 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8458 if (xmlStrEqual(val, BAD_CAST "default"))
8459 *(ctxt->space) = 0;
8460 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8461 *(ctxt->space) = 1;
8462 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008463 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008464"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008465 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008466 }
8467 }
8468
8469 *value = val;
8470 return(name);
8471}
8472
8473/**
8474 * xmlParseStartTag:
8475 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008476 *
Owen Taylor3473f882001-02-23 17:55:21 +00008477 * parse a start of tag either for rule element or
8478 * EmptyElement. In both case we don't parse the tag closing chars.
8479 *
8480 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8481 *
8482 * [ WFC: Unique Att Spec ]
8483 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008484 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008485 *
8486 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8487 *
8488 * [ WFC: Unique Att Spec ]
8489 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008490 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008491 *
8492 * With namespace:
8493 *
8494 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8495 *
8496 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8497 *
8498 * Returns the element name parsed
8499 */
8500
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008501const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008502xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008503 const xmlChar *name;
8504 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008505 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008506 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008507 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008508 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008509 int i;
8510
8511 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008512 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008513
8514 name = xmlParseName(ctxt);
8515 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008516 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008517 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008518 return(NULL);
8519 }
8520
8521 /*
8522 * Now parse the attributes, it ends up with the ending
8523 *
8524 * (S Attribute)* S?
8525 */
8526 SKIP_BLANKS;
8527 GROW;
8528
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008529 while (((RAW != '>') &&
Daniel Veillard21a0f912001-02-25 19:54:14 +00008530 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008531 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008532 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008533 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008534
8535 attname = xmlParseAttribute(ctxt, &attvalue);
8536 if ((attname != NULL) && (attvalue != NULL)) {
8537 /*
8538 * [ WFC: Unique Att Spec ]
8539 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008540 * start-tag or empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008541 */
8542 for (i = 0; i < nbatts;i += 2) {
8543 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008544 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008545 xmlFree(attvalue);
8546 goto failed;
8547 }
8548 }
Owen Taylor3473f882001-02-23 17:55:21 +00008549 /*
8550 * Add the pair to atts
8551 */
8552 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008553 maxatts = 22; /* allow for 10 attrs by default */
8554 atts = (const xmlChar **)
8555 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008556 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008557 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008558 if (attvalue != NULL)
8559 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008560 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008561 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008562 ctxt->atts = atts;
8563 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008564 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008565 const xmlChar **n;
8566
Owen Taylor3473f882001-02-23 17:55:21 +00008567 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008568 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008569 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008570 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008571 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008572 if (attvalue != NULL)
8573 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008574 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008575 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008576 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008577 ctxt->atts = atts;
8578 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008579 }
8580 atts[nbatts++] = attname;
8581 atts[nbatts++] = attvalue;
8582 atts[nbatts] = NULL;
8583 atts[nbatts + 1] = NULL;
8584 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008585 if (attvalue != NULL)
8586 xmlFree(attvalue);
8587 }
8588
Daniel Veillard52d8ade2012-07-30 10:08:45 +08008589failed:
Owen Taylor3473f882001-02-23 17:55:21 +00008590
Daniel Veillard3772de32002-12-17 10:31:45 +00008591 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008592 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8593 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008594 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008595 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8596 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008597 }
8598 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008599 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8600 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008601 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8602 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008603 break;
8604 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008605 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008606 GROW;
8607 }
8608
8609 /*
8610 * SAX: Start of Element !
8611 */
8612 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008613 (!ctxt->disableSAX)) {
8614 if (nbatts > 0)
8615 ctxt->sax->startElement(ctxt->userData, name, atts);
8616 else
8617 ctxt->sax->startElement(ctxt->userData, name, NULL);
8618 }
Owen Taylor3473f882001-02-23 17:55:21 +00008619
8620 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008621 /* Free only the content strings */
8622 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008623 if (atts[i] != NULL)
8624 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008625 }
8626 return(name);
8627}
8628
8629/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008630 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008631 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008632 * @line: line of the start tag
8633 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008634 *
8635 * parse an end of tag
8636 *
8637 * [42] ETag ::= '</' Name S? '>'
8638 *
8639 * With namespace
8640 *
8641 * [NS 9] ETag ::= '</' QName S? '>'
8642 */
8643
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008644static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008645xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008646 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008647
8648 GROW;
8649 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008650 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008651 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008652 return;
8653 }
8654 SKIP(2);
8655
Daniel Veillard46de64e2002-05-29 08:21:33 +00008656 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008657
8658 /*
8659 * We should definitely be at the ending "S? '>'" part
8660 */
8661 GROW;
8662 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008663 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008664 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008665 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008666 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008667
8668 /*
8669 * [ WFC: Element Type Match ]
8670 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008671 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008672 *
8673 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008674 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008675 if (name == NULL) name = BAD_CAST "unparseable";
8676 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008677 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008678 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008679 }
8680
8681 /*
8682 * SAX: End of Tag
8683 */
8684 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8685 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008686 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008687
Daniel Veillarde57ec792003-09-10 10:50:59 +00008688 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008689 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008690 return;
8691}
8692
8693/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008694 * xmlParseEndTag:
8695 * @ctxt: an XML parser context
8696 *
8697 * parse an end of tag
8698 *
8699 * [42] ETag ::= '</' Name S? '>'
8700 *
8701 * With namespace
8702 *
8703 * [NS 9] ETag ::= '</' QName S? '>'
8704 */
8705
8706void
8707xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008708 xmlParseEndTag1(ctxt, 0);
8709}
Daniel Veillard81273902003-09-30 00:43:48 +00008710#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008711
8712/************************************************************************
8713 * *
8714 * SAX 2 specific operations *
8715 * *
8716 ************************************************************************/
8717
Daniel Veillard0fb18932003-09-07 09:14:37 +00008718/*
8719 * xmlGetNamespace:
8720 * @ctxt: an XML parser context
8721 * @prefix: the prefix to lookup
8722 *
8723 * Lookup the namespace name for the @prefix (which ca be NULL)
8724 * The prefix must come from the @ctxt->dict dictionnary
8725 *
8726 * Returns the namespace name or NULL if not bound
8727 */
8728static const xmlChar *
8729xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8730 int i;
8731
Daniel Veillarde57ec792003-09-10 10:50:59 +00008732 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008733 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008734 if (ctxt->nsTab[i] == prefix) {
8735 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8736 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008737 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008738 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008739 return(NULL);
8740}
8741
8742/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008743 * xmlParseQName:
8744 * @ctxt: an XML parser context
8745 * @prefix: pointer to store the prefix part
8746 *
8747 * parse an XML Namespace QName
8748 *
8749 * [6] QName ::= (Prefix ':')? LocalPart
8750 * [7] Prefix ::= NCName
8751 * [8] LocalPart ::= NCName
8752 *
8753 * Returns the Name parsed or NULL
8754 */
8755
8756static const xmlChar *
8757xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8758 const xmlChar *l, *p;
8759
8760 GROW;
8761
8762 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008763 if (l == NULL) {
8764 if (CUR == ':') {
8765 l = xmlParseName(ctxt);
8766 if (l != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008767 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008768 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008769 *prefix = NULL;
8770 return(l);
8771 }
8772 }
8773 return(NULL);
8774 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008775 if (CUR == ':') {
8776 NEXT;
8777 p = l;
8778 l = xmlParseNCName(ctxt);
8779 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008780 xmlChar *tmp;
8781
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008782 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8783 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008784 l = xmlParseNmtoken(ctxt);
8785 if (l == NULL)
8786 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8787 else {
8788 tmp = xmlBuildQName(l, p, NULL, 0);
8789 xmlFree((char *)l);
8790 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008791 p = xmlDictLookup(ctxt->dict, tmp, -1);
8792 if (tmp != NULL) xmlFree(tmp);
8793 *prefix = NULL;
8794 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008795 }
8796 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008797 xmlChar *tmp;
8798
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008799 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8800 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008801 NEXT;
8802 tmp = (xmlChar *) xmlParseName(ctxt);
8803 if (tmp != NULL) {
8804 tmp = xmlBuildQName(tmp, l, NULL, 0);
8805 l = xmlDictLookup(ctxt->dict, tmp, -1);
8806 if (tmp != NULL) xmlFree(tmp);
8807 *prefix = p;
8808 return(l);
8809 }
8810 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8811 l = xmlDictLookup(ctxt->dict, tmp, -1);
8812 if (tmp != NULL) xmlFree(tmp);
8813 *prefix = p;
8814 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008815 }
8816 *prefix = p;
8817 } else
8818 *prefix = NULL;
8819 return(l);
8820}
8821
8822/**
8823 * xmlParseQNameAndCompare:
8824 * @ctxt: an XML parser context
8825 * @name: the localname
8826 * @prefix: the prefix, if any.
8827 *
8828 * parse an XML name and compares for match
8829 * (specialized for endtag parsing)
8830 *
8831 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8832 * and the name for mismatch
8833 */
8834
8835static const xmlChar *
8836xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8837 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008838 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008839 const xmlChar *in;
8840 const xmlChar *ret;
8841 const xmlChar *prefix2;
8842
8843 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8844
8845 GROW;
8846 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008847
Daniel Veillard0fb18932003-09-07 09:14:37 +00008848 cmp = prefix;
8849 while (*in != 0 && *in == *cmp) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008850 ++in;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008851 ++cmp;
8852 }
8853 if ((*cmp == 0) && (*in == ':')) {
8854 in++;
8855 cmp = name;
8856 while (*in != 0 && *in == *cmp) {
8857 ++in;
8858 ++cmp;
8859 }
William M. Brack76e95df2003-10-18 16:20:14 +00008860 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008861 /* success */
8862 ctxt->input->cur = in;
8863 return((const xmlChar*) 1);
8864 }
8865 }
8866 /*
8867 * all strings coms from the dictionary, equality can be done directly
8868 */
8869 ret = xmlParseQName (ctxt, &prefix2);
8870 if ((ret == name) && (prefix == prefix2))
8871 return((const xmlChar*) 1);
8872 return ret;
8873}
8874
8875/**
8876 * xmlParseAttValueInternal:
8877 * @ctxt: an XML parser context
8878 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008879 * @alloc: whether the attribute was reallocated as a new string
8880 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008881 *
8882 * parse a value for an attribute.
8883 * NOTE: if no normalization is needed, the routine will return pointers
8884 * directly from the data buffer.
8885 *
8886 * 3.3.3 Attribute-Value Normalization:
8887 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008888 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008889 * - a character reference is processed by appending the referenced
8890 * character to the attribute value
8891 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008892 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00008893 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8894 * appending #x20 to the normalized value, except that only a single
8895 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008896 * parsed entity or the literal entity value of an internal parsed entity
8897 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00008898 * If the declared value is not CDATA, then the XML processor must further
8899 * process the normalized attribute value by discarding any leading and
8900 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008901 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00008902 * All attributes for which no declaration has been read should be treated
8903 * by a non-validating parser as if declared CDATA.
8904 *
8905 * Returns the AttValue parsed or NULL. The value has to be freed by the
8906 * caller if it was copied, this can be detected by val[*len] == 0.
8907 */
8908
8909static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008910xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8911 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008912{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008913 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008914 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008915 xmlChar *ret = NULL;
8916
8917 GROW;
8918 in = (xmlChar *) CUR_PTR;
8919 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008920 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008921 return (NULL);
8922 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008923 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008924
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008925 /*
8926 * try to handle in this routine the most common case where no
8927 * allocation of a new string is required and where content is
8928 * pure ASCII.
8929 */
8930 limit = *in++;
8931 end = ctxt->input->end;
8932 start = in;
8933 if (in >= end) {
8934 const xmlChar *oldbase = ctxt->input->base;
8935 GROW;
8936 if (oldbase != ctxt->input->base) {
8937 long delta = ctxt->input->base - oldbase;
8938 start = start + delta;
8939 in = in + delta;
8940 }
8941 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008942 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008943 if (normalize) {
8944 /*
8945 * Skip any leading spaces
8946 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008947 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008948 ((*in == 0x20) || (*in == 0x9) ||
8949 (*in == 0xA) || (*in == 0xD))) {
8950 in++;
8951 start = in;
8952 if (in >= end) {
8953 const xmlChar *oldbase = ctxt->input->base;
8954 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008955 if (ctxt->instate == XML_PARSER_EOF)
8956 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008957 if (oldbase != ctxt->input->base) {
8958 long delta = ctxt->input->base - oldbase;
8959 start = start + delta;
8960 in = in + delta;
8961 }
8962 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008963 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8964 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8965 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008966 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008967 return(NULL);
8968 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008969 }
8970 }
8971 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8972 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8973 if ((*in++ == 0x20) && (*in == 0x20)) break;
8974 if (in >= end) {
8975 const xmlChar *oldbase = ctxt->input->base;
8976 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008977 if (ctxt->instate == XML_PARSER_EOF)
8978 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008979 if (oldbase != ctxt->input->base) {
8980 long delta = ctxt->input->base - oldbase;
8981 start = start + delta;
8982 in = in + delta;
8983 }
8984 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008985 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8986 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8987 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008988 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008989 return(NULL);
8990 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008991 }
8992 }
8993 last = in;
8994 /*
8995 * skip the trailing blanks
8996 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008997 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008998 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008999 ((*in == 0x20) || (*in == 0x9) ||
9000 (*in == 0xA) || (*in == 0xD))) {
9001 in++;
9002 if (in >= end) {
9003 const xmlChar *oldbase = ctxt->input->base;
9004 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009005 if (ctxt->instate == XML_PARSER_EOF)
9006 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009007 if (oldbase != ctxt->input->base) {
9008 long delta = ctxt->input->base - oldbase;
9009 start = start + delta;
9010 in = in + delta;
9011 last = last + delta;
9012 }
9013 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009014 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9015 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9016 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009017 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009018 return(NULL);
9019 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009020 }
9021 }
Daniel Veillarde17db992012-07-19 11:25:16 +08009022 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9023 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9024 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009025 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009026 return(NULL);
9027 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009028 if (*in != limit) goto need_complex;
9029 } else {
9030 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9031 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9032 in++;
9033 if (in >= end) {
9034 const xmlChar *oldbase = ctxt->input->base;
9035 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009036 if (ctxt->instate == XML_PARSER_EOF)
9037 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009038 if (oldbase != ctxt->input->base) {
9039 long delta = ctxt->input->base - oldbase;
9040 start = start + delta;
9041 in = in + delta;
9042 }
9043 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08009044 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9045 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9046 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009047 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009048 return(NULL);
9049 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009050 }
9051 }
9052 last = in;
Daniel Veillarde17db992012-07-19 11:25:16 +08009053 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
9054 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9055 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02009056 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08009057 return(NULL);
9058 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009059 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009060 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009061 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009062 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009063 *len = last - start;
9064 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009065 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009066 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009067 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009068 }
9069 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009070 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009071 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009072need_complex:
9073 if (alloc) *alloc = 1;
9074 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009075}
9076
9077/**
9078 * xmlParseAttribute2:
9079 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009080 * @pref: the element prefix
9081 * @elem: the element name
9082 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00009083 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009084 * @len: an int * to save the length of the attribute
9085 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00009086 *
9087 * parse an attribute in the new SAX2 framework.
9088 *
9089 * Returns the attribute name, and the value in *value, .
9090 */
9091
9092static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009093xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009094 const xmlChar * pref, const xmlChar * elem,
9095 const xmlChar ** prefix, xmlChar ** value,
9096 int *len, int *alloc)
9097{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009098 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00009099 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009100 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009101
9102 *value = NULL;
9103 GROW;
9104 name = xmlParseQName(ctxt, prefix);
9105 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009106 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9107 "error parsing attribute name\n");
9108 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009109 }
9110
9111 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009112 * get the type if needed
9113 */
9114 if (ctxt->attsSpecial != NULL) {
9115 int type;
9116
9117 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009118 pref, elem, *prefix, name);
9119 if (type != 0)
9120 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009121 }
9122
9123 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009124 * read the value
9125 */
9126 SKIP_BLANKS;
9127 if (RAW == '=') {
9128 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009129 SKIP_BLANKS;
9130 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9131 if (normalize) {
9132 /*
9133 * Sometimes a second normalisation pass for spaces is needed
9134 * but that only happens if charrefs or entities refernces
9135 * have been used in the attribute value, i.e. the attribute
9136 * value have been extracted in an allocated string already.
9137 */
9138 if (*alloc) {
9139 const xmlChar *val2;
9140
9141 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009142 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009143 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00009144 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009145 }
9146 }
9147 }
9148 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009149 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009150 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9151 "Specification mandate value for attribute %s\n",
9152 name);
9153 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009154 }
9155
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009156 if (*prefix == ctxt->str_xml) {
9157 /*
9158 * Check that xml:lang conforms to the specification
9159 * No more registered as an error, just generate a warning now
9160 * since this was deprecated in XML second edition
9161 */
9162 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9163 internal_val = xmlStrndup(val, *len);
9164 if (!xmlCheckLanguageID(internal_val)) {
9165 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9166 "Malformed value for xml:lang : %s\n",
9167 internal_val, NULL);
9168 }
9169 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009170
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009171 /*
9172 * Check that xml:space conforms to the specification
9173 */
9174 if (xmlStrEqual(name, BAD_CAST "space")) {
9175 internal_val = xmlStrndup(val, *len);
9176 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9177 *(ctxt->space) = 0;
9178 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9179 *(ctxt->space) = 1;
9180 else {
9181 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9182 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9183 internal_val, NULL);
9184 }
9185 }
9186 if (internal_val) {
9187 xmlFree(internal_val);
9188 }
9189 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009190
9191 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009192 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009193}
Daniel Veillard0fb18932003-09-07 09:14:37 +00009194/**
9195 * xmlParseStartTag2:
9196 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009197 *
Daniel Veillard0fb18932003-09-07 09:14:37 +00009198 * parse a start of tag either for rule element or
9199 * EmptyElement. In both case we don't parse the tag closing chars.
9200 * This routine is called when running SAX2 parsing
9201 *
9202 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9203 *
9204 * [ WFC: Unique Att Spec ]
9205 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009206 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009207 *
9208 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9209 *
9210 * [ WFC: Unique Att Spec ]
9211 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009212 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009213 *
9214 * With namespace:
9215 *
9216 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9217 *
9218 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9219 *
9220 * Returns the element name parsed
9221 */
9222
9223static const xmlChar *
9224xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009225 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009226 const xmlChar *localname;
9227 const xmlChar *prefix;
9228 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009229 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009230 const xmlChar *nsname;
9231 xmlChar *attvalue;
9232 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009233 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009234 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009235 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009236 const xmlChar *base;
9237 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00009238 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009239
9240 if (RAW != '<') return(NULL);
9241 NEXT1;
9242
9243 /*
9244 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9245 * point since the attribute values may be stored as pointers to
9246 * the buffer and calling SHRINK would destroy them !
9247 * The Shrinking is only possible once the full set of attribute
9248 * callbacks have been done.
9249 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009250reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009251 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009252 base = ctxt->input->base;
9253 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009254 oldline = ctxt->input->line;
9255 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009256 nbatts = 0;
9257 nratts = 0;
9258 nbdef = 0;
9259 nbNs = 0;
9260 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00009261 /* Forget any namespaces added during an earlier parse of this element. */
9262 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009263
9264 localname = xmlParseQName(ctxt, &prefix);
9265 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009266 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9267 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009268 return(NULL);
9269 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009270 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009271
9272 /*
9273 * Now parse the attributes, it ends up with the ending
9274 *
9275 * (S Attribute)* S?
9276 */
9277 SKIP_BLANKS;
9278 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009279 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009280
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009281 while (((RAW != '>') &&
Daniel Veillard0fb18932003-09-07 09:14:37 +00009282 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009283 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009284 const xmlChar *q = CUR_PTR;
9285 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009286 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009287
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009288 attname = xmlParseAttribute2(ctxt, prefix, localname,
9289 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00009290 if (ctxt->input->base != base) {
9291 if ((attvalue != NULL) && (alloc != 0))
9292 xmlFree(attvalue);
9293 attvalue = NULL;
9294 goto base_changed;
9295 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009296 if ((attname != NULL) && (attvalue != NULL)) {
9297 if (len < 0) len = xmlStrlen(attvalue);
9298 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009299 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9300 xmlURIPtr uri;
9301
9302 if (*URL != 0) {
9303 uri = xmlParseURI((const char *) URL);
9304 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009305 xmlNsErr(ctxt, XML_WAR_NS_URI,
9306 "xmlns: '%s' is not a valid URI\n",
9307 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009308 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00009309 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009310 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9311 "xmlns: URI %s is not absolute\n",
9312 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009313 }
9314 xmlFreeURI(uri);
9315 }
Daniel Veillard37334572008-07-31 08:20:02 +00009316 if (URL == ctxt->str_xml_ns) {
9317 if (attname != ctxt->str_xml) {
9318 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9319 "xml namespace URI cannot be the default namespace\n",
9320 NULL, NULL, NULL);
9321 }
9322 goto skip_default_ns;
9323 }
9324 if ((len == 29) &&
9325 (xmlStrEqual(URL,
9326 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9327 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9328 "reuse of the xmlns namespace name is forbidden\n",
9329 NULL, NULL, NULL);
9330 goto skip_default_ns;
9331 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009332 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009333 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009334 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009335 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009336 for (j = 1;j <= nbNs;j++)
9337 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9338 break;
9339 if (j <= nbNs)
9340 xmlErrAttributeDup(ctxt, NULL, attname);
9341 else
9342 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009343skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009344 if (alloc != 0) xmlFree(attvalue);
9345 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009346 continue;
9347 }
9348 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009349 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9350 xmlURIPtr uri;
9351
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009352 if (attname == ctxt->str_xml) {
9353 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009354 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9355 "xml namespace prefix mapped to wrong URI\n",
9356 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009357 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009358 /*
9359 * Do not keep a namespace definition node
9360 */
Daniel Veillard37334572008-07-31 08:20:02 +00009361 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009362 }
Daniel Veillard37334572008-07-31 08:20:02 +00009363 if (URL == ctxt->str_xml_ns) {
9364 if (attname != ctxt->str_xml) {
9365 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9366 "xml namespace URI mapped to wrong prefix\n",
9367 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009368 }
Daniel Veillard37334572008-07-31 08:20:02 +00009369 goto skip_ns;
9370 }
9371 if (attname == ctxt->str_xmlns) {
9372 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9373 "redefinition of the xmlns prefix is forbidden\n",
9374 NULL, NULL, NULL);
9375 goto skip_ns;
9376 }
9377 if ((len == 29) &&
9378 (xmlStrEqual(URL,
9379 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9380 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9381 "reuse of the xmlns namespace name is forbidden\n",
9382 NULL, NULL, NULL);
9383 goto skip_ns;
9384 }
9385 if ((URL == NULL) || (URL[0] == 0)) {
9386 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9387 "xmlns:%s: Empty XML namespace is not allowed\n",
9388 attname, NULL, NULL);
9389 goto skip_ns;
9390 } else {
9391 uri = xmlParseURI((const char *) URL);
9392 if (uri == NULL) {
9393 xmlNsErr(ctxt, XML_WAR_NS_URI,
9394 "xmlns:%s: '%s' is not a valid URI\n",
9395 attname, URL, NULL);
9396 } else {
9397 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9398 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9399 "xmlns:%s: URI %s is not absolute\n",
9400 attname, URL, NULL);
9401 }
9402 xmlFreeURI(uri);
9403 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009404 }
9405
Daniel Veillard0fb18932003-09-07 09:14:37 +00009406 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009407 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009408 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009409 for (j = 1;j <= nbNs;j++)
9410 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9411 break;
9412 if (j <= nbNs)
9413 xmlErrAttributeDup(ctxt, aprefix, attname);
9414 else
9415 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009416skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009417 if (alloc != 0) xmlFree(attvalue);
9418 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00009419 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009420 continue;
9421 }
9422
9423 /*
9424 * Add the pair to atts
9425 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009426 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9427 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009428 if (attvalue[len] == 0)
9429 xmlFree(attvalue);
9430 goto failed;
9431 }
9432 maxatts = ctxt->maxatts;
9433 atts = ctxt->atts;
9434 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009435 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009436 atts[nbatts++] = attname;
9437 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009438 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00009439 atts[nbatts++] = attvalue;
9440 attvalue += len;
9441 atts[nbatts++] = attvalue;
9442 /*
9443 * tag if some deallocation is needed
9444 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009445 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009446 } else {
9447 if ((attvalue != NULL) && (attvalue[len] == 0))
9448 xmlFree(attvalue);
9449 }
9450
Daniel Veillard37334572008-07-31 08:20:02 +00009451failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009452
9453 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009454 if (ctxt->instate == XML_PARSER_EOF)
9455 break;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009456 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009457 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9458 break;
William M. Brack76e95df2003-10-18 16:20:14 +00009459 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009460 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9461 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009462 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009463 }
9464 SKIP_BLANKS;
9465 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9466 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009467 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009468 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009469 break;
9470 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009471 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009472 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009473 }
9474
Daniel Veillard0fb18932003-09-07 09:14:37 +00009475 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009476 * The attributes defaulting
9477 */
9478 if (ctxt->attsDefault != NULL) {
9479 xmlDefAttrsPtr defaults;
9480
9481 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9482 if (defaults != NULL) {
9483 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009484 attname = defaults->values[5 * i];
9485 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009486
9487 /*
9488 * special work for namespaces defaulted defs
9489 */
9490 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9491 /*
9492 * check that it's not a defined namespace
9493 */
9494 for (j = 1;j <= nbNs;j++)
9495 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9496 break;
9497 if (j <= nbNs) continue;
9498
9499 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009500 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009501 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009502 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009503 nbNs++;
9504 }
9505 } else if (aprefix == ctxt->str_xmlns) {
9506 /*
9507 * check that it's not a defined namespace
9508 */
9509 for (j = 1;j <= nbNs;j++)
9510 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9511 break;
9512 if (j <= nbNs) continue;
9513
9514 nsname = xmlGetNamespace(ctxt, attname);
9515 if (nsname != defaults->values[2]) {
9516 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009517 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009518 nbNs++;
9519 }
9520 } else {
9521 /*
9522 * check that it's not a defined attribute
9523 */
9524 for (j = 0;j < nbatts;j+=5) {
9525 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9526 break;
9527 }
9528 if (j < nbatts) continue;
9529
9530 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9531 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009532 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009533 }
9534 maxatts = ctxt->maxatts;
9535 atts = ctxt->atts;
9536 }
9537 atts[nbatts++] = attname;
9538 atts[nbatts++] = aprefix;
9539 if (aprefix == NULL)
9540 atts[nbatts++] = NULL;
9541 else
9542 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009543 atts[nbatts++] = defaults->values[5 * i + 2];
9544 atts[nbatts++] = defaults->values[5 * i + 3];
9545 if ((ctxt->standalone == 1) &&
9546 (defaults->values[5 * i + 4] != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009547 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009548 "standalone: attribute %s on %s defaulted from external subset\n",
9549 attname, localname);
9550 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009551 nbdef++;
9552 }
9553 }
9554 }
9555 }
9556
Daniel Veillarde70c8772003-11-25 07:21:18 +00009557 /*
9558 * The attributes checkings
9559 */
9560 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009561 /*
9562 * The default namespace does not apply to attribute names.
9563 */
9564 if (atts[i + 1] != NULL) {
9565 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9566 if (nsname == NULL) {
9567 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9568 "Namespace prefix %s for %s on %s is not defined\n",
9569 atts[i + 1], atts[i], localname);
9570 }
9571 atts[i + 2] = nsname;
9572 } else
9573 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009574 /*
9575 * [ WFC: Unique Att Spec ]
9576 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009577 * start-tag or empty-element tag.
Daniel Veillarde70c8772003-11-25 07:21:18 +00009578 * As extended by the Namespace in XML REC.
9579 */
9580 for (j = 0; j < i;j += 5) {
9581 if (atts[i] == atts[j]) {
9582 if (atts[i+1] == atts[j+1]) {
9583 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9584 break;
9585 }
9586 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9587 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9588 "Namespaced Attribute %s in '%s' redefined\n",
9589 atts[i], nsname, NULL);
9590 break;
9591 }
9592 }
9593 }
9594 }
9595
Daniel Veillarde57ec792003-09-10 10:50:59 +00009596 nsname = xmlGetNamespace(ctxt, prefix);
9597 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009598 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9599 "Namespace prefix %s on %s is not defined\n",
9600 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009601 }
9602 *pref = prefix;
9603 *URI = nsname;
9604
9605 /*
9606 * SAX: Start of Element !
9607 */
9608 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9609 (!ctxt->disableSAX)) {
9610 if (nbNs > 0)
9611 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9612 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9613 nbatts / 5, nbdef, atts);
9614 else
9615 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9616 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9617 }
9618
9619 /*
9620 * Free up attribute allocated strings if needed
9621 */
9622 if (attval != 0) {
9623 for (i = 3,j = 0; j < nratts;i += 5,j++)
9624 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9625 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009626 }
9627
9628 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009629
9630base_changed:
9631 /*
9632 * the attribute strings are valid iif the base didn't changed
9633 */
9634 if (attval != 0) {
9635 for (i = 3,j = 0; j < nratts;i += 5,j++)
9636 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9637 xmlFree((xmlChar *) atts[i]);
9638 }
9639 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009640 ctxt->input->line = oldline;
9641 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009642 if (ctxt->wellFormed == 1) {
9643 goto reparse;
9644 }
9645 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009646}
9647
9648/**
9649 * xmlParseEndTag2:
9650 * @ctxt: an XML parser context
9651 * @line: line of the start tag
9652 * @nsNr: number of namespaces on the start tag
9653 *
9654 * parse an end of tag
9655 *
9656 * [42] ETag ::= '</' Name S? '>'
9657 *
9658 * With namespace
9659 *
9660 * [NS 9] ETag ::= '</' QName S? '>'
9661 */
9662
9663static void
9664xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009665 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009666 const xmlChar *name;
9667
9668 GROW;
9669 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009670 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009671 return;
9672 }
9673 SKIP(2);
9674
William M. Brack13dfa872004-09-18 04:52:08 +00009675 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009676 if (ctxt->input->cur[tlen] == '>') {
9677 ctxt->input->cur += tlen + 1;
9678 goto done;
9679 }
9680 ctxt->input->cur += tlen;
9681 name = (xmlChar*)1;
9682 } else {
9683 if (prefix == NULL)
9684 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9685 else
9686 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9687 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009688
9689 /*
9690 * We should definitely be at the ending "S? '>'" part
9691 */
9692 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009693 if (ctxt->instate == XML_PARSER_EOF)
9694 return;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009695 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009696 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009697 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009698 } else
9699 NEXT1;
9700
9701 /*
9702 * [ WFC: Element Type Match ]
9703 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009704 * start-tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009705 *
9706 */
9707 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009708 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009709 if ((line == 0) && (ctxt->node != NULL))
9710 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009711 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009712 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009713 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009714 }
9715
9716 /*
9717 * SAX: End of Tag
9718 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009719done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009720 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9721 (!ctxt->disableSAX))
9722 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9723
Daniel Veillard0fb18932003-09-07 09:14:37 +00009724 spacePop(ctxt);
9725 if (nsNr != 0)
9726 nsPop(ctxt, nsNr);
9727 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009728}
9729
9730/**
Owen Taylor3473f882001-02-23 17:55:21 +00009731 * xmlParseCDSect:
9732 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009733 *
Owen Taylor3473f882001-02-23 17:55:21 +00009734 * Parse escaped pure raw content.
9735 *
9736 * [18] CDSect ::= CDStart CData CDEnd
9737 *
9738 * [19] CDStart ::= '<![CDATA['
9739 *
9740 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9741 *
9742 * [21] CDEnd ::= ']]>'
9743 */
9744void
9745xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9746 xmlChar *buf = NULL;
9747 int len = 0;
9748 int size = XML_PARSER_BUFFER_SIZE;
9749 int r, rl;
9750 int s, sl;
9751 int cur, l;
9752 int count = 0;
9753
Daniel Veillard8f597c32003-10-06 08:19:27 +00009754 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009755 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009756 SKIP(9);
9757 } else
9758 return;
9759
9760 ctxt->instate = XML_PARSER_CDATA_SECTION;
9761 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009762 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009763 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009764 ctxt->instate = XML_PARSER_CONTENT;
9765 return;
9766 }
9767 NEXTL(rl);
9768 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009769 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009770 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009771 ctxt->instate = XML_PARSER_CONTENT;
9772 return;
9773 }
9774 NEXTL(sl);
9775 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009776 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009777 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009778 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009779 return;
9780 }
William M. Brack871611b2003-10-18 04:53:14 +00009781 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009782 ((r != ']') || (s != ']') || (cur != '>'))) {
9783 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009784 xmlChar *tmp;
9785
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009786 if ((size > XML_MAX_TEXT_LENGTH) &&
9787 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9788 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9789 "CData section too big found", NULL);
9790 xmlFree (buf);
9791 return;
9792 }
9793 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00009794 if (tmp == NULL) {
9795 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009796 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009797 return;
9798 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009799 buf = tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009800 size *= 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009801 }
9802 COPY_BUF(rl,buf,len,r);
9803 r = s;
9804 rl = sl;
9805 s = cur;
9806 sl = l;
9807 count++;
9808 if (count > 50) {
9809 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009810 if (ctxt->instate == XML_PARSER_EOF) {
9811 xmlFree(buf);
9812 return;
9813 }
Owen Taylor3473f882001-02-23 17:55:21 +00009814 count = 0;
9815 }
9816 NEXTL(l);
9817 cur = CUR_CHAR(l);
9818 }
9819 buf[len] = 0;
9820 ctxt->instate = XML_PARSER_CONTENT;
9821 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009822 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009823 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009824 xmlFree(buf);
9825 return;
9826 }
9827 NEXTL(l);
9828
9829 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009830 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009831 */
9832 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9833 if (ctxt->sax->cdataBlock != NULL)
9834 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009835 else if (ctxt->sax->characters != NULL)
9836 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009837 }
9838 xmlFree(buf);
9839}
9840
9841/**
9842 * xmlParseContent:
9843 * @ctxt: an XML parser context
9844 *
9845 * Parse a content:
9846 *
9847 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9848 */
9849
9850void
9851xmlParseContent(xmlParserCtxtPtr ctxt) {
9852 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009853 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009854 ((RAW != '<') || (NXT(1) != '/')) &&
9855 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009856 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009857 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009858 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009859
9860 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009861 * First case : a Processing Instruction.
9862 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009863 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009864 xmlParsePI(ctxt);
9865 }
9866
9867 /*
9868 * Second case : a CDSection
9869 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009870 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009871 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009872 xmlParseCDSect(ctxt);
9873 }
9874
9875 /*
9876 * Third case : a comment
9877 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009878 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009879 (NXT(2) == '-') && (NXT(3) == '-')) {
9880 xmlParseComment(ctxt);
9881 ctxt->instate = XML_PARSER_CONTENT;
9882 }
9883
9884 /*
9885 * Fourth case : a sub-element.
9886 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009887 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009888 xmlParseElement(ctxt);
9889 }
9890
9891 /*
9892 * Fifth case : a reference. If if has not been resolved,
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009893 * parsing returns it's Name, create the node
Owen Taylor3473f882001-02-23 17:55:21 +00009894 */
9895
Daniel Veillard21a0f912001-02-25 19:54:14 +00009896 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009897 xmlParseReference(ctxt);
9898 }
9899
9900 /*
9901 * Last case, text. Note that References are handled directly.
9902 */
9903 else {
9904 xmlParseCharData(ctxt, 0);
9905 }
9906
9907 GROW;
9908 /*
9909 * Pop-up of finished entities.
9910 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009911 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009912 xmlPopInput(ctxt);
9913 SHRINK;
9914
Daniel Veillardfdc91562002-07-01 21:52:03 +00009915 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009916 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9917 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009918 ctxt->instate = XML_PARSER_EOF;
9919 break;
9920 }
9921 }
9922}
9923
9924/**
9925 * xmlParseElement:
9926 * @ctxt: an XML parser context
9927 *
9928 * parse an XML element, this is highly recursive
9929 *
9930 * [39] element ::= EmptyElemTag | STag content ETag
9931 *
9932 * [ WFC: Element Type Match ]
9933 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009934 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00009935 *
Owen Taylor3473f882001-02-23 17:55:21 +00009936 */
9937
9938void
9939xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009940 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009941 const xmlChar *prefix = NULL;
9942 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009943 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +08009944 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00009945 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009946 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009947
Daniel Veillard8915c152008-08-26 13:05:34 +00009948 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9949 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9950 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9951 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9952 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009953 ctxt->instate = XML_PARSER_EOF;
9954 return;
9955 }
9956
Owen Taylor3473f882001-02-23 17:55:21 +00009957 /* Capture start position */
9958 if (ctxt->record_info) {
9959 node_info.begin_pos = ctxt->input->consumed +
9960 (CUR_PTR - ctxt->input->base);
9961 node_info.begin_line = ctxt->input->line;
9962 }
9963
9964 if (ctxt->spaceNr == 0)
9965 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009966 else if (*ctxt->space == -2)
9967 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009968 else
9969 spacePush(ctxt, *ctxt->space);
9970
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009971 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009972#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009973 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009974#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009975 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009976#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009977 else
9978 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009979#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +08009980 if (ctxt->instate == XML_PARSER_EOF)
9981 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009982 if (name == NULL) {
9983 spacePop(ctxt);
9984 return;
9985 }
9986 namePush(ctxt, name);
9987 ret = ctxt->node;
9988
Daniel Veillard4432df22003-09-28 18:58:27 +00009989#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009990 /*
9991 * [ VC: Root Element Type ]
9992 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009993 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00009994 */
9995 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9996 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9997 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009998#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009999
10000 /*
10001 * Check for an Empty Element.
10002 */
10003 if ((RAW == '/') && (NXT(1) == '>')) {
10004 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010005 if (ctxt->sax2) {
10006 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10007 (!ctxt->disableSAX))
10008 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +000010009#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +000010010 } else {
10011 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10012 (!ctxt->disableSAX))
10013 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010014#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010015 }
Daniel Veillard0fb18932003-09-07 09:14:37 +000010016 namePop(ctxt);
10017 spacePop(ctxt);
10018 if (nsNr != ctxt->nsNr)
10019 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010020 if ( ret != NULL && ctxt->record_info ) {
10021 node_info.end_pos = ctxt->input->consumed +
10022 (CUR_PTR - ctxt->input->base);
10023 node_info.end_line = ctxt->input->line;
10024 node_info.node = ret;
10025 xmlParserAddNodeInfo(ctxt, &node_info);
10026 }
10027 return;
10028 }
10029 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +000010030 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +000010031 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010032 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10033 "Couldn't find end of Start Tag %s line %d\n",
10034 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010035
10036 /*
10037 * end of parsing of this node.
10038 */
10039 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010040 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010041 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010042 if (nsNr != ctxt->nsNr)
10043 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010044
10045 /*
10046 * Capture end position and add node
10047 */
10048 if ( ret != NULL && ctxt->record_info ) {
10049 node_info.end_pos = ctxt->input->consumed +
10050 (CUR_PTR - ctxt->input->base);
10051 node_info.end_line = ctxt->input->line;
10052 node_info.node = ret;
10053 xmlParserAddNodeInfo(ctxt, &node_info);
10054 }
10055 return;
10056 }
10057
10058 /*
10059 * Parse the content of the element:
10060 */
10061 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010062 if (ctxt->instate == XML_PARSER_EOF)
10063 return;
Daniel Veillard73b013f2003-09-30 12:36:01 +000010064 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +000010065 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +000010066 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +000010067 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010068
10069 /*
10070 * end of parsing of this node.
10071 */
10072 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010073 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010074 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010075 if (nsNr != ctxt->nsNr)
10076 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010077 return;
10078 }
10079
10080 /*
10081 * parse the end of tag: '</' should be here.
10082 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010083 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010084 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010085 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010086 }
10087#ifdef LIBXML_SAX1_ENABLED
10088 else
Daniel Veillard0fb18932003-09-07 09:14:37 +000010089 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +000010090#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010091
10092 /*
10093 * Capture end position and add node
10094 */
10095 if ( ret != NULL && ctxt->record_info ) {
10096 node_info.end_pos = ctxt->input->consumed +
10097 (CUR_PTR - ctxt->input->base);
10098 node_info.end_line = ctxt->input->line;
10099 node_info.node = ret;
10100 xmlParserAddNodeInfo(ctxt, &node_info);
10101 }
10102}
10103
10104/**
10105 * xmlParseVersionNum:
10106 * @ctxt: an XML parser context
10107 *
10108 * parse the XML version value.
10109 *
Daniel Veillard34e3f642008-07-29 09:02:27 +000010110 * [26] VersionNum ::= '1.' [0-9]+
10111 *
10112 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +000010113 *
10114 * Returns the string giving the XML version number, or NULL
10115 */
10116xmlChar *
10117xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10118 xmlChar *buf = NULL;
10119 int len = 0;
10120 int size = 10;
10121 xmlChar cur;
10122
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010123 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010124 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010125 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010126 return(NULL);
10127 }
10128 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +000010129 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010130 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010131 return(NULL);
10132 }
10133 buf[len++] = cur;
10134 NEXT;
10135 cur=CUR;
10136 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010137 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010138 return(NULL);
10139 }
10140 buf[len++] = cur;
10141 NEXT;
10142 cur=CUR;
10143 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010144 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010145 xmlChar *tmp;
10146
Owen Taylor3473f882001-02-23 17:55:21 +000010147 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010148 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10149 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +000010150 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010151 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010152 return(NULL);
10153 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010154 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010155 }
10156 buf[len++] = cur;
10157 NEXT;
10158 cur=CUR;
10159 }
10160 buf[len] = 0;
10161 return(buf);
10162}
10163
10164/**
10165 * xmlParseVersionInfo:
10166 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +000010167 *
Owen Taylor3473f882001-02-23 17:55:21 +000010168 * parse the XML version.
10169 *
10170 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +000010171 *
Owen Taylor3473f882001-02-23 17:55:21 +000010172 * [25] Eq ::= S? '=' S?
10173 *
10174 * Returns the version string, e.g. "1.0"
10175 */
10176
10177xmlChar *
10178xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10179 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010180
Daniel Veillarda07050d2003-10-19 14:46:32 +000010181 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010182 SKIP(7);
10183 SKIP_BLANKS;
10184 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010185 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010186 return(NULL);
10187 }
10188 NEXT;
10189 SKIP_BLANKS;
10190 if (RAW == '"') {
10191 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010192 version = xmlParseVersionNum(ctxt);
10193 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010194 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010195 } else
10196 NEXT;
10197 } else if (RAW == '\''){
10198 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010199 version = xmlParseVersionNum(ctxt);
10200 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010201 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010202 } else
10203 NEXT;
10204 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010205 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010206 }
10207 }
10208 return(version);
10209}
10210
10211/**
10212 * xmlParseEncName:
10213 * @ctxt: an XML parser context
10214 *
10215 * parse the XML encoding name
10216 *
10217 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10218 *
10219 * Returns the encoding name value or NULL
10220 */
10221xmlChar *
10222xmlParseEncName(xmlParserCtxtPtr ctxt) {
10223 xmlChar *buf = NULL;
10224 int len = 0;
10225 int size = 10;
10226 xmlChar cur;
10227
10228 cur = CUR;
10229 if (((cur >= 'a') && (cur <= 'z')) ||
10230 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010231 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010232 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010233 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010234 return(NULL);
10235 }
Daniel Veillard34e3f642008-07-29 09:02:27 +000010236
Owen Taylor3473f882001-02-23 17:55:21 +000010237 buf[len++] = cur;
10238 NEXT;
10239 cur = CUR;
10240 while (((cur >= 'a') && (cur <= 'z')) ||
10241 ((cur >= 'A') && (cur <= 'Z')) ||
10242 ((cur >= '0') && (cur <= '9')) ||
10243 (cur == '.') || (cur == '_') ||
10244 (cur == '-')) {
10245 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010246 xmlChar *tmp;
10247
Owen Taylor3473f882001-02-23 17:55:21 +000010248 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010249 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10250 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010251 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +000010252 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010253 return(NULL);
10254 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010255 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010256 }
10257 buf[len++] = cur;
10258 NEXT;
10259 cur = CUR;
10260 if (cur == 0) {
10261 SHRINK;
10262 GROW;
10263 cur = CUR;
10264 }
10265 }
10266 buf[len] = 0;
10267 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010268 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010269 }
10270 return(buf);
10271}
10272
10273/**
10274 * xmlParseEncodingDecl:
10275 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010276 *
Owen Taylor3473f882001-02-23 17:55:21 +000010277 * parse the XML encoding declaration
10278 *
10279 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10280 *
10281 * this setups the conversion filters.
10282 *
10283 * Returns the encoding value or NULL
10284 */
10285
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010286const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +000010287xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10288 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010289
10290 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010291 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010292 SKIP(8);
10293 SKIP_BLANKS;
10294 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010295 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010296 return(NULL);
10297 }
10298 NEXT;
10299 SKIP_BLANKS;
10300 if (RAW == '"') {
10301 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010302 encoding = xmlParseEncName(ctxt);
10303 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010304 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010305 } else
10306 NEXT;
10307 } else if (RAW == '\''){
10308 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010309 encoding = xmlParseEncName(ctxt);
10310 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010311 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010312 } else
10313 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +000010314 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010315 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010316 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010317
10318 /*
10319 * Non standard parsing, allowing the user to ignore encoding
10320 */
10321 if (ctxt->options & XML_PARSE_IGNORE_ENC)
10322 return(encoding);
10323
Daniel Veillard6b621b82003-08-11 15:03:34 +000010324 /*
10325 * UTF-16 encoding stwich has already taken place at this stage,
10326 * more over the little-endian/big-endian selection is already done
10327 */
10328 if ((encoding != NULL) &&
10329 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10330 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +000010331 /*
10332 * If no encoding was passed to the parser, that we are
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010333 * using UTF-16 and no decoder is present i.e. the
Daniel Veillard37334572008-07-31 08:20:02 +000010334 * document is apparently UTF-8 compatible, then raise an
10335 * encoding mismatch fatal error
10336 */
10337 if ((ctxt->encoding == NULL) &&
10338 (ctxt->input->buf != NULL) &&
10339 (ctxt->input->buf->encoder == NULL)) {
10340 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10341 "Document labelled UTF-16 but has UTF-8 content\n");
10342 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010343 if (ctxt->encoding != NULL)
10344 xmlFree((xmlChar *) ctxt->encoding);
10345 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +000010346 }
10347 /*
10348 * UTF-8 encoding is handled natively
10349 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010350 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +000010351 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10352 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010353 if (ctxt->encoding != NULL)
10354 xmlFree((xmlChar *) ctxt->encoding);
10355 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010356 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010357 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010358 xmlCharEncodingHandlerPtr handler;
10359
10360 if (ctxt->input->encoding != NULL)
10361 xmlFree((xmlChar *) ctxt->input->encoding);
10362 ctxt->input->encoding = encoding;
10363
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010364 handler = xmlFindCharEncodingHandler((const char *) encoding);
10365 if (handler != NULL) {
10366 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +000010367 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010368 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010369 "Unsupported encoding %s\n", encoding);
10370 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010371 }
10372 }
10373 }
10374 return(encoding);
10375}
10376
10377/**
10378 * xmlParseSDDecl:
10379 * @ctxt: an XML parser context
10380 *
10381 * parse the XML standalone declaration
10382 *
10383 * [32] SDDecl ::= S 'standalone' Eq
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010384 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Owen Taylor3473f882001-02-23 17:55:21 +000010385 *
10386 * [ VC: Standalone Document Declaration ]
10387 * TODO The standalone document declaration must have the value "no"
10388 * if any external markup declarations contain declarations of:
10389 * - attributes with default values, if elements to which these
10390 * attributes apply appear in the document without specifications
10391 * of values for these attributes, or
10392 * - entities (other than amp, lt, gt, apos, quot), if references
10393 * to those entities appear in the document, or
10394 * - attributes with values subject to normalization, where the
10395 * attribute appears in the document with a value which will change
10396 * as a result of normalization, or
10397 * - element types with element content, if white space occurs directly
10398 * within any instance of those types.
10399 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010400 * Returns:
10401 * 1 if standalone="yes"
10402 * 0 if standalone="no"
10403 * -2 if standalone attribute is missing or invalid
10404 * (A standalone value of -2 means that the XML declaration was found,
10405 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010406 */
10407
10408int
10409xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010410 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010411
10412 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010413 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010414 SKIP(10);
10415 SKIP_BLANKS;
10416 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010417 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010418 return(standalone);
10419 }
10420 NEXT;
10421 SKIP_BLANKS;
10422 if (RAW == '\''){
10423 NEXT;
10424 if ((RAW == 'n') && (NXT(1) == 'o')) {
10425 standalone = 0;
10426 SKIP(2);
10427 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10428 (NXT(2) == 's')) {
10429 standalone = 1;
10430 SKIP(3);
10431 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010432 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010433 }
10434 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010435 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010436 } else
10437 NEXT;
10438 } else if (RAW == '"'){
10439 NEXT;
10440 if ((RAW == 'n') && (NXT(1) == 'o')) {
10441 standalone = 0;
10442 SKIP(2);
10443 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10444 (NXT(2) == 's')) {
10445 standalone = 1;
10446 SKIP(3);
10447 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010448 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010449 }
10450 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010451 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010452 } else
10453 NEXT;
10454 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010455 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010456 }
10457 }
10458 return(standalone);
10459}
10460
10461/**
10462 * xmlParseXMLDecl:
10463 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010464 *
Owen Taylor3473f882001-02-23 17:55:21 +000010465 * parse an XML declaration header
10466 *
10467 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10468 */
10469
10470void
10471xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10472 xmlChar *version;
10473
10474 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010475 * This value for standalone indicates that the document has an
10476 * XML declaration but it does not have a standalone attribute.
10477 * It will be overwritten later if a standalone attribute is found.
10478 */
10479 ctxt->input->standalone = -2;
10480
10481 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010482 * We know that '<?xml' is here.
10483 */
10484 SKIP(5);
10485
William M. Brack76e95df2003-10-18 16:20:14 +000010486 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010487 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10488 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010489 }
10490 SKIP_BLANKS;
10491
10492 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010493 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010494 */
10495 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010496 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010497 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010498 } else {
10499 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10500 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010501 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010502 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010503 if (ctxt->options & XML_PARSE_OLD10) {
10504 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10505 "Unsupported version '%s'\n",
10506 version);
10507 } else {
10508 if ((version[0] == '1') && ((version[1] == '.'))) {
10509 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10510 "Unsupported version '%s'\n",
10511 version, NULL);
10512 } else {
10513 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10514 "Unsupported version '%s'\n",
10515 version);
10516 }
10517 }
Daniel Veillard19840942001-11-29 16:11:38 +000010518 }
10519 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010520 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010521 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010522 }
Owen Taylor3473f882001-02-23 17:55:21 +000010523
10524 /*
10525 * We may have the encoding declaration
10526 */
William M. Brack76e95df2003-10-18 16:20:14 +000010527 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010528 if ((RAW == '?') && (NXT(1) == '>')) {
10529 SKIP(2);
10530 return;
10531 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010532 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010533 }
10534 xmlParseEncodingDecl(ctxt);
10535 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10536 /*
10537 * The XML REC instructs us to stop parsing right here
10538 */
10539 return;
10540 }
10541
10542 /*
10543 * We may have the standalone status.
10544 */
William M. Brack76e95df2003-10-18 16:20:14 +000010545 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010546 if ((RAW == '?') && (NXT(1) == '>')) {
10547 SKIP(2);
10548 return;
10549 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010550 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010551 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010552
10553 /*
10554 * We can grow the input buffer freely at that point
10555 */
10556 GROW;
10557
Owen Taylor3473f882001-02-23 17:55:21 +000010558 SKIP_BLANKS;
10559 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10560
10561 SKIP_BLANKS;
10562 if ((RAW == '?') && (NXT(1) == '>')) {
10563 SKIP(2);
10564 } else if (RAW == '>') {
10565 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010566 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010567 NEXT;
10568 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010569 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010570 MOVETO_ENDTAG(CUR_PTR);
10571 NEXT;
10572 }
10573}
10574
10575/**
10576 * xmlParseMisc:
10577 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010578 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010579 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010580 *
10581 * [27] Misc ::= Comment | PI | S
10582 */
10583
10584void
10585xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080010586 while ((ctxt->instate != XML_PARSER_EOF) &&
10587 (((RAW == '<') && (NXT(1) == '?')) ||
10588 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10589 IS_BLANK_CH(CUR))) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010590 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010591 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010592 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010593 NEXT;
10594 } else
10595 xmlParseComment(ctxt);
10596 }
10597}
10598
10599/**
10600 * xmlParseDocument:
10601 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010602 *
Owen Taylor3473f882001-02-23 17:55:21 +000010603 * parse an XML document (and build a tree if using the standard SAX
10604 * interface).
10605 *
10606 * [1] document ::= prolog element Misc*
10607 *
10608 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10609 *
10610 * Returns 0, -1 in case of error. the parser context is augmented
10611 * as a result of the parsing.
10612 */
10613
10614int
10615xmlParseDocument(xmlParserCtxtPtr ctxt) {
10616 xmlChar start[4];
10617 xmlCharEncoding enc;
10618
10619 xmlInitParser();
10620
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010621 if ((ctxt == NULL) || (ctxt->input == NULL))
10622 return(-1);
10623
Owen Taylor3473f882001-02-23 17:55:21 +000010624 GROW;
10625
10626 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010627 * SAX: detecting the level.
10628 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010629 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010630
10631 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010632 * SAX: beginning of the document processing.
10633 */
10634 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10635 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010636 if (ctxt->instate == XML_PARSER_EOF)
10637 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010638
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010639 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010640 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010641 /*
Daniel Veillard4aafa792001-07-28 17:21:12 +000010642 * Get the 4 first bytes and decode the charset
10643 * if enc != XML_CHAR_ENCODING_NONE
10644 * plug some encoding conversion routines.
10645 */
10646 start[0] = RAW;
10647 start[1] = NXT(1);
10648 start[2] = NXT(2);
10649 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010650 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010651 if (enc != XML_CHAR_ENCODING_NONE) {
10652 xmlSwitchEncoding(ctxt, enc);
10653 }
Owen Taylor3473f882001-02-23 17:55:21 +000010654 }
10655
10656
10657 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010658 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010659 }
10660
10661 /*
10662 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010663 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010664 * than just the first line, unless the amount of data is really
10665 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010666 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010667 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10668 GROW;
10669 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010670 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010671
10672 /*
10673 * Note that we will switch encoding on the fly.
10674 */
10675 xmlParseXMLDecl(ctxt);
10676 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10677 /*
10678 * The XML REC instructs us to stop parsing right here
10679 */
10680 return(-1);
10681 }
10682 ctxt->standalone = ctxt->input->standalone;
10683 SKIP_BLANKS;
10684 } else {
10685 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10686 }
10687 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10688 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010689 if (ctxt->instate == XML_PARSER_EOF)
10690 return(-1);
Daniel Veillard63588f42013-05-10 14:01:46 +080010691 if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
10692 (ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
10693 ctxt->myDoc->compression = ctxt->input->buf->compressed;
10694 }
Owen Taylor3473f882001-02-23 17:55:21 +000010695
10696 /*
10697 * The Misc part of the Prolog
10698 */
10699 GROW;
10700 xmlParseMisc(ctxt);
10701
10702 /*
10703 * Then possibly doc type declaration(s) and more Misc
10704 * (doctypedecl Misc*)?
10705 */
10706 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010707 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010708
10709 ctxt->inSubset = 1;
10710 xmlParseDocTypeDecl(ctxt);
10711 if (RAW == '[') {
10712 ctxt->instate = XML_PARSER_DTD;
10713 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010714 if (ctxt->instate == XML_PARSER_EOF)
10715 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010716 }
10717
10718 /*
10719 * Create and update the external subset.
10720 */
10721 ctxt->inSubset = 2;
10722 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10723 (!ctxt->disableSAX))
10724 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10725 ctxt->extSubSystem, ctxt->extSubURI);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010726 if (ctxt->instate == XML_PARSER_EOF)
10727 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010728 ctxt->inSubset = 0;
10729
Daniel Veillardac4118d2008-01-11 05:27:32 +000010730 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010731
10732 ctxt->instate = XML_PARSER_PROLOG;
10733 xmlParseMisc(ctxt);
10734 }
10735
10736 /*
10737 * Time to start parsing the tree itself
10738 */
10739 GROW;
10740 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010741 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10742 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010743 } else {
10744 ctxt->instate = XML_PARSER_CONTENT;
10745 xmlParseElement(ctxt);
10746 ctxt->instate = XML_PARSER_EPILOG;
10747
10748
10749 /*
10750 * The Misc part at the end
10751 */
10752 xmlParseMisc(ctxt);
10753
Daniel Veillard561b7f82002-03-20 21:55:57 +000010754 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010755 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010756 }
10757 ctxt->instate = XML_PARSER_EOF;
10758 }
10759
10760 /*
10761 * SAX: end of the document processing.
10762 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010763 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010764 ctxt->sax->endDocument(ctxt->userData);
10765
Daniel Veillard5997aca2002-03-18 18:36:20 +000010766 /*
10767 * Remove locally kept entity definitions if the tree was not built
10768 */
10769 if ((ctxt->myDoc != NULL) &&
10770 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10771 xmlFreeDoc(ctxt->myDoc);
10772 ctxt->myDoc = NULL;
10773 }
10774
Daniel Veillardae0765b2008-07-31 19:54:59 +000010775 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10776 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10777 if (ctxt->valid)
10778 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10779 if (ctxt->nsWellFormed)
10780 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10781 if (ctxt->options & XML_PARSE_OLD10)
10782 ctxt->myDoc->properties |= XML_DOC_OLD10;
10783 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010784 if (! ctxt->wellFormed) {
10785 ctxt->valid = 0;
10786 return(-1);
10787 }
Owen Taylor3473f882001-02-23 17:55:21 +000010788 return(0);
10789}
10790
10791/**
10792 * xmlParseExtParsedEnt:
10793 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010794 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010795 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010796 * An external general parsed entity is well-formed if it matches the
10797 * production labeled extParsedEnt.
10798 *
10799 * [78] extParsedEnt ::= TextDecl? content
10800 *
10801 * Returns 0, -1 in case of error. the parser context is augmented
10802 * as a result of the parsing.
10803 */
10804
10805int
10806xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10807 xmlChar start[4];
10808 xmlCharEncoding enc;
10809
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010810 if ((ctxt == NULL) || (ctxt->input == NULL))
10811 return(-1);
10812
Owen Taylor3473f882001-02-23 17:55:21 +000010813 xmlDefaultSAXHandlerInit();
10814
Daniel Veillard309f81d2003-09-23 09:02:53 +000010815 xmlDetectSAX2(ctxt);
10816
Owen Taylor3473f882001-02-23 17:55:21 +000010817 GROW;
10818
10819 /*
10820 * SAX: beginning of the document processing.
10821 */
10822 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10823 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10824
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010825 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010826 * Get the 4 first bytes and decode the charset
10827 * if enc != XML_CHAR_ENCODING_NONE
10828 * plug some encoding conversion routines.
10829 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010830 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10831 start[0] = RAW;
10832 start[1] = NXT(1);
10833 start[2] = NXT(2);
10834 start[3] = NXT(3);
10835 enc = xmlDetectCharEncoding(start, 4);
10836 if (enc != XML_CHAR_ENCODING_NONE) {
10837 xmlSwitchEncoding(ctxt, enc);
10838 }
Owen Taylor3473f882001-02-23 17:55:21 +000010839 }
10840
10841
10842 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010843 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010844 }
10845
10846 /*
10847 * Check for the XMLDecl in the Prolog.
10848 */
10849 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010850 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010851
10852 /*
10853 * Note that we will switch encoding on the fly.
10854 */
10855 xmlParseXMLDecl(ctxt);
10856 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10857 /*
10858 * The XML REC instructs us to stop parsing right here
10859 */
10860 return(-1);
10861 }
10862 SKIP_BLANKS;
10863 } else {
10864 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10865 }
10866 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10867 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010868 if (ctxt->instate == XML_PARSER_EOF)
10869 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +000010870
10871 /*
10872 * Doing validity checking on chunk doesn't make sense
10873 */
10874 ctxt->instate = XML_PARSER_CONTENT;
10875 ctxt->validate = 0;
10876 ctxt->loadsubset = 0;
10877 ctxt->depth = 0;
10878
10879 xmlParseContent(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080010880 if (ctxt->instate == XML_PARSER_EOF)
10881 return(-1);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010882
Owen Taylor3473f882001-02-23 17:55:21 +000010883 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010884 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010885 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010886 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010887 }
10888
10889 /*
10890 * SAX: end of the document processing.
10891 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010892 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010893 ctxt->sax->endDocument(ctxt->userData);
10894
10895 if (! ctxt->wellFormed) return(-1);
10896 return(0);
10897}
10898
Daniel Veillard73b013f2003-09-30 12:36:01 +000010899#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010900/************************************************************************
10901 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010902 * Progressive parsing interfaces *
Owen Taylor3473f882001-02-23 17:55:21 +000010903 * *
10904 ************************************************************************/
10905
10906/**
10907 * xmlParseLookupSequence:
10908 * @ctxt: an XML parser context
10909 * @first: the first char to lookup
10910 * @next: the next char to lookup or zero
10911 * @third: the next char to lookup or zero
10912 *
10913 * Try to find if a sequence (first, next, third) or just (first next) or
10914 * (first) is available in the input stream.
10915 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10916 * to avoid rescanning sequences of bytes, it DOES change the state of the
10917 * parser, do not use liberally.
10918 *
10919 * Returns the index to the current parsing point if the full sequence
10920 * is available, -1 otherwise.
10921 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010922static int
Owen Taylor3473f882001-02-23 17:55:21 +000010923xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10924 xmlChar next, xmlChar third) {
10925 int base, len;
10926 xmlParserInputPtr in;
10927 const xmlChar *buf;
10928
10929 in = ctxt->input;
10930 if (in == NULL) return(-1);
10931 base = in->cur - in->base;
10932 if (base < 0) return(-1);
10933 if (ctxt->checkIndex > base)
10934 base = ctxt->checkIndex;
10935 if (in->buf == NULL) {
10936 buf = in->base;
10937 len = in->length;
10938 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080010939 buf = xmlBufContent(in->buf->buffer);
10940 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000010941 }
10942 /* take into account the sequence length */
10943 if (third) len -= 2;
10944 else if (next) len --;
10945 for (;base < len;base++) {
10946 if (buf[base] == first) {
10947 if (third != 0) {
10948 if ((buf[base + 1] != next) ||
10949 (buf[base + 2] != third)) continue;
10950 } else if (next != 0) {
10951 if (buf[base + 1] != next) continue;
10952 }
10953 ctxt->checkIndex = 0;
10954#ifdef DEBUG_PUSH
10955 if (next == 0)
10956 xmlGenericError(xmlGenericErrorContext,
10957 "PP: lookup '%c' found at %d\n",
10958 first, base);
10959 else if (third == 0)
10960 xmlGenericError(xmlGenericErrorContext,
10961 "PP: lookup '%c%c' found at %d\n",
10962 first, next, base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010963 else
Owen Taylor3473f882001-02-23 17:55:21 +000010964 xmlGenericError(xmlGenericErrorContext,
10965 "PP: lookup '%c%c%c' found at %d\n",
10966 first, next, third, base);
10967#endif
10968 return(base - (in->cur - in->base));
10969 }
10970 }
10971 ctxt->checkIndex = base;
10972#ifdef DEBUG_PUSH
10973 if (next == 0)
10974 xmlGenericError(xmlGenericErrorContext,
10975 "PP: lookup '%c' failed\n", first);
10976 else if (third == 0)
10977 xmlGenericError(xmlGenericErrorContext,
10978 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010979 else
Owen Taylor3473f882001-02-23 17:55:21 +000010980 xmlGenericError(xmlGenericErrorContext,
10981 "PP: lookup '%c%c%c' failed\n", first, next, third);
10982#endif
10983 return(-1);
10984}
10985
10986/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010987 * xmlParseGetLasts:
10988 * @ctxt: an XML parser context
10989 * @lastlt: pointer to store the last '<' from the input
10990 * @lastgt: pointer to store the last '>' from the input
10991 *
10992 * Lookup the last < and > in the current chunk
10993 */
10994static void
10995xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10996 const xmlChar **lastgt) {
10997 const xmlChar *tmp;
10998
10999 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
11000 xmlGenericError(xmlGenericErrorContext,
11001 "Internal error: xmlParseGetLasts\n");
11002 return;
11003 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011004 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011005 tmp = ctxt->input->end;
11006 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000011007 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000011008 if (tmp < ctxt->input->base) {
11009 *lastlt = NULL;
11010 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000011011 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011012 *lastlt = tmp;
11013 tmp++;
11014 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
11015 if (*tmp == '\'') {
11016 tmp++;
11017 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
11018 if (tmp < ctxt->input->end) tmp++;
11019 } else if (*tmp == '"') {
11020 tmp++;
11021 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
11022 if (tmp < ctxt->input->end) tmp++;
11023 } else
11024 tmp++;
11025 }
11026 if (tmp < ctxt->input->end)
11027 *lastgt = tmp;
11028 else {
11029 tmp = *lastlt;
11030 tmp--;
11031 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
11032 if (tmp >= ctxt->input->base)
11033 *lastgt = tmp;
11034 else
11035 *lastgt = NULL;
11036 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011037 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011038 } else {
11039 *lastlt = NULL;
11040 *lastgt = NULL;
11041 }
11042}
11043/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011044 * xmlCheckCdataPush:
11045 * @cur: pointer to the bock of characters
11046 * @len: length of the block in bytes
11047 *
11048 * Check that the block of characters is okay as SCdata content [20]
11049 *
11050 * Returns the number of bytes to pass if okay, a negative index where an
11051 * UTF-8 error occured otherwise
11052 */
11053static int
11054xmlCheckCdataPush(const xmlChar *utf, int len) {
11055 int ix;
11056 unsigned char c;
11057 int codepoint;
11058
11059 if ((utf == NULL) || (len <= 0))
11060 return(0);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011061
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011062 for (ix = 0; ix < len;) { /* string is 0-terminated */
11063 c = utf[ix];
11064 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11065 if (c >= 0x20)
11066 ix++;
11067 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11068 ix++;
11069 else
11070 return(-ix);
11071 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11072 if (ix + 2 > len) return(ix);
11073 if ((utf[ix+1] & 0xc0 ) != 0x80)
11074 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011075 codepoint = (utf[ix] & 0x1f) << 6;
11076 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011077 if (!xmlIsCharQ(codepoint))
11078 return(-ix);
11079 ix += 2;
11080 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11081 if (ix + 3 > len) return(ix);
11082 if (((utf[ix+1] & 0xc0) != 0x80) ||
11083 ((utf[ix+2] & 0xc0) != 0x80))
11084 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011085 codepoint = (utf[ix] & 0xf) << 12;
11086 codepoint |= (utf[ix+1] & 0x3f) << 6;
11087 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011088 if (!xmlIsCharQ(codepoint))
11089 return(-ix);
11090 ix += 3;
11091 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11092 if (ix + 4 > len) return(ix);
11093 if (((utf[ix+1] & 0xc0) != 0x80) ||
11094 ((utf[ix+2] & 0xc0) != 0x80) ||
11095 ((utf[ix+3] & 0xc0) != 0x80))
11096 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011097 codepoint = (utf[ix] & 0x7) << 18;
11098 codepoint |= (utf[ix+1] & 0x3f) << 12;
11099 codepoint |= (utf[ix+2] & 0x3f) << 6;
11100 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011101 if (!xmlIsCharQ(codepoint))
11102 return(-ix);
11103 ix += 4;
11104 } else /* unknown encoding */
11105 return(-ix);
11106 }
11107 return(ix);
11108}
11109
11110/**
Owen Taylor3473f882001-02-23 17:55:21 +000011111 * xmlParseTryOrFinish:
11112 * @ctxt: an XML parser context
11113 * @terminate: last chunk indicator
11114 *
11115 * Try to progress on parsing
11116 *
11117 * Returns zero if no parsing was possible
11118 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011119static int
Owen Taylor3473f882001-02-23 17:55:21 +000011120xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11121 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011122 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000011123 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000011124 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000011125
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011126 if (ctxt->input == NULL)
11127 return(0);
11128
Owen Taylor3473f882001-02-23 17:55:21 +000011129#ifdef DEBUG_PUSH
11130 switch (ctxt->instate) {
11131 case XML_PARSER_EOF:
11132 xmlGenericError(xmlGenericErrorContext,
11133 "PP: try EOF\n"); break;
11134 case XML_PARSER_START:
11135 xmlGenericError(xmlGenericErrorContext,
11136 "PP: try START\n"); break;
11137 case XML_PARSER_MISC:
11138 xmlGenericError(xmlGenericErrorContext,
11139 "PP: try MISC\n");break;
11140 case XML_PARSER_COMMENT:
11141 xmlGenericError(xmlGenericErrorContext,
11142 "PP: try COMMENT\n");break;
11143 case XML_PARSER_PROLOG:
11144 xmlGenericError(xmlGenericErrorContext,
11145 "PP: try PROLOG\n");break;
11146 case XML_PARSER_START_TAG:
11147 xmlGenericError(xmlGenericErrorContext,
11148 "PP: try START_TAG\n");break;
11149 case XML_PARSER_CONTENT:
11150 xmlGenericError(xmlGenericErrorContext,
11151 "PP: try CONTENT\n");break;
11152 case XML_PARSER_CDATA_SECTION:
11153 xmlGenericError(xmlGenericErrorContext,
11154 "PP: try CDATA_SECTION\n");break;
11155 case XML_PARSER_END_TAG:
11156 xmlGenericError(xmlGenericErrorContext,
11157 "PP: try END_TAG\n");break;
11158 case XML_PARSER_ENTITY_DECL:
11159 xmlGenericError(xmlGenericErrorContext,
11160 "PP: try ENTITY_DECL\n");break;
11161 case XML_PARSER_ENTITY_VALUE:
11162 xmlGenericError(xmlGenericErrorContext,
11163 "PP: try ENTITY_VALUE\n");break;
11164 case XML_PARSER_ATTRIBUTE_VALUE:
11165 xmlGenericError(xmlGenericErrorContext,
11166 "PP: try ATTRIBUTE_VALUE\n");break;
11167 case XML_PARSER_DTD:
11168 xmlGenericError(xmlGenericErrorContext,
11169 "PP: try DTD\n");break;
11170 case XML_PARSER_EPILOG:
11171 xmlGenericError(xmlGenericErrorContext,
11172 "PP: try EPILOG\n");break;
11173 case XML_PARSER_PI:
11174 xmlGenericError(xmlGenericErrorContext,
11175 "PP: try PI\n");break;
11176 case XML_PARSER_IGNORE:
11177 xmlGenericError(xmlGenericErrorContext,
11178 "PP: try IGNORE\n");break;
11179 }
11180#endif
11181
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011182 if ((ctxt->input != NULL) &&
11183 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011184 xmlSHRINK(ctxt);
11185 ctxt->checkIndex = 0;
11186 }
11187 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000011188
Daniel Veillarde50ba812013-04-11 15:54:51 +080011189 while (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard14412512005-01-21 23:53:26 +000011190 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011191 return(0);
11192
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011193
Owen Taylor3473f882001-02-23 17:55:21 +000011194 /*
11195 * Pop-up of finished entities.
11196 */
11197 while ((RAW == 0) && (ctxt->inputNr > 1))
11198 xmlPopInput(ctxt);
11199
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011200 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000011201 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011202 avail = ctxt->input->length -
11203 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011204 else {
11205 /*
11206 * If we are operating on converted input, try to flush
11207 * remainng chars to avoid them stalling in the non-converted
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011208 * buffer. But do not do this in document start where
11209 * encoding="..." may not have been read and we work on a
11210 * guessed encoding.
Daniel Veillard158a4d22002-02-20 22:17:58 +000011211 */
Daniel Veillardbf058dc2013-02-13 18:19:42 +080011212 if ((ctxt->instate != XML_PARSER_START) &&
11213 (ctxt->input->buf->raw != NULL) &&
11214 (xmlBufIsEmpty(ctxt->input->buf->raw) == 0)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011215 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11216 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011217 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000011218
11219 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011220 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11221 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011222 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011223 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000011224 (ctxt->input->cur - ctxt->input->base);
11225 }
Owen Taylor3473f882001-02-23 17:55:21 +000011226 if (avail < 1)
11227 goto done;
11228 switch (ctxt->instate) {
11229 case XML_PARSER_EOF:
11230 /*
11231 * Document parsing is done !
11232 */
11233 goto done;
11234 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011235 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11236 xmlChar start[4];
11237 xmlCharEncoding enc;
11238
11239 /*
11240 * Very first chars read from the document flow.
11241 */
11242 if (avail < 4)
11243 goto done;
11244
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011245 /*
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011246 * Get the 4 first bytes and decode the charset
11247 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000011248 * plug some encoding conversion routines,
11249 * else xmlSwitchEncoding will set to (default)
11250 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011251 */
11252 start[0] = RAW;
11253 start[1] = NXT(1);
11254 start[2] = NXT(2);
11255 start[3] = NXT(3);
11256 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000011257 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011258 break;
11259 }
Owen Taylor3473f882001-02-23 17:55:21 +000011260
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011261 if (avail < 2)
11262 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011263 cur = ctxt->input->cur[0];
11264 next = ctxt->input->cur[1];
11265 if (cur == 0) {
11266 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11267 ctxt->sax->setDocumentLocator(ctxt->userData,
11268 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011269 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011270 ctxt->instate = XML_PARSER_EOF;
11271#ifdef DEBUG_PUSH
11272 xmlGenericError(xmlGenericErrorContext,
11273 "PP: entering EOF\n");
11274#endif
11275 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11276 ctxt->sax->endDocument(ctxt->userData);
11277 goto done;
11278 }
11279 if ((cur == '<') && (next == '?')) {
11280 /* PI or XML decl */
11281 if (avail < 5) return(ret);
11282 if ((!terminate) &&
11283 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11284 return(ret);
11285 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11286 ctxt->sax->setDocumentLocator(ctxt->userData,
11287 &xmlDefaultSAXLocator);
11288 if ((ctxt->input->cur[2] == 'x') &&
11289 (ctxt->input->cur[3] == 'm') &&
11290 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000011291 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011292 ret += 5;
11293#ifdef DEBUG_PUSH
11294 xmlGenericError(xmlGenericErrorContext,
11295 "PP: Parsing XML Decl\n");
11296#endif
11297 xmlParseXMLDecl(ctxt);
11298 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11299 /*
11300 * The XML REC instructs us to stop parsing right
11301 * here
11302 */
11303 ctxt->instate = XML_PARSER_EOF;
11304 return(0);
11305 }
11306 ctxt->standalone = ctxt->input->standalone;
11307 if ((ctxt->encoding == NULL) &&
11308 (ctxt->input->encoding != NULL))
11309 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11310 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11311 (!ctxt->disableSAX))
11312 ctxt->sax->startDocument(ctxt->userData);
11313 ctxt->instate = XML_PARSER_MISC;
11314#ifdef DEBUG_PUSH
11315 xmlGenericError(xmlGenericErrorContext,
11316 "PP: entering MISC\n");
11317#endif
11318 } else {
11319 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11320 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11321 (!ctxt->disableSAX))
11322 ctxt->sax->startDocument(ctxt->userData);
11323 ctxt->instate = XML_PARSER_MISC;
11324#ifdef DEBUG_PUSH
11325 xmlGenericError(xmlGenericErrorContext,
11326 "PP: entering MISC\n");
11327#endif
11328 }
11329 } else {
11330 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11331 ctxt->sax->setDocumentLocator(ctxt->userData,
11332 &xmlDefaultSAXLocator);
11333 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000011334 if (ctxt->version == NULL) {
11335 xmlErrMemory(ctxt, NULL);
11336 break;
11337 }
Owen Taylor3473f882001-02-23 17:55:21 +000011338 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11339 (!ctxt->disableSAX))
11340 ctxt->sax->startDocument(ctxt->userData);
11341 ctxt->instate = XML_PARSER_MISC;
11342#ifdef DEBUG_PUSH
11343 xmlGenericError(xmlGenericErrorContext,
11344 "PP: entering MISC\n");
11345#endif
11346 }
11347 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011348 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000011349 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020011350 const xmlChar *prefix = NULL;
11351 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011352 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000011353
11354 if ((avail < 2) && (ctxt->inputNr == 1))
11355 goto done;
11356 cur = ctxt->input->cur[0];
11357 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011358 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000011359 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000011360 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11361 ctxt->sax->endDocument(ctxt->userData);
11362 goto done;
11363 }
11364 if (!terminate) {
11365 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000011366 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000011367 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011368 goto done;
11369 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11370 goto done;
11371 }
11372 }
11373 if (ctxt->spaceNr == 0)
11374 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011375 else if (*ctxt->space == -2)
11376 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011377 else
11378 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011379#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011380 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011381#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011382 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011383#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011384 else
11385 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011386#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011387 if (ctxt->instate == XML_PARSER_EOF)
11388 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011389 if (name == NULL) {
11390 spacePop(ctxt);
11391 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000011392 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11393 ctxt->sax->endDocument(ctxt->userData);
11394 goto done;
11395 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011396#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011397 /*
11398 * [ VC: Root Element Type ]
11399 * The Name in the document type declaration must match
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011400 * the element type of the root element.
Daniel Veillarda880b122003-04-21 21:36:41 +000011401 */
11402 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11403 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11404 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011405#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011406
11407 /*
11408 * Check for an Empty Element.
11409 */
11410 if ((RAW == '/') && (NXT(1) == '>')) {
11411 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011412
11413 if (ctxt->sax2) {
11414 if ((ctxt->sax != NULL) &&
11415 (ctxt->sax->endElementNs != NULL) &&
11416 (!ctxt->disableSAX))
11417 ctxt->sax->endElementNs(ctxt->userData, name,
11418 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011419 if (ctxt->nsNr - nsNr > 0)
11420 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011421#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011422 } else {
11423 if ((ctxt->sax != NULL) &&
11424 (ctxt->sax->endElement != NULL) &&
11425 (!ctxt->disableSAX))
11426 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011427#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011428 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011429 if (ctxt->instate == XML_PARSER_EOF)
11430 goto done;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011431 spacePop(ctxt);
11432 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011433 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011434 } else {
11435 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011436 }
Daniel Veillard65686452012-07-19 18:25:01 +080011437 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011438 break;
11439 }
11440 if (RAW == '>') {
11441 NEXT;
11442 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011443 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011444 "Couldn't find end of Start Tag %s\n",
11445 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011446 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011447 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011448 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011449 if (ctxt->sax2)
11450 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011451#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011452 else
11453 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011454#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011455
Daniel Veillarda880b122003-04-21 21:36:41 +000011456 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011457 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011458 break;
11459 }
11460 case XML_PARSER_CONTENT: {
11461 const xmlChar *test;
11462 unsigned int cons;
11463 if ((avail < 2) && (ctxt->inputNr == 1))
11464 goto done;
11465 cur = ctxt->input->cur[0];
11466 next = ctxt->input->cur[1];
11467
11468 test = CUR_PTR;
11469 cons = ctxt->input->consumed;
11470 if ((cur == '<') && (next == '/')) {
11471 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011472 break;
11473 } else if ((cur == '<') && (next == '?')) {
11474 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011475 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11476 ctxt->progressive = XML_PARSER_PI;
Daniel Veillarda880b122003-04-21 21:36:41 +000011477 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011478 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011479 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011480 ctxt->instate = XML_PARSER_CONTENT;
11481 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011482 } else if ((cur == '<') && (next != '!')) {
11483 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011484 break;
11485 } else if ((cur == '<') && (next == '!') &&
11486 (ctxt->input->cur[2] == '-') &&
11487 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011488 int term;
11489
11490 if (avail < 4)
11491 goto done;
11492 ctxt->input->cur += 4;
11493 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11494 ctxt->input->cur -= 4;
Daniel Veillard65686452012-07-19 18:25:01 +080011495 if ((!terminate) && (term < 0)) {
11496 ctxt->progressive = XML_PARSER_COMMENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011497 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011498 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011499 xmlParseComment(ctxt);
11500 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011501 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011502 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11503 (ctxt->input->cur[2] == '[') &&
11504 (ctxt->input->cur[3] == 'C') &&
11505 (ctxt->input->cur[4] == 'D') &&
11506 (ctxt->input->cur[5] == 'A') &&
11507 (ctxt->input->cur[6] == 'T') &&
11508 (ctxt->input->cur[7] == 'A') &&
11509 (ctxt->input->cur[8] == '[')) {
11510 SKIP(9);
11511 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011512 break;
11513 } else if ((cur == '<') && (next == '!') &&
11514 (avail < 9)) {
11515 goto done;
11516 } else if (cur == '&') {
11517 if ((!terminate) &&
11518 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11519 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011520 xmlParseReference(ctxt);
11521 } else {
11522 /* TODO Avoid the extra copy, handle directly !!! */
11523 /*
11524 * Goal of the following test is:
11525 * - minimize calls to the SAX 'character' callback
11526 * when they are mergeable
11527 * - handle an problem for isBlank when we only parse
11528 * a sequence of blank chars and the next one is
11529 * not available to check against '<' presence.
11530 * - tries to homogenize the differences in SAX
11531 * callbacks between the push and pull versions
11532 * of the parser.
11533 */
11534 if ((ctxt->inputNr == 1) &&
11535 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11536 if (!terminate) {
11537 if (ctxt->progressive) {
11538 if ((lastlt == NULL) ||
11539 (ctxt->input->cur > lastlt))
11540 goto done;
11541 } else if (xmlParseLookupSequence(ctxt,
11542 '<', 0, 0) < 0) {
11543 goto done;
11544 }
11545 }
11546 }
11547 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011548 xmlParseCharData(ctxt, 0);
11549 }
11550 /*
11551 * Pop-up of finished entities.
11552 */
11553 while ((RAW == 0) && (ctxt->inputNr > 1))
11554 xmlPopInput(ctxt);
11555 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011556 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11557 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000011558 ctxt->instate = XML_PARSER_EOF;
11559 break;
11560 }
11561 break;
11562 }
11563 case XML_PARSER_END_TAG:
11564 if (avail < 2)
11565 goto done;
11566 if (!terminate) {
11567 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011568 /* > can be found unescaped in attribute values */
11569 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011570 goto done;
11571 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11572 goto done;
11573 }
11574 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011575 if (ctxt->sax2) {
11576 xmlParseEndTag2(ctxt,
11577 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11578 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011579 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011580 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011581 }
11582#ifdef LIBXML_SAX1_ENABLED
11583 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011584 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011585#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011586 if (ctxt->instate == XML_PARSER_EOF) {
11587 /* Nothing */
11588 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011589 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011590 } else {
11591 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011592 }
11593 break;
11594 case XML_PARSER_CDATA_SECTION: {
11595 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011596 * The Push mode need to have the SAX callback for
Daniel Veillarda880b122003-04-21 21:36:41 +000011597 * cdataBlock merge back contiguous callbacks.
11598 */
11599 int base;
11600
11601 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11602 if (base < 0) {
11603 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011604 int tmp;
11605
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011606 tmp = xmlCheckCdataPush(ctxt->input->cur,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011607 XML_PARSER_BIG_BUFFER_SIZE);
11608 if (tmp < 0) {
11609 tmp = -tmp;
11610 ctxt->input->cur += tmp;
11611 goto encoding_error;
11612 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011613 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11614 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011615 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011616 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011617 else if (ctxt->sax->characters != NULL)
11618 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011619 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011620 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011621 if (ctxt->instate == XML_PARSER_EOF)
11622 goto done;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011623 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011624 ctxt->checkIndex = 0;
11625 }
11626 goto done;
11627 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011628 int tmp;
11629
11630 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11631 if ((tmp < 0) || (tmp != base)) {
11632 tmp = -tmp;
11633 ctxt->input->cur += tmp;
11634 goto encoding_error;
11635 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011636 if ((ctxt->sax != NULL) && (base == 0) &&
11637 (ctxt->sax->cdataBlock != NULL) &&
11638 (!ctxt->disableSAX)) {
11639 /*
11640 * Special case to provide identical behaviour
11641 * between pull and push parsers on enpty CDATA
11642 * sections
11643 */
11644 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11645 (!strncmp((const char *)&ctxt->input->cur[-9],
11646 "<![CDATA[", 9)))
11647 ctxt->sax->cdataBlock(ctxt->userData,
11648 BAD_CAST "", 0);
11649 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011650 (!ctxt->disableSAX)) {
11651 if (ctxt->sax->cdataBlock != NULL)
11652 ctxt->sax->cdataBlock(ctxt->userData,
11653 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011654 else if (ctxt->sax->characters != NULL)
11655 ctxt->sax->characters(ctxt->userData,
11656 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011657 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080011658 if (ctxt->instate == XML_PARSER_EOF)
11659 goto done;
Daniel Veillard0b787f32004-03-26 17:29:53 +000011660 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011661 ctxt->checkIndex = 0;
11662 ctxt->instate = XML_PARSER_CONTENT;
11663#ifdef DEBUG_PUSH
11664 xmlGenericError(xmlGenericErrorContext,
11665 "PP: entering CONTENT\n");
11666#endif
11667 }
11668 break;
11669 }
Owen Taylor3473f882001-02-23 17:55:21 +000011670 case XML_PARSER_MISC:
11671 SKIP_BLANKS;
11672 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011673 avail = ctxt->input->length -
11674 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011675 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011676 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011677 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011678 if (avail < 2)
11679 goto done;
11680 cur = ctxt->input->cur[0];
11681 next = ctxt->input->cur[1];
11682 if ((cur == '<') && (next == '?')) {
11683 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011684 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11685 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011686 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011687 }
Owen Taylor3473f882001-02-23 17:55:21 +000011688#ifdef DEBUG_PUSH
11689 xmlGenericError(xmlGenericErrorContext,
11690 "PP: Parsing PI\n");
11691#endif
11692 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011693 if (ctxt->instate == XML_PARSER_EOF)
11694 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011695 ctxt->instate = XML_PARSER_MISC;
11696 ctxt->progressive = 1;
Daniel Veillard40e4b212007-06-12 14:46:40 +000011697 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011698 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011699 (ctxt->input->cur[2] == '-') &&
11700 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011701 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011702 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11703 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011704 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011705 }
Owen Taylor3473f882001-02-23 17:55:21 +000011706#ifdef DEBUG_PUSH
11707 xmlGenericError(xmlGenericErrorContext,
11708 "PP: Parsing Comment\n");
11709#endif
11710 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011711 if (ctxt->instate == XML_PARSER_EOF)
11712 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011713 ctxt->instate = XML_PARSER_MISC;
Daniel Veillard65686452012-07-19 18:25:01 +080011714 ctxt->progressive = 1;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011715 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011716 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011717 (ctxt->input->cur[2] == 'D') &&
11718 (ctxt->input->cur[3] == 'O') &&
11719 (ctxt->input->cur[4] == 'C') &&
11720 (ctxt->input->cur[5] == 'T') &&
11721 (ctxt->input->cur[6] == 'Y') &&
11722 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011723 (ctxt->input->cur[8] == 'E')) {
11724 if ((!terminate) &&
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011725 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11726 ctxt->progressive = XML_PARSER_DTD;
Owen Taylor3473f882001-02-23 17:55:21 +000011727 goto done;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011728 }
Owen Taylor3473f882001-02-23 17:55:21 +000011729#ifdef DEBUG_PUSH
11730 xmlGenericError(xmlGenericErrorContext,
11731 "PP: Parsing internal subset\n");
11732#endif
11733 ctxt->inSubset = 1;
Daniel Veillard6c91aa32012-10-25 15:33:59 +080011734 ctxt->progressive = 0;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011735 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011736 xmlParseDocTypeDecl(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011737 if (ctxt->instate == XML_PARSER_EOF)
11738 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011739 if (RAW == '[') {
11740 ctxt->instate = XML_PARSER_DTD;
11741#ifdef DEBUG_PUSH
11742 xmlGenericError(xmlGenericErrorContext,
11743 "PP: entering DTD\n");
11744#endif
11745 } else {
11746 /*
11747 * Create and update the external subset.
11748 */
11749 ctxt->inSubset = 2;
11750 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11751 (ctxt->sax->externalSubset != NULL))
11752 ctxt->sax->externalSubset(ctxt->userData,
11753 ctxt->intSubName, ctxt->extSubSystem,
11754 ctxt->extSubURI);
11755 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011756 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011757 ctxt->instate = XML_PARSER_PROLOG;
11758#ifdef DEBUG_PUSH
11759 xmlGenericError(xmlGenericErrorContext,
11760 "PP: entering PROLOG\n");
11761#endif
11762 }
11763 } else if ((cur == '<') && (next == '!') &&
11764 (avail < 9)) {
11765 goto done;
11766 } else {
11767 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard65686452012-07-19 18:25:01 +080011768 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011769 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011770#ifdef DEBUG_PUSH
11771 xmlGenericError(xmlGenericErrorContext,
11772 "PP: entering START_TAG\n");
11773#endif
11774 }
11775 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011776 case XML_PARSER_PROLOG:
11777 SKIP_BLANKS;
11778 if (ctxt->input->buf == NULL)
11779 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11780 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011781 avail = xmlBufUse(ctxt->input->buf->buffer) -
11782 (ctxt->input->cur - ctxt->input->base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011783 if (avail < 2)
Owen Taylor3473f882001-02-23 17:55:21 +000011784 goto done;
11785 cur = ctxt->input->cur[0];
11786 next = ctxt->input->cur[1];
11787 if ((cur == '<') && (next == '?')) {
11788 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011789 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11790 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011791 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011792 }
Owen Taylor3473f882001-02-23 17:55:21 +000011793#ifdef DEBUG_PUSH
11794 xmlGenericError(xmlGenericErrorContext,
11795 "PP: Parsing PI\n");
11796#endif
11797 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011798 if (ctxt->instate == XML_PARSER_EOF)
11799 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011800 ctxt->instate = XML_PARSER_PROLOG;
11801 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011802 } else if ((cur == '<') && (next == '!') &&
11803 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11804 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011805 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11806 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011807 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011808 }
Owen Taylor3473f882001-02-23 17:55:21 +000011809#ifdef DEBUG_PUSH
11810 xmlGenericError(xmlGenericErrorContext,
11811 "PP: Parsing Comment\n");
11812#endif
11813 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011814 if (ctxt->instate == XML_PARSER_EOF)
11815 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011816 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011817 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011818 } else if ((cur == '<') && (next == '!') &&
11819 (avail < 4)) {
11820 goto done;
11821 } else {
11822 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011823 if (ctxt->progressive == 0)
Daniel Veillard65686452012-07-19 18:25:01 +080011824 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011825 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011826#ifdef DEBUG_PUSH
11827 xmlGenericError(xmlGenericErrorContext,
11828 "PP: entering START_TAG\n");
11829#endif
11830 }
11831 break;
11832 case XML_PARSER_EPILOG:
11833 SKIP_BLANKS;
11834 if (ctxt->input->buf == NULL)
11835 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11836 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011837 avail = xmlBufUse(ctxt->input->buf->buffer) -
11838 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011839 if (avail < 2)
11840 goto done;
11841 cur = ctxt->input->cur[0];
11842 next = ctxt->input->cur[1];
11843 if ((cur == '<') && (next == '?')) {
11844 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011845 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11846 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011847 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011848 }
Owen Taylor3473f882001-02-23 17:55:21 +000011849#ifdef DEBUG_PUSH
11850 xmlGenericError(xmlGenericErrorContext,
11851 "PP: Parsing PI\n");
11852#endif
11853 xmlParsePI(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011854 if (ctxt->instate == XML_PARSER_EOF)
11855 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011856 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillardf572a782012-07-19 20:36:25 +080011857 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011858 } else if ((cur == '<') && (next == '!') &&
11859 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11860 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011861 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11862 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011863 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011864 }
Owen Taylor3473f882001-02-23 17:55:21 +000011865#ifdef DEBUG_PUSH
11866 xmlGenericError(xmlGenericErrorContext,
11867 "PP: Parsing Comment\n");
11868#endif
11869 xmlParseComment(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080011870 if (ctxt->instate == XML_PARSER_EOF)
11871 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011872 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011873 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011874 } else if ((cur == '<') && (next == '!') &&
11875 (avail < 4)) {
11876 goto done;
11877 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011878 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011879 ctxt->instate = XML_PARSER_EOF;
11880#ifdef DEBUG_PUSH
11881 xmlGenericError(xmlGenericErrorContext,
11882 "PP: entering EOF\n");
11883#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011884 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011885 ctxt->sax->endDocument(ctxt->userData);
11886 goto done;
11887 }
11888 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011889 case XML_PARSER_DTD: {
11890 /*
11891 * Sorry but progressive parsing of the internal subset
11892 * is not expected to be supported. We first check that
11893 * the full content of the internal subset is available and
11894 * the parsing is launched only at that point.
11895 * Internal subset ends up with "']' S? '>'" in an unescaped
11896 * section and not in a ']]>' sequence which are conditional
11897 * sections (whoever argued to keep that crap in XML deserve
11898 * a place in hell !).
11899 */
11900 int base, i;
11901 xmlChar *buf;
11902 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011903 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000011904
11905 base = ctxt->input->cur - ctxt->input->base;
11906 if (base < 0) return(0);
11907 if (ctxt->checkIndex > base)
11908 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011909 buf = xmlBufContent(ctxt->input->buf->buffer);
11910 use = xmlBufUse(ctxt->input->buf->buffer);
11911 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000011912 if (quote != 0) {
11913 if (buf[base] == quote)
11914 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011915 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000011916 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011917 if ((quote == 0) && (buf[base] == '<')) {
11918 int found = 0;
11919 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011920 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000011921 (buf[base + 1] == '!') &&
11922 (buf[base + 2] == '-') &&
11923 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011924 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000011925 if ((buf[base] == '-') &&
11926 (buf[base + 1] == '-') &&
11927 (buf[base + 2] == '>')) {
11928 found = 1;
11929 base += 2;
11930 break;
11931 }
11932 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011933 if (!found) {
11934#if 0
11935 fprintf(stderr, "unfinished comment\n");
11936#endif
11937 break; /* for */
11938 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011939 continue;
11940 }
11941 }
Owen Taylor3473f882001-02-23 17:55:21 +000011942 if (buf[base] == '"') {
11943 quote = '"';
11944 continue;
11945 }
11946 if (buf[base] == '\'') {
11947 quote = '\'';
11948 continue;
11949 }
11950 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011951#if 0
11952 fprintf(stderr, "%c%c%c%c: ", buf[base],
11953 buf[base + 1], buf[base + 2], buf[base + 3]);
11954#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011955 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000011956 break;
11957 if (buf[base + 1] == ']') {
11958 /* conditional crap, skip both ']' ! */
11959 base++;
11960 continue;
11961 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011962 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011963 if (buf[base + i] == '>') {
11964#if 0
11965 fprintf(stderr, "found\n");
11966#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011967 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011968 }
11969 if (!IS_BLANK_CH(buf[base + i])) {
11970#if 0
11971 fprintf(stderr, "not found\n");
11972#endif
11973 goto not_end_of_int_subset;
11974 }
Owen Taylor3473f882001-02-23 17:55:21 +000011975 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011976#if 0
11977 fprintf(stderr, "end of stream\n");
11978#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011979 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011980
Owen Taylor3473f882001-02-23 17:55:21 +000011981 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011982not_end_of_int_subset:
11983 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011984 }
11985 /*
11986 * We didn't found the end of the Internal subset
11987 */
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011988 if (quote == 0)
11989 ctxt->checkIndex = base;
11990 else
11991 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011992#ifdef DEBUG_PUSH
11993 if (next == 0)
11994 xmlGenericError(xmlGenericErrorContext,
11995 "PP: lookup of int subset end filed\n");
11996#endif
11997 goto done;
11998
11999found_end_int_subset:
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012000 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000012001 xmlParseInternalSubset(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012002 if (ctxt->instate == XML_PARSER_EOF)
12003 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012004 ctxt->inSubset = 2;
12005 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12006 (ctxt->sax->externalSubset != NULL))
12007 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12008 ctxt->extSubSystem, ctxt->extSubURI);
12009 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000012010 xmlCleanSpecialAttr(ctxt);
Daniel Veillarde50ba812013-04-11 15:54:51 +080012011 if (ctxt->instate == XML_PARSER_EOF)
12012 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000012013 ctxt->instate = XML_PARSER_PROLOG;
12014 ctxt->checkIndex = 0;
12015#ifdef DEBUG_PUSH
12016 xmlGenericError(xmlGenericErrorContext,
12017 "PP: entering PROLOG\n");
12018#endif
12019 break;
12020 }
12021 case XML_PARSER_COMMENT:
12022 xmlGenericError(xmlGenericErrorContext,
12023 "PP: internal error, state == COMMENT\n");
12024 ctxt->instate = XML_PARSER_CONTENT;
12025#ifdef DEBUG_PUSH
12026 xmlGenericError(xmlGenericErrorContext,
12027 "PP: entering CONTENT\n");
12028#endif
12029 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000012030 case XML_PARSER_IGNORE:
12031 xmlGenericError(xmlGenericErrorContext,
12032 "PP: internal error, state == IGNORE");
12033 ctxt->instate = XML_PARSER_DTD;
12034#ifdef DEBUG_PUSH
12035 xmlGenericError(xmlGenericErrorContext,
12036 "PP: entering DTD\n");
12037#endif
12038 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012039 case XML_PARSER_PI:
12040 xmlGenericError(xmlGenericErrorContext,
12041 "PP: internal error, state == PI\n");
12042 ctxt->instate = XML_PARSER_CONTENT;
12043#ifdef DEBUG_PUSH
12044 xmlGenericError(xmlGenericErrorContext,
12045 "PP: entering CONTENT\n");
12046#endif
12047 break;
12048 case XML_PARSER_ENTITY_DECL:
12049 xmlGenericError(xmlGenericErrorContext,
12050 "PP: internal error, state == ENTITY_DECL\n");
12051 ctxt->instate = XML_PARSER_DTD;
12052#ifdef DEBUG_PUSH
12053 xmlGenericError(xmlGenericErrorContext,
12054 "PP: entering DTD\n");
12055#endif
12056 break;
12057 case XML_PARSER_ENTITY_VALUE:
12058 xmlGenericError(xmlGenericErrorContext,
12059 "PP: internal error, state == ENTITY_VALUE\n");
12060 ctxt->instate = XML_PARSER_CONTENT;
12061#ifdef DEBUG_PUSH
12062 xmlGenericError(xmlGenericErrorContext,
12063 "PP: entering DTD\n");
12064#endif
12065 break;
12066 case XML_PARSER_ATTRIBUTE_VALUE:
12067 xmlGenericError(xmlGenericErrorContext,
12068 "PP: internal error, state == ATTRIBUTE_VALUE\n");
12069 ctxt->instate = XML_PARSER_START_TAG;
12070#ifdef DEBUG_PUSH
12071 xmlGenericError(xmlGenericErrorContext,
12072 "PP: entering START_TAG\n");
12073#endif
12074 break;
12075 case XML_PARSER_SYSTEM_LITERAL:
12076 xmlGenericError(xmlGenericErrorContext,
12077 "PP: internal error, state == SYSTEM_LITERAL\n");
12078 ctxt->instate = XML_PARSER_START_TAG;
12079#ifdef DEBUG_PUSH
12080 xmlGenericError(xmlGenericErrorContext,
12081 "PP: entering START_TAG\n");
12082#endif
12083 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000012084 case XML_PARSER_PUBLIC_LITERAL:
12085 xmlGenericError(xmlGenericErrorContext,
12086 "PP: internal error, state == PUBLIC_LITERAL\n");
12087 ctxt->instate = XML_PARSER_START_TAG;
12088#ifdef DEBUG_PUSH
12089 xmlGenericError(xmlGenericErrorContext,
12090 "PP: entering START_TAG\n");
12091#endif
12092 break;
Owen Taylor3473f882001-02-23 17:55:21 +000012093 }
12094 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012095done:
Owen Taylor3473f882001-02-23 17:55:21 +000012096#ifdef DEBUG_PUSH
12097 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
12098#endif
12099 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000012100encoding_error:
12101 {
12102 char buffer[150];
12103
12104 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12105 ctxt->input->cur[0], ctxt->input->cur[1],
12106 ctxt->input->cur[2], ctxt->input->cur[3]);
12107 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12108 "Input is not proper UTF-8, indicate encoding !\n%s",
12109 BAD_CAST buffer, NULL);
12110 }
12111 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012112}
12113
12114/**
Daniel Veillard65686452012-07-19 18:25:01 +080012115 * xmlParseCheckTransition:
12116 * @ctxt: an XML parser context
12117 * @chunk: a char array
12118 * @size: the size in byte of the chunk
12119 *
12120 * Check depending on the current parser state if the chunk given must be
12121 * processed immediately or one need more data to advance on parsing.
12122 *
12123 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12124 */
12125static int
12126xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12127 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12128 return(-1);
12129 if (ctxt->instate == XML_PARSER_START_TAG) {
12130 if (memchr(chunk, '>', size) != NULL)
12131 return(1);
12132 return(0);
12133 }
12134 if (ctxt->progressive == XML_PARSER_COMMENT) {
12135 if (memchr(chunk, '>', size) != NULL)
12136 return(1);
12137 return(0);
12138 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012139 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12140 if (memchr(chunk, '>', size) != NULL)
12141 return(1);
12142 return(0);
12143 }
Daniel Veillardf572a782012-07-19 20:36:25 +080012144 if (ctxt->progressive == XML_PARSER_PI) {
12145 if (memchr(chunk, '>', size) != NULL)
12146 return(1);
12147 return(0);
12148 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012149 if (ctxt->instate == XML_PARSER_END_TAG) {
12150 if (memchr(chunk, '>', size) != NULL)
12151 return(1);
12152 return(0);
12153 }
12154 if ((ctxt->progressive == XML_PARSER_DTD) ||
12155 (ctxt->instate == XML_PARSER_DTD)) {
Dan Winshipcf8f0422012-12-21 11:13:31 +080012156 if (memchr(chunk, '>', size) != NULL)
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012157 return(1);
12158 return(0);
12159 }
Daniel Veillard65686452012-07-19 18:25:01 +080012160 return(1);
12161}
12162
12163/**
Owen Taylor3473f882001-02-23 17:55:21 +000012164 * xmlParseChunk:
12165 * @ctxt: an XML parser context
12166 * @chunk: an char array
12167 * @size: the size in byte of the chunk
12168 * @terminate: last chunk indicator
12169 *
12170 * Parse a Chunk of memory
12171 *
12172 * Returns zero if no error, the xmlParserErrors otherwise.
12173 */
12174int
12175xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12176 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000012177 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012178 int remain = 0;
Daniel Veillard65686452012-07-19 18:25:01 +080012179 size_t old_avail = 0;
12180 size_t avail = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000012181
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012182 if (ctxt == NULL)
12183 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000012184 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012185 return(ctxt->errNo);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080012186 if (ctxt->instate == XML_PARSER_EOF)
12187 return(-1);
Daniel Veillard309f81d2003-09-23 09:02:53 +000012188 if (ctxt->instate == XML_PARSER_START)
12189 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000012190 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12191 (chunk[size - 1] == '\r')) {
12192 end_in_lf = 1;
12193 size--;
12194 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012195
12196xmldecl_done:
12197
Owen Taylor3473f882001-02-23 17:55:21 +000012198 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12199 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012200 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12201 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000012202 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012203
Daniel Veillard65686452012-07-19 18:25:01 +080012204 old_avail = xmlBufUse(ctxt->input->buf->buffer);
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012205 /*
12206 * Specific handling if we autodetected an encoding, we should not
12207 * push more than the first line ... which depend on the encoding
12208 * And only push the rest once the final encoding was detected
12209 */
12210 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12211 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010012212 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012213
12214 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12215 BAD_CAST "UTF-16")) ||
12216 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12217 BAD_CAST "UTF16")))
12218 len = 90;
12219 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12220 BAD_CAST "UCS-4")) ||
12221 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12222 BAD_CAST "UCS4")))
12223 len = 180;
12224
12225 if (ctxt->input->buf->rawconsumed < len)
12226 len -= ctxt->input->buf->rawconsumed;
12227
Raul Hudeaba9716a2010-03-15 10:13:29 +010012228 /*
12229 * Change size for reading the initial declaration only
12230 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12231 * will blindly copy extra bytes from memory.
12232 */
Daniel Veillard60587d62010-11-04 15:16:27 +010012233 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010012234 remain = size - len;
12235 size = len;
12236 } else {
12237 remain = 0;
12238 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012239 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012240 res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
William M. Bracka3215c72004-07-31 16:24:01 +000012241 if (res < 0) {
12242 ctxt->errNo = XML_PARSER_EOF;
12243 ctxt->disableSAX = 1;
12244 return (XML_PARSER_EOF);
12245 }
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012246 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012247#ifdef DEBUG_PUSH
12248 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12249#endif
12250
Owen Taylor3473f882001-02-23 17:55:21 +000012251 } else if (ctxt->instate != XML_PARSER_EOF) {
12252 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12253 xmlParserInputBufferPtr in = ctxt->input->buf;
12254 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12255 (in->raw != NULL)) {
12256 int nbchars;
Daniel Veillardde0cc202013-02-12 16:55:34 +080012257 size_t base = xmlBufGetInputBase(in->buffer, ctxt->input);
12258 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012259
Daniel Veillardbf058dc2013-02-13 18:19:42 +080012260 nbchars = xmlCharEncInput(in, terminate);
Owen Taylor3473f882001-02-23 17:55:21 +000012261 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012262 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000012263 xmlGenericError(xmlGenericErrorContext,
12264 "xmlParseChunk: encoder error\n");
12265 return(XML_ERR_INVALID_ENCODING);
12266 }
Daniel Veillardde0cc202013-02-12 16:55:34 +080012267 xmlBufSetInputBaseCur(in->buffer, ctxt->input, base, current);
Owen Taylor3473f882001-02-23 17:55:21 +000012268 }
12269 }
12270 }
Daniel Veillard65686452012-07-19 18:25:01 +080012271 if (remain != 0) {
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012272 xmlParseTryOrFinish(ctxt, 0);
Daniel Veillard65686452012-07-19 18:25:01 +080012273 } else {
12274 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12275 avail = xmlBufUse(ctxt->input->buf->buffer);
12276 /*
12277 * Depending on the current state it may not be such
12278 * a good idea to try parsing if there is nothing in the chunk
12279 * which would be worth doing a parser state transition and we
12280 * need to wait for more data
12281 */
12282 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12283 (old_avail == 0) || (avail == 0) ||
12284 (xmlParseCheckTransition(ctxt,
12285 (const char *)&ctxt->input->base[old_avail],
12286 avail - old_avail)))
12287 xmlParseTryOrFinish(ctxt, terminate);
12288 }
Daniel Veillarde50ba812013-04-11 15:54:51 +080012289 if (ctxt->instate == XML_PARSER_EOF)
12290 return(ctxt->errNo);
12291
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012292 if ((ctxt->input != NULL) &&
12293 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12294 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12295 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12296 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12297 ctxt->instate = XML_PARSER_EOF;
12298 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012299 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12300 return(ctxt->errNo);
12301
12302 if (remain != 0) {
12303 chunk += size;
12304 size = remain;
12305 remain = 0;
12306 goto xmldecl_done;
12307 }
Daniel Veillarda617e242006-01-09 14:38:44 +000012308 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12309 (ctxt->input->buf != NULL)) {
Daniel Veillardde0cc202013-02-12 16:55:34 +080012310 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
12311 ctxt->input);
12312 size_t current = ctxt->input->cur - ctxt->input->base;
12313
Daniel Veillarda617e242006-01-09 14:38:44 +000012314 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
Daniel Veillardde0cc202013-02-12 16:55:34 +080012315
12316 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
12317 base, current);
Daniel Veillarda617e242006-01-09 14:38:44 +000012318 }
Owen Taylor3473f882001-02-23 17:55:21 +000012319 if (terminate) {
12320 /*
12321 * Check for termination
12322 */
Daniel Veillard65686452012-07-19 18:25:01 +080012323 int cur_avail = 0;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012324
12325 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012326 if (ctxt->input->buf == NULL)
Daniel Veillard65686452012-07-19 18:25:01 +080012327 cur_avail = ctxt->input->length -
12328 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012329 else
Daniel Veillard65686452012-07-19 18:25:01 +080012330 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12331 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012332 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012333
Owen Taylor3473f882001-02-23 17:55:21 +000012334 if ((ctxt->instate != XML_PARSER_EOF) &&
12335 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012336 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012337 }
Daniel Veillard65686452012-07-19 18:25:01 +080012338 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012339 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012340 }
Owen Taylor3473f882001-02-23 17:55:21 +000012341 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012342 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012343 ctxt->sax->endDocument(ctxt->userData);
12344 }
12345 ctxt->instate = XML_PARSER_EOF;
12346 }
Daniel Veillard6c91aa32012-10-25 15:33:59 +080012347 if (ctxt->wellFormed == 0)
12348 return((xmlParserErrors) ctxt->errNo);
12349 else
12350 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012351}
12352
12353/************************************************************************
12354 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012355 * I/O front end functions to the parser *
Owen Taylor3473f882001-02-23 17:55:21 +000012356 * *
12357 ************************************************************************/
12358
12359/**
Owen Taylor3473f882001-02-23 17:55:21 +000012360 * xmlCreatePushParserCtxt:
12361 * @sax: a SAX handler
12362 * @user_data: The user data returned on SAX callbacks
12363 * @chunk: a pointer to an array of chars
12364 * @size: number of chars in the array
12365 * @filename: an optional file name or URI
12366 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000012367 * Create a parser context for using the XML parser in push mode.
12368 * If @buffer and @size are non-NULL, the data is used to detect
12369 * the encoding. The remaining characters will be parsed so they
12370 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000012371 * To allow content encoding detection, @size should be >= 4
12372 * The value of @filename is used for fetching external entities
12373 * and error/warning reports.
12374 *
12375 * Returns the new parser context or NULL
12376 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000012377
Owen Taylor3473f882001-02-23 17:55:21 +000012378xmlParserCtxtPtr
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012379xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
Owen Taylor3473f882001-02-23 17:55:21 +000012380 const char *chunk, int size, const char *filename) {
12381 xmlParserCtxtPtr ctxt;
12382 xmlParserInputPtr inputStream;
12383 xmlParserInputBufferPtr buf;
12384 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12385
12386 /*
12387 * plug some encoding conversion routines
12388 */
12389 if ((chunk != NULL) && (size >= 4))
12390 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12391
12392 buf = xmlAllocParserInputBuffer(enc);
12393 if (buf == NULL) return(NULL);
12394
12395 ctxt = xmlNewParserCtxt();
12396 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012397 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012398 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012399 return(NULL);
12400 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012401 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012402 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12403 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012404 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000012405 xmlFreeParserInputBuffer(buf);
12406 xmlFreeParserCtxt(ctxt);
12407 return(NULL);
12408 }
Owen Taylor3473f882001-02-23 17:55:21 +000012409 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012410#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012411 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012412#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012413 xmlFree(ctxt->sax);
12414 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12415 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012416 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012417 xmlFreeParserInputBuffer(buf);
12418 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012419 return(NULL);
12420 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012421 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12422 if (sax->initialized == XML_SAX2_MAGIC)
12423 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12424 else
12425 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012426 if (user_data != NULL)
12427 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012428 }
Owen Taylor3473f882001-02-23 17:55:21 +000012429 if (filename == NULL) {
12430 ctxt->directory = NULL;
12431 } else {
12432 ctxt->directory = xmlParserGetDirectory(filename);
12433 }
12434
12435 inputStream = xmlNewInputStream(ctxt);
12436 if (inputStream == NULL) {
12437 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012438 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012439 return(NULL);
12440 }
12441
12442 if (filename == NULL)
12443 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000012444 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000012445 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012446 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000012447 if (inputStream->filename == NULL) {
12448 xmlFreeParserCtxt(ctxt);
12449 xmlFreeParserInputBuffer(buf);
12450 return(NULL);
12451 }
12452 }
Owen Taylor3473f882001-02-23 17:55:21 +000012453 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080012454 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000012455 inputPush(ctxt, inputStream);
12456
William M. Brack3a1cd212005-02-11 14:35:54 +000012457 /*
12458 * If the caller didn't provide an initial 'chunk' for determining
12459 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12460 * that it can be automatically determined later
12461 */
12462 if ((size == 0) || (chunk == NULL)) {
12463 ctxt->charset = XML_CHAR_ENCODING_NONE;
12464 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012465 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12466 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012467
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012468 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012469
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012470 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012471#ifdef DEBUG_PUSH
12472 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12473#endif
12474 }
12475
Daniel Veillard0e4cd172001-06-28 12:13:56 +000012476 if (enc != XML_CHAR_ENCODING_NONE) {
12477 xmlSwitchEncoding(ctxt, enc);
12478 }
12479
Owen Taylor3473f882001-02-23 17:55:21 +000012480 return(ctxt);
12481}
Daniel Veillard73b013f2003-09-30 12:36:01 +000012482#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012483
12484/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000012485 * xmlStopParser:
12486 * @ctxt: an XML parser context
12487 *
12488 * Blocks further parser processing
12489 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012490void
Daniel Veillard39e5c892005-07-03 22:48:50 +000012491xmlStopParser(xmlParserCtxtPtr ctxt) {
12492 if (ctxt == NULL)
12493 return;
12494 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarde50ba812013-04-11 15:54:51 +080012495 ctxt->errNo = XML_ERR_USER_STOP;
Daniel Veillard39e5c892005-07-03 22:48:50 +000012496 ctxt->disableSAX = 1;
12497 if (ctxt->input != NULL) {
12498 ctxt->input->cur = BAD_CAST"";
12499 ctxt->input->base = ctxt->input->cur;
12500 }
12501}
12502
12503/**
Owen Taylor3473f882001-02-23 17:55:21 +000012504 * xmlCreateIOParserCtxt:
12505 * @sax: a SAX handler
12506 * @user_data: The user data returned on SAX callbacks
12507 * @ioread: an I/O read function
12508 * @ioclose: an I/O close function
12509 * @ioctx: an I/O handler
12510 * @enc: the charset encoding if known
12511 *
12512 * Create a parser context for using the XML parser with an existing
12513 * I/O stream
12514 *
12515 * Returns the new parser context or NULL
12516 */
12517xmlParserCtxtPtr
12518xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12519 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12520 void *ioctx, xmlCharEncoding enc) {
12521 xmlParserCtxtPtr ctxt;
12522 xmlParserInputPtr inputStream;
12523 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012524
Daniel Veillard42595322004-11-08 10:52:06 +000012525 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012526
12527 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012528 if (buf == NULL) {
12529 if (ioclose != NULL)
12530 ioclose(ioctx);
12531 return (NULL);
12532 }
Owen Taylor3473f882001-02-23 17:55:21 +000012533
12534 ctxt = xmlNewParserCtxt();
12535 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012536 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012537 return(NULL);
12538 }
12539 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012540#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012541 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012542#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012543 xmlFree(ctxt->sax);
12544 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12545 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012546 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012547 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012548 return(NULL);
12549 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012550 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12551 if (sax->initialized == XML_SAX2_MAGIC)
12552 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12553 else
12554 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012555 if (user_data != NULL)
12556 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012557 }
Owen Taylor3473f882001-02-23 17:55:21 +000012558
12559 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12560 if (inputStream == NULL) {
12561 xmlFreeParserCtxt(ctxt);
12562 return(NULL);
12563 }
12564 inputPush(ctxt, inputStream);
12565
12566 return(ctxt);
12567}
12568
Daniel Veillard4432df22003-09-28 18:58:27 +000012569#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012570/************************************************************************
12571 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012572 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012573 * *
12574 ************************************************************************/
12575
12576/**
12577 * xmlIOParseDTD:
12578 * @sax: the SAX handler block or NULL
12579 * @input: an Input Buffer
12580 * @enc: the charset encoding if known
12581 *
12582 * Load and parse a DTD
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012583 *
Owen Taylor3473f882001-02-23 17:55:21 +000012584 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012585 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012586 */
12587
12588xmlDtdPtr
12589xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12590 xmlCharEncoding enc) {
12591 xmlDtdPtr ret = NULL;
12592 xmlParserCtxtPtr ctxt;
12593 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012594 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012595
12596 if (input == NULL)
12597 return(NULL);
12598
12599 ctxt = xmlNewParserCtxt();
12600 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012601 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012602 return(NULL);
12603 }
12604
12605 /*
12606 * Set-up the SAX context
12607 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012608 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012609 if (ctxt->sax != NULL)
12610 xmlFree(ctxt->sax);
12611 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012612 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012613 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012614 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012615
12616 /*
12617 * generate a parser input from the I/O handler
12618 */
12619
Daniel Veillard43caefb2003-12-07 19:32:22 +000012620 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012621 if (pinput == NULL) {
12622 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012623 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012624 xmlFreeParserCtxt(ctxt);
12625 return(NULL);
12626 }
12627
12628 /*
12629 * plug some encoding conversion routines here.
12630 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012631 if (xmlPushInput(ctxt, pinput) < 0) {
12632 if (sax != NULL) ctxt->sax = NULL;
12633 xmlFreeParserCtxt(ctxt);
12634 return(NULL);
12635 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012636 if (enc != XML_CHAR_ENCODING_NONE) {
12637 xmlSwitchEncoding(ctxt, enc);
12638 }
Owen Taylor3473f882001-02-23 17:55:21 +000012639
12640 pinput->filename = NULL;
12641 pinput->line = 1;
12642 pinput->col = 1;
12643 pinput->base = ctxt->input->cur;
12644 pinput->cur = ctxt->input->cur;
12645 pinput->free = NULL;
12646
12647 /*
12648 * let's parse that entity knowing it's an external subset.
12649 */
12650 ctxt->inSubset = 2;
12651 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012652 if (ctxt->myDoc == NULL) {
12653 xmlErrMemory(ctxt, "New Doc failed");
12654 return(NULL);
12655 }
12656 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012657 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12658 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012659
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012660 if ((enc == XML_CHAR_ENCODING_NONE) &&
12661 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012662 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012663 * Get the 4 first bytes and decode the charset
12664 * if enc != XML_CHAR_ENCODING_NONE
12665 * plug some encoding conversion routines.
12666 */
12667 start[0] = RAW;
12668 start[1] = NXT(1);
12669 start[2] = NXT(2);
12670 start[3] = NXT(3);
12671 enc = xmlDetectCharEncoding(start, 4);
12672 if (enc != XML_CHAR_ENCODING_NONE) {
12673 xmlSwitchEncoding(ctxt, enc);
12674 }
12675 }
12676
Owen Taylor3473f882001-02-23 17:55:21 +000012677 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12678
12679 if (ctxt->myDoc != NULL) {
12680 if (ctxt->wellFormed) {
12681 ret = ctxt->myDoc->extSubset;
12682 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012683 if (ret != NULL) {
12684 xmlNodePtr tmp;
12685
12686 ret->doc = NULL;
12687 tmp = ret->children;
12688 while (tmp != NULL) {
12689 tmp->doc = NULL;
12690 tmp = tmp->next;
12691 }
12692 }
Owen Taylor3473f882001-02-23 17:55:21 +000012693 } else {
12694 ret = NULL;
12695 }
12696 xmlFreeDoc(ctxt->myDoc);
12697 ctxt->myDoc = NULL;
12698 }
12699 if (sax != NULL) ctxt->sax = NULL;
12700 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012701
Owen Taylor3473f882001-02-23 17:55:21 +000012702 return(ret);
12703}
12704
12705/**
12706 * xmlSAXParseDTD:
12707 * @sax: the SAX handler block
12708 * @ExternalID: a NAME* containing the External ID of the DTD
12709 * @SystemID: a NAME* containing the URL to the DTD
12710 *
12711 * Load and parse an external subset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012712 *
Owen Taylor3473f882001-02-23 17:55:21 +000012713 * Returns the resulting xmlDtdPtr or NULL in case of error.
12714 */
12715
12716xmlDtdPtr
12717xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12718 const xmlChar *SystemID) {
12719 xmlDtdPtr ret = NULL;
12720 xmlParserCtxtPtr ctxt;
12721 xmlParserInputPtr input = NULL;
12722 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012723 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012724
12725 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12726
12727 ctxt = xmlNewParserCtxt();
12728 if (ctxt == NULL) {
12729 return(NULL);
12730 }
12731
12732 /*
12733 * Set-up the SAX context
12734 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012735 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012736 if (ctxt->sax != NULL)
12737 xmlFree(ctxt->sax);
12738 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012739 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012740 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012741
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012742 /*
12743 * Canonicalise the system ID
12744 */
12745 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012746 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012747 xmlFreeParserCtxt(ctxt);
12748 return(NULL);
12749 }
Owen Taylor3473f882001-02-23 17:55:21 +000012750
12751 /*
12752 * Ask the Entity resolver to load the damn thing
12753 */
12754
12755 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012756 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12757 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012758 if (input == NULL) {
12759 if (sax != NULL) ctxt->sax = NULL;
12760 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012761 if (systemIdCanonic != NULL)
12762 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012763 return(NULL);
12764 }
12765
12766 /*
12767 * plug some encoding conversion routines here.
12768 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012769 if (xmlPushInput(ctxt, input) < 0) {
12770 if (sax != NULL) ctxt->sax = NULL;
12771 xmlFreeParserCtxt(ctxt);
12772 if (systemIdCanonic != NULL)
12773 xmlFree(systemIdCanonic);
12774 return(NULL);
12775 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012776 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12777 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12778 xmlSwitchEncoding(ctxt, enc);
12779 }
Owen Taylor3473f882001-02-23 17:55:21 +000012780
12781 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012782 input->filename = (char *) systemIdCanonic;
12783 else
12784 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012785 input->line = 1;
12786 input->col = 1;
12787 input->base = ctxt->input->cur;
12788 input->cur = ctxt->input->cur;
12789 input->free = NULL;
12790
12791 /*
12792 * let's parse that entity knowing it's an external subset.
12793 */
12794 ctxt->inSubset = 2;
12795 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012796 if (ctxt->myDoc == NULL) {
12797 xmlErrMemory(ctxt, "New Doc failed");
12798 if (sax != NULL) ctxt->sax = NULL;
12799 xmlFreeParserCtxt(ctxt);
12800 return(NULL);
12801 }
12802 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012803 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12804 ExternalID, SystemID);
12805 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12806
12807 if (ctxt->myDoc != NULL) {
12808 if (ctxt->wellFormed) {
12809 ret = ctxt->myDoc->extSubset;
12810 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012811 if (ret != NULL) {
12812 xmlNodePtr tmp;
12813
12814 ret->doc = NULL;
12815 tmp = ret->children;
12816 while (tmp != NULL) {
12817 tmp->doc = NULL;
12818 tmp = tmp->next;
12819 }
12820 }
Owen Taylor3473f882001-02-23 17:55:21 +000012821 } else {
12822 ret = NULL;
12823 }
12824 xmlFreeDoc(ctxt->myDoc);
12825 ctxt->myDoc = NULL;
12826 }
12827 if (sax != NULL) ctxt->sax = NULL;
12828 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012829
Owen Taylor3473f882001-02-23 17:55:21 +000012830 return(ret);
12831}
12832
Daniel Veillard4432df22003-09-28 18:58:27 +000012833
Owen Taylor3473f882001-02-23 17:55:21 +000012834/**
12835 * xmlParseDTD:
12836 * @ExternalID: a NAME* containing the External ID of the DTD
12837 * @SystemID: a NAME* containing the URL to the DTD
12838 *
12839 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012840 *
Owen Taylor3473f882001-02-23 17:55:21 +000012841 * Returns the resulting xmlDtdPtr or NULL in case of error.
12842 */
12843
12844xmlDtdPtr
12845xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12846 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12847}
Daniel Veillard4432df22003-09-28 18:58:27 +000012848#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012849
12850/************************************************************************
12851 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012852 * Front ends when parsing an Entity *
Owen Taylor3473f882001-02-23 17:55:21 +000012853 * *
12854 ************************************************************************/
12855
12856/**
Owen Taylor3473f882001-02-23 17:55:21 +000012857 * xmlParseCtxtExternalEntity:
12858 * @ctx: the existing parsing context
12859 * @URL: the URL for the entity to load
12860 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012861 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012862 *
12863 * Parse an external general entity within an existing parsing context
12864 * An external general parsed entity is well-formed if it matches the
12865 * production labeled extParsedEnt.
12866 *
12867 * [78] extParsedEnt ::= TextDecl? content
12868 *
12869 * Returns 0 if the entity is well formed, -1 in case of args problem and
12870 * the parser error code otherwise
12871 */
12872
12873int
12874xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012875 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012876 xmlParserCtxtPtr ctxt;
12877 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012878 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012879 xmlSAXHandlerPtr oldsax = NULL;
12880 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012881 xmlChar start[4];
12882 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012883
Daniel Veillardce682bc2004-11-05 17:22:25 +000012884 if (ctx == NULL) return(-1);
12885
Daniel Veillard0161e632008-08-28 15:36:32 +000012886 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12887 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012888 return(XML_ERR_ENTITY_LOOP);
12889 }
12890
Daniel Veillardcda96922001-08-21 10:56:31 +000012891 if (lst != NULL)
12892 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012893 if ((URL == NULL) && (ID == NULL))
12894 return(-1);
12895 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12896 return(-1);
12897
Rob Richards798743a2009-06-19 13:54:25 -040012898 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012899 if (ctxt == NULL) {
12900 return(-1);
12901 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012902
Owen Taylor3473f882001-02-23 17:55:21 +000012903 oldsax = ctxt->sax;
12904 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012905 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012906 newDoc = xmlNewDoc(BAD_CAST "1.0");
12907 if (newDoc == NULL) {
12908 xmlFreeParserCtxt(ctxt);
12909 return(-1);
12910 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012911 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012912 if (ctx->myDoc->dict) {
12913 newDoc->dict = ctx->myDoc->dict;
12914 xmlDictReference(newDoc->dict);
12915 }
Owen Taylor3473f882001-02-23 17:55:21 +000012916 if (ctx->myDoc != NULL) {
12917 newDoc->intSubset = ctx->myDoc->intSubset;
12918 newDoc->extSubset = ctx->myDoc->extSubset;
12919 }
12920 if (ctx->myDoc->URL != NULL) {
12921 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12922 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012923 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12924 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012925 ctxt->sax = oldsax;
12926 xmlFreeParserCtxt(ctxt);
12927 newDoc->intSubset = NULL;
12928 newDoc->extSubset = NULL;
12929 xmlFreeDoc(newDoc);
12930 return(-1);
12931 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012932 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012933 nodePush(ctxt, newDoc->children);
12934 if (ctx->myDoc == NULL) {
12935 ctxt->myDoc = newDoc;
12936 } else {
12937 ctxt->myDoc = ctx->myDoc;
12938 newDoc->children->doc = ctx->myDoc;
12939 }
12940
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012941 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012942 * Get the 4 first bytes and decode the charset
12943 * if enc != XML_CHAR_ENCODING_NONE
12944 * plug some encoding conversion routines.
12945 */
12946 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012947 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12948 start[0] = RAW;
12949 start[1] = NXT(1);
12950 start[2] = NXT(2);
12951 start[3] = NXT(3);
12952 enc = xmlDetectCharEncoding(start, 4);
12953 if (enc != XML_CHAR_ENCODING_NONE) {
12954 xmlSwitchEncoding(ctxt, enc);
12955 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012956 }
12957
Owen Taylor3473f882001-02-23 17:55:21 +000012958 /*
12959 * Parse a possible text declaration first
12960 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012961 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012962 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012963 /*
12964 * An XML-1.0 document can't reference an entity not XML-1.0
12965 */
12966 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12967 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012968 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012969 "Version mismatch between document and entity\n");
12970 }
Owen Taylor3473f882001-02-23 17:55:21 +000012971 }
12972
12973 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012974 * If the user provided its own SAX callbacks then reuse the
12975 * useData callback field, otherwise the expected setup in a
12976 * DOM builder is to have userData == ctxt
12977 */
12978 if (ctx->userData == ctx)
12979 ctxt->userData = ctxt;
12980 else
12981 ctxt->userData = ctx->userData;
12982
12983 /*
Owen Taylor3473f882001-02-23 17:55:21 +000012984 * Doing validity checking on chunk doesn't make sense
12985 */
12986 ctxt->instate = XML_PARSER_CONTENT;
12987 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012988 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012989 ctxt->loadsubset = ctx->loadsubset;
12990 ctxt->depth = ctx->depth + 1;
12991 ctxt->replaceEntities = ctx->replaceEntities;
12992 if (ctxt->validate) {
12993 ctxt->vctxt.error = ctx->vctxt.error;
12994 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012995 } else {
12996 ctxt->vctxt.error = NULL;
12997 ctxt->vctxt.warning = NULL;
12998 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012999 ctxt->vctxt.nodeTab = NULL;
13000 ctxt->vctxt.nodeNr = 0;
13001 ctxt->vctxt.nodeMax = 0;
13002 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013003 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13004 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013005 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13006 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13007 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013008 ctxt->dictNames = ctx->dictNames;
13009 ctxt->attsDefault = ctx->attsDefault;
13010 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000013011 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000013012
13013 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013014
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000013015 ctx->validate = ctxt->validate;
13016 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000013017 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013018 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013019 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013020 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013021 }
13022 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013023 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013024 }
13025
13026 if (!ctxt->wellFormed) {
13027 if (ctxt->errNo == 0)
13028 ret = 1;
13029 else
13030 ret = ctxt->errNo;
13031 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000013032 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013033 xmlNodePtr cur;
13034
13035 /*
13036 * Return the newly created nodeset after unlinking it from
13037 * they pseudo parent.
13038 */
13039 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000013040 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013041 while (cur != NULL) {
13042 cur->parent = NULL;
13043 cur = cur->next;
13044 }
13045 newDoc->children->children = NULL;
13046 }
13047 ret = 0;
13048 }
13049 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013050 ctxt->dict = NULL;
13051 ctxt->attsDefault = NULL;
13052 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013053 xmlFreeParserCtxt(ctxt);
13054 newDoc->intSubset = NULL;
13055 newDoc->extSubset = NULL;
13056 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000013057
Owen Taylor3473f882001-02-23 17:55:21 +000013058 return(ret);
13059}
13060
13061/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013062 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000013063 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013064 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000013065 * @sax: the SAX handler bloc (possibly NULL)
13066 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13067 * @depth: Used for loop detection, use 0
13068 * @URL: the URL for the entity to load
13069 * @ID: the System ID for the entity to load
13070 * @list: the return value for the set of parsed nodes
13071 *
Daniel Veillard257d9102001-05-08 10:41:44 +000013072 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000013073 *
13074 * Returns 0 if the entity is well formed, -1 in case of args problem and
13075 * the parser error code otherwise
13076 */
13077
Daniel Veillard7d515752003-09-26 19:12:37 +000013078static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013079xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
13080 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000013081 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013082 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000013083 xmlParserCtxtPtr ctxt;
13084 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013085 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013086 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000013087 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000013088 xmlChar start[4];
13089 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000013090
Daniel Veillard0161e632008-08-28 15:36:32 +000013091 if (((depth > 40) &&
13092 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13093 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000013094 return(XML_ERR_ENTITY_LOOP);
13095 }
13096
Owen Taylor3473f882001-02-23 17:55:21 +000013097 if (list != NULL)
13098 *list = NULL;
13099 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000013100 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000013101 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000013102 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013103
13104
Rob Richards9c0aa472009-03-26 18:10:19 +000013105 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000013106 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000013107 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013108 if (oldctxt != NULL) {
13109 ctxt->_private = oldctxt->_private;
13110 ctxt->loadsubset = oldctxt->loadsubset;
13111 ctxt->validate = oldctxt->validate;
13112 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013113 ctxt->record_info = oldctxt->record_info;
13114 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
13115 ctxt->node_seq.length = oldctxt->node_seq.length;
13116 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013117 } else {
13118 /*
13119 * Doing validity checking on chunk without context
13120 * doesn't make sense
13121 */
13122 ctxt->_private = NULL;
13123 ctxt->validate = 0;
13124 ctxt->external = 2;
13125 ctxt->loadsubset = 0;
13126 }
Owen Taylor3473f882001-02-23 17:55:21 +000013127 if (sax != NULL) {
13128 oldsax = ctxt->sax;
13129 ctxt->sax = sax;
13130 if (user_data != NULL)
13131 ctxt->userData = user_data;
13132 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013133 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013134 newDoc = xmlNewDoc(BAD_CAST "1.0");
13135 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013136 ctxt->node_seq.maximum = 0;
13137 ctxt->node_seq.length = 0;
13138 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013139 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000013140 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013141 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013142 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000013143 newDoc->intSubset = doc->intSubset;
13144 newDoc->extSubset = doc->extSubset;
13145 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013146 xmlDictReference(newDoc->dict);
13147
Owen Taylor3473f882001-02-23 17:55:21 +000013148 if (doc->URL != NULL) {
13149 newDoc->URL = xmlStrdup(doc->URL);
13150 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013151 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13152 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013153 if (sax != NULL)
13154 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013155 ctxt->node_seq.maximum = 0;
13156 ctxt->node_seq.length = 0;
13157 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013158 xmlFreeParserCtxt(ctxt);
13159 newDoc->intSubset = NULL;
13160 newDoc->extSubset = NULL;
13161 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000013162 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013163 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013164 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013165 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000013166 ctxt->myDoc = doc;
13167 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000013168
Daniel Veillard0161e632008-08-28 15:36:32 +000013169 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013170 * Get the 4 first bytes and decode the charset
13171 * if enc != XML_CHAR_ENCODING_NONE
13172 * plug some encoding conversion routines.
13173 */
13174 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013175 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13176 start[0] = RAW;
13177 start[1] = NXT(1);
13178 start[2] = NXT(2);
13179 start[3] = NXT(3);
13180 enc = xmlDetectCharEncoding(start, 4);
13181 if (enc != XML_CHAR_ENCODING_NONE) {
13182 xmlSwitchEncoding(ctxt, enc);
13183 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013184 }
13185
Owen Taylor3473f882001-02-23 17:55:21 +000013186 /*
13187 * Parse a possible text declaration first
13188 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013189 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013190 xmlParseTextDecl(ctxt);
13191 }
13192
Owen Taylor3473f882001-02-23 17:55:21 +000013193 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000013194 ctxt->depth = depth;
13195
13196 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013197
Daniel Veillard561b7f82002-03-20 21:55:57 +000013198 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013199 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000013200 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013201 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013202 }
13203 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013204 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013205 }
13206
13207 if (!ctxt->wellFormed) {
13208 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013209 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000013210 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013211 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000013212 } else {
13213 if (list != NULL) {
13214 xmlNodePtr cur;
13215
13216 /*
13217 * Return the newly created nodeset after unlinking it from
13218 * they pseudo parent.
13219 */
13220 cur = newDoc->children->children;
13221 *list = cur;
13222 while (cur != NULL) {
13223 cur->parent = NULL;
13224 cur = cur->next;
13225 }
13226 newDoc->children->children = NULL;
13227 }
Daniel Veillard7d515752003-09-26 19:12:37 +000013228 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000013229 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013230
13231 /*
13232 * Record in the parent context the number of entities replacement
13233 * done when parsing that reference.
13234 */
Daniel Veillard76d36452009-09-07 11:19:33 +020013235 if (oldctxt != NULL)
13236 oldctxt->nbentities += ctxt->nbentities;
13237
Daniel Veillard0161e632008-08-28 15:36:32 +000013238 /*
13239 * Also record the size of the entity parsed
13240 */
13241 if (ctxt->input != NULL) {
13242 oldctxt->sizeentities += ctxt->input->consumed;
13243 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13244 }
13245 /*
13246 * And record the last error if any
13247 */
13248 if (ctxt->lastError.code != XML_ERR_OK)
13249 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13250
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013251 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013252 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000013253 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13254 oldctxt->node_seq.length = ctxt->node_seq.length;
13255 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013256 ctxt->node_seq.maximum = 0;
13257 ctxt->node_seq.length = 0;
13258 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013259 xmlFreeParserCtxt(ctxt);
13260 newDoc->intSubset = NULL;
13261 newDoc->extSubset = NULL;
13262 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013263
Owen Taylor3473f882001-02-23 17:55:21 +000013264 return(ret);
13265}
13266
Daniel Veillard81273902003-09-30 00:43:48 +000013267#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013268/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013269 * xmlParseExternalEntity:
13270 * @doc: the document the chunk pertains to
13271 * @sax: the SAX handler bloc (possibly NULL)
13272 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13273 * @depth: Used for loop detection, use 0
13274 * @URL: the URL for the entity to load
13275 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013276 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000013277 *
13278 * Parse an external general entity
13279 * An external general parsed entity is well-formed if it matches the
13280 * production labeled extParsedEnt.
13281 *
13282 * [78] extParsedEnt ::= TextDecl? content
13283 *
13284 * Returns 0 if the entity is well formed, -1 in case of args problem and
13285 * the parser error code otherwise
13286 */
13287
13288int
13289xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000013290 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013291 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013292 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000013293}
13294
13295/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000013296 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000013297 * @doc: the document the chunk pertains to
13298 * @sax: the SAX handler bloc (possibly NULL)
13299 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13300 * @depth: Used for loop detection, use 0
13301 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000013302 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013303 *
13304 * Parse a well-balanced chunk of an XML document
13305 * called by the parser
13306 * The allowed sequence for the Well Balanced Chunk is the one defined by
13307 * the content production in the XML grammar:
13308 *
13309 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13310 *
13311 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13312 * the parser error code otherwise
13313 */
13314
13315int
13316xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000013317 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013318 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13319 depth, string, lst, 0 );
13320}
Daniel Veillard81273902003-09-30 00:43:48 +000013321#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000013322
13323/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000013324 * xmlParseBalancedChunkMemoryInternal:
13325 * @oldctxt: the existing parsing context
13326 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13327 * @user_data: the user data field for the parser context
13328 * @lst: the return value for the set of parsed nodes
13329 *
13330 *
13331 * Parse a well-balanced chunk of an XML document
13332 * called by the parser
13333 * The allowed sequence for the Well Balanced Chunk is the one defined by
13334 * the content production in the XML grammar:
13335 *
13336 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13337 *
Daniel Veillard7d515752003-09-26 19:12:37 +000013338 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13339 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000013340 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000013341 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000013342 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000013343 */
Daniel Veillard7d515752003-09-26 19:12:37 +000013344static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000013345xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13346 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13347 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013348 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013349 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013350 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013351 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013352 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013353 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000013354 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020013355#ifdef SAX2
13356 int i;
13357#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000013358
Daniel Veillard0161e632008-08-28 15:36:32 +000013359 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13360 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013361 return(XML_ERR_ENTITY_LOOP);
13362 }
13363
13364
13365 if (lst != NULL)
13366 *lst = NULL;
13367 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000013368 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013369
13370 size = xmlStrlen(string);
13371
13372 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000013373 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013374 if (user_data != NULL)
13375 ctxt->userData = user_data;
13376 else
13377 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013378 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13379 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013380 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13381 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13382 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013383
Daniel Veillard74eaec12009-08-26 15:57:20 +020013384#ifdef SAX2
13385 /* propagate namespaces down the entity */
13386 for (i = 0;i < oldctxt->nsNr;i += 2) {
13387 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13388 }
13389#endif
13390
Daniel Veillard328f48c2002-11-15 15:24:34 +000013391 oldsax = ctxt->sax;
13392 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013393 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013394 ctxt->replaceEntities = oldctxt->replaceEntities;
13395 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000013396
Daniel Veillarde1ca5032002-12-09 14:13:43 +000013397 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013398 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013399 newDoc = xmlNewDoc(BAD_CAST "1.0");
13400 if (newDoc == NULL) {
13401 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013402 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013403 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000013404 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013405 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013406 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013407 newDoc->dict = ctxt->dict;
13408 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013409 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013410 } else {
13411 ctxt->myDoc = oldctxt->myDoc;
13412 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013413 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013414 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013415 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13416 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013417 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013418 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013419 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013420 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013421 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013422 }
William M. Brack7b9154b2003-09-27 19:23:50 +000013423 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013424 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013425 ctxt->myDoc->children = NULL;
13426 ctxt->myDoc->last = NULL;
13427 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013428 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013429 ctxt->instate = XML_PARSER_CONTENT;
13430 ctxt->depth = oldctxt->depth + 1;
13431
Daniel Veillard328f48c2002-11-15 15:24:34 +000013432 ctxt->validate = 0;
13433 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000013434 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13435 /*
13436 * ID/IDREF registration will be done in xmlValidateElement below
13437 */
13438 ctxt->loadsubset |= XML_SKIP_IDS;
13439 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013440 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013441 ctxt->attsDefault = oldctxt->attsDefault;
13442 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013443
Daniel Veillard68e9e742002-11-16 15:35:11 +000013444 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013445 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013446 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013447 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013448 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013449 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013450 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013451 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013452 }
13453
13454 if (!ctxt->wellFormed) {
13455 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013456 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013457 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013458 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013459 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000013460 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013461 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013462
William M. Brack7b9154b2003-09-27 19:23:50 +000013463 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013464 xmlNodePtr cur;
13465
13466 /*
13467 * Return the newly created nodeset after unlinking it from
13468 * they pseudo parent.
13469 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000013470 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013471 *lst = cur;
13472 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000013473#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000013474 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13475 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13476 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000013477 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13478 oldctxt->myDoc, cur);
13479 }
Daniel Veillard4432df22003-09-28 18:58:27 +000013480#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000013481 cur->parent = NULL;
13482 cur = cur->next;
13483 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013484 ctxt->myDoc->children->children = NULL;
13485 }
13486 if (ctxt->myDoc != NULL) {
13487 xmlFreeNode(ctxt->myDoc->children);
13488 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013489 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013490 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013491
13492 /*
13493 * Record in the parent context the number of entities replacement
13494 * done when parsing that reference.
13495 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020013496 if (oldctxt != NULL)
13497 oldctxt->nbentities += ctxt->nbentities;
13498
Daniel Veillard0161e632008-08-28 15:36:32 +000013499 /*
13500 * Also record the last error if any
13501 */
13502 if (ctxt->lastError.code != XML_ERR_OK)
13503 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13504
Daniel Veillard328f48c2002-11-15 15:24:34 +000013505 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013506 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013507 ctxt->attsDefault = NULL;
13508 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013509 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013510 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013511 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013512 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013513
Daniel Veillard328f48c2002-11-15 15:24:34 +000013514 return(ret);
13515}
13516
Daniel Veillard29b17482004-08-16 00:39:03 +000013517/**
13518 * xmlParseInNodeContext:
13519 * @node: the context node
13520 * @data: the input string
13521 * @datalen: the input string length in bytes
13522 * @options: a combination of xmlParserOption
13523 * @lst: the return value for the set of parsed nodes
13524 *
13525 * Parse a well-balanced chunk of an XML document
13526 * within the context (DTD, namespaces, etc ...) of the given node.
13527 *
13528 * The allowed sequence for the data is a Well Balanced Chunk defined by
13529 * the content production in the XML grammar:
13530 *
13531 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13532 *
13533 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13534 * error code otherwise
13535 */
13536xmlParserErrors
13537xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13538 int options, xmlNodePtr *lst) {
13539#ifdef SAX2
13540 xmlParserCtxtPtr ctxt;
13541 xmlDocPtr doc = NULL;
13542 xmlNodePtr fake, cur;
13543 int nsnr = 0;
13544
13545 xmlParserErrors ret = XML_ERR_OK;
13546
13547 /*
13548 * check all input parameters, grab the document
13549 */
13550 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13551 return(XML_ERR_INTERNAL_ERROR);
13552 switch (node->type) {
13553 case XML_ELEMENT_NODE:
13554 case XML_ATTRIBUTE_NODE:
13555 case XML_TEXT_NODE:
13556 case XML_CDATA_SECTION_NODE:
13557 case XML_ENTITY_REF_NODE:
13558 case XML_PI_NODE:
13559 case XML_COMMENT_NODE:
13560 case XML_DOCUMENT_NODE:
13561 case XML_HTML_DOCUMENT_NODE:
13562 break;
13563 default:
13564 return(XML_ERR_INTERNAL_ERROR);
13565
13566 }
13567 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13568 (node->type != XML_DOCUMENT_NODE) &&
13569 (node->type != XML_HTML_DOCUMENT_NODE))
13570 node = node->parent;
13571 if (node == NULL)
13572 return(XML_ERR_INTERNAL_ERROR);
13573 if (node->type == XML_ELEMENT_NODE)
13574 doc = node->doc;
13575 else
13576 doc = (xmlDocPtr) node;
13577 if (doc == NULL)
13578 return(XML_ERR_INTERNAL_ERROR);
13579
13580 /*
13581 * allocate a context and set-up everything not related to the
13582 * node position in the tree
13583 */
13584 if (doc->type == XML_DOCUMENT_NODE)
13585 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13586#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013587 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013588 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013589 /*
13590 * When parsing in context, it makes no sense to add implied
13591 * elements like html/body/etc...
13592 */
13593 options |= HTML_PARSE_NOIMPLIED;
13594 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013595#endif
13596 else
13597 return(XML_ERR_INTERNAL_ERROR);
13598
13599 if (ctxt == NULL)
13600 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013601
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013602 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013603 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13604 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13605 * we must wait until the last moment to free the original one.
13606 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013607 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013608 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013609 xmlDictFree(ctxt->dict);
13610 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013611 } else
13612 options |= XML_PARSE_NODICT;
13613
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013614 if (doc->encoding != NULL) {
13615 xmlCharEncodingHandlerPtr hdlr;
13616
13617 if (ctxt->encoding != NULL)
13618 xmlFree((xmlChar *) ctxt->encoding);
13619 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13620
Nikolay Sivovd4a5d982013-04-30 17:45:36 +040013621 hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013622 if (hdlr != NULL) {
13623 xmlSwitchToEncoding(ctxt, hdlr);
13624 } else {
13625 return(XML_ERR_UNSUPPORTED_ENCODING);
13626 }
13627 }
13628
Daniel Veillard37334572008-07-31 08:20:02 +000013629 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013630 xmlDetectSAX2(ctxt);
13631 ctxt->myDoc = doc;
Daniel Veillard6faa1262014-03-21 17:05:51 +080013632 /* parsing in context, i.e. as within existing content */
13633 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard29b17482004-08-16 00:39:03 +000013634
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013635 fake = xmlNewComment(NULL);
13636 if (fake == NULL) {
13637 xmlFreeParserCtxt(ctxt);
13638 return(XML_ERR_NO_MEMORY);
13639 }
13640 xmlAddChild(node, fake);
13641
Daniel Veillard29b17482004-08-16 00:39:03 +000013642 if (node->type == XML_ELEMENT_NODE) {
13643 nodePush(ctxt, node);
13644 /*
13645 * initialize the SAX2 namespaces stack
13646 */
13647 cur = node;
13648 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13649 xmlNsPtr ns = cur->nsDef;
13650 const xmlChar *iprefix, *ihref;
13651
13652 while (ns != NULL) {
13653 if (ctxt->dict) {
13654 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13655 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13656 } else {
13657 iprefix = ns->prefix;
13658 ihref = ns->href;
13659 }
13660
13661 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13662 nsPush(ctxt, iprefix, ihref);
13663 nsnr++;
13664 }
13665 ns = ns->next;
13666 }
13667 cur = cur->parent;
13668 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013669 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013670
13671 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13672 /*
13673 * ID/IDREF registration will be done in xmlValidateElement below
13674 */
13675 ctxt->loadsubset |= XML_SKIP_IDS;
13676 }
13677
Daniel Veillard499cc922006-01-18 17:22:35 +000013678#ifdef LIBXML_HTML_ENABLED
13679 if (doc->type == XML_HTML_DOCUMENT_NODE)
13680 __htmlParseContent(ctxt);
13681 else
13682#endif
13683 xmlParseContent(ctxt);
13684
Daniel Veillard29b17482004-08-16 00:39:03 +000013685 nsPop(ctxt, nsnr);
13686 if ((RAW == '<') && (NXT(1) == '/')) {
13687 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13688 } else if (RAW != 0) {
13689 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13690 }
13691 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13692 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13693 ctxt->wellFormed = 0;
13694 }
13695
13696 if (!ctxt->wellFormed) {
13697 if (ctxt->errNo == 0)
13698 ret = XML_ERR_INTERNAL_ERROR;
13699 else
13700 ret = (xmlParserErrors)ctxt->errNo;
13701 } else {
13702 ret = XML_ERR_OK;
13703 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013704
Daniel Veillard29b17482004-08-16 00:39:03 +000013705 /*
13706 * Return the newly created nodeset after unlinking it from
13707 * the pseudo sibling.
13708 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013709
Daniel Veillard29b17482004-08-16 00:39:03 +000013710 cur = fake->next;
13711 fake->next = NULL;
13712 node->last = fake;
13713
13714 if (cur != NULL) {
13715 cur->prev = NULL;
13716 }
13717
13718 *lst = cur;
13719
13720 while (cur != NULL) {
13721 cur->parent = NULL;
13722 cur = cur->next;
13723 }
13724
13725 xmlUnlinkNode(fake);
13726 xmlFreeNode(fake);
13727
13728
13729 if (ret != XML_ERR_OK) {
13730 xmlFreeNodeList(*lst);
13731 *lst = NULL;
13732 }
William M. Brackc3f81342004-10-03 01:22:44 +000013733
William M. Brackb7b54de2004-10-06 16:38:01 +000013734 if (doc->dict != NULL)
13735 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013736 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013737
Daniel Veillard29b17482004-08-16 00:39:03 +000013738 return(ret);
13739#else /* !SAX2 */
13740 return(XML_ERR_INTERNAL_ERROR);
13741#endif
13742}
13743
Daniel Veillard81273902003-09-30 00:43:48 +000013744#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013745/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013746 * xmlParseBalancedChunkMemoryRecover:
13747 * @doc: the document the chunk pertains to
13748 * @sax: the SAX handler bloc (possibly NULL)
13749 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13750 * @depth: Used for loop detection, use 0
13751 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13752 * @lst: the return value for the set of parsed nodes
13753 * @recover: return nodes even if the data is broken (use 0)
13754 *
13755 *
13756 * Parse a well-balanced chunk of an XML document
13757 * called by the parser
13758 * The allowed sequence for the Well Balanced Chunk is the one defined by
13759 * the content production in the XML grammar:
13760 *
13761 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13762 *
13763 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13764 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013765 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013766 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013767 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13768 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013769 */
13770int
13771xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013772 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013773 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013774 xmlParserCtxtPtr ctxt;
13775 xmlDocPtr newDoc;
13776 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013777 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013778 int size;
13779 int ret = 0;
13780
Daniel Veillard0161e632008-08-28 15:36:32 +000013781 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013782 return(XML_ERR_ENTITY_LOOP);
13783 }
13784
13785
Daniel Veillardcda96922001-08-21 10:56:31 +000013786 if (lst != NULL)
13787 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013788 if (string == NULL)
13789 return(-1);
13790
13791 size = xmlStrlen(string);
13792
13793 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13794 if (ctxt == NULL) return(-1);
13795 ctxt->userData = ctxt;
13796 if (sax != NULL) {
13797 oldsax = ctxt->sax;
13798 ctxt->sax = sax;
13799 if (user_data != NULL)
13800 ctxt->userData = user_data;
13801 }
13802 newDoc = xmlNewDoc(BAD_CAST "1.0");
13803 if (newDoc == NULL) {
13804 xmlFreeParserCtxt(ctxt);
13805 return(-1);
13806 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013807 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013808 if ((doc != NULL) && (doc->dict != NULL)) {
13809 xmlDictFree(ctxt->dict);
13810 ctxt->dict = doc->dict;
13811 xmlDictReference(ctxt->dict);
13812 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13813 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13814 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13815 ctxt->dictNames = 1;
13816 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013817 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013818 }
Owen Taylor3473f882001-02-23 17:55:21 +000013819 if (doc != NULL) {
13820 newDoc->intSubset = doc->intSubset;
13821 newDoc->extSubset = doc->extSubset;
13822 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013823 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13824 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013825 if (sax != NULL)
13826 ctxt->sax = oldsax;
13827 xmlFreeParserCtxt(ctxt);
13828 newDoc->intSubset = NULL;
13829 newDoc->extSubset = NULL;
13830 xmlFreeDoc(newDoc);
13831 return(-1);
13832 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013833 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13834 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013835 if (doc == NULL) {
13836 ctxt->myDoc = newDoc;
13837 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013838 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013839 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013840 /* Ensure that doc has XML spec namespace */
13841 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13842 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013843 }
13844 ctxt->instate = XML_PARSER_CONTENT;
13845 ctxt->depth = depth;
13846
13847 /*
13848 * Doing validity checking on chunk doesn't make sense
13849 */
13850 ctxt->validate = 0;
13851 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013852 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013853
Daniel Veillardb39bc392002-10-26 19:29:51 +000013854 if ( doc != NULL ){
13855 content = doc->children;
13856 doc->children = NULL;
13857 xmlParseContent(ctxt);
13858 doc->children = content;
13859 }
13860 else {
13861 xmlParseContent(ctxt);
13862 }
Owen Taylor3473f882001-02-23 17:55:21 +000013863 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013864 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013865 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013866 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013867 }
13868 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013869 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013870 }
13871
13872 if (!ctxt->wellFormed) {
13873 if (ctxt->errNo == 0)
13874 ret = 1;
13875 else
13876 ret = ctxt->errNo;
13877 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013878 ret = 0;
13879 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013880
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013881 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13882 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013883
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013884 /*
13885 * Return the newly created nodeset after unlinking it from
13886 * they pseudo parent.
13887 */
13888 cur = newDoc->children->children;
13889 *lst = cur;
13890 while (cur != NULL) {
13891 xmlSetTreeDoc(cur, doc);
13892 cur->parent = NULL;
13893 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013894 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013895 newDoc->children->children = NULL;
13896 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013897
13898 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013899 ctxt->sax = oldsax;
13900 xmlFreeParserCtxt(ctxt);
13901 newDoc->intSubset = NULL;
13902 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013903 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013904 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013905
Owen Taylor3473f882001-02-23 17:55:21 +000013906 return(ret);
13907}
13908
13909/**
13910 * xmlSAXParseEntity:
13911 * @sax: the SAX handler block
13912 * @filename: the filename
13913 *
13914 * parse an XML external entity out of context and build a tree.
13915 * It use the given SAX function block to handle the parsing callback.
13916 * If sax is NULL, fallback to the default DOM tree building routines.
13917 *
13918 * [78] extParsedEnt ::= TextDecl? content
13919 *
13920 * This correspond to a "Well Balanced" chunk
13921 *
13922 * Returns the resulting document tree
13923 */
13924
13925xmlDocPtr
13926xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13927 xmlDocPtr ret;
13928 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013929
13930 ctxt = xmlCreateFileParserCtxt(filename);
13931 if (ctxt == NULL) {
13932 return(NULL);
13933 }
13934 if (sax != NULL) {
13935 if (ctxt->sax != NULL)
13936 xmlFree(ctxt->sax);
13937 ctxt->sax = sax;
13938 ctxt->userData = NULL;
13939 }
13940
Owen Taylor3473f882001-02-23 17:55:21 +000013941 xmlParseExtParsedEnt(ctxt);
13942
13943 if (ctxt->wellFormed)
13944 ret = ctxt->myDoc;
13945 else {
13946 ret = NULL;
13947 xmlFreeDoc(ctxt->myDoc);
13948 ctxt->myDoc = NULL;
13949 }
13950 if (sax != NULL)
13951 ctxt->sax = NULL;
13952 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013953
Owen Taylor3473f882001-02-23 17:55:21 +000013954 return(ret);
13955}
13956
13957/**
13958 * xmlParseEntity:
13959 * @filename: the filename
13960 *
13961 * parse an XML external entity out of context and build a tree.
13962 *
13963 * [78] extParsedEnt ::= TextDecl? content
13964 *
13965 * This correspond to a "Well Balanced" chunk
13966 *
13967 * Returns the resulting document tree
13968 */
13969
13970xmlDocPtr
13971xmlParseEntity(const char *filename) {
13972 return(xmlSAXParseEntity(NULL, filename));
13973}
Daniel Veillard81273902003-09-30 00:43:48 +000013974#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013975
13976/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013977 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013978 * @URL: the entity URL
13979 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013980 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013981 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013982 *
13983 * Create a parser context for an external entity
13984 * Automatic support for ZLIB/Compress compressed document is provided
13985 * by default if found at compile-time.
13986 *
13987 * Returns the new parser context or NULL
13988 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013989static xmlParserCtxtPtr
13990xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13991 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013992 xmlParserCtxtPtr ctxt;
13993 xmlParserInputPtr inputStream;
13994 char *directory = NULL;
13995 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013996
Owen Taylor3473f882001-02-23 17:55:21 +000013997 ctxt = xmlNewParserCtxt();
13998 if (ctxt == NULL) {
13999 return(NULL);
14000 }
14001
Daniel Veillard48247b42009-07-10 16:12:46 +020014002 if (pctx != NULL) {
14003 ctxt->options = pctx->options;
14004 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000014005 }
14006
Owen Taylor3473f882001-02-23 17:55:21 +000014007 uri = xmlBuildURI(URL, base);
14008
14009 if (uri == NULL) {
14010 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
14011 if (inputStream == NULL) {
14012 xmlFreeParserCtxt(ctxt);
14013 return(NULL);
14014 }
14015
14016 inputPush(ctxt, inputStream);
14017
14018 if ((ctxt->directory == NULL) && (directory == NULL))
14019 directory = xmlParserGetDirectory((char *)URL);
14020 if ((ctxt->directory == NULL) && (directory != NULL))
14021 ctxt->directory = directory;
14022 } else {
14023 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
14024 if (inputStream == NULL) {
14025 xmlFree(uri);
14026 xmlFreeParserCtxt(ctxt);
14027 return(NULL);
14028 }
14029
14030 inputPush(ctxt, inputStream);
14031
14032 if ((ctxt->directory == NULL) && (directory == NULL))
14033 directory = xmlParserGetDirectory((char *)uri);
14034 if ((ctxt->directory == NULL) && (directory != NULL))
14035 ctxt->directory = directory;
14036 xmlFree(uri);
14037 }
Owen Taylor3473f882001-02-23 17:55:21 +000014038 return(ctxt);
14039}
14040
Rob Richards9c0aa472009-03-26 18:10:19 +000014041/**
14042 * xmlCreateEntityParserCtxt:
14043 * @URL: the entity URL
14044 * @ID: the entity PUBLIC ID
14045 * @base: a possible base for the target URI
14046 *
14047 * Create a parser context for an external entity
14048 * Automatic support for ZLIB/Compress compressed document is provided
14049 * by default if found at compile-time.
14050 *
14051 * Returns the new parser context or NULL
14052 */
14053xmlParserCtxtPtr
14054xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
14055 const xmlChar *base) {
14056 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
14057
14058}
14059
Owen Taylor3473f882001-02-23 17:55:21 +000014060/************************************************************************
14061 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000014062 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000014063 * *
14064 ************************************************************************/
14065
14066/**
Daniel Veillard61b93382003-11-03 14:28:31 +000014067 * xmlCreateURLParserCtxt:
14068 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014069 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000014070 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014071 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000014072 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000014073 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000014074 *
14075 * Returns the new parser context or NULL
14076 */
14077xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000014078xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000014079{
14080 xmlParserCtxtPtr ctxt;
14081 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000014082 char *directory = NULL;
14083
Owen Taylor3473f882001-02-23 17:55:21 +000014084 ctxt = xmlNewParserCtxt();
14085 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000014086 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000014087 return(NULL);
14088 }
14089
Daniel Veillarddf292f72005-01-16 19:00:15 +000014090 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000014091 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000014092 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000014093
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000014094 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014095 if (inputStream == NULL) {
14096 xmlFreeParserCtxt(ctxt);
14097 return(NULL);
14098 }
14099
Owen Taylor3473f882001-02-23 17:55:21 +000014100 inputPush(ctxt, inputStream);
14101 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000014102 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014103 if ((ctxt->directory == NULL) && (directory != NULL))
14104 ctxt->directory = directory;
14105
14106 return(ctxt);
14107}
14108
Daniel Veillard61b93382003-11-03 14:28:31 +000014109/**
14110 * xmlCreateFileParserCtxt:
14111 * @filename: the filename
14112 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014113 * Create a parser context for a file content.
Daniel Veillard61b93382003-11-03 14:28:31 +000014114 * Automatic support for ZLIB/Compress compressed document is provided
14115 * by default if found at compile-time.
14116 *
14117 * Returns the new parser context or NULL
14118 */
14119xmlParserCtxtPtr
14120xmlCreateFileParserCtxt(const char *filename)
14121{
14122 return(xmlCreateURLParserCtxt(filename, 0));
14123}
14124
Daniel Veillard81273902003-09-30 00:43:48 +000014125#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014126/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014127 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000014128 * @sax: the SAX handler block
14129 * @filename: the filename
14130 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14131 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000014132 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000014133 *
14134 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14135 * compressed document is provided by default if found at compile-time.
14136 * It use the given SAX function block to handle the parsing callback.
14137 * If sax is NULL, fallback to the default DOM tree building routines.
14138 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000014139 * User data (void *) is stored within the parser context in the
14140 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000014141 *
Owen Taylor3473f882001-02-23 17:55:21 +000014142 * Returns the resulting document tree
14143 */
14144
14145xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000014146xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14147 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000014148 xmlDocPtr ret;
14149 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014150
Daniel Veillard635ef722001-10-29 11:48:19 +000014151 xmlInitParser();
14152
Owen Taylor3473f882001-02-23 17:55:21 +000014153 ctxt = xmlCreateFileParserCtxt(filename);
14154 if (ctxt == NULL) {
14155 return(NULL);
14156 }
14157 if (sax != NULL) {
14158 if (ctxt->sax != NULL)
14159 xmlFree(ctxt->sax);
14160 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014161 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014162 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000014163 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000014164 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000014165 }
Owen Taylor3473f882001-02-23 17:55:21 +000014166
Daniel Veillard37d2d162008-03-14 10:54:00 +000014167 if (ctxt->directory == NULL)
14168 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014169
Daniel Veillarddad3f682002-11-17 16:47:27 +000014170 ctxt->recovery = recovery;
14171
Owen Taylor3473f882001-02-23 17:55:21 +000014172 xmlParseDocument(ctxt);
14173
William M. Brackc07329e2003-09-08 01:57:30 +000014174 if ((ctxt->wellFormed) || recovery) {
14175 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000014176 if (ret != NULL) {
14177 if (ctxt->input->buf->compressed > 0)
14178 ret->compression = 9;
14179 else
14180 ret->compression = ctxt->input->buf->compressed;
14181 }
William M. Brackc07329e2003-09-08 01:57:30 +000014182 }
Owen Taylor3473f882001-02-23 17:55:21 +000014183 else {
14184 ret = NULL;
14185 xmlFreeDoc(ctxt->myDoc);
14186 ctxt->myDoc = NULL;
14187 }
14188 if (sax != NULL)
14189 ctxt->sax = NULL;
14190 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014191
Owen Taylor3473f882001-02-23 17:55:21 +000014192 return(ret);
14193}
14194
14195/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014196 * xmlSAXParseFile:
14197 * @sax: the SAX handler block
14198 * @filename: the filename
14199 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14200 * documents
14201 *
14202 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14203 * compressed document is provided by default if found at compile-time.
14204 * It use the given SAX function block to handle the parsing callback.
14205 * If sax is NULL, fallback to the default DOM tree building routines.
14206 *
14207 * Returns the resulting document tree
14208 */
14209
14210xmlDocPtr
14211xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14212 int recovery) {
14213 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14214}
14215
14216/**
Owen Taylor3473f882001-02-23 17:55:21 +000014217 * xmlRecoverDoc:
14218 * @cur: a pointer to an array of xmlChar
14219 *
14220 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014221 * In the case the document is not Well Formed, a attempt to build a
14222 * tree is tried anyway
14223 *
14224 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014225 */
14226
14227xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020014228xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014229 return(xmlSAXParseDoc(NULL, cur, 1));
14230}
14231
14232/**
14233 * xmlParseFile:
14234 * @filename: the filename
14235 *
14236 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14237 * compressed document is provided by default if found at compile-time.
14238 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000014239 * Returns the resulting document tree if the file was wellformed,
14240 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000014241 */
14242
14243xmlDocPtr
14244xmlParseFile(const char *filename) {
14245 return(xmlSAXParseFile(NULL, filename, 0));
14246}
14247
14248/**
14249 * xmlRecoverFile:
14250 * @filename: the filename
14251 *
14252 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14253 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014254 * In the case the document is not Well Formed, it attempts to build
14255 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000014256 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000014257 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014258 */
14259
14260xmlDocPtr
14261xmlRecoverFile(const char *filename) {
14262 return(xmlSAXParseFile(NULL, filename, 1));
14263}
14264
14265
14266/**
14267 * xmlSetupParserForBuffer:
14268 * @ctxt: an XML parser context
14269 * @buffer: a xmlChar * buffer
14270 * @filename: a file name
14271 *
14272 * Setup the parser context to parse a new buffer; Clears any prior
14273 * contents from the parser context. The buffer parameter must not be
14274 * NULL, but the filename parameter can be
14275 */
14276void
14277xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14278 const char* filename)
14279{
14280 xmlParserInputPtr input;
14281
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014282 if ((ctxt == NULL) || (buffer == NULL))
14283 return;
14284
Owen Taylor3473f882001-02-23 17:55:21 +000014285 input = xmlNewInputStream(ctxt);
14286 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000014287 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014288 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014289 return;
14290 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014291
Owen Taylor3473f882001-02-23 17:55:21 +000014292 xmlClearParserCtxt(ctxt);
14293 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000014294 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014295 input->base = buffer;
14296 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000014297 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000014298 inputPush(ctxt, input);
14299}
14300
14301/**
14302 * xmlSAXUserParseFile:
14303 * @sax: a SAX handler
14304 * @user_data: The user data returned on SAX callbacks
14305 * @filename: a file name
14306 *
14307 * parse an XML file and call the given SAX handler routines.
14308 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014309 *
Owen Taylor3473f882001-02-23 17:55:21 +000014310 * Returns 0 in case of success or a error number otherwise
14311 */
14312int
14313xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14314 const char *filename) {
14315 int ret = 0;
14316 xmlParserCtxtPtr ctxt;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014317
Owen Taylor3473f882001-02-23 17:55:21 +000014318 ctxt = xmlCreateFileParserCtxt(filename);
14319 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000014320 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000014321 xmlFree(ctxt->sax);
14322 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014323 xmlDetectSAX2(ctxt);
14324
Owen Taylor3473f882001-02-23 17:55:21 +000014325 if (user_data != NULL)
14326 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014327
Owen Taylor3473f882001-02-23 17:55:21 +000014328 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014329
Owen Taylor3473f882001-02-23 17:55:21 +000014330 if (ctxt->wellFormed)
14331 ret = 0;
14332 else {
14333 if (ctxt->errNo != 0)
14334 ret = ctxt->errNo;
14335 else
14336 ret = -1;
14337 }
14338 if (sax != NULL)
14339 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014340 if (ctxt->myDoc != NULL) {
14341 xmlFreeDoc(ctxt->myDoc);
14342 ctxt->myDoc = NULL;
14343 }
Owen Taylor3473f882001-02-23 17:55:21 +000014344 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014345
Owen Taylor3473f882001-02-23 17:55:21 +000014346 return ret;
14347}
Daniel Veillard81273902003-09-30 00:43:48 +000014348#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014349
14350/************************************************************************
14351 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014352 * Front ends when parsing from memory *
Owen Taylor3473f882001-02-23 17:55:21 +000014353 * *
14354 ************************************************************************/
14355
14356/**
14357 * xmlCreateMemoryParserCtxt:
14358 * @buffer: a pointer to a char array
14359 * @size: the size of the array
14360 *
14361 * Create a parser context for an XML in-memory document.
14362 *
14363 * Returns the new parser context or NULL
14364 */
14365xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014366xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014367 xmlParserCtxtPtr ctxt;
14368 xmlParserInputPtr input;
14369 xmlParserInputBufferPtr buf;
14370
14371 if (buffer == NULL)
14372 return(NULL);
14373 if (size <= 0)
14374 return(NULL);
14375
14376 ctxt = xmlNewParserCtxt();
14377 if (ctxt == NULL)
14378 return(NULL);
14379
Daniel Veillard53350552003-09-18 13:35:51 +000014380 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000014381 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014382 if (buf == NULL) {
14383 xmlFreeParserCtxt(ctxt);
14384 return(NULL);
14385 }
Owen Taylor3473f882001-02-23 17:55:21 +000014386
14387 input = xmlNewInputStream(ctxt);
14388 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014389 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000014390 xmlFreeParserCtxt(ctxt);
14391 return(NULL);
14392 }
14393
14394 input->filename = NULL;
14395 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014396 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000014397
14398 inputPush(ctxt, input);
14399 return(ctxt);
14400}
14401
Daniel Veillard81273902003-09-30 00:43:48 +000014402#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014403/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014404 * xmlSAXParseMemoryWithData:
14405 * @sax: the SAX handler block
14406 * @buffer: an pointer to a char array
14407 * @size: the size of the array
14408 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14409 * documents
14410 * @data: the userdata
14411 *
14412 * parse an XML in-memory block and use the given SAX function block
14413 * to handle the parsing callback. If sax is NULL, fallback to the default
14414 * DOM tree building routines.
14415 *
14416 * User data (void *) is stored within the parser context in the
14417 * context's _private member, so it is available nearly everywhere in libxml
14418 *
14419 * Returns the resulting document tree
14420 */
14421
14422xmlDocPtr
14423xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14424 int size, int recovery, void *data) {
14425 xmlDocPtr ret;
14426 xmlParserCtxtPtr ctxt;
14427
Daniel Veillardab2a7632009-07-09 08:45:03 +020014428 xmlInitParser();
14429
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014430 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14431 if (ctxt == NULL) return(NULL);
14432 if (sax != NULL) {
14433 if (ctxt->sax != NULL)
14434 xmlFree(ctxt->sax);
14435 ctxt->sax = sax;
14436 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014437 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014438 if (data!=NULL) {
14439 ctxt->_private=data;
14440 }
14441
Daniel Veillardadba5f12003-04-04 16:09:01 +000014442 ctxt->recovery = recovery;
14443
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014444 xmlParseDocument(ctxt);
14445
14446 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14447 else {
14448 ret = NULL;
14449 xmlFreeDoc(ctxt->myDoc);
14450 ctxt->myDoc = NULL;
14451 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014452 if (sax != NULL)
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014453 ctxt->sax = NULL;
14454 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020014455
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014456 return(ret);
14457}
14458
14459/**
Owen Taylor3473f882001-02-23 17:55:21 +000014460 * xmlSAXParseMemory:
14461 * @sax: the SAX handler block
14462 * @buffer: an pointer to a char array
14463 * @size: the size of the array
14464 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14465 * documents
14466 *
14467 * parse an XML in-memory block and use the given SAX function block
14468 * to handle the parsing callback. If sax is NULL, fallback to the default
14469 * DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014470 *
Owen Taylor3473f882001-02-23 17:55:21 +000014471 * Returns the resulting document tree
14472 */
14473xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000014474xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14475 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014476 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014477}
14478
14479/**
14480 * xmlParseMemory:
14481 * @buffer: an pointer to a char array
14482 * @size: the size of the array
14483 *
14484 * parse an XML in-memory block and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014485 *
Owen Taylor3473f882001-02-23 17:55:21 +000014486 * Returns the resulting document tree
14487 */
14488
Daniel Veillard50822cb2001-07-26 20:05:51 +000014489xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014490 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14491}
14492
14493/**
14494 * xmlRecoverMemory:
14495 * @buffer: an pointer to a char array
14496 * @size: the size of the array
14497 *
14498 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014499 * In the case the document is not Well Formed, an attempt to
14500 * build a tree is tried anyway
14501 *
14502 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000014503 */
14504
Daniel Veillard50822cb2001-07-26 20:05:51 +000014505xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014506 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14507}
14508
14509/**
14510 * xmlSAXUserParseMemory:
14511 * @sax: a SAX handler
14512 * @user_data: The user data returned on SAX callbacks
14513 * @buffer: an in-memory XML document input
14514 * @size: the length of the XML document in bytes
14515 *
14516 * A better SAX parsing routine.
14517 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014518 *
Owen Taylor3473f882001-02-23 17:55:21 +000014519 * Returns 0 in case of success or a error number otherwise
14520 */
14521int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014522 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014523 int ret = 0;
14524 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014525
14526 xmlInitParser();
14527
Owen Taylor3473f882001-02-23 17:55:21 +000014528 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14529 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014530 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14531 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014532 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014533 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014534
Daniel Veillard30211a02001-04-26 09:33:18 +000014535 if (user_data != NULL)
14536 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014537
Owen Taylor3473f882001-02-23 17:55:21 +000014538 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014539
Owen Taylor3473f882001-02-23 17:55:21 +000014540 if (ctxt->wellFormed)
14541 ret = 0;
14542 else {
14543 if (ctxt->errNo != 0)
14544 ret = ctxt->errNo;
14545 else
14546 ret = -1;
14547 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014548 if (sax != NULL)
14549 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014550 if (ctxt->myDoc != NULL) {
14551 xmlFreeDoc(ctxt->myDoc);
14552 ctxt->myDoc = NULL;
14553 }
Owen Taylor3473f882001-02-23 17:55:21 +000014554 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014555
Owen Taylor3473f882001-02-23 17:55:21 +000014556 return ret;
14557}
Daniel Veillard81273902003-09-30 00:43:48 +000014558#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014559
14560/**
14561 * xmlCreateDocParserCtxt:
14562 * @cur: a pointer to an array of xmlChar
14563 *
14564 * Creates a parser context for an XML in-memory document.
14565 *
14566 * Returns the new parser context or NULL
14567 */
14568xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014569xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014570 int len;
14571
14572 if (cur == NULL)
14573 return(NULL);
14574 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014575 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014576}
14577
Daniel Veillard81273902003-09-30 00:43:48 +000014578#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014579/**
14580 * xmlSAXParseDoc:
14581 * @sax: the SAX handler block
14582 * @cur: a pointer to an array of xmlChar
14583 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14584 * documents
14585 *
14586 * parse an XML in-memory document and build a tree.
14587 * It use the given SAX function block to handle the parsing callback.
14588 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014589 *
Owen Taylor3473f882001-02-23 17:55:21 +000014590 * Returns the resulting document tree
14591 */
14592
14593xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014594xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014595 xmlDocPtr ret;
14596 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014597 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014598
Daniel Veillard38936062004-11-04 17:45:11 +000014599 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014600
14601
14602 ctxt = xmlCreateDocParserCtxt(cur);
14603 if (ctxt == NULL) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014604 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014605 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014606 ctxt->sax = sax;
14607 ctxt->userData = NULL;
14608 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014609 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014610
14611 xmlParseDocument(ctxt);
14612 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14613 else {
14614 ret = NULL;
14615 xmlFreeDoc(ctxt->myDoc);
14616 ctxt->myDoc = NULL;
14617 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014618 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014619 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014620 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014621
Owen Taylor3473f882001-02-23 17:55:21 +000014622 return(ret);
14623}
14624
14625/**
14626 * xmlParseDoc:
14627 * @cur: a pointer to an array of xmlChar
14628 *
14629 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014630 *
Owen Taylor3473f882001-02-23 17:55:21 +000014631 * Returns the resulting document tree
14632 */
14633
14634xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014635xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014636 return(xmlSAXParseDoc(NULL, cur, 0));
14637}
Daniel Veillard81273902003-09-30 00:43:48 +000014638#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014639
Daniel Veillard81273902003-09-30 00:43:48 +000014640#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014641/************************************************************************
14642 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014643 * Specific function to keep track of entities references *
14644 * and used by the XSLT debugger *
Daniel Veillard8107a222002-01-13 14:10:10 +000014645 * *
14646 ************************************************************************/
14647
14648static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14649
14650/**
14651 * xmlAddEntityReference:
14652 * @ent : A valid entity
14653 * @firstNode : A valid first node for children of entity
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014654 * @lastNode : A valid last node of children entity
Daniel Veillard8107a222002-01-13 14:10:10 +000014655 *
14656 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14657 */
14658static void
14659xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14660 xmlNodePtr lastNode)
14661{
14662 if (xmlEntityRefFunc != NULL) {
14663 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14664 }
14665}
14666
14667
14668/**
14669 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014670 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014671 *
14672 * Set the function to call call back when a xml reference has been made
14673 */
14674void
14675xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14676{
14677 xmlEntityRefFunc = func;
14678}
Daniel Veillard81273902003-09-30 00:43:48 +000014679#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014680
14681/************************************************************************
14682 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014683 * Miscellaneous *
Owen Taylor3473f882001-02-23 17:55:21 +000014684 * *
14685 ************************************************************************/
14686
14687#ifdef LIBXML_XPATH_ENABLED
14688#include <libxml/xpath.h>
14689#endif
14690
Daniel Veillardffa3c742005-07-21 13:24:09 +000014691extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014692static int xmlParserInitialized = 0;
14693
14694/**
14695 * xmlInitParser:
14696 *
14697 * Initialization function for the XML parser.
14698 * This is not reentrant. Call once before processing in case of
14699 * use in multithreaded programs.
14700 */
14701
14702void
14703xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014704 if (xmlParserInitialized != 0)
14705 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014706
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014707#ifdef LIBXML_THREAD_ENABLED
14708 __xmlGlobalInitMutexLock();
14709 if (xmlParserInitialized == 0) {
14710#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014711 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014712 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014713 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14714 (xmlGenericError == NULL))
14715 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014716 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014717 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014718 xmlInitCharEncodingHandlers();
14719 xmlDefaultSAXHandlerInit();
14720 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014721#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014722 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014723#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014724#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014725 htmlInitAutoClose();
14726 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014727#endif
14728#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014729 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014730#endif
Daniel Veillard054c7162014-01-26 15:02:25 +010014731#ifdef LIBXML_CATALOG_ENABLED
14732 xmlInitializeCatalog();
14733#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014734 xmlParserInitialized = 1;
14735#ifdef LIBXML_THREAD_ENABLED
14736 }
14737 __xmlGlobalInitMutexUnlock();
14738#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014739}
14740
14741/**
14742 * xmlCleanupParser:
14743 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014744 * This function name is somewhat misleading. It does not clean up
14745 * parser state, it cleans up memory allocated by the library itself.
14746 * It is a cleanup function for the XML library. It tries to reclaim all
14747 * related global memory allocated for the library processing.
14748 * It doesn't deallocate any document related memory. One should
14749 * call xmlCleanupParser() only when the process has finished using
14750 * the library and all XML/HTML documents built with it.
14751 * See also xmlInitParser() which has the opposite function of preparing
14752 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014753 *
14754 * WARNING: if your application is multithreaded or has plugin support
14755 * calling this may crash the application if another thread or
14756 * a plugin is still using libxml2. It's sometimes very hard to
14757 * guess if libxml2 is in use in the application, some libraries
14758 * or plugins may use it without notice. In case of doubt abstain
14759 * from calling this function or do it just before calling exit()
14760 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014761 */
14762
14763void
14764xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014765 if (!xmlParserInitialized)
14766 return;
14767
Owen Taylor3473f882001-02-23 17:55:21 +000014768 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014769#ifdef LIBXML_CATALOG_ENABLED
14770 xmlCatalogCleanup();
14771#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014772 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014773 xmlCleanupInputCallbacks();
14774#ifdef LIBXML_OUTPUT_ENABLED
14775 xmlCleanupOutputCallbacks();
14776#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014777#ifdef LIBXML_SCHEMAS_ENABLED
14778 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014779 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014780#endif
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014781 xmlResetLastError();
Alexander Pastukhov704d8c52013-04-23 13:02:11 +080014782 xmlCleanupGlobals();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014783 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014784 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014785 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014786}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014787
14788/************************************************************************
14789 * *
14790 * New set (2.6.0) of simpler and more flexible APIs *
14791 * *
14792 ************************************************************************/
14793
14794/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014795 * DICT_FREE:
14796 * @str: a string
14797 *
14798 * Free a string if it is not owned by the "dict" dictionnary in the
14799 * current scope
14800 */
14801#define DICT_FREE(str) \
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014802 if ((str) && ((!dict) || \
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014803 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14804 xmlFree((char *)(str));
14805
14806/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014807 * xmlCtxtReset:
14808 * @ctxt: an XML parser context
14809 *
14810 * Reset a parser context
14811 */
14812void
14813xmlCtxtReset(xmlParserCtxtPtr ctxt)
14814{
14815 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014816 xmlDictPtr dict;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014817
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014818 if (ctxt == NULL)
14819 return;
14820
14821 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014822
14823 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14824 xmlFreeInputStream(input);
14825 }
14826 ctxt->inputNr = 0;
14827 ctxt->input = NULL;
14828
14829 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014830 if (ctxt->spaceTab != NULL) {
14831 ctxt->spaceTab[0] = -1;
14832 ctxt->space = &ctxt->spaceTab[0];
14833 } else {
14834 ctxt->space = NULL;
14835 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014836
14837
14838 ctxt->nodeNr = 0;
14839 ctxt->node = NULL;
14840
14841 ctxt->nameNr = 0;
14842 ctxt->name = NULL;
14843
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014844 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014845 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014846 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014847 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014848 DICT_FREE(ctxt->directory);
14849 ctxt->directory = NULL;
14850 DICT_FREE(ctxt->extSubURI);
14851 ctxt->extSubURI = NULL;
14852 DICT_FREE(ctxt->extSubSystem);
14853 ctxt->extSubSystem = NULL;
14854 if (ctxt->myDoc != NULL)
14855 xmlFreeDoc(ctxt->myDoc);
14856 ctxt->myDoc = NULL;
14857
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014858 ctxt->standalone = -1;
14859 ctxt->hasExternalSubset = 0;
14860 ctxt->hasPErefs = 0;
14861 ctxt->html = 0;
14862 ctxt->external = 0;
14863 ctxt->instate = XML_PARSER_START;
14864 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014865
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014866 ctxt->wellFormed = 1;
14867 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014868 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014869 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014870#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014871 ctxt->vctxt.userData = ctxt;
14872 ctxt->vctxt.error = xmlParserValidityError;
14873 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014874#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014875 ctxt->record_info = 0;
14876 ctxt->nbChars = 0;
14877 ctxt->checkIndex = 0;
14878 ctxt->inSubset = 0;
14879 ctxt->errNo = XML_ERR_OK;
14880 ctxt->depth = 0;
14881 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14882 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014883 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014884 ctxt->sizeentities = 0;
Daniel Veillard23f05e02013-02-19 10:21:49 +080014885 ctxt->sizeentcopy = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014886 xmlInitNodeInfoSeq(&ctxt->node_seq);
14887
14888 if (ctxt->attsDefault != NULL) {
14889 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14890 ctxt->attsDefault = NULL;
14891 }
14892 if (ctxt->attsSpecial != NULL) {
14893 xmlHashFree(ctxt->attsSpecial, NULL);
14894 ctxt->attsSpecial = NULL;
14895 }
14896
Daniel Veillard4432df22003-09-28 18:58:27 +000014897#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014898 if (ctxt->catalogs != NULL)
14899 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014900#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014901 if (ctxt->lastError.code != XML_ERR_OK)
14902 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014903}
14904
14905/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014906 * xmlCtxtResetPush:
14907 * @ctxt: an XML parser context
14908 * @chunk: a pointer to an array of chars
14909 * @size: number of chars in the array
14910 * @filename: an optional file name or URI
14911 * @encoding: the document encoding, or NULL
14912 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014913 * Reset a push parser context
14914 *
14915 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014916 */
14917int
14918xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14919 int size, const char *filename, const char *encoding)
14920{
14921 xmlParserInputPtr inputStream;
14922 xmlParserInputBufferPtr buf;
14923 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14924
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014925 if (ctxt == NULL)
14926 return(1);
14927
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014928 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14929 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14930
14931 buf = xmlAllocParserInputBuffer(enc);
14932 if (buf == NULL)
14933 return(1);
14934
14935 if (ctxt == NULL) {
14936 xmlFreeParserInputBuffer(buf);
14937 return(1);
14938 }
14939
14940 xmlCtxtReset(ctxt);
14941
14942 if (ctxt->pushTab == NULL) {
14943 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14944 sizeof(xmlChar *));
14945 if (ctxt->pushTab == NULL) {
14946 xmlErrMemory(ctxt, NULL);
14947 xmlFreeParserInputBuffer(buf);
14948 return(1);
14949 }
14950 }
14951
14952 if (filename == NULL) {
14953 ctxt->directory = NULL;
14954 } else {
14955 ctxt->directory = xmlParserGetDirectory(filename);
14956 }
14957
14958 inputStream = xmlNewInputStream(ctxt);
14959 if (inputStream == NULL) {
14960 xmlFreeParserInputBuffer(buf);
14961 return(1);
14962 }
14963
14964 if (filename == NULL)
14965 inputStream->filename = NULL;
14966 else
14967 inputStream->filename = (char *)
14968 xmlCanonicPath((const xmlChar *) filename);
14969 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014970 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014971
14972 inputPush(ctxt, inputStream);
14973
14974 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14975 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014976 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14977 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014978
14979 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14980
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014981 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014982#ifdef DEBUG_PUSH
14983 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14984#endif
14985 }
14986
14987 if (encoding != NULL) {
14988 xmlCharEncodingHandlerPtr hdlr;
14989
Daniel Veillard37334572008-07-31 08:20:02 +000014990 if (ctxt->encoding != NULL)
14991 xmlFree((xmlChar *) ctxt->encoding);
14992 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14993
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014994 hdlr = xmlFindCharEncodingHandler(encoding);
14995 if (hdlr != NULL) {
14996 xmlSwitchToEncoding(ctxt, hdlr);
14997 } else {
14998 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14999 "Unsupported encoding %s\n", BAD_CAST encoding);
15000 }
15001 } else if (enc != XML_CHAR_ENCODING_NONE) {
15002 xmlSwitchEncoding(ctxt, enc);
15003 }
15004
15005 return(0);
15006}
15007
Daniel Veillard37334572008-07-31 08:20:02 +000015008
Daniel Veillard9ba8e382003-10-28 21:31:45 +000015009/**
Daniel Veillard37334572008-07-31 08:20:02 +000015010 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015011 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015012 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000015013 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015014 *
15015 * Applies the options to the parser context
15016 *
15017 * Returns 0 in case of success, the set of unknown or unimplemented options
15018 * in case of error.
15019 */
Daniel Veillard37334572008-07-31 08:20:02 +000015020static int
15021xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015022{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000015023 if (ctxt == NULL)
15024 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000015025 if (encoding != NULL) {
15026 if (ctxt->encoding != NULL)
15027 xmlFree((xmlChar *) ctxt->encoding);
15028 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
15029 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015030 if (options & XML_PARSE_RECOVER) {
15031 ctxt->recovery = 1;
15032 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015033 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015034 } else
15035 ctxt->recovery = 0;
15036 if (options & XML_PARSE_DTDLOAD) {
15037 ctxt->loadsubset = XML_DETECT_IDS;
15038 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015039 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015040 } else
15041 ctxt->loadsubset = 0;
15042 if (options & XML_PARSE_DTDATTR) {
15043 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
15044 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015045 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015046 }
15047 if (options & XML_PARSE_NOENT) {
15048 ctxt->replaceEntities = 1;
15049 /* ctxt->loadsubset |= XML_DETECT_IDS; */
15050 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015051 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015052 } else
15053 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015054 if (options & XML_PARSE_PEDANTIC) {
15055 ctxt->pedantic = 1;
15056 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015057 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015058 } else
15059 ctxt->pedantic = 0;
15060 if (options & XML_PARSE_NOBLANKS) {
15061 ctxt->keepBlanks = 0;
15062 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
15063 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015064 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015065 } else
15066 ctxt->keepBlanks = 1;
15067 if (options & XML_PARSE_DTDVALID) {
15068 ctxt->validate = 1;
15069 if (options & XML_PARSE_NOWARNING)
15070 ctxt->vctxt.warning = NULL;
15071 if (options & XML_PARSE_NOERROR)
15072 ctxt->vctxt.error = NULL;
15073 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015074 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015075 } else
15076 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000015077 if (options & XML_PARSE_NOWARNING) {
15078 ctxt->sax->warning = NULL;
15079 options -= XML_PARSE_NOWARNING;
15080 }
15081 if (options & XML_PARSE_NOERROR) {
15082 ctxt->sax->error = NULL;
15083 ctxt->sax->fatalError = NULL;
15084 options -= XML_PARSE_NOERROR;
15085 }
Daniel Veillard81273902003-09-30 00:43:48 +000015086#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015087 if (options & XML_PARSE_SAX1) {
15088 ctxt->sax->startElement = xmlSAX2StartElement;
15089 ctxt->sax->endElement = xmlSAX2EndElement;
15090 ctxt->sax->startElementNs = NULL;
15091 ctxt->sax->endElementNs = NULL;
15092 ctxt->sax->initialized = 1;
15093 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015094 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015095 }
Daniel Veillard81273902003-09-30 00:43:48 +000015096#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015097 if (options & XML_PARSE_NODICT) {
15098 ctxt->dictNames = 0;
15099 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015100 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015101 } else {
15102 ctxt->dictNames = 1;
15103 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015104 if (options & XML_PARSE_NOCDATA) {
15105 ctxt->sax->cdataBlock = NULL;
15106 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000015107 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000015108 }
15109 if (options & XML_PARSE_NSCLEAN) {
15110 ctxt->options |= XML_PARSE_NSCLEAN;
15111 options -= XML_PARSE_NSCLEAN;
15112 }
Daniel Veillard61b93382003-11-03 14:28:31 +000015113 if (options & XML_PARSE_NONET) {
15114 ctxt->options |= XML_PARSE_NONET;
15115 options -= XML_PARSE_NONET;
15116 }
Daniel Veillard8874b942005-08-25 13:19:21 +000015117 if (options & XML_PARSE_COMPACT) {
15118 ctxt->options |= XML_PARSE_COMPACT;
15119 options -= XML_PARSE_COMPACT;
15120 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000015121 if (options & XML_PARSE_OLD10) {
15122 ctxt->options |= XML_PARSE_OLD10;
15123 options -= XML_PARSE_OLD10;
15124 }
Daniel Veillard8915c152008-08-26 13:05:34 +000015125 if (options & XML_PARSE_NOBASEFIX) {
15126 ctxt->options |= XML_PARSE_NOBASEFIX;
15127 options -= XML_PARSE_NOBASEFIX;
15128 }
15129 if (options & XML_PARSE_HUGE) {
15130 ctxt->options |= XML_PARSE_HUGE;
15131 options -= XML_PARSE_HUGE;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080015132 if (ctxt->dict != NULL)
15133 xmlDictSetLimit(ctxt->dict, 0);
Daniel Veillard8915c152008-08-26 13:05:34 +000015134 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000015135 if (options & XML_PARSE_OLDSAX) {
15136 ctxt->options |= XML_PARSE_OLDSAX;
15137 options -= XML_PARSE_OLDSAX;
15138 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080015139 if (options & XML_PARSE_IGNORE_ENC) {
15140 ctxt->options |= XML_PARSE_IGNORE_ENC;
15141 options -= XML_PARSE_IGNORE_ENC;
15142 }
Daniel Veillard968a03a2012-08-13 12:41:33 +080015143 if (options & XML_PARSE_BIG_LINES) {
15144 ctxt->options |= XML_PARSE_BIG_LINES;
15145 options -= XML_PARSE_BIG_LINES;
15146 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000015147 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015148 return (options);
15149}
15150
15151/**
Daniel Veillard37334572008-07-31 08:20:02 +000015152 * xmlCtxtUseOptions:
15153 * @ctxt: an XML parser context
15154 * @options: a combination of xmlParserOption
15155 *
15156 * Applies the options to the parser context
15157 *
15158 * Returns 0 in case of success, the set of unknown or unimplemented options
15159 * in case of error.
15160 */
15161int
15162xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15163{
15164 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15165}
15166
15167/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015168 * xmlDoRead:
15169 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000015170 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015171 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015172 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015173 * @reuse: keep the context for reuse
15174 *
15175 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000015176 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015177 * Returns the resulting document tree or NULL
15178 */
15179static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015180xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15181 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015182{
15183 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000015184
15185 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015186 if (encoding != NULL) {
15187 xmlCharEncodingHandlerPtr hdlr;
15188
15189 hdlr = xmlFindCharEncodingHandler(encoding);
15190 if (hdlr != NULL)
15191 xmlSwitchToEncoding(ctxt, hdlr);
15192 }
Daniel Veillard60942de2003-09-25 21:05:58 +000015193 if ((URL != NULL) && (ctxt->input != NULL) &&
15194 (ctxt->input->filename == NULL))
15195 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015196 xmlParseDocument(ctxt);
15197 if ((ctxt->wellFormed) || ctxt->recovery)
15198 ret = ctxt->myDoc;
15199 else {
15200 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015201 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015202 xmlFreeDoc(ctxt->myDoc);
15203 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015204 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015205 ctxt->myDoc = NULL;
15206 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015207 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015208 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015209
15210 return (ret);
15211}
15212
15213/**
15214 * xmlReadDoc:
15215 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015216 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015217 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015218 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015219 *
15220 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015221 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015222 * Returns the resulting document tree
15223 */
15224xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015225xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015226{
15227 xmlParserCtxtPtr ctxt;
15228
15229 if (cur == NULL)
15230 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015231 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015232
15233 ctxt = xmlCreateDocParserCtxt(cur);
15234 if (ctxt == NULL)
15235 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015236 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015237}
15238
15239/**
15240 * xmlReadFile:
15241 * @filename: a file or URL
15242 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015243 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015244 *
15245 * parse an XML file from the filesystem or the network.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015246 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015247 * Returns the resulting document tree
15248 */
15249xmlDocPtr
15250xmlReadFile(const char *filename, const char *encoding, int options)
15251{
15252 xmlParserCtxtPtr ctxt;
15253
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015254 xmlInitParser();
Daniel Veillard61b93382003-11-03 14:28:31 +000015255 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015256 if (ctxt == NULL)
15257 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015258 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015259}
15260
15261/**
15262 * xmlReadMemory:
15263 * @buffer: a pointer to a char array
15264 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015265 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015266 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015267 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015268 *
15269 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015270 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015271 * Returns the resulting document tree
15272 */
15273xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015274xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015275{
15276 xmlParserCtxtPtr ctxt;
15277
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015278 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015279 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15280 if (ctxt == NULL)
15281 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015282 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015283}
15284
15285/**
15286 * xmlReadFd:
15287 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015288 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015289 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015290 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015291 *
15292 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015293 * NOTE that the file descriptor will not be closed when the
15294 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015295 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015296 * Returns the resulting document tree
15297 */
15298xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015299xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015300{
15301 xmlParserCtxtPtr ctxt;
15302 xmlParserInputBufferPtr input;
15303 xmlParserInputPtr stream;
15304
15305 if (fd < 0)
15306 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015307 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015308
15309 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15310 if (input == NULL)
15311 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015312 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015313 ctxt = xmlNewParserCtxt();
15314 if (ctxt == NULL) {
15315 xmlFreeParserInputBuffer(input);
15316 return (NULL);
15317 }
15318 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15319 if (stream == NULL) {
15320 xmlFreeParserInputBuffer(input);
15321 xmlFreeParserCtxt(ctxt);
15322 return (NULL);
15323 }
15324 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015325 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015326}
15327
15328/**
15329 * xmlReadIO:
15330 * @ioread: an I/O read function
15331 * @ioclose: an I/O close function
15332 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015333 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015334 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015335 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015336 *
15337 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080015338 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015339 * Returns the resulting document tree
15340 */
15341xmlDocPtr
15342xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000015343 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015344{
15345 xmlParserCtxtPtr ctxt;
15346 xmlParserInputBufferPtr input;
15347 xmlParserInputPtr stream;
15348
15349 if (ioread == NULL)
15350 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015351 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015352
15353 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15354 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015355 if (input == NULL) {
15356 if (ioclose != NULL)
15357 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015358 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015359 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015360 ctxt = xmlNewParserCtxt();
15361 if (ctxt == NULL) {
15362 xmlFreeParserInputBuffer(input);
15363 return (NULL);
15364 }
15365 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15366 if (stream == NULL) {
15367 xmlFreeParserInputBuffer(input);
15368 xmlFreeParserCtxt(ctxt);
15369 return (NULL);
15370 }
15371 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015372 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015373}
15374
15375/**
15376 * xmlCtxtReadDoc:
15377 * @ctxt: an XML parser context
15378 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015379 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015380 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015381 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015382 *
15383 * parse an XML in-memory document and build a tree.
15384 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015385 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015386 * Returns the resulting document tree
15387 */
15388xmlDocPtr
15389xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000015390 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015391{
15392 xmlParserInputPtr stream;
15393
15394 if (cur == NULL)
15395 return (NULL);
15396 if (ctxt == NULL)
15397 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015398 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015399
15400 xmlCtxtReset(ctxt);
15401
15402 stream = xmlNewStringInputStream(ctxt, cur);
15403 if (stream == NULL) {
15404 return (NULL);
15405 }
15406 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015407 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015408}
15409
15410/**
15411 * xmlCtxtReadFile:
15412 * @ctxt: an XML parser context
15413 * @filename: a file or URL
15414 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015415 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015416 *
15417 * parse an XML file from the filesystem or the network.
15418 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015419 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015420 * Returns the resulting document tree
15421 */
15422xmlDocPtr
15423xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15424 const char *encoding, int options)
15425{
15426 xmlParserInputPtr stream;
15427
15428 if (filename == NULL)
15429 return (NULL);
15430 if (ctxt == NULL)
15431 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015432 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015433
15434 xmlCtxtReset(ctxt);
15435
Daniel Veillard29614c72004-11-26 10:47:26 +000015436 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015437 if (stream == NULL) {
15438 return (NULL);
15439 }
15440 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015441 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015442}
15443
15444/**
15445 * xmlCtxtReadMemory:
15446 * @ctxt: an XML parser context
15447 * @buffer: a pointer to a char array
15448 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015449 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015450 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015451 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015452 *
15453 * parse an XML in-memory document and build a tree.
15454 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015455 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015456 * Returns the resulting document tree
15457 */
15458xmlDocPtr
15459xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000015460 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015461{
15462 xmlParserInputBufferPtr input;
15463 xmlParserInputPtr stream;
15464
15465 if (ctxt == NULL)
15466 return (NULL);
15467 if (buffer == NULL)
15468 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015469 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015470
15471 xmlCtxtReset(ctxt);
15472
15473 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15474 if (input == NULL) {
15475 return(NULL);
15476 }
15477
15478 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15479 if (stream == NULL) {
15480 xmlFreeParserInputBuffer(input);
15481 return(NULL);
15482 }
15483
15484 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015485 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015486}
15487
15488/**
15489 * xmlCtxtReadFd:
15490 * @ctxt: an XML parser context
15491 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015492 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015493 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015494 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015495 *
15496 * parse an XML from a file descriptor and build a tree.
15497 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015498 * NOTE that the file descriptor will not be closed when the
15499 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015500 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015501 * Returns the resulting document tree
15502 */
15503xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015504xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15505 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015506{
15507 xmlParserInputBufferPtr input;
15508 xmlParserInputPtr stream;
15509
15510 if (fd < 0)
15511 return (NULL);
15512 if (ctxt == NULL)
15513 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015514 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015515
15516 xmlCtxtReset(ctxt);
15517
15518
15519 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15520 if (input == NULL)
15521 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015522 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015523 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15524 if (stream == NULL) {
15525 xmlFreeParserInputBuffer(input);
15526 return (NULL);
15527 }
15528 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015529 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015530}
15531
15532/**
15533 * xmlCtxtReadIO:
15534 * @ctxt: an XML parser context
15535 * @ioread: an I/O read function
15536 * @ioclose: an I/O close function
15537 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015538 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015539 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015540 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015541 *
15542 * parse an XML document from I/O functions and source and build a tree.
15543 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015544 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015545 * Returns the resulting document tree
15546 */
15547xmlDocPtr
15548xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15549 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015550 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015551 const char *encoding, int options)
15552{
15553 xmlParserInputBufferPtr input;
15554 xmlParserInputPtr stream;
15555
15556 if (ioread == NULL)
15557 return (NULL);
15558 if (ctxt == NULL)
15559 return (NULL);
Daniel Veillard4e1476c2013-12-09 15:23:40 +080015560 xmlInitParser();
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015561
15562 xmlCtxtReset(ctxt);
15563
15564 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15565 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015566 if (input == NULL) {
15567 if (ioclose != NULL)
15568 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015569 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015570 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015571 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15572 if (stream == NULL) {
15573 xmlFreeParserInputBuffer(input);
15574 return (NULL);
15575 }
15576 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015577 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015578}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015579
15580#define bottom_parser
15581#include "elfgcchack.h"