blob: c7802cf36a7b0f15d1ae517de9294d0c27ad9657 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
Daniel Veillardf8e3db02012-09-11 13:26:36 +080020 * different ranges of character are actually implanted either in
Owen Taylor3473f882001-02-23 17:55:21 +000021 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080043#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000045#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000047#include <libxml/threads.h>
48#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <libxml/tree.h>
50#include <libxml/parser.h>
51#include <libxml/parserInternals.h>
52#include <libxml/valid.h>
53#include <libxml/entities.h>
54#include <libxml/xmlerror.h>
55#include <libxml/encoding.h>
56#include <libxml/xmlIO.h>
57#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000058#ifdef LIBXML_CATALOG_ENABLED
59#include <libxml/catalog.h>
60#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000061#ifdef LIBXML_SCHEMAS_ENABLED
62#include <libxml/xmlschemastypes.h>
63#include <libxml/relaxng.h>
64#endif
Owen Taylor3473f882001-02-23 17:55:21 +000065#ifdef HAVE_CTYPE_H
66#include <ctype.h>
67#endif
68#ifdef HAVE_STDLIB_H
69#include <stdlib.h>
70#endif
71#ifdef HAVE_SYS_STAT_H
72#include <sys/stat.h>
73#endif
74#ifdef HAVE_FCNTL_H
75#include <fcntl.h>
76#endif
77#ifdef HAVE_UNISTD_H
78#include <unistd.h>
79#endif
80#ifdef HAVE_ZLIB_H
81#include <zlib.h>
82#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +020083#ifdef HAVE_LZMA_H
84#include <lzma.h>
85#endif
Owen Taylor3473f882001-02-23 17:55:21 +000086
Daniel Veillard768eb3b2012-07-16 14:19:49 +080087#include "buf.h"
88#include "enc.h"
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090static void
91xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
Rob Richards9c0aa472009-03-26 18:10:19 +000093static xmlParserCtxtPtr
94xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
Daniel Veillard0161e632008-08-28 15:36:32 +000097/************************************************************************
98 * *
99 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
100 * *
101 ************************************************************************/
102
103#define XML_PARSER_BIG_ENTITY 1000
104#define XML_PARSER_LOT_ENTITY 5000
105
106/*
107 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
108 * replacement over the size in byte of the input indicates that you have
109 * and eponential behaviour. A value of 10 correspond to at least 3 entity
110 * replacement per byte of input.
111 */
112#define XML_PARSER_NON_LINEAR 10
113
114/*
115 * xmlParserEntityCheck
116 *
117 * Function to check non-linear entity expansion behaviour
118 * This is here to detect and stop exponential linear entity expansion
119 * This is not a limitation of the parser but a safety
120 * boundary feature. It can be disabled with the XML_PARSE_HUGE
121 * parser option.
122 */
123static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800124xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard0161e632008-08-28 15:36:32 +0000125 xmlEntityPtr ent)
126{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800127 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000128
129 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
130 return (0);
131 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
132 return (1);
133 if (size != 0) {
134 /*
135 * Do the check based on the replacement size of the entity
136 */
137 if (size < XML_PARSER_BIG_ENTITY)
138 return(0);
139
140 /*
141 * A limit on the amount of text data reasonably used
142 */
143 if (ctxt->input != NULL) {
144 consumed = ctxt->input->consumed +
145 (ctxt->input->cur - ctxt->input->base);
146 }
147 consumed += ctxt->sizeentities;
148
149 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
150 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
151 return (0);
152 } else if (ent != NULL) {
153 /*
154 * use the number of parsed entities in the replacement
155 */
156 size = ent->checked;
157
158 /*
159 * The amount of data parsed counting entities size only once
160 */
161 if (ctxt->input != NULL) {
162 consumed = ctxt->input->consumed +
163 (ctxt->input->cur - ctxt->input->base);
164 }
165 consumed += ctxt->sizeentities;
166
167 /*
168 * Check the density of entities for the amount of data
169 * knowing an entity reference will take at least 3 bytes
170 */
171 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
172 return (0);
173 } else {
174 /*
175 * strange we got no data for checking just return
176 */
177 return (0);
178 }
179
180 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
181 return (1);
182}
183
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000184/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000185 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000186 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000187 * arbitrary depth limit for the XML documents that we allow to
188 * process. This is not a limitation of the parser but a safety
189 * boundary feature. It can be disabled with the XML_PARSE_HUGE
190 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000191 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000192unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000193
Daniel Veillard0fb18932003-09-07 09:14:37 +0000194
Daniel Veillard0161e632008-08-28 15:36:32 +0000195
196#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000197#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000198#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000199#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
200
Daniel Veillard1f972e92012-08-15 10:16:37 +0800201/**
202 * XML_PARSER_CHUNK_SIZE
203 *
204 * When calling GROW that's the minimal amount of data
205 * the parser expected to have received. It is not a hard
206 * limit but an optimization when reading strings like Names
207 * It is not strictly needed as long as inputs available characters
208 * are followed by 0, which should be provided by the I/O level
209 */
210#define XML_PARSER_CHUNK_SIZE 100
211
Owen Taylor3473f882001-02-23 17:55:21 +0000212/*
Owen Taylor3473f882001-02-23 17:55:21 +0000213 * List of XML prefixed PI allowed by W3C specs
214 */
215
Daniel Veillardb44025c2001-10-11 22:55:55 +0000216static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000217 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800218 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000219 NULL
220};
221
Daniel Veillarda07050d2003-10-19 14:46:32 +0000222
Owen Taylor3473f882001-02-23 17:55:21 +0000223/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200224static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
225 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000226
Daniel Veillard7d515752003-09-26 19:12:37 +0000227static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000228xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
229 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000230 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000231 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000232
Daniel Veillard37334572008-07-31 08:20:02 +0000233static int
234xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
235 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000236#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000237static void
238xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
239 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000240#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000241
Daniel Veillard7d515752003-09-26 19:12:37 +0000242static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000243xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
244 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000245
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000246static int
247xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
248
Daniel Veillarde57ec792003-09-10 10:50:59 +0000249/************************************************************************
250 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800251 * Some factorized error routines *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000252 * *
253 ************************************************************************/
254
255/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 * xmlErrAttributeDup:
257 * @ctxt: an XML parser context
258 * @prefix: the attribute prefix
259 * @localname: the attribute localname
260 *
261 * Handle a redefinition of attribute error
262 */
263static void
264xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
265 const xmlChar * localname)
266{
Daniel Veillard157fee02003-10-31 10:36:03 +0000267 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
268 (ctxt->instate == XML_PARSER_EOF))
269 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000270 if (ctxt != NULL)
271 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200272
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000273 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000274 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200275 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000276 (const char *) localname, NULL, NULL, 0, 0,
277 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000278 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000279 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200280 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 (const char *) prefix, (const char *) localname,
282 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
283 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000284 if (ctxt != NULL) {
285 ctxt->wellFormed = 0;
286 if (ctxt->recovery == 0)
287 ctxt->disableSAX = 1;
288 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000289}
290
291/**
292 * xmlFatalErr:
293 * @ctxt: an XML parser context
294 * @error: the error number
295 * @extra: extra information string
296 *
297 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
298 */
299static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000301{
302 const char *errmsg;
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800303 char errstr[129] = "";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000304
Daniel Veillard157fee02003-10-31 10:36:03 +0000305 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
306 (ctxt->instate == XML_PARSER_EOF))
307 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000308 switch (error) {
309 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800310 errmsg = "CharRef: invalid hexadecimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800313 errmsg = "CharRef: invalid decimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_INVALID_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800316 errmsg = "CharRef: invalid value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "internal error";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_PEREF_AT_EOF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800322 errmsg = "PEReference at end of document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800325 errmsg = "PEReference in prolog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800328 errmsg = "PEReference in epilog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_PEREF_NO_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800331 errmsg = "PEReference: no name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800334 errmsg = "PEReference: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_ENTITY_LOOP:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800337 errmsg = "Detected an entity reference loop";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800340 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800343 errmsg = "PEReferences forbidden in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800346 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800349 errmsg = "AttValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800352 errmsg = "Unescaped '<' not allowed in attributes values";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800355 errmsg = "SystemLiteral \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800358 errmsg = "Unfinished System or Public ID \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000359 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000360 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800361 errmsg = "Sequence ']]>' not allowed in content";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000362 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 case XML_ERR_URI_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800364 errmsg = "SYSTEM or PUBLIC, the URI is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000365 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 case XML_ERR_PUBID_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800367 errmsg = "PUBLIC, the Public Identifier is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000368 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000369 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800370 errmsg = "Comment must not contain '--' (double-hyphen)";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000371 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000372 case XML_ERR_PI_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800373 errmsg = "xmlParsePI : no target name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000374 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000375 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800376 errmsg = "Invalid PI name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000377 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000378 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800379 errmsg = "NOTATION: Name expected here";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000380 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000381 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800382 errmsg = "'>' required to close NOTATION declaration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000383 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000384 case XML_ERR_VALUE_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800385 errmsg = "Entity value required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000386 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000387 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 errmsg = "Fragment not allowed";
389 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000390 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800391 errmsg = "'(' required to start ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000392 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000393 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800394 errmsg = "NmToken expected in ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000395 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000396 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800397 errmsg = "')' required to finish ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000398 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000399 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800400 errmsg = "MixedContentDecl : '|' or ')*' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000401 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000402 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800403 errmsg = "MixedContentDecl : '#PCDATA' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000404 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000405 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800406 errmsg = "ContentDecl : Name or '(' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000407 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000408 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800409 errmsg = "ContentDecl : ',' '|' or ')' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000410 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000411 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000412 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800413 "PEReference: forbidden within markup decl in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000415 case XML_ERR_GT_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800416 errmsg = "expected '>'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000418 case XML_ERR_CONDSEC_INVALID:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800419 errmsg = "XML conditional section '[' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000421 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800422 errmsg = "Content error in the external subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 break;
424 case XML_ERR_CONDSEC_INVALID_KEYWORD:
425 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800426 "conditional section INCLUDE or IGNORE keyword expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000427 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000428 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800429 errmsg = "XML conditional section not closed";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000430 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000431 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800432 errmsg = "Text declaration '<?xml' required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000433 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000434 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800435 errmsg = "parsing XML declaration: '?>' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000436 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000437 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800438 errmsg = "external parsed entities cannot be standalone";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000439 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000440 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800441 errmsg = "EntityRef: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000442 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000443 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800444 errmsg = "DOCTYPE improperly terminated";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000445 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000446 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800447 errmsg = "EndTag: '</' not found";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000448 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000449 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800450 errmsg = "expected '='";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000451 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000452 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800453 errmsg = "String not closed expecting \" or '";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000454 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000455 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800456 errmsg = "String not started expecting ' or \"";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000457 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000458 case XML_ERR_ENCODING_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800459 errmsg = "Invalid XML encoding name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000460 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000461 case XML_ERR_STANDALONE_VALUE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800462 errmsg = "standalone accepts only 'yes' or 'no'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000464 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800465 errmsg = "Document is empty";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000466 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000467 case XML_ERR_DOCUMENT_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800468 errmsg = "Extra content at the end of the document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000469 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000470 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800471 errmsg = "chunk is not well balanced";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000472 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000473 case XML_ERR_EXTRA_CONTENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800474 errmsg = "extra content at the end of well balanced chunk";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000475 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000476 case XML_ERR_VERSION_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800477 errmsg = "Malformed declaration expecting version";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000478 break;
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800479 case XML_ERR_NAME_TOO_LONG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800480 errmsg = "Name too long use XML_PARSE_HUGE option";
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800481 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000482#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000483 case:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800484 errmsg = "";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000485 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000486#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000487 default:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800488 errmsg = "Unregistered error message";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000489 }
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800490 if (info == NULL)
491 snprintf(errstr, 128, "%s\n", errmsg);
492 else
493 snprintf(errstr, 128, "%s: %%s\n", errmsg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000494 if (ctxt != NULL)
495 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000496 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800497 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000498 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000499 if (ctxt != NULL) {
500 ctxt->wellFormed = 0;
501 if (ctxt->recovery == 0)
502 ctxt->disableSAX = 1;
503 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000504}
505
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000506/**
507 * xmlFatalErrMsg:
508 * @ctxt: an XML parser context
509 * @error: the error number
510 * @msg: the error message
511 *
512 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
513 */
514static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000515xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
516 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000517{
Daniel Veillard157fee02003-10-31 10:36:03 +0000518 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
519 (ctxt->instate == XML_PARSER_EOF))
520 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000521 if (ctxt != NULL)
522 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000523 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200524 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000525 if (ctxt != NULL) {
526 ctxt->wellFormed = 0;
527 if (ctxt->recovery == 0)
528 ctxt->disableSAX = 1;
529 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000530}
531
532/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000533 * xmlWarningMsg:
534 * @ctxt: an XML parser context
535 * @error: the error number
536 * @msg: the error message
537 * @str1: extra data
538 * @str2: extra data
539 *
540 * Handle a warning.
541 */
542static void
543xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
544 const char *msg, const xmlChar *str1, const xmlChar *str2)
545{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000546 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000547
Daniel Veillard157fee02003-10-31 10:36:03 +0000548 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
549 (ctxt->instate == XML_PARSER_EOF))
550 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000551 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
552 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000553 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200554 if (ctxt != NULL) {
555 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000556 (ctxt->sax) ? ctxt->sax->warning : NULL,
557 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000558 ctxt, NULL, XML_FROM_PARSER, error,
559 XML_ERR_WARNING, NULL, 0,
560 (const char *) str1, (const char *) str2, NULL, 0, 0,
561 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200562 } else {
563 __xmlRaiseError(schannel, NULL, NULL,
564 ctxt, NULL, XML_FROM_PARSER, error,
565 XML_ERR_WARNING, NULL, 0,
566 (const char *) str1, (const char *) str2, NULL, 0, 0,
567 msg, (const char *) str1, (const char *) str2);
568 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000569}
570
571/**
572 * xmlValidityError:
573 * @ctxt: an XML parser context
574 * @error: the error number
575 * @msg: the error message
576 * @str1: extra data
577 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000578 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000579 */
580static void
581xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000582 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000583{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000584 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000585
586 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
587 (ctxt->instate == XML_PARSER_EOF))
588 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000589 if (ctxt != NULL) {
590 ctxt->errNo = error;
591 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
592 schannel = ctxt->sax->serror;
593 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200594 if (ctxt != NULL) {
595 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000596 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000597 ctxt, NULL, XML_FROM_DTD, error,
598 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000599 (const char *) str2, NULL, 0, 0,
600 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000601 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200602 } else {
603 __xmlRaiseError(schannel, NULL, NULL,
604 ctxt, NULL, XML_FROM_DTD, error,
605 XML_ERR_ERROR, NULL, 0, (const char *) str1,
606 (const char *) str2, NULL, 0, 0,
607 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000608 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000609}
610
611/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000612 * xmlFatalErrMsgInt:
613 * @ctxt: an XML parser context
614 * @error: the error number
615 * @msg: the error message
616 * @val: an integer value
617 *
618 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
619 */
620static void
621xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000622 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000623{
Daniel Veillard157fee02003-10-31 10:36:03 +0000624 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
625 (ctxt->instate == XML_PARSER_EOF))
626 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000627 if (ctxt != NULL)
628 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000629 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000630 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
631 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000632 if (ctxt != NULL) {
633 ctxt->wellFormed = 0;
634 if (ctxt->recovery == 0)
635 ctxt->disableSAX = 1;
636 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000637}
638
639/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000640 * xmlFatalErrMsgStrIntStr:
641 * @ctxt: an XML parser context
642 * @error: the error number
643 * @msg: the error message
644 * @str1: an string info
645 * @val: an integer value
646 * @str2: an string info
647 *
648 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
649 */
650static void
651xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800652 const char *msg, const xmlChar *str1, int val,
Daniel Veillardf403d292003-10-05 13:51:35 +0000653 const xmlChar *str2)
654{
Daniel Veillard157fee02003-10-31 10:36:03 +0000655 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
656 (ctxt->instate == XML_PARSER_EOF))
657 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000658 if (ctxt != NULL)
659 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000660 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000661 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
662 NULL, 0, (const char *) str1, (const char *) str2,
663 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000664 if (ctxt != NULL) {
665 ctxt->wellFormed = 0;
666 if (ctxt->recovery == 0)
667 ctxt->disableSAX = 1;
668 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000669}
670
671/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000672 * xmlFatalErrMsgStr:
673 * @ctxt: an XML parser context
674 * @error: the error number
675 * @msg: the error message
676 * @val: a string value
677 *
678 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
679 */
680static void
681xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000682 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000683{
Daniel Veillard157fee02003-10-31 10:36:03 +0000684 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
685 (ctxt->instate == XML_PARSER_EOF))
686 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000687 if (ctxt != NULL)
688 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000689 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000690 XML_FROM_PARSER, error, XML_ERR_FATAL,
691 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
692 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000693 if (ctxt != NULL) {
694 ctxt->wellFormed = 0;
695 if (ctxt->recovery == 0)
696 ctxt->disableSAX = 1;
697 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000698}
699
700/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000701 * xmlErrMsgStr:
702 * @ctxt: an XML parser context
703 * @error: the error number
704 * @msg: the error message
705 * @val: a string value
706 *
707 * Handle a non fatal parser error
708 */
709static void
710xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
711 const char *msg, const xmlChar * val)
712{
Daniel Veillard157fee02003-10-31 10:36:03 +0000713 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
714 (ctxt->instate == XML_PARSER_EOF))
715 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000716 if (ctxt != NULL)
717 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000718 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000719 XML_FROM_PARSER, error, XML_ERR_ERROR,
720 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
721 val);
722}
723
724/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000725 * xmlNsErr:
726 * @ctxt: an XML parser context
727 * @error: the error number
728 * @msg: the message
729 * @info1: extra information string
730 * @info2: extra information string
731 *
732 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
733 */
734static void
735xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
736 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000737 const xmlChar * info1, const xmlChar * info2,
738 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000739{
Daniel Veillard157fee02003-10-31 10:36:03 +0000740 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
741 (ctxt->instate == XML_PARSER_EOF))
742 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000743 if (ctxt != NULL)
744 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000745 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000746 XML_ERR_ERROR, NULL, 0, (const char *) info1,
747 (const char *) info2, (const char *) info3, 0, 0, msg,
748 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000749 if (ctxt != NULL)
750 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000751}
752
Daniel Veillard37334572008-07-31 08:20:02 +0000753/**
754 * xmlNsWarn
755 * @ctxt: an XML parser context
756 * @error: the error number
757 * @msg: the message
758 * @info1: extra information string
759 * @info2: extra information string
760 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800761 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000762 */
763static void
764xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
765 const char *msg,
766 const xmlChar * info1, const xmlChar * info2,
767 const xmlChar * info3)
768{
769 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
770 (ctxt->instate == XML_PARSER_EOF))
771 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000772 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
773 XML_ERR_WARNING, NULL, 0, (const char *) info1,
774 (const char *) info2, (const char *) info3, 0, 0, msg,
775 info1, info2, info3);
776}
777
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000778/************************************************************************
779 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +0800780 * Library wide options *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000781 * *
782 ************************************************************************/
783
784/**
785 * xmlHasFeature:
786 * @feature: the feature to be examined
787 *
788 * Examines if the library has been compiled with a given feature.
789 *
790 * Returns a non-zero value if the feature exist, otherwise zero.
791 * Returns zero (0) if the feature does not exist or an unknown
792 * unknown feature is requested, non-zero otherwise.
793 */
794int
795xmlHasFeature(xmlFeature feature)
796{
797 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000798 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000799#ifdef LIBXML_THREAD_ENABLED
800 return(1);
801#else
802 return(0);
803#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000804 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000805#ifdef LIBXML_TREE_ENABLED
806 return(1);
807#else
808 return(0);
809#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000810 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000811#ifdef LIBXML_OUTPUT_ENABLED
812 return(1);
813#else
814 return(0);
815#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000816 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000817#ifdef LIBXML_PUSH_ENABLED
818 return(1);
819#else
820 return(0);
821#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000822 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000823#ifdef LIBXML_READER_ENABLED
824 return(1);
825#else
826 return(0);
827#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000828 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000829#ifdef LIBXML_PATTERN_ENABLED
830 return(1);
831#else
832 return(0);
833#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000834 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000835#ifdef LIBXML_WRITER_ENABLED
836 return(1);
837#else
838 return(0);
839#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000840 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000841#ifdef LIBXML_SAX1_ENABLED
842 return(1);
843#else
844 return(0);
845#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000846 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000847#ifdef LIBXML_FTP_ENABLED
848 return(1);
849#else
850 return(0);
851#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000852 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000853#ifdef LIBXML_HTTP_ENABLED
854 return(1);
855#else
856 return(0);
857#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000858 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000859#ifdef LIBXML_VALID_ENABLED
860 return(1);
861#else
862 return(0);
863#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000864 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000865#ifdef LIBXML_HTML_ENABLED
866 return(1);
867#else
868 return(0);
869#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000870 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000871#ifdef LIBXML_LEGACY_ENABLED
872 return(1);
873#else
874 return(0);
875#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000876 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000877#ifdef LIBXML_C14N_ENABLED
878 return(1);
879#else
880 return(0);
881#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000882 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000883#ifdef LIBXML_CATALOG_ENABLED
884 return(1);
885#else
886 return(0);
887#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000888 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000889#ifdef LIBXML_XPATH_ENABLED
890 return(1);
891#else
892 return(0);
893#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000894 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000895#ifdef LIBXML_XPTR_ENABLED
896 return(1);
897#else
898 return(0);
899#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000900 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000901#ifdef LIBXML_XINCLUDE_ENABLED
902 return(1);
903#else
904 return(0);
905#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000906 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000907#ifdef LIBXML_ICONV_ENABLED
908 return(1);
909#else
910 return(0);
911#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000912 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000913#ifdef LIBXML_ISO8859X_ENABLED
914 return(1);
915#else
916 return(0);
917#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000918 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000919#ifdef LIBXML_UNICODE_ENABLED
920 return(1);
921#else
922 return(0);
923#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000924 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000925#ifdef LIBXML_REGEXP_ENABLED
926 return(1);
927#else
928 return(0);
929#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000930 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000931#ifdef LIBXML_AUTOMATA_ENABLED
932 return(1);
933#else
934 return(0);
935#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000936 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000937#ifdef LIBXML_EXPR_ENABLED
938 return(1);
939#else
940 return(0);
941#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000942 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000943#ifdef LIBXML_SCHEMAS_ENABLED
944 return(1);
945#else
946 return(0);
947#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000948 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000949#ifdef LIBXML_SCHEMATRON_ENABLED
950 return(1);
951#else
952 return(0);
953#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000954 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000955#ifdef LIBXML_MODULES_ENABLED
956 return(1);
957#else
958 return(0);
959#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000960 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000961#ifdef LIBXML_DEBUG_ENABLED
962 return(1);
963#else
964 return(0);
965#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000966 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000967#ifdef DEBUG_MEMORY_LOCATION
968 return(1);
969#else
970 return(0);
971#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000972 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000973#ifdef LIBXML_DEBUG_RUNTIME
974 return(1);
975#else
976 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000977#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000978 case XML_WITH_ZLIB:
979#ifdef LIBXML_ZLIB_ENABLED
980 return(1);
981#else
982 return(0);
983#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +0200984 case XML_WITH_LZMA:
985#ifdef LIBXML_LZMA_ENABLED
986 return(1);
987#else
988 return(0);
989#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100990 case XML_WITH_ICU:
991#ifdef LIBXML_ICU_ENABLED
992 return(1);
993#else
994 return(0);
995#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000996 default:
997 break;
998 }
999 return(0);
1000}
1001
1002/************************************************************************
1003 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001004 * SAX2 defaulted attributes handling *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001005 * *
1006 ************************************************************************/
1007
1008/**
1009 * xmlDetectSAX2:
1010 * @ctxt: an XML parser context
1011 *
1012 * Do the SAX2 detection and specific intialization
1013 */
1014static void
1015xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1016 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +00001017#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001018 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1019 ((ctxt->sax->startElementNs != NULL) ||
1020 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001021#else
1022 ctxt->sax2 = 1;
1023#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001024
1025 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1026 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1027 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001028 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1029 (ctxt->str_xml_ns == NULL)) {
William M. Brack9f797ab2004-07-28 07:40:12 +00001030 xmlErrMemory(ctxt, NULL);
1031 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001032}
1033
Daniel Veillarde57ec792003-09-10 10:50:59 +00001034typedef struct _xmlDefAttrs xmlDefAttrs;
1035typedef xmlDefAttrs *xmlDefAttrsPtr;
1036struct _xmlDefAttrs {
1037 int nbAttrs; /* number of defaulted attributes on that element */
1038 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +00001039 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001040};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001041
1042/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001043 * xmlAttrNormalizeSpace:
1044 * @src: the source string
1045 * @dst: the target string
1046 *
1047 * Normalize the space in non CDATA attribute values:
1048 * If the attribute type is not CDATA, then the XML processor MUST further
1049 * process the normalized attribute value by discarding any leading and
1050 * trailing space (#x20) characters, and by replacing sequences of space
1051 * (#x20) characters by a single space (#x20) character.
1052 * Note that the size of dst need to be at least src, and if one doesn't need
1053 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1054 * passing src as dst is just fine.
1055 *
1056 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1057 * is needed.
1058 */
1059static xmlChar *
1060xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1061{
1062 if ((src == NULL) || (dst == NULL))
1063 return(NULL);
1064
1065 while (*src == 0x20) src++;
1066 while (*src != 0) {
1067 if (*src == 0x20) {
1068 while (*src == 0x20) src++;
1069 if (*src != 0)
1070 *dst++ = 0x20;
1071 } else {
1072 *dst++ = *src++;
1073 }
1074 }
1075 *dst = 0;
1076 if (dst == src)
1077 return(NULL);
1078 return(dst);
1079}
1080
1081/**
1082 * xmlAttrNormalizeSpace2:
1083 * @src: the source string
1084 *
1085 * Normalize the space in non CDATA attribute values, a slightly more complex
1086 * front end to avoid allocation problems when running on attribute values
1087 * coming from the input.
1088 *
1089 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1090 * is needed.
1091 */
1092static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001093xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001094{
1095 int i;
1096 int remove_head = 0;
1097 int need_realloc = 0;
1098 const xmlChar *cur;
1099
1100 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1101 return(NULL);
1102 i = *len;
1103 if (i <= 0)
1104 return(NULL);
1105
1106 cur = src;
1107 while (*cur == 0x20) {
1108 cur++;
1109 remove_head++;
1110 }
1111 while (*cur != 0) {
1112 if (*cur == 0x20) {
1113 cur++;
1114 if ((*cur == 0x20) || (*cur == 0)) {
1115 need_realloc = 1;
1116 break;
1117 }
1118 } else
1119 cur++;
1120 }
1121 if (need_realloc) {
1122 xmlChar *ret;
1123
1124 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1125 if (ret == NULL) {
1126 xmlErrMemory(ctxt, NULL);
1127 return(NULL);
1128 }
1129 xmlAttrNormalizeSpace(ret, ret);
1130 *len = (int) strlen((const char *)ret);
1131 return(ret);
1132 } else if (remove_head) {
1133 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001134 memmove(src, src + remove_head, 1 + *len);
1135 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001136 }
1137 return(NULL);
1138}
1139
1140/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001141 * xmlAddDefAttrs:
1142 * @ctxt: an XML parser context
1143 * @fullname: the element fullname
1144 * @fullattr: the attribute fullname
1145 * @value: the attribute value
1146 *
1147 * Add a defaulted attribute for an element
1148 */
1149static void
1150xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1151 const xmlChar *fullname,
1152 const xmlChar *fullattr,
1153 const xmlChar *value) {
1154 xmlDefAttrsPtr defaults;
1155 int len;
1156 const xmlChar *name;
1157 const xmlChar *prefix;
1158
Daniel Veillard6a31b832008-03-26 14:06:44 +00001159 /*
1160 * Allows to detect attribute redefinitions
1161 */
1162 if (ctxt->attsSpecial != NULL) {
1163 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1164 return;
1165 }
1166
Daniel Veillarde57ec792003-09-10 10:50:59 +00001167 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001168 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001169 if (ctxt->attsDefault == NULL)
1170 goto mem_error;
1171 }
1172
1173 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001174 * split the element name into prefix:localname , the string found
1175 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001176 */
1177 name = xmlSplitQName3(fullname, &len);
1178 if (name == NULL) {
1179 name = xmlDictLookup(ctxt->dict, fullname, -1);
1180 prefix = NULL;
1181 } else {
1182 name = xmlDictLookup(ctxt->dict, name, -1);
1183 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1184 }
1185
1186 /*
1187 * make sure there is some storage
1188 */
1189 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1190 if (defaults == NULL) {
1191 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001192 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001193 if (defaults == NULL)
1194 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001195 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001196 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001197 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1198 defaults, NULL) < 0) {
1199 xmlFree(defaults);
1200 goto mem_error;
1201 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001202 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001203 xmlDefAttrsPtr temp;
1204
1205 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001206 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001207 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001208 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001209 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001210 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001211 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1212 defaults, NULL) < 0) {
1213 xmlFree(defaults);
1214 goto mem_error;
1215 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001216 }
1217
1218 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001219 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001220 * are within the DTD and hen not associated to namespace names.
1221 */
1222 name = xmlSplitQName3(fullattr, &len);
1223 if (name == NULL) {
1224 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1225 prefix = NULL;
1226 } else {
1227 name = xmlDictLookup(ctxt->dict, name, -1);
1228 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1229 }
1230
Daniel Veillardae0765b2008-07-31 19:54:59 +00001231 defaults->values[5 * defaults->nbAttrs] = name;
1232 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001233 /* intern the string and precompute the end */
1234 len = xmlStrlen(value);
1235 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001236 defaults->values[5 * defaults->nbAttrs + 2] = value;
1237 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1238 if (ctxt->external)
1239 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1240 else
1241 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001242 defaults->nbAttrs++;
1243
1244 return;
1245
1246mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001247 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001248 return;
1249}
1250
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001251/**
1252 * xmlAddSpecialAttr:
1253 * @ctxt: an XML parser context
1254 * @fullname: the element fullname
1255 * @fullattr: the attribute fullname
1256 * @type: the attribute type
1257 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001258 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001259 */
1260static void
1261xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1262 const xmlChar *fullname,
1263 const xmlChar *fullattr,
1264 int type)
1265{
1266 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001267 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001268 if (ctxt->attsSpecial == NULL)
1269 goto mem_error;
1270 }
1271
Daniel Veillardac4118d2008-01-11 05:27:32 +00001272 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1273 return;
1274
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001275 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1276 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001277 return;
1278
1279mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001280 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001281 return;
1282}
1283
Daniel Veillard4432df22003-09-28 18:58:27 +00001284/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001285 * xmlCleanSpecialAttrCallback:
1286 *
1287 * Removes CDATA attributes from the special attribute table
1288 */
1289static void
1290xmlCleanSpecialAttrCallback(void *payload, void *data,
1291 const xmlChar *fullname, const xmlChar *fullattr,
1292 const xmlChar *unused ATTRIBUTE_UNUSED) {
1293 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1294
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001295 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001296 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1297 }
1298}
1299
1300/**
1301 * xmlCleanSpecialAttr:
1302 * @ctxt: an XML parser context
1303 *
1304 * Trim the list of attributes defined to remove all those of type
1305 * CDATA as they are not special. This call should be done when finishing
1306 * to parse the DTD and before starting to parse the document root.
1307 */
1308static void
1309xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1310{
1311 if (ctxt->attsSpecial == NULL)
1312 return;
1313
1314 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1315
1316 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1317 xmlHashFree(ctxt->attsSpecial, NULL);
1318 ctxt->attsSpecial = NULL;
1319 }
1320 return;
1321}
1322
1323/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001324 * xmlCheckLanguageID:
1325 * @lang: pointer to the string value
1326 *
1327 * Checks that the value conforms to the LanguageID production:
1328 *
1329 * NOTE: this is somewhat deprecated, those productions were removed from
1330 * the XML Second edition.
1331 *
1332 * [33] LanguageID ::= Langcode ('-' Subcode)*
1333 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1334 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1335 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1336 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1337 * [38] Subcode ::= ([a-z] | [A-Z])+
1338 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001339 * The current REC reference the sucessors of RFC 1766, currently 5646
1340 *
1341 * http://www.rfc-editor.org/rfc/rfc5646.txt
1342 * langtag = language
1343 * ["-" script]
1344 * ["-" region]
1345 * *("-" variant)
1346 * *("-" extension)
1347 * ["-" privateuse]
1348 * language = 2*3ALPHA ; shortest ISO 639 code
1349 * ["-" extlang] ; sometimes followed by
1350 * ; extended language subtags
1351 * / 4ALPHA ; or reserved for future use
1352 * / 5*8ALPHA ; or registered language subtag
1353 *
1354 * extlang = 3ALPHA ; selected ISO 639 codes
1355 * *2("-" 3ALPHA) ; permanently reserved
1356 *
1357 * script = 4ALPHA ; ISO 15924 code
1358 *
1359 * region = 2ALPHA ; ISO 3166-1 code
1360 * / 3DIGIT ; UN M.49 code
1361 *
1362 * variant = 5*8alphanum ; registered variants
1363 * / (DIGIT 3alphanum)
1364 *
1365 * extension = singleton 1*("-" (2*8alphanum))
1366 *
1367 * ; Single alphanumerics
1368 * ; "x" reserved for private use
1369 * singleton = DIGIT ; 0 - 9
1370 * / %x41-57 ; A - W
1371 * / %x59-5A ; Y - Z
1372 * / %x61-77 ; a - w
1373 * / %x79-7A ; y - z
1374 *
1375 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1376 * The parser below doesn't try to cope with extension or privateuse
1377 * that could be added but that's not interoperable anyway
1378 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001379 * Returns 1 if correct 0 otherwise
1380 **/
1381int
1382xmlCheckLanguageID(const xmlChar * lang)
1383{
Daniel Veillard60587d62010-11-04 15:16:27 +01001384 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001385
1386 if (cur == NULL)
1387 return (0);
1388 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001389 ((cur[0] == 'I') && (cur[1] == '-')) ||
1390 ((cur[0] == 'x') && (cur[1] == '-')) ||
1391 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001392 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001393 * Still allow IANA code and user code which were coming
1394 * from the previous version of the XML-1.0 specification
1395 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001396 */
1397 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001398 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001399 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1400 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001401 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001402 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001403 nxt = cur;
1404 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1405 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1406 nxt++;
1407 if (nxt - cur >= 4) {
1408 /*
1409 * Reserved
1410 */
1411 if ((nxt - cur > 8) || (nxt[0] != 0))
1412 return(0);
1413 return(1);
1414 }
1415 if (nxt - cur < 2)
1416 return(0);
1417 /* we got an ISO 639 code */
1418 if (nxt[0] == 0)
1419 return(1);
1420 if (nxt[0] != '-')
1421 return(0);
1422
1423 nxt++;
1424 cur = nxt;
1425 /* now we can have extlang or script or region or variant */
1426 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1427 goto region_m49;
1428
1429 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1430 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1431 nxt++;
1432 if (nxt - cur == 4)
1433 goto script;
1434 if (nxt - cur == 2)
1435 goto region;
1436 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1437 goto variant;
1438 if (nxt - cur != 3)
1439 return(0);
1440 /* we parsed an extlang */
1441 if (nxt[0] == 0)
1442 return(1);
1443 if (nxt[0] != '-')
1444 return(0);
1445
1446 nxt++;
1447 cur = nxt;
1448 /* now we can have script or region or variant */
1449 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1450 goto region_m49;
1451
1452 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1453 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1454 nxt++;
1455 if (nxt - cur == 2)
1456 goto region;
1457 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1458 goto variant;
1459 if (nxt - cur != 4)
1460 return(0);
1461 /* we parsed a script */
1462script:
1463 if (nxt[0] == 0)
1464 return(1);
1465 if (nxt[0] != '-')
1466 return(0);
1467
1468 nxt++;
1469 cur = nxt;
1470 /* now we can have region or variant */
1471 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1472 goto region_m49;
1473
1474 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1475 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1476 nxt++;
1477
1478 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1479 goto variant;
1480 if (nxt - cur != 2)
1481 return(0);
1482 /* we parsed a region */
1483region:
1484 if (nxt[0] == 0)
1485 return(1);
1486 if (nxt[0] != '-')
1487 return(0);
1488
1489 nxt++;
1490 cur = nxt;
1491 /* now we can just have a variant */
1492 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1493 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1494 nxt++;
1495
1496 if ((nxt - cur < 5) || (nxt - cur > 8))
1497 return(0);
1498
1499 /* we parsed a variant */
1500variant:
1501 if (nxt[0] == 0)
1502 return(1);
1503 if (nxt[0] != '-')
1504 return(0);
1505 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001506 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001507
1508region_m49:
1509 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1510 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1511 nxt += 3;
1512 goto region;
1513 }
1514 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001515}
1516
Owen Taylor3473f882001-02-23 17:55:21 +00001517/************************************************************************
1518 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001519 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001520 * *
1521 ************************************************************************/
1522
Daniel Veillard8ed10722009-08-20 19:17:36 +02001523static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1524 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001525
Daniel Veillard0fb18932003-09-07 09:14:37 +00001526#ifdef SAX2
1527/**
1528 * nsPush:
1529 * @ctxt: an XML parser context
1530 * @prefix: the namespace prefix or NULL
1531 * @URL: the namespace name
1532 *
1533 * Pushes a new parser namespace on top of the ns stack
1534 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001535 * Returns -1 in case of error, -2 if the namespace should be discarded
1536 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001537 */
1538static int
1539nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1540{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001541 if (ctxt->options & XML_PARSE_NSCLEAN) {
1542 int i;
Daniel Veillard711b15d2012-10-25 19:23:26 +08001543 for (i = ctxt->nsNr - 2;i >= 0;i -= 2) {
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001544 if (ctxt->nsTab[i] == prefix) {
1545 /* in scope */
1546 if (ctxt->nsTab[i + 1] == URL)
1547 return(-2);
1548 /* out of scope keep it */
1549 break;
1550 }
1551 }
1552 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001553 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1554 ctxt->nsMax = 10;
1555 ctxt->nsNr = 0;
1556 ctxt->nsTab = (const xmlChar **)
1557 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1558 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001559 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001560 ctxt->nsMax = 0;
1561 return (-1);
1562 }
1563 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001564 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001565 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001566 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1567 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1568 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001569 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001570 ctxt->nsMax /= 2;
1571 return (-1);
1572 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001573 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001574 }
1575 ctxt->nsTab[ctxt->nsNr++] = prefix;
1576 ctxt->nsTab[ctxt->nsNr++] = URL;
1577 return (ctxt->nsNr);
1578}
1579/**
1580 * nsPop:
1581 * @ctxt: an XML parser context
1582 * @nr: the number to pop
1583 *
1584 * Pops the top @nr parser prefix/namespace from the ns stack
1585 *
1586 * Returns the number of namespaces removed
1587 */
1588static int
1589nsPop(xmlParserCtxtPtr ctxt, int nr)
1590{
1591 int i;
1592
1593 if (ctxt->nsTab == NULL) return(0);
1594 if (ctxt->nsNr < nr) {
1595 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1596 nr = ctxt->nsNr;
1597 }
1598 if (ctxt->nsNr <= 0)
1599 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001600
Daniel Veillard0fb18932003-09-07 09:14:37 +00001601 for (i = 0;i < nr;i++) {
1602 ctxt->nsNr--;
1603 ctxt->nsTab[ctxt->nsNr] = NULL;
1604 }
1605 return(nr);
1606}
1607#endif
1608
1609static int
1610xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1611 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001612 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001613 int maxatts;
1614
1615 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001616 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001617 atts = (const xmlChar **)
1618 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001619 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001620 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001621 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1622 if (attallocs == NULL) goto mem_error;
1623 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001624 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001625 } else if (nr + 5 > ctxt->maxatts) {
1626 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001627 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1628 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001629 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001630 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001631 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1632 (maxatts / 5) * sizeof(int));
1633 if (attallocs == NULL) goto mem_error;
1634 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001635 ctxt->maxatts = maxatts;
1636 }
1637 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001638mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001639 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001640 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001641}
1642
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001643/**
1644 * inputPush:
1645 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001646 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001647 *
1648 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001649 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001650 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001651 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001652int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001653inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1654{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001655 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001656 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001657 if (ctxt->inputNr >= ctxt->inputMax) {
1658 ctxt->inputMax *= 2;
1659 ctxt->inputTab =
1660 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1661 ctxt->inputMax *
1662 sizeof(ctxt->inputTab[0]));
1663 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001664 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001665 xmlFreeInputStream(value);
1666 ctxt->inputMax /= 2;
1667 value = NULL;
1668 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001669 }
1670 }
1671 ctxt->inputTab[ctxt->inputNr] = value;
1672 ctxt->input = value;
1673 return (ctxt->inputNr++);
1674}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001675/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001676 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001677 * @ctxt: an XML parser context
1678 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001679 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001680 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001681 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001682 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001683xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001684inputPop(xmlParserCtxtPtr ctxt)
1685{
1686 xmlParserInputPtr ret;
1687
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001688 if (ctxt == NULL)
1689 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001690 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001691 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001692 ctxt->inputNr--;
1693 if (ctxt->inputNr > 0)
1694 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1695 else
1696 ctxt->input = NULL;
1697 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001698 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001699 return (ret);
1700}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001701/**
1702 * nodePush:
1703 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001704 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001705 *
1706 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001707 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001708 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001709 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001710int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001711nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1712{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001713 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001714 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001715 xmlNodePtr *tmp;
1716
1717 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1718 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001719 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001720 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001721 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001722 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001723 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001724 ctxt->nodeTab = tmp;
1725 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001726 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001727 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1728 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001729 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001730 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001731 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001732 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001733 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001734 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001735 ctxt->nodeTab[ctxt->nodeNr] = value;
1736 ctxt->node = value;
1737 return (ctxt->nodeNr++);
1738}
Daniel Veillard8915c152008-08-26 13:05:34 +00001739
Daniel Veillard1c732d22002-11-30 11:22:59 +00001740/**
1741 * nodePop:
1742 * @ctxt: an XML parser context
1743 *
1744 * Pops the top element node from the node stack
1745 *
1746 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001747 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001748xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001749nodePop(xmlParserCtxtPtr ctxt)
1750{
1751 xmlNodePtr ret;
1752
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001753 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001754 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001755 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001756 ctxt->nodeNr--;
1757 if (ctxt->nodeNr > 0)
1758 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1759 else
1760 ctxt->node = NULL;
1761 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001762 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001763 return (ret);
1764}
Daniel Veillarda2351322004-06-27 12:08:10 +00001765
1766#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001767/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001768 * nameNsPush:
1769 * @ctxt: an XML parser context
1770 * @value: the element name
1771 * @prefix: the element prefix
1772 * @URI: the element namespace name
1773 *
1774 * Pushes a new element name/prefix/URL on top of the name stack
1775 *
1776 * Returns -1 in case of error, the index in the stack otherwise
1777 */
1778static int
1779nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1780 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1781{
1782 if (ctxt->nameNr >= ctxt->nameMax) {
1783 const xmlChar * *tmp;
1784 void **tmp2;
1785 ctxt->nameMax *= 2;
1786 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1787 ctxt->nameMax *
1788 sizeof(ctxt->nameTab[0]));
1789 if (tmp == NULL) {
1790 ctxt->nameMax /= 2;
1791 goto mem_error;
1792 }
1793 ctxt->nameTab = tmp;
1794 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1795 ctxt->nameMax * 3 *
1796 sizeof(ctxt->pushTab[0]));
1797 if (tmp2 == NULL) {
1798 ctxt->nameMax /= 2;
1799 goto mem_error;
1800 }
1801 ctxt->pushTab = tmp2;
1802 }
1803 ctxt->nameTab[ctxt->nameNr] = value;
1804 ctxt->name = value;
1805 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1806 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001807 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001808 return (ctxt->nameNr++);
1809mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001810 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001811 return (-1);
1812}
1813/**
1814 * nameNsPop:
1815 * @ctxt: an XML parser context
1816 *
1817 * Pops the top element/prefix/URI name from the name stack
1818 *
1819 * Returns the name just removed
1820 */
1821static const xmlChar *
1822nameNsPop(xmlParserCtxtPtr ctxt)
1823{
1824 const xmlChar *ret;
1825
1826 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001827 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001828 ctxt->nameNr--;
1829 if (ctxt->nameNr > 0)
1830 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1831 else
1832 ctxt->name = NULL;
1833 ret = ctxt->nameTab[ctxt->nameNr];
1834 ctxt->nameTab[ctxt->nameNr] = NULL;
1835 return (ret);
1836}
Daniel Veillarda2351322004-06-27 12:08:10 +00001837#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001838
1839/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001840 * namePush:
1841 * @ctxt: an XML parser context
1842 * @value: the element name
1843 *
1844 * Pushes a new element name on top of the name stack
1845 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001846 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001847 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001848int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001849namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001850{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001851 if (ctxt == NULL) return (-1);
1852
Daniel Veillard1c732d22002-11-30 11:22:59 +00001853 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001854 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001855 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001856 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001857 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001858 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001859 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001860 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001861 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001862 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001863 }
1864 ctxt->nameTab[ctxt->nameNr] = value;
1865 ctxt->name = value;
1866 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001867mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001868 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001869 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001870}
1871/**
1872 * namePop:
1873 * @ctxt: an XML parser context
1874 *
1875 * Pops the top element name from the name stack
1876 *
1877 * Returns the name just removed
1878 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001879const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001880namePop(xmlParserCtxtPtr ctxt)
1881{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001882 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001883
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001884 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1885 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001886 ctxt->nameNr--;
1887 if (ctxt->nameNr > 0)
1888 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1889 else
1890 ctxt->name = NULL;
1891 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001892 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001893 return (ret);
1894}
Owen Taylor3473f882001-02-23 17:55:21 +00001895
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001896static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001897 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001898 int *tmp;
1899
Owen Taylor3473f882001-02-23 17:55:21 +00001900 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001901 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1902 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1903 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001904 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001905 ctxt->spaceMax /=2;
1906 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001907 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001908 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001909 }
1910 ctxt->spaceTab[ctxt->spaceNr] = val;
1911 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1912 return(ctxt->spaceNr++);
1913}
1914
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001915static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001916 int ret;
1917 if (ctxt->spaceNr <= 0) return(0);
1918 ctxt->spaceNr--;
1919 if (ctxt->spaceNr > 0)
1920 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1921 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001922 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001923 ret = ctxt->spaceTab[ctxt->spaceNr];
1924 ctxt->spaceTab[ctxt->spaceNr] = -1;
1925 return(ret);
1926}
1927
1928/*
1929 * Macros for accessing the content. Those should be used only by the parser,
1930 * and not exported.
1931 *
1932 * Dirty macros, i.e. one often need to make assumption on the context to
1933 * use them
1934 *
1935 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1936 * To be used with extreme caution since operations consuming
1937 * characters may move the input buffer to a different location !
1938 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1939 * This should be used internally by the parser
1940 * only to compare to ASCII values otherwise it would break when
1941 * running with UTF-8 encoding.
1942 * RAW same as CUR but in the input buffer, bypass any token
1943 * extraction that may have been done
1944 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1945 * to compare on ASCII based substring.
1946 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001947 * strings without newlines within the parser.
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001948 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
Daniel Veillard77a90a72003-03-22 00:04:05 +00001949 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001950 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1951 *
1952 * NEXT Skip to the next character, this does the proper decoding
1953 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001954 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001955 * CUR_CHAR(l) returns the current unicode character (int), set l
1956 * to the number of xmlChars used for the encoding [0-5].
1957 * CUR_SCHAR same but operate on a string instead of the context
1958 * COPY_BUF copy the current unicode char to the target buffer, increment
1959 * the index
1960 * GROW, SHRINK handling of input buffers
1961 */
1962
Daniel Veillardfdc91562002-07-01 21:52:03 +00001963#define RAW (*ctxt->input->cur)
1964#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001965#define NXT(val) ctxt->input->cur[(val)]
1966#define CUR_PTR ctxt->input->cur
1967
Daniel Veillarda07050d2003-10-19 14:46:32 +00001968#define CMP4( s, c1, c2, c3, c4 ) \
1969 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1970 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1971#define CMP5( s, c1, c2, c3, c4, c5 ) \
1972 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1973#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1974 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1975#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1976 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1977#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1978 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1979#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1980 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1981 ((unsigned char *) s)[ 8 ] == c9 )
1982#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1983 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1984 ((unsigned char *) s)[ 9 ] == c10 )
1985
Owen Taylor3473f882001-02-23 17:55:21 +00001986#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001987 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001988 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001989 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001990 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1991 xmlPopInput(ctxt); \
1992 } while (0)
1993
Daniel Veillard0b787f32004-03-26 17:29:53 +00001994#define SKIPL(val) do { \
1995 int skipl; \
1996 for(skipl=0; skipl<val; skipl++) { \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001997 if (*(ctxt->input->cur) == '\n') { \
Daniel Veillard0b787f32004-03-26 17:29:53 +00001998 ctxt->input->line++; ctxt->input->col = 1; \
Daniel Veillardf8e3db02012-09-11 13:26:36 +08001999 } else ctxt->input->col++; \
2000 ctxt->nbChars++; \
Daniel Veillard0b787f32004-03-26 17:29:53 +00002001 ctxt->input->cur++; \
2002 } \
2003 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2004 if ((*ctxt->input->cur == 0) && \
2005 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2006 xmlPopInput(ctxt); \
2007 } while (0)
2008
Daniel Veillarda880b122003-04-21 21:36:41 +00002009#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002010 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2011 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002012 xmlSHRINK (ctxt);
2013
2014static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2015 xmlParserInputShrink(ctxt->input);
2016 if ((*ctxt->input->cur == 0) &&
2017 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2018 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002019 }
Owen Taylor3473f882001-02-23 17:55:21 +00002020
Daniel Veillarda880b122003-04-21 21:36:41 +00002021#define GROW if ((ctxt->progressive == 0) && \
2022 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002023 xmlGROW (ctxt);
2024
2025static void xmlGROW (xmlParserCtxtPtr ctxt) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002026 if ((((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
2027 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
Daniel Veillard153cf152012-10-26 13:50:47 +08002028 ((ctxt->input->buf) && (ctxt->input->buf->readcallback != xmlNop)) &&
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002029 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2030 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillard0df83ca2012-07-30 15:41:10 +08002031 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002032 }
Daniel Veillard46de64e2002-05-29 08:21:33 +00002033 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard59df7832010-02-02 10:24:01 +01002034 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
Daniel Veillard46de64e2002-05-29 08:21:33 +00002035 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2036 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00002037}
Owen Taylor3473f882001-02-23 17:55:21 +00002038
2039#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2040
2041#define NEXT xmlNextChar(ctxt)
2042
Daniel Veillard21a0f912001-02-25 19:54:14 +00002043#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002044 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002045 ctxt->input->cur++; \
2046 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002047 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002048 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2049 }
2050
Owen Taylor3473f882001-02-23 17:55:21 +00002051#define NEXTL(l) do { \
2052 if (*(ctxt->input->cur) == '\n') { \
2053 ctxt->input->line++; ctxt->input->col = 1; \
2054 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002055 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002056 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00002057 } while (0)
2058
2059#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2060#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2061
2062#define COPY_BUF(l,b,i,v) \
2063 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002064 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002065
2066/**
2067 * xmlSkipBlankChars:
2068 * @ctxt: the XML parser context
2069 *
2070 * skip all blanks character found at that point in the input streams.
2071 * It pops up finished entities in the process if allowable at that point.
2072 *
2073 * Returns the number of space chars skipped
2074 */
2075
2076int
2077xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002078 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002079
2080 /*
2081 * It's Okay to use CUR/NEXT here since all the blanks are on
2082 * the ASCII range.
2083 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002084 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2085 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002086 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002087 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002088 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002089 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002090 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002091 if (*cur == '\n') {
2092 ctxt->input->line++; ctxt->input->col = 1;
2093 }
2094 cur++;
2095 res++;
2096 if (*cur == 0) {
2097 ctxt->input->cur = cur;
2098 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2099 cur = ctxt->input->cur;
2100 }
2101 }
2102 ctxt->input->cur = cur;
2103 } else {
2104 int cur;
2105 do {
2106 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00002107 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002108 NEXT;
2109 cur = CUR;
2110 res++;
2111 }
2112 while ((cur == 0) && (ctxt->inputNr > 1) &&
2113 (ctxt->instate != XML_PARSER_COMMENT)) {
2114 xmlPopInput(ctxt);
2115 cur = CUR;
2116 }
2117 /*
2118 * Need to handle support of entities branching here
2119 */
2120 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2121 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2122 }
Owen Taylor3473f882001-02-23 17:55:21 +00002123 return(res);
2124}
2125
2126/************************************************************************
2127 * *
2128 * Commodity functions to handle entities *
2129 * *
2130 ************************************************************************/
2131
2132/**
2133 * xmlPopInput:
2134 * @ctxt: an XML parser context
2135 *
2136 * xmlPopInput: the current input pointed by ctxt->input came to an end
2137 * pop it and return the next char.
2138 *
2139 * Returns the current xmlChar in the parser context
2140 */
2141xmlChar
2142xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002143 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002144 if (xmlParserDebugEntities)
2145 xmlGenericError(xmlGenericErrorContext,
2146 "Popping input %d\n", ctxt->inputNr);
2147 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00002148 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00002149 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2150 return(xmlPopInput(ctxt));
2151 return(CUR);
2152}
2153
2154/**
2155 * xmlPushInput:
2156 * @ctxt: an XML parser context
2157 * @input: an XML parser input fragment (entity, XML fragment ...).
2158 *
2159 * xmlPushInput: switch to a new input stream which is stacked on top
2160 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002161 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002162 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002163int
Owen Taylor3473f882001-02-23 17:55:21 +00002164xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002165 int ret;
2166 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002167
2168 if (xmlParserDebugEntities) {
2169 if ((ctxt->input != NULL) && (ctxt->input->filename))
2170 xmlGenericError(xmlGenericErrorContext,
2171 "%s(%d): ", ctxt->input->filename,
2172 ctxt->input->line);
2173 xmlGenericError(xmlGenericErrorContext,
2174 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2175 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002176 ret = inputPush(ctxt, input);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002177 if (ctxt->instate == XML_PARSER_EOF)
2178 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002179 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002180 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002181}
2182
2183/**
2184 * xmlParseCharRef:
2185 * @ctxt: an XML parser context
2186 *
2187 * parse Reference declarations
2188 *
2189 * [66] CharRef ::= '&#' [0-9]+ ';' |
2190 * '&#x' [0-9a-fA-F]+ ';'
2191 *
2192 * [ WFC: Legal Character ]
2193 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002194 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002195 *
2196 * Returns the value parsed (as an int), 0 in case of error
2197 */
2198int
2199xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002200 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002201 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002202 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002203
Owen Taylor3473f882001-02-23 17:55:21 +00002204 /*
2205 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2206 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002207 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002208 (NXT(2) == 'x')) {
2209 SKIP(3);
2210 GROW;
2211 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002212 if (count++ > 20) {
2213 count = 0;
2214 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002215 if (ctxt->instate == XML_PARSER_EOF)
2216 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002217 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002218 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002219 val = val * 16 + (CUR - '0');
2220 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2221 val = val * 16 + (CUR - 'a') + 10;
2222 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2223 val = val * 16 + (CUR - 'A') + 10;
2224 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002225 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002226 val = 0;
2227 break;
2228 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002229 if (val > 0x10FFFF)
2230 outofrange = val;
2231
Owen Taylor3473f882001-02-23 17:55:21 +00002232 NEXT;
2233 count++;
2234 }
2235 if (RAW == ';') {
2236 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002237 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002238 ctxt->nbChars ++;
2239 ctxt->input->cur++;
2240 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002241 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002242 SKIP(2);
2243 GROW;
2244 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002245 if (count++ > 20) {
2246 count = 0;
2247 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002248 if (ctxt->instate == XML_PARSER_EOF)
2249 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002250 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002251 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002252 val = val * 10 + (CUR - '0');
2253 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002254 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002255 val = 0;
2256 break;
2257 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002258 if (val > 0x10FFFF)
2259 outofrange = val;
2260
Owen Taylor3473f882001-02-23 17:55:21 +00002261 NEXT;
2262 count++;
2263 }
2264 if (RAW == ';') {
2265 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002266 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002267 ctxt->nbChars ++;
2268 ctxt->input->cur++;
2269 }
2270 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002271 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002272 }
2273
2274 /*
2275 * [ WFC: Legal Character ]
2276 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002277 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002278 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002279 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002280 return(val);
2281 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002282 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2283 "xmlParseCharRef: invalid xmlChar value %d\n",
2284 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002285 }
2286 return(0);
2287}
2288
2289/**
2290 * xmlParseStringCharRef:
2291 * @ctxt: an XML parser context
2292 * @str: a pointer to an index in the string
2293 *
2294 * parse Reference declarations, variant parsing from a string rather
2295 * than an an input flow.
2296 *
2297 * [66] CharRef ::= '&#' [0-9]+ ';' |
2298 * '&#x' [0-9a-fA-F]+ ';'
2299 *
2300 * [ WFC: Legal Character ]
2301 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002302 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002303 *
2304 * Returns the value parsed (as an int), 0 in case of error, str will be
2305 * updated to the current value of the index
2306 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002307static int
Owen Taylor3473f882001-02-23 17:55:21 +00002308xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2309 const xmlChar *ptr;
2310 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002311 unsigned int val = 0;
2312 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002313
2314 if ((str == NULL) || (*str == NULL)) return(0);
2315 ptr = *str;
2316 cur = *ptr;
2317 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2318 ptr += 3;
2319 cur = *ptr;
2320 while (cur != ';') { /* Non input consuming loop */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002321 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002322 val = val * 16 + (cur - '0');
2323 else if ((cur >= 'a') && (cur <= 'f'))
2324 val = val * 16 + (cur - 'a') + 10;
2325 else if ((cur >= 'A') && (cur <= 'F'))
2326 val = val * 16 + (cur - 'A') + 10;
2327 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002328 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002329 val = 0;
2330 break;
2331 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002332 if (val > 0x10FFFF)
2333 outofrange = val;
2334
Owen Taylor3473f882001-02-23 17:55:21 +00002335 ptr++;
2336 cur = *ptr;
2337 }
2338 if (cur == ';')
2339 ptr++;
2340 } else if ((cur == '&') && (ptr[1] == '#')){
2341 ptr += 2;
2342 cur = *ptr;
2343 while (cur != ';') { /* Non input consuming loops */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002344 if ((cur >= '0') && (cur <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002345 val = val * 10 + (cur - '0');
2346 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002347 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002348 val = 0;
2349 break;
2350 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002351 if (val > 0x10FFFF)
2352 outofrange = val;
2353
Owen Taylor3473f882001-02-23 17:55:21 +00002354 ptr++;
2355 cur = *ptr;
2356 }
2357 if (cur == ';')
2358 ptr++;
2359 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002360 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002361 return(0);
2362 }
2363 *str = ptr;
2364
2365 /*
2366 * [ WFC: Legal Character ]
2367 * Characters referred to using character references must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002368 * production for Char.
Owen Taylor3473f882001-02-23 17:55:21 +00002369 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002370 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002371 return(val);
2372 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002373 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2374 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2375 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002376 }
2377 return(0);
2378}
2379
2380/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002381 * xmlNewBlanksWrapperInputStream:
2382 * @ctxt: an XML parser context
2383 * @entity: an Entity pointer
2384 *
2385 * Create a new input stream for wrapping
2386 * blanks around a PEReference
2387 *
2388 * Returns the new input stream or NULL
2389 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002390
Daniel Veillardf5582f12002-06-11 10:08:16 +00002391static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002392
Daniel Veillardf4862f02002-09-10 11:13:43 +00002393static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002394xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2395 xmlParserInputPtr input;
2396 xmlChar *buffer;
2397 size_t length;
2398 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002399 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2400 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002401 return(NULL);
2402 }
2403 if (xmlParserDebugEntities)
2404 xmlGenericError(xmlGenericErrorContext,
2405 "new blanks wrapper for entity: %s\n", entity->name);
2406 input = xmlNewInputStream(ctxt);
2407 if (input == NULL) {
2408 return(NULL);
2409 }
2410 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002411 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002412 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002413 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002414 xmlFree(input);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002415 return(NULL);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002416 }
2417 buffer [0] = ' ';
2418 buffer [1] = '%';
2419 buffer [length-3] = ';';
2420 buffer [length-2] = ' ';
2421 buffer [length-1] = 0;
2422 memcpy(buffer + 2, entity->name, length - 5);
2423 input->free = deallocblankswrapper;
2424 input->base = buffer;
2425 input->cur = buffer;
2426 input->length = length;
2427 input->end = &buffer[length];
2428 return(input);
2429}
2430
2431/**
Owen Taylor3473f882001-02-23 17:55:21 +00002432 * xmlParserHandlePEReference:
2433 * @ctxt: the parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002434 *
Owen Taylor3473f882001-02-23 17:55:21 +00002435 * [69] PEReference ::= '%' Name ';'
2436 *
2437 * [ WFC: No Recursion ]
2438 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002439 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00002440 *
2441 * [ WFC: Entity Declared ]
2442 * In a document without any DTD, a document with only an internal DTD
2443 * subset which contains no parameter entity references, or a document
2444 * with "standalone='yes'", ... ... The declaration of a parameter
2445 * entity must precede any reference to it...
2446 *
2447 * [ VC: Entity Declared ]
2448 * In a document with an external subset or external parameter entities
2449 * with "standalone='no'", ... ... The declaration of a parameter entity
2450 * must precede any reference to it...
2451 *
2452 * [ WFC: In DTD ]
2453 * Parameter-entity references may only appear in the DTD.
2454 * NOTE: misleading but this is handled.
2455 *
2456 * A PEReference may have been detected in the current input stream
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002457 * the handling is done accordingly to
Owen Taylor3473f882001-02-23 17:55:21 +00002458 * http://www.w3.org/TR/REC-xml#entproc
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002459 * i.e.
Owen Taylor3473f882001-02-23 17:55:21 +00002460 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002461 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002462 */
2463void
2464xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002465 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002466 xmlEntityPtr entity = NULL;
2467 xmlParserInputPtr input;
2468
Owen Taylor3473f882001-02-23 17:55:21 +00002469 if (RAW != '%') return;
2470 switch(ctxt->instate) {
2471 case XML_PARSER_CDATA_SECTION:
2472 return;
2473 case XML_PARSER_COMMENT:
2474 return;
2475 case XML_PARSER_START_TAG:
2476 return;
2477 case XML_PARSER_END_TAG:
2478 return;
2479 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002480 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002481 return;
2482 case XML_PARSER_PROLOG:
2483 case XML_PARSER_START:
2484 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002485 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002486 return;
2487 case XML_PARSER_ENTITY_DECL:
2488 case XML_PARSER_CONTENT:
2489 case XML_PARSER_ATTRIBUTE_VALUE:
2490 case XML_PARSER_PI:
2491 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002492 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002493 /* we just ignore it there */
2494 return;
2495 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002496 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002497 return;
2498 case XML_PARSER_ENTITY_VALUE:
2499 /*
2500 * NOTE: in the case of entity values, we don't do the
2501 * substitution here since we need the literal
2502 * entity value to be able to save the internal
2503 * subset of the document.
2504 * This will be handled by xmlStringDecodeEntities
2505 */
2506 return;
2507 case XML_PARSER_DTD:
2508 /*
2509 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2510 * In the internal DTD subset, parameter-entity references
2511 * can occur only where markup declarations can occur, not
2512 * within markup declarations.
2513 * In that case this is handled in xmlParseMarkupDecl
2514 */
2515 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2516 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002517 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002518 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002519 break;
2520 case XML_PARSER_IGNORE:
2521 return;
2522 }
2523
2524 NEXT;
2525 name = xmlParseName(ctxt);
2526 if (xmlParserDebugEntities)
2527 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002528 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002529 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002530 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002531 } else {
2532 if (RAW == ';') {
2533 NEXT;
2534 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2535 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2536 if (entity == NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002537
Owen Taylor3473f882001-02-23 17:55:21 +00002538 /*
2539 * [ WFC: Entity Declared ]
2540 * In a document without any DTD, a document with only an
2541 * internal DTD subset which contains no parameter entity
2542 * references, or a document with "standalone='yes'", ...
2543 * ... The declaration of a parameter entity must precede
2544 * any reference to it...
2545 */
2546 if ((ctxt->standalone == 1) ||
2547 ((ctxt->hasExternalSubset == 0) &&
2548 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002549 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002550 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002551 } else {
2552 /*
2553 * [ VC: Entity Declared ]
2554 * In a document with an external subset or external
2555 * parameter entities with "standalone='no'", ...
2556 * ... The declaration of a parameter entity must precede
2557 * any reference to it...
2558 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002559 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2560 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2561 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002562 name, NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002563 } else
Daniel Veillard24eb9782003-10-04 21:08:09 +00002564 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2565 "PEReference: %%%s; not found\n",
2566 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002567 ctxt->valid = 0;
2568 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002569 } else if (ctxt->input->free != deallocblankswrapper) {
2570 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002571 if (xmlPushInput(ctxt, input) < 0)
2572 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002573 } else {
2574 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2575 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002576 xmlChar start[4];
2577 xmlCharEncoding enc;
2578
Owen Taylor3473f882001-02-23 17:55:21 +00002579 /*
2580 * handle the extra spaces added before and after
2581 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002582 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002583 */
2584 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002585 if (xmlPushInput(ctxt, input) < 0)
2586 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002587
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002588 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +00002589 * Get the 4 first bytes and decode the charset
2590 * if enc != XML_CHAR_ENCODING_NONE
2591 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002592 * Note that, since we may have some non-UTF8
2593 * encoding (like UTF16, bug 135229), the 'length'
2594 * is not known, but we can calculate based upon
2595 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002596 */
2597 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002598 if (ctxt->instate == XML_PARSER_EOF)
2599 return;
William M. Bracka0c48ad2004-04-16 15:58:29 +00002600 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002601 start[0] = RAW;
2602 start[1] = NXT(1);
2603 start[2] = NXT(2);
2604 start[3] = NXT(3);
2605 enc = xmlDetectCharEncoding(start, 4);
2606 if (enc != XML_CHAR_ENCODING_NONE) {
2607 xmlSwitchEncoding(ctxt, enc);
2608 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002609 }
2610
Owen Taylor3473f882001-02-23 17:55:21 +00002611 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002612 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2613 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002614 xmlParseTextDecl(ctxt);
2615 }
Owen Taylor3473f882001-02-23 17:55:21 +00002616 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002617 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2618 "PEReference: %s is not a parameter entity\n",
2619 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002620 }
2621 }
2622 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002623 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002624 }
Owen Taylor3473f882001-02-23 17:55:21 +00002625 }
2626}
2627
2628/*
2629 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002630 * buffer##_size is expected to be a size_t
2631 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002632 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002633#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002634 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002635 size_t new_size = buffer##_size * 2 + n; \
2636 if (new_size < buffer##_size) goto mem_error; \
2637 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002638 if (tmp == NULL) goto mem_error; \
2639 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002640 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002641}
2642
2643/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002644 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002645 * @ctxt: the parser context
2646 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002647 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002648 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2649 * @end: an end marker xmlChar, 0 if none
2650 * @end2: an end marker xmlChar, 0 if none
2651 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002652 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002653 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002654 *
2655 * [67] Reference ::= EntityRef | CharRef
2656 *
2657 * [69] PEReference ::= '%' Name ';'
2658 *
2659 * Returns A newly allocated string with the substitution done. The caller
2660 * must deallocate it !
2661 */
2662xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002663xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2664 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002665 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002666 size_t buffer_size = 0;
2667 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002668
2669 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002670 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002671 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002672 xmlEntityPtr ent;
2673 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002674
Daniel Veillarda82b1822004-11-08 16:24:57 +00002675 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002676 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002677 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002678
Daniel Veillard0161e632008-08-28 15:36:32 +00002679 if (((ctxt->depth > 40) &&
2680 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2681 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002682 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002683 return(NULL);
2684 }
2685
2686 /*
2687 * allocate a translation buffer.
2688 */
2689 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002690 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002691 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002692
2693 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002694 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002695 * we are operating on already parsed values.
2696 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002697 if (str < last)
2698 c = CUR_SCHAR(str, l);
2699 else
2700 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002701 while ((c != 0) && (c != end) && /* non input consuming loop */
2702 (c != end2) && (c != end3)) {
2703
2704 if (c == 0) break;
2705 if ((c == '&') && (str[1] == '#')) {
2706 int val = xmlParseStringCharRef(ctxt, &str);
2707 if (val != 0) {
2708 COPY_BUF(0,buffer,nbchars,val);
2709 }
Daniel Veillard459eeb92012-07-17 16:19:17 +08002710 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002711 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002712 }
Owen Taylor3473f882001-02-23 17:55:21 +00002713 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2714 if (xmlParserDebugEntities)
2715 xmlGenericError(xmlGenericErrorContext,
2716 "String decoding Entity Reference: %.30s\n",
2717 str);
2718 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002719 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2720 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002721 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002722 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002723 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002724 if ((ent != NULL) &&
2725 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2726 if (ent->content != NULL) {
2727 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002728 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002729 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002730 }
Owen Taylor3473f882001-02-23 17:55:21 +00002731 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002732 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2733 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002734 }
2735 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002736 ctxt->depth++;
2737 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2738 0, 0, 0);
2739 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002740
Owen Taylor3473f882001-02-23 17:55:21 +00002741 if (rep != NULL) {
2742 current = rep;
2743 while (*current != 0) { /* non input consuming loop */
2744 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002745 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002746 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2747 goto int_error;
2748 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002749 }
2750 }
2751 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002752 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002753 }
2754 } else if (ent != NULL) {
2755 int i = xmlStrlen(ent->name);
2756 const xmlChar *cur = ent->name;
2757
2758 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002759 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002760 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002761 }
2762 for (;i > 0;i--)
2763 buffer[nbchars++] = *cur++;
2764 buffer[nbchars++] = ';';
2765 }
2766 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2767 if (xmlParserDebugEntities)
2768 xmlGenericError(xmlGenericErrorContext,
2769 "String decoding PE Reference: %.30s\n", str);
2770 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002771 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2772 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002773 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002774 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002775 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002776 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002777 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002778 }
Owen Taylor3473f882001-02-23 17:55:21 +00002779 ctxt->depth++;
2780 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2781 0, 0, 0);
2782 ctxt->depth--;
2783 if (rep != NULL) {
2784 current = rep;
2785 while (*current != 0) { /* non input consuming loop */
2786 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002787 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002788 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2789 goto int_error;
2790 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002791 }
2792 }
2793 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002794 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002795 }
2796 }
2797 } else {
2798 COPY_BUF(l,buffer,nbchars,c);
2799 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002800 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2801 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002802 }
2803 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002804 if (str < last)
2805 c = CUR_SCHAR(str, l);
2806 else
2807 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002808 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002809 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002810 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002811
2812mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002813 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002814int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002815 if (rep != NULL)
2816 xmlFree(rep);
2817 if (buffer != NULL)
2818 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002819 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002820}
2821
Daniel Veillarde57ec792003-09-10 10:50:59 +00002822/**
2823 * xmlStringDecodeEntities:
2824 * @ctxt: the parser context
2825 * @str: the input string
2826 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2827 * @end: an end marker xmlChar, 0 if none
2828 * @end2: an end marker xmlChar, 0 if none
2829 * @end3: an end marker xmlChar, 0 if none
Daniel Veillardf8e3db02012-09-11 13:26:36 +08002830 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002831 * Takes a entity string content and process to do the adequate substitutions.
2832 *
2833 * [67] Reference ::= EntityRef | CharRef
2834 *
2835 * [69] PEReference ::= '%' Name ';'
2836 *
2837 * Returns A newly allocated string with the substitution done. The caller
2838 * must deallocate it !
2839 */
2840xmlChar *
2841xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2842 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002843 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002844 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2845 end, end2, end3));
2846}
Owen Taylor3473f882001-02-23 17:55:21 +00002847
2848/************************************************************************
2849 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002850 * Commodity functions, cleanup needed ? *
2851 * *
2852 ************************************************************************/
2853
2854/**
2855 * areBlanks:
2856 * @ctxt: an XML parser context
2857 * @str: a xmlChar *
2858 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002859 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002860 *
2861 * Is this a sequence of blank chars that one can ignore ?
2862 *
2863 * Returns 1 if ignorable 0 otherwise.
2864 */
2865
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002866static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2867 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002868 int i, ret;
2869 xmlNodePtr lastChild;
2870
Daniel Veillard05c13a22001-09-09 08:38:09 +00002871 /*
2872 * Don't spend time trying to differentiate them, the same callback is
2873 * used !
2874 */
2875 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002876 return(0);
2877
Owen Taylor3473f882001-02-23 17:55:21 +00002878 /*
2879 * Check for xml:space value.
2880 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002881 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2882 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002883 return(0);
2884
2885 /*
2886 * Check that the string is made of blanks
2887 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002888 if (blank_chars == 0) {
2889 for (i = 0;i < len;i++)
2890 if (!(IS_BLANK_CH(str[i]))) return(0);
2891 }
Owen Taylor3473f882001-02-23 17:55:21 +00002892
2893 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002894 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002895 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002896 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002897 if (ctxt->myDoc != NULL) {
2898 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2899 if (ret == 0) return(1);
2900 if (ret == 1) return(0);
2901 }
2902
2903 /*
2904 * Otherwise, heuristic :-\
2905 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002906 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002907 if ((ctxt->node->children == NULL) &&
2908 (RAW == '<') && (NXT(1) == '/')) return(0);
2909
2910 lastChild = xmlGetLastChild(ctxt->node);
2911 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002912 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2913 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002914 } else if (xmlNodeIsText(lastChild))
2915 return(0);
2916 else if ((ctxt->node->children != NULL) &&
2917 (xmlNodeIsText(ctxt->node->children)))
2918 return(0);
2919 return(1);
2920}
2921
Owen Taylor3473f882001-02-23 17:55:21 +00002922/************************************************************************
2923 * *
2924 * Extra stuff for namespace support *
2925 * Relates to http://www.w3.org/TR/WD-xml-names *
2926 * *
2927 ************************************************************************/
2928
2929/**
2930 * xmlSplitQName:
2931 * @ctxt: an XML parser context
2932 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002933 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002934 *
2935 * parse an UTF8 encoded XML qualified name string
2936 *
2937 * [NS 5] QName ::= (Prefix ':')? LocalPart
2938 *
2939 * [NS 6] Prefix ::= NCName
2940 *
2941 * [NS 7] LocalPart ::= NCName
2942 *
2943 * Returns the local part, and prefix is updated
2944 * to get the Prefix if any.
2945 */
2946
2947xmlChar *
2948xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2949 xmlChar buf[XML_MAX_NAMELEN + 5];
2950 xmlChar *buffer = NULL;
2951 int len = 0;
2952 int max = XML_MAX_NAMELEN;
2953 xmlChar *ret = NULL;
2954 const xmlChar *cur = name;
2955 int c;
2956
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002957 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002958 *prefix = NULL;
2959
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002960 if (cur == NULL) return(NULL);
2961
Owen Taylor3473f882001-02-23 17:55:21 +00002962#ifndef XML_XML_NAMESPACE
2963 /* xml: prefix is not really a namespace */
2964 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2965 (cur[2] == 'l') && (cur[3] == ':'))
2966 return(xmlStrdup(name));
2967#endif
2968
Daniel Veillard597bc482003-07-24 16:08:28 +00002969 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002970 if (cur[0] == ':')
2971 return(xmlStrdup(name));
2972
2973 c = *cur++;
2974 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2975 buf[len++] = c;
2976 c = *cur++;
2977 }
2978 if (len >= max) {
2979 /*
2980 * Okay someone managed to make a huge name, so he's ready to pay
2981 * for the processing speed.
2982 */
2983 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002984
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002985 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002986 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002987 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002988 return(NULL);
2989 }
2990 memcpy(buffer, buf, len);
2991 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2992 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002993 xmlChar *tmp;
2994
Owen Taylor3473f882001-02-23 17:55:21 +00002995 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002996 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002997 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002998 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002999 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003000 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003001 return(NULL);
3002 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003003 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003004 }
3005 buffer[len++] = c;
3006 c = *cur++;
3007 }
3008 buffer[len] = 0;
3009 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003010
Daniel Veillard597bc482003-07-24 16:08:28 +00003011 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00003012 if (buffer != NULL)
3013 xmlFree(buffer);
3014 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00003015 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00003016 }
Daniel Veillard597bc482003-07-24 16:08:28 +00003017
Owen Taylor3473f882001-02-23 17:55:21 +00003018 if (buffer == NULL)
3019 ret = xmlStrndup(buf, len);
3020 else {
3021 ret = buffer;
3022 buffer = NULL;
3023 max = XML_MAX_NAMELEN;
3024 }
3025
3026
3027 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00003028 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003029 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00003030 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00003031 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00003032 }
Owen Taylor3473f882001-02-23 17:55:21 +00003033 len = 0;
3034
Daniel Veillardbb284f42002-10-16 18:02:47 +00003035 /*
3036 * Check that the first character is proper to start
3037 * a new name
3038 */
3039 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3040 ((c >= 0x41) && (c <= 0x5A)) ||
3041 (c == '_') || (c == ':'))) {
3042 int l;
3043 int first = CUR_SCHAR(cur, l);
3044
3045 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003046 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003047 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003048 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003049 }
3050 }
3051 cur++;
3052
Owen Taylor3473f882001-02-23 17:55:21 +00003053 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3054 buf[len++] = c;
3055 c = *cur++;
3056 }
3057 if (len >= max) {
3058 /*
3059 * Okay someone managed to make a huge name, so he's ready to pay
3060 * for the processing speed.
3061 */
3062 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003063
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003064 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003065 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003066 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003067 return(NULL);
3068 }
3069 memcpy(buffer, buf, len);
3070 while (c != 0) { /* tested bigname2.xml */
3071 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003072 xmlChar *tmp;
3073
Owen Taylor3473f882001-02-23 17:55:21 +00003074 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003075 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003076 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003077 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003078 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003079 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003080 return(NULL);
3081 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003082 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003083 }
3084 buffer[len++] = c;
3085 c = *cur++;
3086 }
3087 buffer[len] = 0;
3088 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003089
Owen Taylor3473f882001-02-23 17:55:21 +00003090 if (buffer == NULL)
3091 ret = xmlStrndup(buf, len);
3092 else {
3093 ret = buffer;
3094 }
3095 }
3096
3097 return(ret);
3098}
3099
3100/************************************************************************
3101 * *
3102 * The parser itself *
3103 * Relates to http://www.w3.org/TR/REC-xml *
3104 * *
3105 ************************************************************************/
3106
Daniel Veillard34e3f642008-07-29 09:02:27 +00003107/************************************************************************
3108 * *
3109 * Routines to parse Name, NCName and NmToken *
3110 * *
3111 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003112#ifdef DEBUG
3113static unsigned long nbParseName = 0;
3114static unsigned long nbParseNmToken = 0;
3115static unsigned long nbParseNCName = 0;
3116static unsigned long nbParseNCNameComplex = 0;
3117static unsigned long nbParseNameComplex = 0;
3118static unsigned long nbParseStringName = 0;
3119#endif
3120
Daniel Veillard34e3f642008-07-29 09:02:27 +00003121/*
3122 * The two following functions are related to the change of accepted
3123 * characters for Name and NmToken in the Revision 5 of XML-1.0
3124 * They correspond to the modified production [4] and the new production [4a]
3125 * changes in that revision. Also note that the macros used for the
3126 * productions Letter, Digit, CombiningChar and Extender are not needed
3127 * anymore.
3128 * We still keep compatibility to pre-revision5 parsing semantic if the
3129 * new XML_PARSE_OLD10 option is given to the parser.
3130 */
3131static int
3132xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3133 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3134 /*
3135 * Use the new checks of production [4] [4a] amd [5] of the
3136 * Update 5 of XML-1.0
3137 */
3138 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3139 (((c >= 'a') && (c <= 'z')) ||
3140 ((c >= 'A') && (c <= 'Z')) ||
3141 (c == '_') || (c == ':') ||
3142 ((c >= 0xC0) && (c <= 0xD6)) ||
3143 ((c >= 0xD8) && (c <= 0xF6)) ||
3144 ((c >= 0xF8) && (c <= 0x2FF)) ||
3145 ((c >= 0x370) && (c <= 0x37D)) ||
3146 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3147 ((c >= 0x200C) && (c <= 0x200D)) ||
3148 ((c >= 0x2070) && (c <= 0x218F)) ||
3149 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3150 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3151 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3152 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3153 ((c >= 0x10000) && (c <= 0xEFFFF))))
3154 return(1);
3155 } else {
3156 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3157 return(1);
3158 }
3159 return(0);
3160}
3161
3162static int
3163xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3164 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3165 /*
3166 * Use the new checks of production [4] [4a] amd [5] of the
3167 * Update 5 of XML-1.0
3168 */
3169 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3170 (((c >= 'a') && (c <= 'z')) ||
3171 ((c >= 'A') && (c <= 'Z')) ||
3172 ((c >= '0') && (c <= '9')) || /* !start */
3173 (c == '_') || (c == ':') ||
3174 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3175 ((c >= 0xC0) && (c <= 0xD6)) ||
3176 ((c >= 0xD8) && (c <= 0xF6)) ||
3177 ((c >= 0xF8) && (c <= 0x2FF)) ||
3178 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3179 ((c >= 0x370) && (c <= 0x37D)) ||
3180 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3181 ((c >= 0x200C) && (c <= 0x200D)) ||
3182 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3183 ((c >= 0x2070) && (c <= 0x218F)) ||
3184 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3185 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3186 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3187 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3188 ((c >= 0x10000) && (c <= 0xEFFFF))))
3189 return(1);
3190 } else {
3191 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3192 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003193 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003194 (IS_COMBINING(c)) ||
3195 (IS_EXTENDER(c)))
3196 return(1);
3197 }
3198 return(0);
3199}
3200
Daniel Veillarde57ec792003-09-10 10:50:59 +00003201static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003202 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003203
Daniel Veillard34e3f642008-07-29 09:02:27 +00003204static const xmlChar *
3205xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3206 int len = 0, l;
3207 int c;
3208 int count = 0;
3209
Daniel Veillardc6561462009-03-25 10:22:31 +00003210#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003211 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003212#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003213
3214 /*
3215 * Handler for more complex cases
3216 */
3217 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003218 if (ctxt->instate == XML_PARSER_EOF)
3219 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003220 c = CUR_CHAR(l);
3221 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3222 /*
3223 * Use the new checks of production [4] [4a] amd [5] of the
3224 * Update 5 of XML-1.0
3225 */
3226 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3227 (!(((c >= 'a') && (c <= 'z')) ||
3228 ((c >= 'A') && (c <= 'Z')) ||
3229 (c == '_') || (c == ':') ||
3230 ((c >= 0xC0) && (c <= 0xD6)) ||
3231 ((c >= 0xD8) && (c <= 0xF6)) ||
3232 ((c >= 0xF8) && (c <= 0x2FF)) ||
3233 ((c >= 0x370) && (c <= 0x37D)) ||
3234 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3235 ((c >= 0x200C) && (c <= 0x200D)) ||
3236 ((c >= 0x2070) && (c <= 0x218F)) ||
3237 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3238 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3239 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3240 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3241 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3242 return(NULL);
3243 }
3244 len += l;
3245 NEXTL(l);
3246 c = CUR_CHAR(l);
3247 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3248 (((c >= 'a') && (c <= 'z')) ||
3249 ((c >= 'A') && (c <= 'Z')) ||
3250 ((c >= '0') && (c <= '9')) || /* !start */
3251 (c == '_') || (c == ':') ||
3252 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3253 ((c >= 0xC0) && (c <= 0xD6)) ||
3254 ((c >= 0xD8) && (c <= 0xF6)) ||
3255 ((c >= 0xF8) && (c <= 0x2FF)) ||
3256 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3257 ((c >= 0x370) && (c <= 0x37D)) ||
3258 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3259 ((c >= 0x200C) && (c <= 0x200D)) ||
3260 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3261 ((c >= 0x2070) && (c <= 0x218F)) ||
3262 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3263 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3264 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3265 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3266 ((c >= 0x10000) && (c <= 0xEFFFF))
3267 )) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003268 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003269 count = 0;
3270 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003271 if (ctxt->instate == XML_PARSER_EOF)
3272 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003273 }
3274 len += l;
3275 NEXTL(l);
3276 c = CUR_CHAR(l);
3277 }
3278 } else {
3279 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3280 (!IS_LETTER(c) && (c != '_') &&
3281 (c != ':'))) {
3282 return(NULL);
3283 }
3284 len += l;
3285 NEXTL(l);
3286 c = CUR_CHAR(l);
3287
3288 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3289 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3290 (c == '.') || (c == '-') ||
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003291 (c == '_') || (c == ':') ||
Daniel Veillard34e3f642008-07-29 09:02:27 +00003292 (IS_COMBINING(c)) ||
3293 (IS_EXTENDER(c)))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003294 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003295 count = 0;
3296 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003297 if (ctxt->instate == XML_PARSER_EOF)
3298 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003299 }
3300 len += l;
3301 NEXTL(l);
3302 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003303 if (c == 0) {
3304 count = 0;
3305 GROW;
3306 if (ctxt->instate == XML_PARSER_EOF)
3307 return(NULL);
3308 c = CUR_CHAR(l);
3309 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003310 }
3311 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003312 if ((len > XML_MAX_NAME_LENGTH) &&
3313 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3314 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3315 return(NULL);
3316 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003317 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3318 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3319 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3320}
3321
Owen Taylor3473f882001-02-23 17:55:21 +00003322/**
3323 * xmlParseName:
3324 * @ctxt: an XML parser context
3325 *
3326 * parse an XML name.
3327 *
3328 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3329 * CombiningChar | Extender
3330 *
3331 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3332 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003333 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003334 *
3335 * Returns the Name parsed or NULL
3336 */
3337
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003338const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003339xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003340 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003341 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003342 int count = 0;
3343
3344 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003345
Daniel Veillardc6561462009-03-25 10:22:31 +00003346#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003347 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003348#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003349
Daniel Veillard48b2f892001-02-25 16:11:03 +00003350 /*
3351 * Accelerator for simple ASCII names
3352 */
3353 in = ctxt->input->cur;
3354 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3355 ((*in >= 0x41) && (*in <= 0x5A)) ||
3356 (*in == '_') || (*in == ':')) {
3357 in++;
3358 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3359 ((*in >= 0x41) && (*in <= 0x5A)) ||
3360 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003361 (*in == '_') || (*in == '-') ||
3362 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003363 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003364 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003365 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003366 if ((count > XML_MAX_NAME_LENGTH) &&
3367 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3368 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3369 return(NULL);
3370 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003371 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003372 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003373 ctxt->nbChars += count;
3374 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003375 if (ret == NULL)
3376 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003377 return(ret);
3378 }
3379 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003380 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003381 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003382}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003383
Daniel Veillard34e3f642008-07-29 09:02:27 +00003384static const xmlChar *
3385xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3386 int len = 0, l;
3387 int c;
3388 int count = 0;
3389
Daniel Veillardc6561462009-03-25 10:22:31 +00003390#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003391 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003392#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003393
3394 /*
3395 * Handler for more complex cases
3396 */
3397 GROW;
3398 c = CUR_CHAR(l);
3399 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3400 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3401 return(NULL);
3402 }
3403
3404 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3405 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003406 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003407 if ((len > XML_MAX_NAME_LENGTH) &&
3408 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3409 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3410 return(NULL);
3411 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003412 count = 0;
3413 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003414 if (ctxt->instate == XML_PARSER_EOF)
3415 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003416 }
3417 len += l;
3418 NEXTL(l);
3419 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003420 if (c == 0) {
3421 count = 0;
3422 GROW;
3423 if (ctxt->instate == XML_PARSER_EOF)
3424 return(NULL);
3425 c = CUR_CHAR(l);
3426 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003427 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003428 if ((len > XML_MAX_NAME_LENGTH) &&
3429 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3430 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3431 return(NULL);
3432 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003433 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3434}
3435
3436/**
3437 * xmlParseNCName:
3438 * @ctxt: an XML parser context
Michael Woodfb27e2c2012-09-28 08:59:33 +02003439 * @len: length of the string parsed
Daniel Veillard34e3f642008-07-29 09:02:27 +00003440 *
3441 * parse an XML name.
3442 *
3443 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3444 * CombiningChar | Extender
3445 *
3446 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3447 *
3448 * Returns the Name parsed or NULL
3449 */
3450
3451static const xmlChar *
3452xmlParseNCName(xmlParserCtxtPtr ctxt) {
3453 const xmlChar *in;
3454 const xmlChar *ret;
3455 int count = 0;
3456
Daniel Veillardc6561462009-03-25 10:22:31 +00003457#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003458 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003459#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003460
3461 /*
3462 * Accelerator for simple ASCII names
3463 */
3464 in = ctxt->input->cur;
3465 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3466 ((*in >= 0x41) && (*in <= 0x5A)) ||
3467 (*in == '_')) {
3468 in++;
3469 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3470 ((*in >= 0x41) && (*in <= 0x5A)) ||
3471 ((*in >= 0x30) && (*in <= 0x39)) ||
3472 (*in == '_') || (*in == '-') ||
3473 (*in == '.'))
3474 in++;
3475 if ((*in > 0) && (*in < 0x80)) {
3476 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003477 if ((count > XML_MAX_NAME_LENGTH) &&
3478 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3479 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3480 return(NULL);
3481 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003482 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3483 ctxt->input->cur = in;
3484 ctxt->nbChars += count;
3485 ctxt->input->col += count;
3486 if (ret == NULL) {
3487 xmlErrMemory(ctxt, NULL);
3488 }
3489 return(ret);
3490 }
3491 }
3492 return(xmlParseNCNameComplex(ctxt));
3493}
3494
Daniel Veillard46de64e2002-05-29 08:21:33 +00003495/**
3496 * xmlParseNameAndCompare:
3497 * @ctxt: an XML parser context
3498 *
3499 * parse an XML name and compares for match
3500 * (specialized for endtag parsing)
3501 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003502 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3503 * and the name for mismatch
3504 */
3505
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003506static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003507xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003508 register const xmlChar *cmp = other;
3509 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003510 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003511
3512 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003513 if (ctxt->instate == XML_PARSER_EOF)
3514 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003515
Daniel Veillard46de64e2002-05-29 08:21:33 +00003516 in = ctxt->input->cur;
3517 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003518 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003519 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003520 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003521 }
William M. Brack76e95df2003-10-18 16:20:14 +00003522 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003523 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003524 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003525 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003526 }
3527 /* failure (or end of input buffer), check with full function */
3528 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003529 /* strings coming from the dictionnary direct compare possible */
3530 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003531 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003532 }
3533 return ret;
3534}
3535
Owen Taylor3473f882001-02-23 17:55:21 +00003536/**
3537 * xmlParseStringName:
3538 * @ctxt: an XML parser context
3539 * @str: a pointer to the string pointer (IN/OUT)
3540 *
3541 * parse an XML name.
3542 *
3543 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3544 * CombiningChar | Extender
3545 *
3546 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3547 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003548 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003549 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003550 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003551 * is updated to the current location in the string.
3552 */
3553
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003554static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003555xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3556 xmlChar buf[XML_MAX_NAMELEN + 5];
3557 const xmlChar *cur = *str;
3558 int len = 0, l;
3559 int c;
3560
Daniel Veillardc6561462009-03-25 10:22:31 +00003561#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003562 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003563#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003564
Owen Taylor3473f882001-02-23 17:55:21 +00003565 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003566 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003567 return(NULL);
3568 }
3569
Daniel Veillard34e3f642008-07-29 09:02:27 +00003570 COPY_BUF(l,buf,len,c);
3571 cur += l;
3572 c = CUR_SCHAR(cur, l);
3573 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003574 COPY_BUF(l,buf,len,c);
3575 cur += l;
3576 c = CUR_SCHAR(cur, l);
3577 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3578 /*
3579 * Okay someone managed to make a huge name, so he's ready to pay
3580 * for the processing speed.
3581 */
3582 xmlChar *buffer;
3583 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003584
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003585 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003586 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003587 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003588 return(NULL);
3589 }
3590 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003591 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003592 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003593 xmlChar *tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003594
3595 if ((len > XML_MAX_NAME_LENGTH) &&
3596 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3597 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3598 xmlFree(buffer);
3599 return(NULL);
3600 }
Owen Taylor3473f882001-02-23 17:55:21 +00003601 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003602 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003603 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003604 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003605 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003606 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003607 return(NULL);
3608 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003609 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003610 }
3611 COPY_BUF(l,buffer,len,c);
3612 cur += l;
3613 c = CUR_SCHAR(cur, l);
3614 }
3615 buffer[len] = 0;
3616 *str = cur;
3617 return(buffer);
3618 }
3619 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003620 if ((len > XML_MAX_NAME_LENGTH) &&
3621 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3622 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3623 return(NULL);
3624 }
Owen Taylor3473f882001-02-23 17:55:21 +00003625 *str = cur;
3626 return(xmlStrndup(buf, len));
3627}
3628
3629/**
3630 * xmlParseNmtoken:
3631 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003632 *
Owen Taylor3473f882001-02-23 17:55:21 +00003633 * parse an XML Nmtoken.
3634 *
3635 * [7] Nmtoken ::= (NameChar)+
3636 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003637 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003638 *
3639 * Returns the Nmtoken parsed or NULL
3640 */
3641
3642xmlChar *
3643xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3644 xmlChar buf[XML_MAX_NAMELEN + 5];
3645 int len = 0, l;
3646 int c;
3647 int count = 0;
3648
Daniel Veillardc6561462009-03-25 10:22:31 +00003649#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003650 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003651#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003652
Owen Taylor3473f882001-02-23 17:55:21 +00003653 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003654 if (ctxt->instate == XML_PARSER_EOF)
3655 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003656 c = CUR_CHAR(l);
3657
Daniel Veillard34e3f642008-07-29 09:02:27 +00003658 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003659 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003660 count = 0;
3661 GROW;
3662 }
3663 COPY_BUF(l,buf,len,c);
3664 NEXTL(l);
3665 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003666 if (c == 0) {
3667 count = 0;
3668 GROW;
3669 if (ctxt->instate == XML_PARSER_EOF)
3670 return(NULL);
3671 c = CUR_CHAR(l);
3672 }
Owen Taylor3473f882001-02-23 17:55:21 +00003673 if (len >= XML_MAX_NAMELEN) {
3674 /*
3675 * Okay someone managed to make a huge token, so he's ready to pay
3676 * for the processing speed.
3677 */
3678 xmlChar *buffer;
3679 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003680
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003681 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003682 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003683 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003684 return(NULL);
3685 }
3686 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003687 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003688 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003689 count = 0;
3690 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003691 if (ctxt->instate == XML_PARSER_EOF) {
3692 xmlFree(buffer);
3693 return(NULL);
3694 }
Owen Taylor3473f882001-02-23 17:55:21 +00003695 }
3696 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003697 xmlChar *tmp;
3698
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003699 if ((max > XML_MAX_NAME_LENGTH) &&
3700 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3701 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3702 xmlFree(buffer);
3703 return(NULL);
3704 }
Owen Taylor3473f882001-02-23 17:55:21 +00003705 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003706 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003707 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003708 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003709 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003710 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003711 return(NULL);
3712 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003713 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003714 }
3715 COPY_BUF(l,buffer,len,c);
3716 NEXTL(l);
3717 c = CUR_CHAR(l);
3718 }
3719 buffer[len] = 0;
3720 return(buffer);
3721 }
3722 }
3723 if (len == 0)
3724 return(NULL);
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003725 if ((len > XML_MAX_NAME_LENGTH) &&
3726 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3727 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3728 return(NULL);
3729 }
Owen Taylor3473f882001-02-23 17:55:21 +00003730 return(xmlStrndup(buf, len));
3731}
3732
3733/**
3734 * xmlParseEntityValue:
3735 * @ctxt: an XML parser context
3736 * @orig: if non-NULL store a copy of the original entity value
3737 *
3738 * parse a value for ENTITY declarations
3739 *
3740 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3741 * "'" ([^%&'] | PEReference | Reference)* "'"
3742 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003743 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003744 */
3745
3746xmlChar *
3747xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3748 xmlChar *buf = NULL;
3749 int len = 0;
3750 int size = XML_PARSER_BUFFER_SIZE;
3751 int c, l;
3752 xmlChar stop;
3753 xmlChar *ret = NULL;
3754 const xmlChar *cur = NULL;
3755 xmlParserInputPtr input;
3756
3757 if (RAW == '"') stop = '"';
3758 else if (RAW == '\'') stop = '\'';
3759 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003760 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003761 return(NULL);
3762 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003763 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003764 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003765 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003766 return(NULL);
3767 }
3768
3769 /*
3770 * The content of the entity definition is copied in a buffer.
3771 */
3772
3773 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3774 input = ctxt->input;
3775 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003776 if (ctxt->instate == XML_PARSER_EOF) {
3777 xmlFree(buf);
3778 return(NULL);
3779 }
Owen Taylor3473f882001-02-23 17:55:21 +00003780 NEXT;
3781 c = CUR_CHAR(l);
3782 /*
3783 * NOTE: 4.4.5 Included in Literal
3784 * When a parameter entity reference appears in a literal entity
3785 * value, ... a single or double quote character in the replacement
3786 * text is always treated as a normal data character and will not
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003787 * terminate the literal.
Owen Taylor3473f882001-02-23 17:55:21 +00003788 * In practice it means we stop the loop only when back at parsing
3789 * the initial entity and the quote is found
3790 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003791 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3792 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003793 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003794 xmlChar *tmp;
3795
Owen Taylor3473f882001-02-23 17:55:21 +00003796 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003797 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3798 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003799 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003800 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003801 return(NULL);
3802 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003803 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003804 }
3805 COPY_BUF(l,buf,len,c);
3806 NEXTL(l);
3807 /*
3808 * Pop-up of finished entities.
3809 */
3810 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3811 xmlPopInput(ctxt);
3812
3813 GROW;
3814 c = CUR_CHAR(l);
3815 if (c == 0) {
3816 GROW;
3817 c = CUR_CHAR(l);
3818 }
3819 }
3820 buf[len] = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003821 if (ctxt->instate == XML_PARSER_EOF) {
3822 xmlFree(buf);
3823 return(NULL);
3824 }
Owen Taylor3473f882001-02-23 17:55:21 +00003825
3826 /*
3827 * Raise problem w.r.t. '&' and '%' being used in non-entities
3828 * reference constructs. Note Charref will be handled in
3829 * xmlStringDecodeEntities()
3830 */
3831 cur = buf;
3832 while (*cur != 0) { /* non input consuming */
3833 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3834 xmlChar *name;
3835 xmlChar tmp = *cur;
3836
3837 cur++;
3838 name = xmlParseStringName(ctxt, &cur);
3839 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003840 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003841 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003842 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003843 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003844 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3845 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003846 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003847 }
3848 if (name != NULL)
3849 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003850 if (*cur == 0)
3851 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003852 }
3853 cur++;
3854 }
3855
3856 /*
3857 * Then PEReference entities are substituted.
3858 */
3859 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003860 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003861 xmlFree(buf);
3862 } else {
3863 NEXT;
3864 /*
3865 * NOTE: 4.4.7 Bypassed
3866 * When a general entity reference appears in the EntityValue in
3867 * an entity declaration, it is bypassed and left as is.
3868 * so XML_SUBSTITUTE_REF is not set here.
3869 */
3870 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3871 0, 0, 0);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003872 if (orig != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00003873 *orig = buf;
3874 else
3875 xmlFree(buf);
3876 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003877
Owen Taylor3473f882001-02-23 17:55:21 +00003878 return(ret);
3879}
3880
3881/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003882 * xmlParseAttValueComplex:
3883 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003884 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003885 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003886 *
3887 * parse a value for an attribute, this is the fallback function
3888 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003889 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003890 *
3891 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3892 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003893static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003894xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003895 xmlChar limit = 0;
3896 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003897 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003898 size_t len = 0;
3899 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003900 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003901 xmlChar *current = NULL;
3902 xmlEntityPtr ent;
3903
Owen Taylor3473f882001-02-23 17:55:21 +00003904 if (NXT(0) == '"') {
3905 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3906 limit = '"';
3907 NEXT;
3908 } else if (NXT(0) == '\'') {
3909 limit = '\'';
3910 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3911 NEXT;
3912 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003913 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003914 return(NULL);
3915 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003916
Owen Taylor3473f882001-02-23 17:55:21 +00003917 /*
3918 * allocate a translation buffer.
3919 */
3920 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003921 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003922 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003923
3924 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003925 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003926 */
3927 c = CUR_CHAR(l);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003928 while (((NXT(0) != limit) && /* checked */
3929 (IS_CHAR(c)) && (c != '<')) &&
3930 (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillarde17db992012-07-19 11:25:16 +08003931 /*
3932 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3933 * special option is given
3934 */
3935 if ((len > XML_MAX_TEXT_LENGTH) &&
3936 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3937 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02003938 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08003939 goto mem_error;
3940 }
Owen Taylor3473f882001-02-23 17:55:21 +00003941 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003942 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003943 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003944 if (NXT(1) == '#') {
3945 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003946
Owen Taylor3473f882001-02-23 17:55:21 +00003947 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003948 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003949 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003950 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003951 }
3952 buf[len++] = '&';
3953 } else {
3954 /*
3955 * The reparsing will be done in xmlStringGetNodeList()
3956 * called by the attribute() function in SAX.c
3957 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08003958 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003959 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003960 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003961 buf[len++] = '&';
3962 buf[len++] = '#';
3963 buf[len++] = '3';
3964 buf[len++] = '8';
3965 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003966 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003967 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003968 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003969 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003970 }
Owen Taylor3473f882001-02-23 17:55:21 +00003971 len += xmlCopyChar(0, &buf[len], val);
3972 }
3973 } else {
3974 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003975 ctxt->nbentities++;
3976 if (ent != NULL)
3977 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003978 if ((ent != NULL) &&
3979 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003980 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003981 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003982 }
3983 if ((ctxt->replaceEntities == 0) &&
3984 (ent->content[0] == '&')) {
3985 buf[len++] = '&';
3986 buf[len++] = '#';
3987 buf[len++] = '3';
3988 buf[len++] = '8';
3989 buf[len++] = ';';
3990 } else {
3991 buf[len++] = ent->content[0];
3992 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08003993 } else if ((ent != NULL) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003994 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003995 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3996 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003997 XML_SUBSTITUTE_REF,
3998 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003999 if (rep != NULL) {
4000 current = rep;
4001 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02004002 if ((*current == 0xD) || (*current == 0xA) ||
4003 (*current == 0x9)) {
4004 buf[len++] = 0x20;
4005 current++;
4006 } else
4007 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004008 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004009 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004010 }
4011 }
4012 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004013 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004014 }
4015 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004016 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004017 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004018 }
Owen Taylor3473f882001-02-23 17:55:21 +00004019 if (ent->content != NULL)
4020 buf[len++] = ent->content[0];
4021 }
4022 } else if (ent != NULL) {
4023 int i = xmlStrlen(ent->name);
4024 const xmlChar *cur = ent->name;
4025
4026 /*
4027 * This may look absurd but is needed to detect
4028 * entities problems
4029 */
4030 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4031 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004032 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00004033 XML_SUBSTITUTE_REF, 0, 0, 0);
4034 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00004035 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004036 rep = NULL;
4037 }
Owen Taylor3473f882001-02-23 17:55:21 +00004038 }
4039
4040 /*
4041 * Just output the reference
4042 */
4043 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08004044 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004045 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004046 }
4047 for (;i > 0;i--)
4048 buf[len++] = *cur++;
4049 buf[len++] = ';';
4050 }
4051 }
4052 } else {
4053 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004054 if ((len != 0) || (!normalize)) {
4055 if ((!normalize) || (!in_space)) {
4056 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004057 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004058 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004059 }
4060 }
4061 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004062 }
4063 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004064 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004065 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004066 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004067 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004068 }
4069 }
4070 NEXTL(l);
4071 }
4072 GROW;
4073 c = CUR_CHAR(l);
4074 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004075 if (ctxt->instate == XML_PARSER_EOF)
4076 goto error;
4077
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004078 if ((in_space) && (normalize)) {
Daniel Veillard6a36fbe2012-10-29 10:39:55 +08004079 while ((len > 0) && (buf[len - 1] == 0x20)) len--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004080 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00004081 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004082 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004083 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004084 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00004085 if ((c != 0) && (!IS_CHAR(c))) {
4086 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4087 "invalid character in attribute value\n");
4088 } else {
4089 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4090 "AttValue: ' expected\n");
4091 }
Owen Taylor3473f882001-02-23 17:55:21 +00004092 } else
4093 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004094
4095 /*
4096 * There we potentially risk an overflow, don't allow attribute value of
Michael Woodfb27e2c2012-09-28 08:59:33 +02004097 * length more than INT_MAX it is a very reasonnable assumption !
Daniel Veillard459eeb92012-07-17 16:19:17 +08004098 */
4099 if (len >= INT_MAX) {
4100 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02004101 "AttValue length too long\n");
Daniel Veillard459eeb92012-07-17 16:19:17 +08004102 goto mem_error;
4103 }
4104
4105 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00004106 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004107
4108mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004109 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004110error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00004111 if (buf != NULL)
4112 xmlFree(buf);
4113 if (rep != NULL)
4114 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004115 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004116}
4117
4118/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00004119 * xmlParseAttValue:
4120 * @ctxt: an XML parser context
4121 *
4122 * parse a value for an attribute
4123 * Note: the parser won't do substitution of entities here, this
4124 * will be handled later in xmlStringGetNodeList
4125 *
4126 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4127 * "'" ([^<&'] | Reference)* "'"
4128 *
4129 * 3.3.3 Attribute-Value Normalization:
4130 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004131 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00004132 * - a character reference is processed by appending the referenced
4133 * character to the attribute value
4134 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004135 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00004136 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4137 * appending #x20 to the normalized value, except that only a single
4138 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004139 * parsed entity or the literal entity value of an internal parsed entity
4140 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00004141 * If the declared value is not CDATA, then the XML processor must further
4142 * process the normalized attribute value by discarding any leading and
4143 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004144 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00004145 * All attributes for which no declaration has been read should be treated
4146 * by a non-validating parser as if declared CDATA.
4147 *
4148 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4149 */
4150
4151
4152xmlChar *
4153xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004154 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004155 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004156}
4157
4158/**
Owen Taylor3473f882001-02-23 17:55:21 +00004159 * xmlParseSystemLiteral:
4160 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004161 *
Owen Taylor3473f882001-02-23 17:55:21 +00004162 * parse an XML Literal
4163 *
4164 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4165 *
4166 * Returns the SystemLiteral parsed or NULL
4167 */
4168
4169xmlChar *
4170xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4171 xmlChar *buf = NULL;
4172 int len = 0;
4173 int size = XML_PARSER_BUFFER_SIZE;
4174 int cur, l;
4175 xmlChar stop;
4176 int state = ctxt->instate;
4177 int count = 0;
4178
4179 SHRINK;
4180 if (RAW == '"') {
4181 NEXT;
4182 stop = '"';
4183 } else if (RAW == '\'') {
4184 NEXT;
4185 stop = '\'';
4186 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004187 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004188 return(NULL);
4189 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004190
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004191 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004192 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004193 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004194 return(NULL);
4195 }
4196 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4197 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004198 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004199 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004200 xmlChar *tmp;
4201
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004202 if ((size > XML_MAX_NAME_LENGTH) &&
4203 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4204 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4205 xmlFree(buf);
4206 ctxt->instate = (xmlParserInputState) state;
4207 return(NULL);
4208 }
Owen Taylor3473f882001-02-23 17:55:21 +00004209 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004210 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4211 if (tmp == NULL) {
4212 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004213 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004214 ctxt->instate = (xmlParserInputState) state;
4215 return(NULL);
4216 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004217 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004218 }
4219 count++;
4220 if (count > 50) {
4221 GROW;
4222 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004223 if (ctxt->instate == XML_PARSER_EOF) {
4224 xmlFree(buf);
4225 return(NULL);
4226 }
Owen Taylor3473f882001-02-23 17:55:21 +00004227 }
4228 COPY_BUF(l,buf,len,cur);
4229 NEXTL(l);
4230 cur = CUR_CHAR(l);
4231 if (cur == 0) {
4232 GROW;
4233 SHRINK;
4234 cur = CUR_CHAR(l);
4235 }
4236 }
4237 buf[len] = 0;
4238 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004239 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004240 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004241 } else {
4242 NEXT;
4243 }
4244 return(buf);
4245}
4246
4247/**
4248 * xmlParsePubidLiteral:
4249 * @ctxt: an XML parser context
4250 *
4251 * parse an XML public literal
4252 *
4253 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4254 *
4255 * Returns the PubidLiteral parsed or NULL.
4256 */
4257
4258xmlChar *
4259xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4260 xmlChar *buf = NULL;
4261 int len = 0;
4262 int size = XML_PARSER_BUFFER_SIZE;
4263 xmlChar cur;
4264 xmlChar stop;
4265 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004266 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004267
4268 SHRINK;
4269 if (RAW == '"') {
4270 NEXT;
4271 stop = '"';
4272 } else if (RAW == '\'') {
4273 NEXT;
4274 stop = '\'';
4275 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004276 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004277 return(NULL);
4278 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004279 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004280 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004281 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004282 return(NULL);
4283 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004284 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004285 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004286 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004287 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004288 xmlChar *tmp;
4289
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004290 if ((size > XML_MAX_NAME_LENGTH) &&
4291 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4292 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4293 xmlFree(buf);
4294 return(NULL);
4295 }
Owen Taylor3473f882001-02-23 17:55:21 +00004296 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004297 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4298 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004299 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004300 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004301 return(NULL);
4302 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004303 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004304 }
4305 buf[len++] = cur;
4306 count++;
4307 if (count > 50) {
4308 GROW;
4309 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004310 if (ctxt->instate == XML_PARSER_EOF) {
4311 xmlFree(buf);
4312 return(NULL);
4313 }
Owen Taylor3473f882001-02-23 17:55:21 +00004314 }
4315 NEXT;
4316 cur = CUR;
4317 if (cur == 0) {
4318 GROW;
4319 SHRINK;
4320 cur = CUR;
4321 }
4322 }
4323 buf[len] = 0;
4324 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004325 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004326 } else {
4327 NEXT;
4328 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004329 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004330 return(buf);
4331}
4332
Daniel Veillard8ed10722009-08-20 19:17:36 +02004333static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004334
4335/*
4336 * used for the test in the inner loop of the char data testing
4337 */
4338static const unsigned char test_char_data[256] = {
4339 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4340 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4341 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4342 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4343 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4344 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4345 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4346 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4347 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4348 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4349 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4350 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4351 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4352 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4353 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4354 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4355 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4356 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4357 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4358 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4359 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4360 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4361 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4362 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4363 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4364 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4365 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4366 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4367 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4368 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4369 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4370 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4371};
4372
Owen Taylor3473f882001-02-23 17:55:21 +00004373/**
4374 * xmlParseCharData:
4375 * @ctxt: an XML parser context
4376 * @cdata: int indicating whether we are within a CDATA section
4377 *
4378 * parse a CharData section.
4379 * if we are within a CDATA section ']]>' marks an end of section.
4380 *
4381 * The right angle bracket (>) may be represented using the string "&gt;",
4382 * and must, for compatibility, be escaped using "&gt;" or a character
4383 * reference when it appears in the string "]]>" in content, when that
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004384 * string is not marking the end of a CDATA section.
Owen Taylor3473f882001-02-23 17:55:21 +00004385 *
4386 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4387 */
4388
4389void
4390xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004391 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004392 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004393 int line = ctxt->input->line;
4394 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004395 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004396
4397 SHRINK;
4398 GROW;
4399 /*
4400 * Accelerated common case where input don't need to be
4401 * modified before passing it to the handler.
4402 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004403 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004404 in = ctxt->input->cur;
4405 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004406get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004407 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004408 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004409 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004410 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004411 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004412 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004413 goto get_more_space;
4414 }
4415 if (*in == '<') {
4416 nbchar = in - ctxt->input->cur;
4417 if (nbchar > 0) {
4418 const xmlChar *tmp = ctxt->input->cur;
4419 ctxt->input->cur = in;
4420
Daniel Veillard34099b42004-11-04 17:34:35 +00004421 if ((ctxt->sax != NULL) &&
4422 (ctxt->sax->ignorableWhitespace !=
4423 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004424 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004425 if (ctxt->sax->ignorableWhitespace != NULL)
4426 ctxt->sax->ignorableWhitespace(ctxt->userData,
4427 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004428 } else {
4429 if (ctxt->sax->characters != NULL)
4430 ctxt->sax->characters(ctxt->userData,
4431 tmp, nbchar);
4432 if (*ctxt->space == -1)
4433 *ctxt->space = -2;
4434 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004435 } else if ((ctxt->sax != NULL) &&
4436 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004437 ctxt->sax->characters(ctxt->userData,
4438 tmp, nbchar);
4439 }
4440 }
4441 return;
4442 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004443
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004444get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004445 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004446 while (test_char_data[*in]) {
4447 in++;
4448 ccol++;
4449 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004450 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004451 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004452 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004453 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004454 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004455 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004456 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004457 }
4458 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004459 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004460 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004461 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004462 return;
4463 }
4464 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004465 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004466 goto get_more;
4467 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004468 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004469 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004470 if ((ctxt->sax != NULL) &&
4471 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004472 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004473 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004474 const xmlChar *tmp = ctxt->input->cur;
4475 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004476
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004477 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004478 if (ctxt->sax->ignorableWhitespace != NULL)
4479 ctxt->sax->ignorableWhitespace(ctxt->userData,
4480 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004481 } else {
4482 if (ctxt->sax->characters != NULL)
4483 ctxt->sax->characters(ctxt->userData,
4484 tmp, nbchar);
4485 if (*ctxt->space == -1)
4486 *ctxt->space = -2;
4487 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004488 line = ctxt->input->line;
4489 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004490 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004491 if (ctxt->sax->characters != NULL)
4492 ctxt->sax->characters(ctxt->userData,
4493 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004494 line = ctxt->input->line;
4495 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004496 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004497 /* something really bad happened in the SAX callback */
4498 if (ctxt->instate != XML_PARSER_CONTENT)
4499 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004500 }
4501 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004502 if (*in == 0xD) {
4503 in++;
4504 if (*in == 0xA) {
4505 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004506 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004507 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004508 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004509 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004510 in--;
4511 }
4512 if (*in == '<') {
4513 return;
4514 }
4515 if (*in == '&') {
4516 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004517 }
4518 SHRINK;
4519 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004520 if (ctxt->instate == XML_PARSER_EOF)
4521 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004522 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004523 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004524 nbchar = 0;
4525 }
Daniel Veillard50582112001-03-26 22:52:16 +00004526 ctxt->input->line = line;
4527 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004528 xmlParseCharDataComplex(ctxt, cdata);
4529}
4530
Daniel Veillard01c13b52002-12-10 15:19:08 +00004531/**
4532 * xmlParseCharDataComplex:
4533 * @ctxt: an XML parser context
4534 * @cdata: int indicating whether we are within a CDATA section
4535 *
4536 * parse a CharData section.this is the fallback function
4537 * of xmlParseCharData() when the parsing requires handling
4538 * of non-ASCII characters.
4539 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004540static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004541xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004542 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4543 int nbchar = 0;
4544 int cur, l;
4545 int count = 0;
4546
4547 SHRINK;
4548 GROW;
4549 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004550 while ((cur != '<') && /* checked */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004551 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004552 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004553 if ((cur == ']') && (NXT(1) == ']') &&
4554 (NXT(2) == '>')) {
4555 if (cdata) break;
4556 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004557 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004558 }
4559 }
4560 COPY_BUF(l,buf,nbchar,cur);
4561 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004562 buf[nbchar] = 0;
4563
Owen Taylor3473f882001-02-23 17:55:21 +00004564 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004565 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004566 */
4567 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004568 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004569 if (ctxt->sax->ignorableWhitespace != NULL)
4570 ctxt->sax->ignorableWhitespace(ctxt->userData,
4571 buf, nbchar);
4572 } else {
4573 if (ctxt->sax->characters != NULL)
4574 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004575 if ((ctxt->sax->characters !=
4576 ctxt->sax->ignorableWhitespace) &&
4577 (*ctxt->space == -1))
4578 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004579 }
4580 }
4581 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004582 /* something really bad happened in the SAX callback */
4583 if (ctxt->instate != XML_PARSER_CONTENT)
4584 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004585 }
4586 count++;
4587 if (count > 50) {
4588 GROW;
4589 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004590 if (ctxt->instate == XML_PARSER_EOF)
4591 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004592 }
4593 NEXTL(l);
4594 cur = CUR_CHAR(l);
4595 }
4596 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004597 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004598 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004599 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004600 */
4601 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004602 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004603 if (ctxt->sax->ignorableWhitespace != NULL)
4604 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4605 } else {
4606 if (ctxt->sax->characters != NULL)
4607 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004608 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4609 (*ctxt->space == -1))
4610 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004611 }
4612 }
4613 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004614 if ((cur != 0) && (!IS_CHAR(cur))) {
4615 /* Generate the error and skip the offending character */
4616 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4617 "PCDATA invalid Char value %d\n",
4618 cur);
4619 NEXTL(l);
4620 }
Owen Taylor3473f882001-02-23 17:55:21 +00004621}
4622
4623/**
4624 * xmlParseExternalID:
4625 * @ctxt: an XML parser context
4626 * @publicID: a xmlChar** receiving PubidLiteral
4627 * @strict: indicate whether we should restrict parsing to only
4628 * production [75], see NOTE below
4629 *
4630 * Parse an External ID or a Public ID
4631 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004632 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004633 * 'PUBLIC' S PubidLiteral S SystemLiteral
4634 *
4635 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4636 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4637 *
4638 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4639 *
4640 * Returns the function returns SystemLiteral and in the second
4641 * case publicID receives PubidLiteral, is strict is off
4642 * it is possible to return NULL and have publicID set.
4643 */
4644
4645xmlChar *
4646xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4647 xmlChar *URI = NULL;
4648
4649 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004650
4651 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004652 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004653 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004654 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004655 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4656 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004657 }
4658 SKIP_BLANKS;
4659 URI = xmlParseSystemLiteral(ctxt);
4660 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004661 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004662 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004663 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004664 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004665 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004666 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004667 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004668 }
4669 SKIP_BLANKS;
4670 *publicID = xmlParsePubidLiteral(ctxt);
4671 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004672 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004673 }
4674 if (strict) {
4675 /*
4676 * We don't handle [83] so "S SystemLiteral" is required.
4677 */
William M. Brack76e95df2003-10-18 16:20:14 +00004678 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004679 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004680 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004681 }
4682 } else {
4683 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004684 * We handle [83] so we return immediately, if
Owen Taylor3473f882001-02-23 17:55:21 +00004685 * "S SystemLiteral" is not detected. From a purely parsing
4686 * point of view that's a nice mess.
4687 */
4688 const xmlChar *ptr;
4689 GROW;
4690
4691 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004692 if (!IS_BLANK_CH(*ptr)) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08004693
William M. Brack76e95df2003-10-18 16:20:14 +00004694 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004695 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4696 }
4697 SKIP_BLANKS;
4698 URI = xmlParseSystemLiteral(ctxt);
4699 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004700 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004701 }
4702 }
4703 return(URI);
4704}
4705
4706/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004707 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004708 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004709 * @buf: the already parsed part of the buffer
4710 * @len: number of bytes filles in the buffer
4711 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004712 *
4713 * Skip an XML (SGML) comment <!-- .... -->
4714 * The spec says that "For compatibility, the string "--" (double-hyphen)
4715 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004716 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004717 *
4718 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4719 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004720static void
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004721xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4722 size_t len, size_t size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004723 int q, ql;
4724 int r, rl;
4725 int cur, l;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004726 size_t count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004727 int inputid;
4728
4729 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004730
Owen Taylor3473f882001-02-23 17:55:21 +00004731 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004732 len = 0;
4733 size = XML_PARSER_BUFFER_SIZE;
4734 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4735 if (buf == NULL) {
4736 xmlErrMemory(ctxt, NULL);
4737 return;
4738 }
Owen Taylor3473f882001-02-23 17:55:21 +00004739 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004740 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004741 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004742 if (q == 0)
4743 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004744 if (!IS_CHAR(q)) {
4745 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4746 "xmlParseComment: invalid xmlChar value %d\n",
4747 q);
4748 xmlFree (buf);
4749 return;
4750 }
Owen Taylor3473f882001-02-23 17:55:21 +00004751 NEXTL(ql);
4752 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004753 if (r == 0)
4754 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004755 if (!IS_CHAR(r)) {
4756 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4757 "xmlParseComment: invalid xmlChar value %d\n",
4758 q);
4759 xmlFree (buf);
4760 return;
4761 }
Owen Taylor3473f882001-02-23 17:55:21 +00004762 NEXTL(rl);
4763 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004764 if (cur == 0)
4765 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004766 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004767 ((cur != '>') ||
4768 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004769 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004770 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004771 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004772 if ((len > XML_MAX_TEXT_LENGTH) &&
4773 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4774 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4775 "Comment too big found", NULL);
4776 xmlFree (buf);
4777 return;
4778 }
Owen Taylor3473f882001-02-23 17:55:21 +00004779 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004780 xmlChar *new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004781 size_t new_size;
4782
4783 new_size = size * 2;
4784 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
William M. Bracka3215c72004-07-31 16:24:01 +00004785 if (new_buf == NULL) {
4786 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004787 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004788 return;
4789 }
William M. Bracka3215c72004-07-31 16:24:01 +00004790 buf = new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004791 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00004792 }
4793 COPY_BUF(ql,buf,len,q);
4794 q = r;
4795 ql = rl;
4796 r = cur;
4797 rl = l;
4798
4799 count++;
4800 if (count > 50) {
4801 GROW;
4802 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004803 if (ctxt->instate == XML_PARSER_EOF) {
4804 xmlFree(buf);
4805 return;
4806 }
Owen Taylor3473f882001-02-23 17:55:21 +00004807 }
4808 NEXTL(l);
4809 cur = CUR_CHAR(l);
4810 if (cur == 0) {
4811 SHRINK;
4812 GROW;
4813 cur = CUR_CHAR(l);
4814 }
4815 }
4816 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004817 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004818 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004819 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004820 } else if (!IS_CHAR(cur)) {
4821 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4822 "xmlParseComment: invalid xmlChar value %d\n",
4823 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004824 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004825 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004826 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4827 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004828 }
4829 NEXT;
4830 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4831 (!ctxt->disableSAX))
4832 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004833 }
Daniel Veillardda629342007-08-01 07:49:06 +00004834 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004835 return;
4836not_terminated:
4837 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4838 "Comment not terminated\n", NULL);
4839 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004840 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004841}
Daniel Veillardda629342007-08-01 07:49:06 +00004842
Daniel Veillard4c778d82005-01-23 17:37:44 +00004843/**
4844 * xmlParseComment:
4845 * @ctxt: an XML parser context
4846 *
4847 * Skip an XML (SGML) comment <!-- .... -->
4848 * The spec says that "For compatibility, the string "--" (double-hyphen)
4849 * must not occur within comments. "
4850 *
4851 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4852 */
4853void
4854xmlParseComment(xmlParserCtxtPtr ctxt) {
4855 xmlChar *buf = NULL;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004856 size_t size = XML_PARSER_BUFFER_SIZE;
4857 size_t len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004858 xmlParserInputState state;
4859 const xmlChar *in;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004860 size_t nbchar = 0;
4861 int ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004862 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004863
4864 /*
4865 * Check that there is a comment right here.
4866 */
4867 if ((RAW != '<') || (NXT(1) != '!') ||
4868 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004869 state = ctxt->instate;
4870 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004871 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004872 SKIP(4);
4873 SHRINK;
4874 GROW;
4875
4876 /*
4877 * Accelerated common case where input don't need to be
4878 * modified before passing it to the handler.
4879 */
4880 in = ctxt->input->cur;
4881 do {
4882 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004883 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004884 ctxt->input->line++; ctxt->input->col = 1;
4885 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004886 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004887 }
4888get_more:
4889 ccol = ctxt->input->col;
4890 while (((*in > '-') && (*in <= 0x7F)) ||
4891 ((*in >= 0x20) && (*in < '-')) ||
4892 (*in == 0x09)) {
4893 in++;
4894 ccol++;
4895 }
4896 ctxt->input->col = ccol;
4897 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004898 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004899 ctxt->input->line++; ctxt->input->col = 1;
4900 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004901 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004902 goto get_more;
4903 }
4904 nbchar = in - ctxt->input->cur;
4905 /*
4906 * save current set of data
4907 */
4908 if (nbchar > 0) {
4909 if ((ctxt->sax != NULL) &&
4910 (ctxt->sax->comment != NULL)) {
4911 if (buf == NULL) {
4912 if ((*in == '-') && (in[1] == '-'))
4913 size = nbchar + 1;
4914 else
4915 size = XML_PARSER_BUFFER_SIZE + nbchar;
4916 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4917 if (buf == NULL) {
4918 xmlErrMemory(ctxt, NULL);
4919 ctxt->instate = state;
4920 return;
4921 }
4922 len = 0;
4923 } else if (len + nbchar + 1 >= size) {
4924 xmlChar *new_buf;
4925 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4926 new_buf = (xmlChar *) xmlRealloc(buf,
4927 size * sizeof(xmlChar));
4928 if (new_buf == NULL) {
4929 xmlFree (buf);
4930 xmlErrMemory(ctxt, NULL);
4931 ctxt->instate = state;
4932 return;
4933 }
4934 buf = new_buf;
4935 }
4936 memcpy(&buf[len], ctxt->input->cur, nbchar);
4937 len += nbchar;
4938 buf[len] = 0;
4939 }
4940 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004941 if ((len > XML_MAX_TEXT_LENGTH) &&
4942 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4943 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4944 "Comment too big found", NULL);
4945 xmlFree (buf);
4946 return;
4947 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004948 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004949 if (*in == 0xA) {
4950 in++;
4951 ctxt->input->line++; ctxt->input->col = 1;
4952 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004953 if (*in == 0xD) {
4954 in++;
4955 if (*in == 0xA) {
4956 ctxt->input->cur = in;
4957 in++;
4958 ctxt->input->line++; ctxt->input->col = 1;
4959 continue; /* while */
4960 }
4961 in--;
4962 }
4963 SHRINK;
4964 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004965 if (ctxt->instate == XML_PARSER_EOF) {
4966 xmlFree(buf);
4967 return;
4968 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004969 in = ctxt->input->cur;
4970 if (*in == '-') {
4971 if (in[1] == '-') {
4972 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004973 if (ctxt->input->id != inputid) {
4974 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4975 "comment doesn't start and stop in the same entity\n");
4976 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004977 SKIP(3);
4978 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4979 (!ctxt->disableSAX)) {
4980 if (buf != NULL)
4981 ctxt->sax->comment(ctxt->userData, buf);
4982 else
4983 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4984 }
4985 if (buf != NULL)
4986 xmlFree(buf);
4987 ctxt->instate = state;
4988 return;
4989 }
Bryan Henderson8658d272012-05-08 16:39:05 +08004990 if (buf != NULL) {
4991 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4992 "Double hyphen within comment: "
4993 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00004994 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08004995 } else
4996 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4997 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004998 in++;
4999 ctxt->input->col++;
5000 }
5001 in++;
5002 ctxt->input->col++;
5003 goto get_more;
5004 }
5005 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5006 xmlParseCommentComplex(ctxt, buf, len, size);
5007 ctxt->instate = state;
5008 return;
5009}
5010
Owen Taylor3473f882001-02-23 17:55:21 +00005011
5012/**
5013 * xmlParsePITarget:
5014 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005015 *
Owen Taylor3473f882001-02-23 17:55:21 +00005016 * parse the name of a PI
5017 *
5018 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5019 *
5020 * Returns the PITarget name or NULL
5021 */
5022
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005023const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00005024xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005025 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005026
5027 name = xmlParseName(ctxt);
5028 if ((name != NULL) &&
5029 ((name[0] == 'x') || (name[0] == 'X')) &&
5030 ((name[1] == 'm') || (name[1] == 'M')) &&
5031 ((name[2] == 'l') || (name[2] == 'L'))) {
5032 int i;
5033 if ((name[0] == 'x') && (name[1] == 'm') &&
5034 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005035 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00005036 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005037 return(name);
5038 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005039 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005040 return(name);
5041 }
5042 for (i = 0;;i++) {
5043 if (xmlW3CPIs[i] == NULL) break;
5044 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5045 return(name);
5046 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005047 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5048 "xmlParsePITarget: invalid name prefix 'xml'\n",
5049 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005050 }
Daniel Veillard37334572008-07-31 08:20:02 +00005051 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005052 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard37334572008-07-31 08:20:02 +00005053 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
5054 }
Owen Taylor3473f882001-02-23 17:55:21 +00005055 return(name);
5056}
5057
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005058#ifdef LIBXML_CATALOG_ENABLED
5059/**
5060 * xmlParseCatalogPI:
5061 * @ctxt: an XML parser context
5062 * @catalog: the PI value string
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005063 *
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005064 * parse an XML Catalog Processing Instruction.
5065 *
5066 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5067 *
5068 * Occurs only if allowed by the user and if happening in the Misc
5069 * part of the document before any doctype informations
5070 * This will add the given catalog to the parsing context in order
5071 * to be used if there is a resolution need further down in the document
5072 */
5073
5074static void
5075xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5076 xmlChar *URL = NULL;
5077 const xmlChar *tmp, *base;
5078 xmlChar marker;
5079
5080 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00005081 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005082 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5083 goto error;
5084 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00005085 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005086 if (*tmp != '=') {
5087 return;
5088 }
5089 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005090 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005091 marker = *tmp;
5092 if ((marker != '\'') && (marker != '"'))
5093 goto error;
5094 tmp++;
5095 base = tmp;
5096 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5097 if (*tmp == 0)
5098 goto error;
5099 URL = xmlStrndup(base, tmp - base);
5100 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005101 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005102 if (*tmp != 0)
5103 goto error;
5104
5105 if (URL != NULL) {
5106 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5107 xmlFree(URL);
5108 }
5109 return;
5110
5111error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00005112 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5113 "Catalog PI syntax error: %s\n",
5114 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005115 if (URL != NULL)
5116 xmlFree(URL);
5117}
5118#endif
5119
Owen Taylor3473f882001-02-23 17:55:21 +00005120/**
5121 * xmlParsePI:
5122 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005123 *
Owen Taylor3473f882001-02-23 17:55:21 +00005124 * parse an XML Processing Instruction.
5125 *
5126 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5127 *
5128 * The processing is transfered to SAX once parsed.
5129 */
5130
5131void
5132xmlParsePI(xmlParserCtxtPtr ctxt) {
5133 xmlChar *buf = NULL;
Daniel Veillard51304812012-07-19 20:34:26 +08005134 size_t len = 0;
5135 size_t size = XML_PARSER_BUFFER_SIZE;
Owen Taylor3473f882001-02-23 17:55:21 +00005136 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005137 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00005138 xmlParserInputState state;
5139 int count = 0;
5140
5141 if ((RAW == '<') && (NXT(1) == '?')) {
5142 xmlParserInputPtr input = ctxt->input;
5143 state = ctxt->instate;
5144 ctxt->instate = XML_PARSER_PI;
5145 /*
5146 * this is a Processing Instruction.
5147 */
5148 SKIP(2);
5149 SHRINK;
5150
5151 /*
5152 * Parse the target name and check for special support like
5153 * namespace.
5154 */
5155 target = xmlParsePITarget(ctxt);
5156 if (target != NULL) {
5157 if ((RAW == '?') && (NXT(1) == '>')) {
5158 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005159 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5160 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005161 }
5162 SKIP(2);
5163
5164 /*
5165 * SAX: PI detected.
5166 */
5167 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5168 (ctxt->sax->processingInstruction != NULL))
5169 ctxt->sax->processingInstruction(ctxt->userData,
5170 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08005171 if (ctxt->instate != XML_PARSER_EOF)
5172 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005173 return;
5174 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00005175 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00005176 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005177 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005178 ctxt->instate = state;
5179 return;
5180 }
5181 cur = CUR;
5182 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005183 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5184 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005185 }
5186 SKIP_BLANKS;
5187 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00005188 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00005189 ((cur != '?') || (NXT(1) != '>'))) {
5190 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00005191 xmlChar *tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005192 size_t new_size = size * 2;
5193 tmp = (xmlChar *) xmlRealloc(buf, new_size);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005194 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005195 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005196 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005197 ctxt->instate = state;
5198 return;
5199 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005200 buf = tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005201 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00005202 }
5203 count++;
5204 if (count > 50) {
5205 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005206 if (ctxt->instate == XML_PARSER_EOF) {
5207 xmlFree(buf);
5208 return;
5209 }
Owen Taylor3473f882001-02-23 17:55:21 +00005210 count = 0;
Daniel Veillard51304812012-07-19 20:34:26 +08005211 if ((len > XML_MAX_TEXT_LENGTH) &&
5212 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5213 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5214 "PI %s too big found", target);
5215 xmlFree(buf);
5216 ctxt->instate = state;
5217 return;
5218 }
Owen Taylor3473f882001-02-23 17:55:21 +00005219 }
5220 COPY_BUF(l,buf,len,cur);
5221 NEXTL(l);
5222 cur = CUR_CHAR(l);
5223 if (cur == 0) {
5224 SHRINK;
5225 GROW;
5226 cur = CUR_CHAR(l);
5227 }
5228 }
Daniel Veillard51304812012-07-19 20:34:26 +08005229 if ((len > XML_MAX_TEXT_LENGTH) &&
5230 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5231 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5232 "PI %s too big found", target);
5233 xmlFree(buf);
5234 ctxt->instate = state;
5235 return;
5236 }
Owen Taylor3473f882001-02-23 17:55:21 +00005237 buf[len] = 0;
5238 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005239 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5240 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005241 } else {
5242 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005243 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5244 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005245 }
5246 SKIP(2);
5247
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005248#ifdef LIBXML_CATALOG_ENABLED
5249 if (((state == XML_PARSER_MISC) ||
5250 (state == XML_PARSER_START)) &&
5251 (xmlStrEqual(target, XML_CATALOG_PI))) {
5252 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5253 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5254 (allow == XML_CATA_ALLOW_ALL))
5255 xmlParseCatalogPI(ctxt, buf);
5256 }
5257#endif
5258
5259
Owen Taylor3473f882001-02-23 17:55:21 +00005260 /*
5261 * SAX: PI detected.
5262 */
5263 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5264 (ctxt->sax->processingInstruction != NULL))
5265 ctxt->sax->processingInstruction(ctxt->userData,
5266 target, buf);
5267 }
5268 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005269 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005270 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005271 }
Chris Evans77404b82011-12-14 16:18:25 +08005272 if (ctxt->instate != XML_PARSER_EOF)
5273 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005274 }
5275}
5276
5277/**
5278 * xmlParseNotationDecl:
5279 * @ctxt: an XML parser context
5280 *
5281 * parse a notation declaration
5282 *
5283 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5284 *
5285 * Hence there is actually 3 choices:
5286 * 'PUBLIC' S PubidLiteral
5287 * 'PUBLIC' S PubidLiteral S SystemLiteral
5288 * and 'SYSTEM' S SystemLiteral
5289 *
5290 * See the NOTE on xmlParseExternalID().
5291 */
5292
5293void
5294xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005295 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005296 xmlChar *Pubid;
5297 xmlChar *Systemid;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005298
Daniel Veillarda07050d2003-10-19 14:46:32 +00005299 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005300 xmlParserInputPtr input = ctxt->input;
5301 SHRINK;
5302 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00005303 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005304 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5305 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005306 return;
5307 }
5308 SKIP_BLANKS;
5309
Daniel Veillard76d66f42001-05-16 21:05:17 +00005310 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005311 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005312 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005313 return;
5314 }
William M. Brack76e95df2003-10-18 16:20:14 +00005315 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005316 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005317 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005318 return;
5319 }
Daniel Veillard37334572008-07-31 08:20:02 +00005320 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005321 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard37334572008-07-31 08:20:02 +00005322 "colon are forbidden from notation names '%s'\n",
5323 name, NULL, NULL);
5324 }
Owen Taylor3473f882001-02-23 17:55:21 +00005325 SKIP_BLANKS;
5326
5327 /*
5328 * Parse the IDs.
5329 */
5330 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5331 SKIP_BLANKS;
5332
5333 if (RAW == '>') {
5334 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005335 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5336 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005337 }
5338 NEXT;
5339 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5340 (ctxt->sax->notationDecl != NULL))
5341 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5342 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005343 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005344 }
Owen Taylor3473f882001-02-23 17:55:21 +00005345 if (Systemid != NULL) xmlFree(Systemid);
5346 if (Pubid != NULL) xmlFree(Pubid);
5347 }
5348}
5349
5350/**
5351 * xmlParseEntityDecl:
5352 * @ctxt: an XML parser context
5353 *
5354 * parse <!ENTITY declarations
5355 *
5356 * [70] EntityDecl ::= GEDecl | PEDecl
5357 *
5358 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5359 *
5360 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5361 *
5362 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5363 *
5364 * [74] PEDef ::= EntityValue | ExternalID
5365 *
5366 * [76] NDataDecl ::= S 'NDATA' S Name
5367 *
5368 * [ VC: Notation Declared ]
5369 * The Name must match the declared name of a notation.
5370 */
5371
5372void
5373xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005374 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005375 xmlChar *value = NULL;
5376 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005377 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005378 int isParameter = 0;
5379 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005380 int skipped;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005381
Daniel Veillard4c778d82005-01-23 17:37:44 +00005382 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005383 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005384 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005385 SHRINK;
5386 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005387 skipped = SKIP_BLANKS;
5388 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005389 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5390 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005391 }
Owen Taylor3473f882001-02-23 17:55:21 +00005392
5393 if (RAW == '%') {
5394 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005395 skipped = SKIP_BLANKS;
5396 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005397 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5398 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005399 }
Owen Taylor3473f882001-02-23 17:55:21 +00005400 isParameter = 1;
5401 }
5402
Daniel Veillard76d66f42001-05-16 21:05:17 +00005403 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005404 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005405 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5406 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005407 return;
5408 }
Daniel Veillard37334572008-07-31 08:20:02 +00005409 if (xmlStrchr(name, ':') != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005410 xmlNsErr(ctxt, XML_NS_ERR_COLON,
Daniel Veillard37334572008-07-31 08:20:02 +00005411 "colon are forbidden from entities names '%s'\n",
5412 name, NULL, NULL);
5413 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005414 skipped = SKIP_BLANKS;
5415 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005416 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5417 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005418 }
Owen Taylor3473f882001-02-23 17:55:21 +00005419
Daniel Veillardf5582f12002-06-11 10:08:16 +00005420 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005421 /*
5422 * handle the various case of definitions...
5423 */
5424 if (isParameter) {
5425 if ((RAW == '"') || (RAW == '\'')) {
5426 value = xmlParseEntityValue(ctxt, &orig);
5427 if (value) {
5428 if ((ctxt->sax != NULL) &&
5429 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5430 ctxt->sax->entityDecl(ctxt->userData, name,
5431 XML_INTERNAL_PARAMETER_ENTITY,
5432 NULL, NULL, value);
5433 }
5434 } else {
5435 URI = xmlParseExternalID(ctxt, &literal, 1);
5436 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005437 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005438 }
5439 if (URI) {
5440 xmlURIPtr uri;
5441
5442 uri = xmlParseURI((const char *) URI);
5443 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005444 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5445 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005446 /*
5447 * This really ought to be a well formedness error
5448 * but the XML Core WG decided otherwise c.f. issue
5449 * E26 of the XML erratas.
5450 */
Owen Taylor3473f882001-02-23 17:55:21 +00005451 } else {
5452 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005453 /*
5454 * Okay this is foolish to block those but not
5455 * invalid URIs.
5456 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005457 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005458 } else {
5459 if ((ctxt->sax != NULL) &&
5460 (!ctxt->disableSAX) &&
5461 (ctxt->sax->entityDecl != NULL))
5462 ctxt->sax->entityDecl(ctxt->userData, name,
5463 XML_EXTERNAL_PARAMETER_ENTITY,
5464 literal, URI, NULL);
5465 }
5466 xmlFreeURI(uri);
5467 }
5468 }
5469 }
5470 } else {
5471 if ((RAW == '"') || (RAW == '\'')) {
5472 value = xmlParseEntityValue(ctxt, &orig);
5473 if ((ctxt->sax != NULL) &&
5474 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5475 ctxt->sax->entityDecl(ctxt->userData, name,
5476 XML_INTERNAL_GENERAL_ENTITY,
5477 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005478 /*
5479 * For expat compatibility in SAX mode.
5480 */
5481 if ((ctxt->myDoc == NULL) ||
5482 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5483 if (ctxt->myDoc == NULL) {
5484 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005485 if (ctxt->myDoc == NULL) {
5486 xmlErrMemory(ctxt, "New Doc failed");
5487 return;
5488 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005489 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005490 }
5491 if (ctxt->myDoc->intSubset == NULL)
5492 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5493 BAD_CAST "fake", NULL, NULL);
5494
Daniel Veillard1af9a412003-08-20 22:54:39 +00005495 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5496 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005497 }
Owen Taylor3473f882001-02-23 17:55:21 +00005498 } else {
5499 URI = xmlParseExternalID(ctxt, &literal, 1);
5500 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005501 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005502 }
5503 if (URI) {
5504 xmlURIPtr uri;
5505
5506 uri = xmlParseURI((const char *)URI);
5507 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005508 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5509 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005510 /*
5511 * This really ought to be a well formedness error
5512 * but the XML Core WG decided otherwise c.f. issue
5513 * E26 of the XML erratas.
5514 */
Owen Taylor3473f882001-02-23 17:55:21 +00005515 } else {
5516 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005517 /*
5518 * Okay this is foolish to block those but not
5519 * invalid URIs.
5520 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005521 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005522 }
5523 xmlFreeURI(uri);
5524 }
5525 }
William M. Brack76e95df2003-10-18 16:20:14 +00005526 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005527 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5528 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005529 }
5530 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005531 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005532 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005533 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005534 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5535 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005536 }
5537 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005538 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005539 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5540 (ctxt->sax->unparsedEntityDecl != NULL))
5541 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5542 literal, URI, ndata);
5543 } else {
5544 if ((ctxt->sax != NULL) &&
5545 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5546 ctxt->sax->entityDecl(ctxt->userData, name,
5547 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5548 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005549 /*
5550 * For expat compatibility in SAX mode.
5551 * assuming the entity repalcement was asked for
5552 */
5553 if ((ctxt->replaceEntities != 0) &&
5554 ((ctxt->myDoc == NULL) ||
5555 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5556 if (ctxt->myDoc == NULL) {
5557 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005558 if (ctxt->myDoc == NULL) {
5559 xmlErrMemory(ctxt, "New Doc failed");
5560 return;
5561 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005562 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005563 }
5564
5565 if (ctxt->myDoc->intSubset == NULL)
5566 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5567 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005568 xmlSAX2EntityDecl(ctxt, name,
5569 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5570 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005571 }
Owen Taylor3473f882001-02-23 17:55:21 +00005572 }
5573 }
5574 }
5575 SKIP_BLANKS;
5576 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005577 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005578 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005579 } else {
5580 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005581 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5582 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005583 }
5584 NEXT;
5585 }
5586 if (orig != NULL) {
5587 /*
5588 * Ugly mechanism to save the raw entity value.
5589 */
5590 xmlEntityPtr cur = NULL;
5591
5592 if (isParameter) {
5593 if ((ctxt->sax != NULL) &&
5594 (ctxt->sax->getParameterEntity != NULL))
5595 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5596 } else {
5597 if ((ctxt->sax != NULL) &&
5598 (ctxt->sax->getEntity != NULL))
5599 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005600 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005601 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005602 }
Owen Taylor3473f882001-02-23 17:55:21 +00005603 }
5604 if (cur != NULL) {
5605 if (cur->orig != NULL)
5606 xmlFree(orig);
5607 else
5608 cur->orig = orig;
5609 } else
5610 xmlFree(orig);
5611 }
Owen Taylor3473f882001-02-23 17:55:21 +00005612 if (value != NULL) xmlFree(value);
5613 if (URI != NULL) xmlFree(URI);
5614 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005615 }
5616}
5617
5618/**
5619 * xmlParseDefaultDecl:
5620 * @ctxt: an XML parser context
5621 * @value: Receive a possible fixed default value for the attribute
5622 *
5623 * Parse an attribute default declaration
5624 *
5625 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5626 *
5627 * [ VC: Required Attribute ]
5628 * if the default declaration is the keyword #REQUIRED, then the
5629 * attribute must be specified for all elements of the type in the
5630 * attribute-list declaration.
5631 *
5632 * [ VC: Attribute Default Legal ]
5633 * The declared default value must meet the lexical constraints of
5634 * the declared attribute type c.f. xmlValidateAttributeDecl()
5635 *
5636 * [ VC: Fixed Attribute Default ]
5637 * if an attribute has a default value declared with the #FIXED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005638 * keyword, instances of that attribute must match the default value.
Owen Taylor3473f882001-02-23 17:55:21 +00005639 *
5640 * [ WFC: No < in Attribute Values ]
5641 * handled in xmlParseAttValue()
5642 *
5643 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005644 * or XML_ATTRIBUTE_FIXED.
Owen Taylor3473f882001-02-23 17:55:21 +00005645 */
5646
5647int
5648xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5649 int val;
5650 xmlChar *ret;
5651
5652 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005653 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005654 SKIP(9);
5655 return(XML_ATTRIBUTE_REQUIRED);
5656 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005657 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005658 SKIP(8);
5659 return(XML_ATTRIBUTE_IMPLIED);
5660 }
5661 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005662 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005663 SKIP(6);
5664 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005665 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005666 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5667 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005668 }
5669 SKIP_BLANKS;
5670 }
5671 ret = xmlParseAttValue(ctxt);
5672 ctxt->instate = XML_PARSER_DTD;
5673 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005674 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005675 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005676 } else
5677 *value = ret;
5678 return(val);
5679}
5680
5681/**
5682 * xmlParseNotationType:
5683 * @ctxt: an XML parser context
5684 *
5685 * parse an Notation attribute type.
5686 *
5687 * Note: the leading 'NOTATION' S part has already being parsed...
5688 *
5689 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5690 *
5691 * [ VC: Notation Attributes ]
5692 * Values of this type must match one of the notation names included
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005693 * in the declaration; all notation names in the declaration must be declared.
Owen Taylor3473f882001-02-23 17:55:21 +00005694 *
5695 * Returns: the notation attribute tree built while parsing
5696 */
5697
5698xmlEnumerationPtr
5699xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005700 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005701 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005702
5703 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005704 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005705 return(NULL);
5706 }
5707 SHRINK;
5708 do {
5709 NEXT;
5710 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005711 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005712 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005713 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5714 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005715 xmlFreeEnumeration(ret);
5716 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005717 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005718 tmp = ret;
5719 while (tmp != NULL) {
5720 if (xmlStrEqual(name, tmp->name)) {
5721 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5722 "standalone: attribute notation value token %s duplicated\n",
5723 name, NULL);
5724 if (!xmlDictOwns(ctxt->dict, name))
5725 xmlFree((xmlChar *) name);
5726 break;
5727 }
5728 tmp = tmp->next;
5729 }
5730 if (tmp == NULL) {
5731 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005732 if (cur == NULL) {
5733 xmlFreeEnumeration(ret);
5734 return(NULL);
5735 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005736 if (last == NULL) ret = last = cur;
5737 else {
5738 last->next = cur;
5739 last = cur;
5740 }
Owen Taylor3473f882001-02-23 17:55:21 +00005741 }
5742 SKIP_BLANKS;
5743 } while (RAW == '|');
5744 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005745 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005746 xmlFreeEnumeration(ret);
5747 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005748 }
5749 NEXT;
5750 return(ret);
5751}
5752
5753/**
5754 * xmlParseEnumerationType:
5755 * @ctxt: an XML parser context
5756 *
5757 * parse an Enumeration attribute type.
5758 *
5759 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5760 *
5761 * [ VC: Enumeration ]
5762 * Values of this type must match one of the Nmtoken tokens in
5763 * the declaration
5764 *
5765 * Returns: the enumeration attribute tree built while parsing
5766 */
5767
5768xmlEnumerationPtr
5769xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5770 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005771 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005772
5773 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005774 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005775 return(NULL);
5776 }
5777 SHRINK;
5778 do {
5779 NEXT;
5780 SKIP_BLANKS;
5781 name = xmlParseNmtoken(ctxt);
5782 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005783 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005784 return(ret);
5785 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005786 tmp = ret;
5787 while (tmp != NULL) {
5788 if (xmlStrEqual(name, tmp->name)) {
5789 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5790 "standalone: attribute enumeration value token %s duplicated\n",
5791 name, NULL);
5792 if (!xmlDictOwns(ctxt->dict, name))
5793 xmlFree(name);
5794 break;
5795 }
5796 tmp = tmp->next;
5797 }
5798 if (tmp == NULL) {
5799 cur = xmlCreateEnumeration(name);
5800 if (!xmlDictOwns(ctxt->dict, name))
5801 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005802 if (cur == NULL) {
5803 xmlFreeEnumeration(ret);
5804 return(NULL);
5805 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005806 if (last == NULL) ret = last = cur;
5807 else {
5808 last->next = cur;
5809 last = cur;
5810 }
Owen Taylor3473f882001-02-23 17:55:21 +00005811 }
5812 SKIP_BLANKS;
5813 } while (RAW == '|');
5814 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005815 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005816 return(ret);
5817 }
5818 NEXT;
5819 return(ret);
5820}
5821
5822/**
5823 * xmlParseEnumeratedType:
5824 * @ctxt: an XML parser context
5825 * @tree: the enumeration tree built while parsing
5826 *
5827 * parse an Enumerated attribute type.
5828 *
5829 * [57] EnumeratedType ::= NotationType | Enumeration
5830 *
5831 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5832 *
5833 *
5834 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5835 */
5836
5837int
5838xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005839 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005840 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005841 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005842 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5843 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005844 return(0);
5845 }
5846 SKIP_BLANKS;
5847 *tree = xmlParseNotationType(ctxt);
5848 if (*tree == NULL) return(0);
5849 return(XML_ATTRIBUTE_NOTATION);
5850 }
5851 *tree = xmlParseEnumerationType(ctxt);
5852 if (*tree == NULL) return(0);
5853 return(XML_ATTRIBUTE_ENUMERATION);
5854}
5855
5856/**
5857 * xmlParseAttributeType:
5858 * @ctxt: an XML parser context
5859 * @tree: the enumeration tree built while parsing
5860 *
5861 * parse the Attribute list def for an element
5862 *
5863 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5864 *
5865 * [55] StringType ::= 'CDATA'
5866 *
5867 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5868 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5869 *
5870 * Validity constraints for attribute values syntax are checked in
5871 * xmlValidateAttributeValue()
5872 *
5873 * [ VC: ID ]
5874 * Values of type ID must match the Name production. A name must not
5875 * appear more than once in an XML document as a value of this type;
5876 * i.e., ID values must uniquely identify the elements which bear them.
5877 *
5878 * [ VC: One ID per Element Type ]
5879 * No element type may have more than one ID attribute specified.
5880 *
5881 * [ VC: ID Attribute Default ]
5882 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5883 *
5884 * [ VC: IDREF ]
5885 * Values of type IDREF must match the Name production, and values
5886 * of type IDREFS must match Names; each IDREF Name must match the value
5887 * of an ID attribute on some element in the XML document; i.e. IDREF
5888 * values must match the value of some ID attribute.
5889 *
5890 * [ VC: Entity Name ]
5891 * Values of type ENTITY must match the Name production, values
5892 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005893 * name of an unparsed entity declared in the DTD.
Owen Taylor3473f882001-02-23 17:55:21 +00005894 *
5895 * [ VC: Name Token ]
5896 * Values of type NMTOKEN must match the Nmtoken production; values
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005897 * of type NMTOKENS must match Nmtokens.
Owen Taylor3473f882001-02-23 17:55:21 +00005898 *
5899 * Returns the attribute type
5900 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08005901int
Owen Taylor3473f882001-02-23 17:55:21 +00005902xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5903 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005904 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005905 SKIP(5);
5906 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005907 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005908 SKIP(6);
5909 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005910 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005911 SKIP(5);
5912 return(XML_ATTRIBUTE_IDREF);
5913 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5914 SKIP(2);
5915 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005916 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005917 SKIP(6);
5918 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005919 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005920 SKIP(8);
5921 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005922 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005923 SKIP(8);
5924 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005925 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005926 SKIP(7);
5927 return(XML_ATTRIBUTE_NMTOKEN);
5928 }
5929 return(xmlParseEnumeratedType(ctxt, tree));
5930}
5931
5932/**
5933 * xmlParseAttributeListDecl:
5934 * @ctxt: an XML parser context
5935 *
5936 * : parse the Attribute list def for an element
5937 *
5938 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5939 *
5940 * [53] AttDef ::= S Name S AttType S DefaultDecl
5941 *
5942 */
5943void
5944xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005945 const xmlChar *elemName;
5946 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005947 xmlEnumerationPtr tree;
5948
Daniel Veillarda07050d2003-10-19 14:46:32 +00005949 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005950 xmlParserInputPtr input = ctxt->input;
5951
5952 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005953 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005954 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005955 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005956 }
5957 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005958 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005959 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005960 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5961 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005962 return;
5963 }
5964 SKIP_BLANKS;
5965 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005966 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005967 const xmlChar *check = CUR_PTR;
5968 int type;
5969 int def;
5970 xmlChar *defaultValue = NULL;
5971
5972 GROW;
5973 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005974 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005975 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005976 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5977 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005978 break;
5979 }
5980 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005981 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005982 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005983 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005984 break;
5985 }
5986 SKIP_BLANKS;
5987
5988 type = xmlParseAttributeType(ctxt, &tree);
5989 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005990 break;
5991 }
5992
5993 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005994 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005995 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5996 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005997 if (tree != NULL)
5998 xmlFreeEnumeration(tree);
5999 break;
6000 }
6001 SKIP_BLANKS;
6002
6003 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6004 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006005 if (defaultValue != NULL)
6006 xmlFree(defaultValue);
6007 if (tree != NULL)
6008 xmlFreeEnumeration(tree);
6009 break;
6010 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00006011 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6012 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00006013
6014 GROW;
6015 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00006016 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006017 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006018 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006019 if (defaultValue != NULL)
6020 xmlFree(defaultValue);
6021 if (tree != NULL)
6022 xmlFreeEnumeration(tree);
6023 break;
6024 }
6025 SKIP_BLANKS;
6026 }
6027 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006028 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6029 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006030 if (defaultValue != NULL)
6031 xmlFree(defaultValue);
6032 if (tree != NULL)
6033 xmlFreeEnumeration(tree);
6034 break;
6035 }
6036 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6037 (ctxt->sax->attributeDecl != NULL))
6038 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6039 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006040 else if (tree != NULL)
6041 xmlFreeEnumeration(tree);
6042
6043 if ((ctxt->sax2) && (defaultValue != NULL) &&
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006044 (def != XML_ATTRIBUTE_IMPLIED) &&
Daniel Veillarde57ec792003-09-10 10:50:59 +00006045 (def != XML_ATTRIBUTE_REQUIRED)) {
6046 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6047 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00006048 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006049 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6050 }
Owen Taylor3473f882001-02-23 17:55:21 +00006051 if (defaultValue != NULL)
6052 xmlFree(defaultValue);
6053 GROW;
6054 }
6055 if (RAW == '>') {
6056 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006057 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6058 "Attribute list declaration doesn't start and stop in the same entity\n",
6059 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006060 }
6061 NEXT;
6062 }
Owen Taylor3473f882001-02-23 17:55:21 +00006063 }
6064}
6065
6066/**
6067 * xmlParseElementMixedContentDecl:
6068 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006069 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00006070 *
6071 * parse the declaration for a Mixed Element content
6072 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006073 *
Owen Taylor3473f882001-02-23 17:55:21 +00006074 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6075 * '(' S? '#PCDATA' S? ')'
6076 *
6077 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6078 *
6079 * [ VC: No Duplicate Types ]
6080 * The same name must not appear more than once in a single
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006081 * mixed-content declaration.
Owen Taylor3473f882001-02-23 17:55:21 +00006082 *
6083 * returns: the list of the xmlElementContentPtr describing the element choices
6084 */
6085xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006086xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00006087 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006088 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006089
6090 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006091 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006092 SKIP(7);
6093 SKIP_BLANKS;
6094 SHRINK;
6095 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006096 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006097 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6098"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006099 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006100 }
Owen Taylor3473f882001-02-23 17:55:21 +00006101 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006102 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006103 if (ret == NULL)
6104 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006105 if (RAW == '*') {
6106 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6107 NEXT;
6108 }
6109 return(ret);
6110 }
6111 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006112 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00006113 if (ret == NULL) return(NULL);
6114 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006115 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006116 NEXT;
6117 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006118 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006119 if (ret == NULL) return(NULL);
6120 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006121 if (cur != NULL)
6122 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00006123 cur = ret;
6124 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006125 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006126 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006127 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006128 if (n->c1 != NULL)
6129 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006130 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006131 if (n != NULL)
6132 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006133 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006134 }
6135 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006136 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006137 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006138 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006139 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006140 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00006141 return(NULL);
6142 }
6143 SKIP_BLANKS;
6144 GROW;
6145 }
6146 if ((RAW == ')') && (NXT(1) == '*')) {
6147 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006148 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00006149 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006150 if (cur->c2 != NULL)
6151 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006152 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02006153 if (ret != NULL)
6154 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006155 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006156 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6157"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006158 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006159 }
Owen Taylor3473f882001-02-23 17:55:21 +00006160 SKIP(2);
6161 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006162 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006163 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006164 return(NULL);
6165 }
6166
6167 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006168 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006169 }
6170 return(ret);
6171}
6172
6173/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006174 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00006175 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006176 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006177 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00006178 *
6179 * parse the declaration for a Mixed Element content
6180 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006181 *
Owen Taylor3473f882001-02-23 17:55:21 +00006182 *
6183 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6184 *
6185 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6186 *
6187 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6188 *
6189 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6190 *
6191 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6192 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006193 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00006194 * opening or closing parentheses in a choice, seq, or Mixed
6195 * construct is contained in the replacement text for a parameter
6196 * entity, both must be contained in the same replacement text. For
6197 * interoperability, if a parameter-entity reference appears in a
6198 * choice, seq, or Mixed construct, its replacement text should not
6199 * be empty, and neither the first nor last non-blank character of
6200 * the replacement text should be a connector (| or ,).
6201 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006202 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00006203 * hierarchy.
6204 */
Daniel Veillard489f9672009-08-10 16:49:30 +02006205static xmlElementContentPtr
6206xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6207 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00006208 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006209 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00006210 xmlChar type = 0;
6211
Daniel Veillard489f9672009-08-10 16:49:30 +02006212 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6213 (depth > 2048)) {
6214 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6215"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6216 depth);
6217 return(NULL);
6218 }
Owen Taylor3473f882001-02-23 17:55:21 +00006219 SKIP_BLANKS;
6220 GROW;
6221 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006222 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006223
Owen Taylor3473f882001-02-23 17:55:21 +00006224 /* Recurse on first child */
6225 NEXT;
6226 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006227 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6228 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006229 SKIP_BLANKS;
6230 GROW;
6231 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006232 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006233 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006234 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006235 return(NULL);
6236 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006237 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006238 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006239 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006240 return(NULL);
6241 }
Owen Taylor3473f882001-02-23 17:55:21 +00006242 GROW;
6243 if (RAW == '?') {
6244 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6245 NEXT;
6246 } else if (RAW == '*') {
6247 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6248 NEXT;
6249 } else if (RAW == '+') {
6250 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6251 NEXT;
6252 } else {
6253 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6254 }
Owen Taylor3473f882001-02-23 17:55:21 +00006255 GROW;
6256 }
6257 SKIP_BLANKS;
6258 SHRINK;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006259 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006260 /*
6261 * Each loop we parse one separator and one element.
6262 */
6263 if (RAW == ',') {
6264 if (type == 0) type = CUR;
6265
6266 /*
6267 * Detect "Name | Name , Name" error
6268 */
6269 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006270 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006271 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006272 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006273 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006274 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006275 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006276 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006277 return(NULL);
6278 }
6279 NEXT;
6280
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006281 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006282 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006283 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006284 xmlFreeDocElementContent(ctxt->myDoc, last);
6285 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006286 return(NULL);
6287 }
6288 if (last == NULL) {
6289 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006290 if (ret != NULL)
6291 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006292 ret = cur = op;
6293 } else {
6294 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006295 if (op != NULL)
6296 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006297 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006298 if (last != NULL)
6299 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006300 cur =op;
6301 last = NULL;
6302 }
6303 } else if (RAW == '|') {
6304 if (type == 0) type = CUR;
6305
6306 /*
6307 * Detect "Name , Name | Name" error
6308 */
6309 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006310 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006311 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006312 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006313 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006314 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006315 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006316 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006317 return(NULL);
6318 }
6319 NEXT;
6320
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006321 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006322 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006323 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006324 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006325 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006326 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006327 return(NULL);
6328 }
6329 if (last == NULL) {
6330 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006331 if (ret != NULL)
6332 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006333 ret = cur = op;
6334 } else {
6335 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006336 if (op != NULL)
6337 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006338 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006339 if (last != NULL)
6340 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006341 cur =op;
6342 last = NULL;
6343 }
6344 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006345 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006346 if ((last != NULL) && (last != ret))
6347 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006348 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006349 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006350 return(NULL);
6351 }
6352 GROW;
6353 SKIP_BLANKS;
6354 GROW;
6355 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006356 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006357 /* Recurse on second child */
6358 NEXT;
6359 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006360 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6361 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006362 SKIP_BLANKS;
6363 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006364 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006365 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006366 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006367 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006368 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006369 return(NULL);
6370 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006371 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006372 if (last == NULL) {
6373 if (ret != NULL)
6374 xmlFreeDocElementContent(ctxt->myDoc, ret);
6375 return(NULL);
6376 }
Owen Taylor3473f882001-02-23 17:55:21 +00006377 if (RAW == '?') {
6378 last->ocur = XML_ELEMENT_CONTENT_OPT;
6379 NEXT;
6380 } else if (RAW == '*') {
6381 last->ocur = XML_ELEMENT_CONTENT_MULT;
6382 NEXT;
6383 } else if (RAW == '+') {
6384 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6385 NEXT;
6386 } else {
6387 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6388 }
6389 }
6390 SKIP_BLANKS;
6391 GROW;
6392 }
6393 if ((cur != NULL) && (last != NULL)) {
6394 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006395 if (last != NULL)
6396 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006397 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006398 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006399 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6400"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006401 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006402 }
Owen Taylor3473f882001-02-23 17:55:21 +00006403 NEXT;
6404 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006405 if (ret != NULL) {
6406 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6407 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6408 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6409 else
6410 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6411 }
Owen Taylor3473f882001-02-23 17:55:21 +00006412 NEXT;
6413 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006414 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006415 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006416 cur = ret;
6417 /*
6418 * Some normalization:
6419 * (a | b* | c?)* == (a | b | c)*
6420 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006421 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006422 if ((cur->c1 != NULL) &&
6423 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6424 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6425 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6426 if ((cur->c2 != NULL) &&
6427 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6428 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6429 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6430 cur = cur->c2;
6431 }
6432 }
Owen Taylor3473f882001-02-23 17:55:21 +00006433 NEXT;
6434 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006435 if (ret != NULL) {
6436 int found = 0;
6437
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006438 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6439 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6440 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006441 else
6442 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006443 /*
6444 * Some normalization:
6445 * (a | b*)+ == (a | b)*
6446 * (a | b?)+ == (a | b)*
6447 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006448 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006449 if ((cur->c1 != NULL) &&
6450 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6451 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6452 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6453 found = 1;
6454 }
6455 if ((cur->c2 != NULL) &&
6456 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6457 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6458 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6459 found = 1;
6460 }
6461 cur = cur->c2;
6462 }
6463 if (found)
6464 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6465 }
Owen Taylor3473f882001-02-23 17:55:21 +00006466 NEXT;
6467 }
6468 return(ret);
6469}
6470
6471/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006472 * xmlParseElementChildrenContentDecl:
6473 * @ctxt: an XML parser context
6474 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006475 *
6476 * parse the declaration for a Mixed Element content
6477 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6478 *
6479 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6480 *
6481 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6482 *
6483 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6484 *
6485 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6486 *
6487 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6488 * TODO Parameter-entity replacement text must be properly nested
6489 * with parenthesized groups. That is to say, if either of the
6490 * opening or closing parentheses in a choice, seq, or Mixed
6491 * construct is contained in the replacement text for a parameter
6492 * entity, both must be contained in the same replacement text. For
6493 * interoperability, if a parameter-entity reference appears in a
6494 * choice, seq, or Mixed construct, its replacement text should not
6495 * be empty, and neither the first nor last non-blank character of
6496 * the replacement text should be a connector (| or ,).
6497 *
6498 * Returns the tree of xmlElementContentPtr describing the element
6499 * hierarchy.
6500 */
6501xmlElementContentPtr
6502xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6503 /* stub left for API/ABI compat */
6504 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6505}
6506
6507/**
Owen Taylor3473f882001-02-23 17:55:21 +00006508 * xmlParseElementContentDecl:
6509 * @ctxt: an XML parser context
6510 * @name: the name of the element being defined.
6511 * @result: the Element Content pointer will be stored here if any
6512 *
6513 * parse the declaration for an Element content either Mixed or Children,
6514 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006515 *
Owen Taylor3473f882001-02-23 17:55:21 +00006516 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6517 *
6518 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6519 */
6520
6521int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006522xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006523 xmlElementContentPtr *result) {
6524
6525 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006526 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006527 int res;
6528
6529 *result = NULL;
6530
6531 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006532 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006533 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006534 return(-1);
6535 }
6536 NEXT;
6537 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006538 if (ctxt->instate == XML_PARSER_EOF)
6539 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006540 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006541 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006542 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006543 res = XML_ELEMENT_TYPE_MIXED;
6544 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006545 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006546 res = XML_ELEMENT_TYPE_ELEMENT;
6547 }
Owen Taylor3473f882001-02-23 17:55:21 +00006548 SKIP_BLANKS;
6549 *result = tree;
6550 return(res);
6551}
6552
6553/**
6554 * xmlParseElementDecl:
6555 * @ctxt: an XML parser context
6556 *
6557 * parse an Element declaration.
6558 *
6559 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6560 *
6561 * [ VC: Unique Element Type Declaration ]
6562 * No element type may be declared more than once
6563 *
6564 * Returns the type of the element, or -1 in case of error
6565 */
6566int
6567xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006568 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006569 int ret = -1;
6570 xmlElementContentPtr content = NULL;
6571
Daniel Veillard4c778d82005-01-23 17:37:44 +00006572 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006573 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006574 xmlParserInputPtr input = ctxt->input;
6575
6576 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006577 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006578 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6579 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006580 }
6581 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006582 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006583 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006584 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6585 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006586 return(-1);
6587 }
6588 while ((RAW == 0) && (ctxt->inputNr > 1))
6589 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006590 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006591 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6592 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006593 }
6594 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006595 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006596 SKIP(5);
6597 /*
6598 * Element must always be empty.
6599 */
6600 ret = XML_ELEMENT_TYPE_EMPTY;
6601 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6602 (NXT(2) == 'Y')) {
6603 SKIP(3);
6604 /*
6605 * Element is a generic container.
6606 */
6607 ret = XML_ELEMENT_TYPE_ANY;
6608 } else if (RAW == '(') {
6609 ret = xmlParseElementContentDecl(ctxt, name, &content);
6610 } else {
6611 /*
6612 * [ WFC: PEs in Internal Subset ] error handling.
6613 */
6614 if ((RAW == '%') && (ctxt->external == 0) &&
6615 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006616 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006617 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006618 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006619 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006620 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6621 }
Owen Taylor3473f882001-02-23 17:55:21 +00006622 return(-1);
6623 }
6624
6625 SKIP_BLANKS;
6626 /*
6627 * Pop-up of finished entities.
6628 */
6629 while ((RAW == 0) && (ctxt->inputNr > 1))
6630 xmlPopInput(ctxt);
6631 SKIP_BLANKS;
6632
6633 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006634 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006635 if (content != NULL) {
6636 xmlFreeDocElementContent(ctxt->myDoc, content);
6637 }
Owen Taylor3473f882001-02-23 17:55:21 +00006638 } else {
6639 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006640 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6641 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006642 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006643
Owen Taylor3473f882001-02-23 17:55:21 +00006644 NEXT;
6645 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006646 (ctxt->sax->elementDecl != NULL)) {
6647 if (content != NULL)
6648 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006649 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6650 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006651 if ((content != NULL) && (content->parent == NULL)) {
6652 /*
6653 * this is a trick: if xmlAddElementDecl is called,
6654 * instead of copying the full tree it is plugged directly
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006655 * if called from the parser. Avoid duplicating the
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006656 * interfaces or change the API/ABI
6657 */
6658 xmlFreeDocElementContent(ctxt->myDoc, content);
6659 }
6660 } else if (content != NULL) {
6661 xmlFreeDocElementContent(ctxt->myDoc, content);
6662 }
Owen Taylor3473f882001-02-23 17:55:21 +00006663 }
Owen Taylor3473f882001-02-23 17:55:21 +00006664 }
6665 return(ret);
6666}
6667
6668/**
Owen Taylor3473f882001-02-23 17:55:21 +00006669 * xmlParseConditionalSections
6670 * @ctxt: an XML parser context
6671 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006672 * [61] conditionalSect ::= includeSect | ignoreSect
6673 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
Owen Taylor3473f882001-02-23 17:55:21 +00006674 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6675 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6676 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6677 */
6678
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006679static void
Owen Taylor3473f882001-02-23 17:55:21 +00006680xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006681 int id = ctxt->input->id;
6682
Owen Taylor3473f882001-02-23 17:55:21 +00006683 SKIP(3);
6684 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006685 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006686 SKIP(7);
6687 SKIP_BLANKS;
6688 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006689 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006690 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006691 if (ctxt->input->id != id) {
6692 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6693 "All markup of the conditional section is not in the same entity\n",
6694 NULL, NULL);
6695 }
Owen Taylor3473f882001-02-23 17:55:21 +00006696 NEXT;
6697 }
6698 if (xmlParserDebugEntities) {
6699 if ((ctxt->input != NULL) && (ctxt->input->filename))
6700 xmlGenericError(xmlGenericErrorContext,
6701 "%s(%d): ", ctxt->input->filename,
6702 ctxt->input->line);
6703 xmlGenericError(xmlGenericErrorContext,
6704 "Entering INCLUDE Conditional Section\n");
6705 }
6706
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006707 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6708 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006709 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006710 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006711
6712 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6713 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006714 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006715 NEXT;
6716 } else if (RAW == '%') {
6717 xmlParsePEReference(ctxt);
6718 } else
6719 xmlParseMarkupDecl(ctxt);
6720
6721 /*
6722 * Pop-up of finished entities.
6723 */
6724 while ((RAW == 0) && (ctxt->inputNr > 1))
6725 xmlPopInput(ctxt);
6726
Daniel Veillardfdc91562002-07-01 21:52:03 +00006727 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006728 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006729 break;
6730 }
6731 }
6732 if (xmlParserDebugEntities) {
6733 if ((ctxt->input != NULL) && (ctxt->input->filename))
6734 xmlGenericError(xmlGenericErrorContext,
6735 "%s(%d): ", ctxt->input->filename,
6736 ctxt->input->line);
6737 xmlGenericError(xmlGenericErrorContext,
6738 "Leaving INCLUDE Conditional Section\n");
6739 }
6740
Daniel Veillarda07050d2003-10-19 14:46:32 +00006741 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006742 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006743 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006744 int depth = 0;
6745
6746 SKIP(6);
6747 SKIP_BLANKS;
6748 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006749 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006750 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006751 if (ctxt->input->id != id) {
6752 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6753 "All markup of the conditional section is not in the same entity\n",
6754 NULL, NULL);
6755 }
Owen Taylor3473f882001-02-23 17:55:21 +00006756 NEXT;
6757 }
6758 if (xmlParserDebugEntities) {
6759 if ((ctxt->input != NULL) && (ctxt->input->filename))
6760 xmlGenericError(xmlGenericErrorContext,
6761 "%s(%d): ", ctxt->input->filename,
6762 ctxt->input->line);
6763 xmlGenericError(xmlGenericErrorContext,
6764 "Entering IGNORE Conditional Section\n");
6765 }
6766
6767 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006768 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006769 * But disable SAX event generating DTD building in the meantime
6770 */
6771 state = ctxt->disableSAX;
6772 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006773 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006774 ctxt->instate = XML_PARSER_IGNORE;
6775
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006776 while (((depth >= 0) && (RAW != 0)) &&
6777 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006778 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6779 depth++;
6780 SKIP(3);
6781 continue;
6782 }
6783 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6784 if (--depth >= 0) SKIP(3);
6785 continue;
6786 }
6787 NEXT;
6788 continue;
6789 }
6790
6791 ctxt->disableSAX = state;
6792 ctxt->instate = instate;
6793
6794 if (xmlParserDebugEntities) {
6795 if ((ctxt->input != NULL) && (ctxt->input->filename))
6796 xmlGenericError(xmlGenericErrorContext,
6797 "%s(%d): ", ctxt->input->filename,
6798 ctxt->input->line);
6799 xmlGenericError(xmlGenericErrorContext,
6800 "Leaving IGNORE Conditional Section\n");
6801 }
6802
6803 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006804 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006805 }
6806
6807 if (RAW == 0)
6808 SHRINK;
6809
6810 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006811 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006812 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006813 if (ctxt->input->id != id) {
6814 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6815 "All markup of the conditional section is not in the same entity\n",
6816 NULL, NULL);
6817 }
Owen Taylor3473f882001-02-23 17:55:21 +00006818 SKIP(3);
6819 }
6820}
6821
6822/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006823 * xmlParseMarkupDecl:
6824 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006825 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006826 * parse Markup declarations
6827 *
6828 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6829 * NotationDecl | PI | Comment
6830 *
6831 * [ VC: Proper Declaration/PE Nesting ]
6832 * Parameter-entity replacement text must be properly nested with
6833 * markup declarations. That is to say, if either the first character
6834 * or the last character of a markup declaration (markupdecl above) is
6835 * contained in the replacement text for a parameter-entity reference,
6836 * both must be contained in the same replacement text.
6837 *
6838 * [ WFC: PEs in Internal Subset ]
6839 * In the internal DTD subset, parameter-entity references can occur
6840 * only where markup declarations can occur, not within markup declarations.
6841 * (This does not apply to references that occur in external parameter
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006842 * entities or to the external subset.)
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006843 */
6844void
6845xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6846 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006847 if (CUR == '<') {
6848 if (NXT(1) == '!') {
6849 switch (NXT(2)) {
6850 case 'E':
6851 if (NXT(3) == 'L')
6852 xmlParseElementDecl(ctxt);
6853 else if (NXT(3) == 'N')
6854 xmlParseEntityDecl(ctxt);
6855 break;
6856 case 'A':
6857 xmlParseAttributeListDecl(ctxt);
6858 break;
6859 case 'N':
6860 xmlParseNotationDecl(ctxt);
6861 break;
6862 case '-':
6863 xmlParseComment(ctxt);
6864 break;
6865 default:
6866 /* there is an error but it will be detected later */
6867 break;
6868 }
6869 } else if (NXT(1) == '?') {
6870 xmlParsePI(ctxt);
6871 }
6872 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006873 /*
6874 * This is only for internal subset. On external entities,
6875 * the replacement is done before parsing stage
6876 */
6877 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6878 xmlParsePEReference(ctxt);
6879
6880 /*
6881 * Conditional sections are allowed from entities included
6882 * by PE References in the internal subset.
6883 */
6884 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6885 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6886 xmlParseConditionalSections(ctxt);
6887 }
6888 }
6889
6890 ctxt->instate = XML_PARSER_DTD;
6891}
6892
6893/**
6894 * xmlParseTextDecl:
6895 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006896 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006897 * parse an XML declaration header for external entities
6898 *
6899 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006900 */
6901
6902void
6903xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6904 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006905 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006906
6907 /*
6908 * We know that '<?xml' is here.
6909 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006910 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006911 SKIP(5);
6912 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006913 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006914 return;
6915 }
6916
William M. Brack76e95df2003-10-18 16:20:14 +00006917 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006918 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6919 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006920 }
6921 SKIP_BLANKS;
6922
6923 /*
6924 * We may have the VersionInfo here.
6925 */
6926 version = xmlParseVersionInfo(ctxt);
6927 if (version == NULL)
6928 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006929 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006930 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006931 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6932 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006933 }
6934 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006935 ctxt->input->version = version;
6936
6937 /*
6938 * We must have the encoding declaration
6939 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006940 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006941 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6942 /*
6943 * The XML REC instructs us to stop parsing right here
6944 */
6945 return;
6946 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006947 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6948 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6949 "Missing encoding in text declaration\n");
6950 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006951
6952 SKIP_BLANKS;
6953 if ((RAW == '?') && (NXT(1) == '>')) {
6954 SKIP(2);
6955 } else if (RAW == '>') {
6956 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006957 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006958 NEXT;
6959 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006960 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006961 MOVETO_ENDTAG(CUR_PTR);
6962 NEXT;
6963 }
6964}
6965
6966/**
Owen Taylor3473f882001-02-23 17:55:21 +00006967 * xmlParseExternalSubset:
6968 * @ctxt: an XML parser context
6969 * @ExternalID: the external identifier
6970 * @SystemID: the system identifier (or URL)
Daniel Veillardf8e3db02012-09-11 13:26:36 +08006971 *
Owen Taylor3473f882001-02-23 17:55:21 +00006972 * parse Markup declarations from an external subset
6973 *
6974 * [30] extSubset ::= textDecl? extSubsetDecl
6975 *
6976 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6977 */
6978void
6979xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6980 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006981 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006982 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006983
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01006984 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006985 (ctxt->input->end - ctxt->input->cur >= 4)) {
6986 xmlChar start[4];
6987 xmlCharEncoding enc;
6988
6989 start[0] = RAW;
6990 start[1] = NXT(1);
6991 start[2] = NXT(2);
6992 start[3] = NXT(3);
6993 enc = xmlDetectCharEncoding(start, 4);
6994 if (enc != XML_CHAR_ENCODING_NONE)
6995 xmlSwitchEncoding(ctxt, enc);
6996 }
6997
Daniel Veillarda07050d2003-10-19 14:46:32 +00006998 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006999 xmlParseTextDecl(ctxt);
7000 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7001 /*
7002 * The XML REC instructs us to stop parsing right here
7003 */
7004 ctxt->instate = XML_PARSER_EOF;
7005 return;
7006 }
7007 }
7008 if (ctxt->myDoc == NULL) {
7009 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00007010 if (ctxt->myDoc == NULL) {
7011 xmlErrMemory(ctxt, "New Doc failed");
7012 return;
7013 }
7014 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00007015 }
7016 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7017 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7018
7019 ctxt->instate = XML_PARSER_DTD;
7020 ctxt->external = 1;
7021 while (((RAW == '<') && (NXT(1) == '?')) ||
7022 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00007023 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007024 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007025 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007026
7027 GROW;
7028 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7029 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00007030 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007031 NEXT;
7032 } else if (RAW == '%') {
7033 xmlParsePEReference(ctxt);
7034 } else
7035 xmlParseMarkupDecl(ctxt);
7036
7037 /*
7038 * Pop-up of finished entities.
7039 */
7040 while ((RAW == 0) && (ctxt->inputNr > 1))
7041 xmlPopInput(ctxt);
7042
Daniel Veillardfdc91562002-07-01 21:52:03 +00007043 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007044 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007045 break;
7046 }
7047 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007048
Owen Taylor3473f882001-02-23 17:55:21 +00007049 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007050 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007051 }
7052
7053}
7054
7055/**
7056 * xmlParseReference:
7057 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00007058 *
Owen Taylor3473f882001-02-23 17:55:21 +00007059 * parse and handle entity references in content, depending on the SAX
7060 * interface, this may end-up in a call to character() if this is a
7061 * CharRef, a predefined entity, if there is no reference() callback.
7062 * or if the parser was asked to switch to that mode.
7063 *
7064 * [67] Reference ::= EntityRef | CharRef
7065 */
7066void
7067xmlParseReference(xmlParserCtxtPtr ctxt) {
7068 xmlEntityPtr ent;
7069 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00007070 int was_checked;
7071 xmlNodePtr list = NULL;
7072 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00007073
Daniel Veillard0161e632008-08-28 15:36:32 +00007074
7075 if (RAW != '&')
7076 return;
7077
7078 /*
7079 * Simple case of a CharRef
7080 */
Owen Taylor3473f882001-02-23 17:55:21 +00007081 if (NXT(1) == '#') {
7082 int i = 0;
7083 xmlChar out[10];
7084 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007085 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007086
Daniel Veillarddc171602008-03-26 17:41:38 +00007087 if (value == 0)
7088 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007089 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7090 /*
7091 * So we are using non-UTF-8 buffers
7092 * Check that the char fit on 8bits, if not
7093 * generate a CharRef.
7094 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007095 if (value <= 0xFF) {
7096 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00007097 out[1] = 0;
7098 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7099 (!ctxt->disableSAX))
7100 ctxt->sax->characters(ctxt->userData, out, 1);
7101 } else {
7102 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007103 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007104 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007105 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007106 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7107 (!ctxt->disableSAX))
7108 ctxt->sax->reference(ctxt->userData, out);
7109 }
7110 } else {
7111 /*
7112 * Just encode the value in UTF-8
7113 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007114 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00007115 out[i] = 0;
7116 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7117 (!ctxt->disableSAX))
7118 ctxt->sax->characters(ctxt->userData, out, i);
7119 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007120 return;
7121 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007122
Daniel Veillard0161e632008-08-28 15:36:32 +00007123 /*
7124 * We are seeing an entity reference
7125 */
7126 ent = xmlParseEntityRef(ctxt);
7127 if (ent == NULL) return;
7128 if (!ctxt->wellFormed)
7129 return;
7130 was_checked = ent->checked;
7131
7132 /* special case of predefined entities */
7133 if ((ent->name == NULL) ||
7134 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7135 val = ent->content;
7136 if (val == NULL) return;
7137 /*
7138 * inline the entity.
7139 */
7140 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7141 (!ctxt->disableSAX))
7142 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7143 return;
7144 }
7145
7146 /*
7147 * The first reference to the entity trigger a parsing phase
7148 * where the ent->children is filled with the result from
7149 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08007150 * Note: external parsed entities will not be loaded, it is not
7151 * required for a non-validating parser, unless the parsing option
7152 * of validating, or substituting entities were given. Doing so is
7153 * far more secure as the parser will only process data coming from
7154 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00007155 */
Daniel Veillard4629ee02012-07-23 14:15:40 +08007156 if ((ent->checked == 0) &&
7157 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7158 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007159 unsigned long oldnbent = ctxt->nbentities;
7160
7161 /*
7162 * This is a bit hackish but this seems the best
7163 * way to make sure both SAX and DOM entity support
7164 * behaves okay.
7165 */
7166 void *user_data;
7167 if (ctxt->userData == ctxt)
7168 user_data = NULL;
7169 else
7170 user_data = ctxt->userData;
7171
7172 /*
7173 * Check that this entity is well formed
7174 * 4.3.2: An internal general parsed entity is well-formed
7175 * if its replacement text matches the production labeled
7176 * content.
7177 */
7178 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7179 ctxt->depth++;
7180 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7181 user_data, &list);
7182 ctxt->depth--;
7183
7184 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7185 ctxt->depth++;
7186 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7187 user_data, ctxt->depth, ent->URI,
7188 ent->ExternalID, &list);
7189 ctxt->depth--;
7190 } else {
7191 ret = XML_ERR_ENTITY_PE_INTERNAL;
7192 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7193 "invalid entity type found\n", NULL);
7194 }
7195
7196 /*
7197 * Store the number of entities needing parsing for this entity
7198 * content and do checkings
7199 */
7200 ent->checked = ctxt->nbentities - oldnbent;
7201 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007202 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007203 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007204 return;
7205 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007206 if (xmlParserEntityCheck(ctxt, 0, ent)) {
7207 xmlFreeNodeList(list);
7208 return;
7209 }
Owen Taylor3473f882001-02-23 17:55:21 +00007210
Daniel Veillard0161e632008-08-28 15:36:32 +00007211 if ((ret == XML_ERR_OK) && (list != NULL)) {
7212 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7213 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7214 (ent->children == NULL)) {
7215 ent->children = list;
7216 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00007217 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007218 * Prune it directly in the generated document
7219 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00007220 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007221 if (((list->type == XML_TEXT_NODE) &&
7222 (list->next == NULL)) ||
7223 (ctxt->parseMode == XML_PARSE_READER)) {
7224 list->parent = (xmlNodePtr) ent;
7225 list = NULL;
7226 ent->owner = 1;
7227 } else {
7228 ent->owner = 0;
7229 while (list != NULL) {
7230 list->parent = (xmlNodePtr) ctxt->node;
7231 list->doc = ctxt->myDoc;
7232 if (list->next == NULL)
7233 ent->last = list;
7234 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007235 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007236 list = ent->children;
7237#ifdef LIBXML_LEGACY_ENABLED
7238 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7239 xmlAddEntityReference(ent, list, NULL);
7240#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007241 }
7242 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007243 ent->owner = 1;
7244 while (list != NULL) {
7245 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007246 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007247 if (list->next == NULL)
7248 ent->last = list;
7249 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007250 }
7251 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007252 } else {
7253 xmlFreeNodeList(list);
7254 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007255 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007256 } else if ((ret != XML_ERR_OK) &&
7257 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7258 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7259 "Entity '%s' failed to parse\n", ent->name);
7260 } else if (list != NULL) {
7261 xmlFreeNodeList(list);
7262 list = NULL;
7263 }
7264 if (ent->checked == 0)
7265 ent->checked = 1;
7266 } else if (ent->checked != 1) {
7267 ctxt->nbentities += ent->checked;
7268 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007269
Daniel Veillard0161e632008-08-28 15:36:32 +00007270 /*
7271 * Now that the entity content has been gathered
7272 * provide it to the application, this can take different forms based
7273 * on the parsing modes.
7274 */
7275 if (ent->children == NULL) {
7276 /*
7277 * Probably running in SAX mode and the callbacks don't
7278 * build the entity content. So unless we already went
7279 * though parsing for first checking go though the entity
7280 * content to generate callbacks associated to the entity
7281 */
7282 if (was_checked != 0) {
7283 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007284 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007285 * This is a bit hackish but this seems the best
7286 * way to make sure both SAX and DOM entity support
7287 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007288 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007289 if (ctxt->userData == ctxt)
7290 user_data = NULL;
7291 else
7292 user_data = ctxt->userData;
7293
7294 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7295 ctxt->depth++;
7296 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7297 ent->content, user_data, NULL);
7298 ctxt->depth--;
7299 } else if (ent->etype ==
7300 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7301 ctxt->depth++;
7302 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7303 ctxt->sax, user_data, ctxt->depth,
7304 ent->URI, ent->ExternalID, NULL);
7305 ctxt->depth--;
7306 } else {
7307 ret = XML_ERR_ENTITY_PE_INTERNAL;
7308 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7309 "invalid entity type found\n", NULL);
7310 }
7311 if (ret == XML_ERR_ENTITY_LOOP) {
7312 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7313 return;
7314 }
7315 }
7316 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7317 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7318 /*
7319 * Entity reference callback comes second, it's somewhat
7320 * superfluous but a compatibility to historical behaviour
7321 */
7322 ctxt->sax->reference(ctxt->userData, ent->name);
7323 }
7324 return;
7325 }
7326
7327 /*
7328 * If we didn't get any children for the entity being built
7329 */
7330 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7331 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7332 /*
7333 * Create a node.
7334 */
7335 ctxt->sax->reference(ctxt->userData, ent->name);
7336 return;
7337 }
7338
7339 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7340 /*
7341 * There is a problem on the handling of _private for entities
7342 * (bug 155816): Should we copy the content of the field from
7343 * the entity (possibly overwriting some value set by the user
7344 * when a copy is created), should we leave it alone, or should
7345 * we try to take care of different situations? The problem
7346 * is exacerbated by the usage of this field by the xmlReader.
7347 * To fix this bug, we look at _private on the created node
7348 * and, if it's NULL, we copy in whatever was in the entity.
7349 * If it's not NULL we leave it alone. This is somewhat of a
7350 * hack - maybe we should have further tests to determine
7351 * what to do.
7352 */
7353 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7354 /*
7355 * Seems we are generating the DOM content, do
7356 * a simple tree copy for all references except the first
7357 * In the first occurrence list contains the replacement.
Daniel Veillard0161e632008-08-28 15:36:32 +00007358 */
7359 if (((list == NULL) && (ent->owner == 0)) ||
7360 (ctxt->parseMode == XML_PARSE_READER)) {
7361 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7362
7363 /*
7364 * when operating on a reader, the entities definitions
7365 * are always owning the entities subtree.
7366 if (ctxt->parseMode == XML_PARSE_READER)
7367 ent->owner = 1;
7368 */
7369
7370 cur = ent->children;
7371 while (cur != NULL) {
7372 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7373 if (nw != NULL) {
7374 if (nw->_private == NULL)
7375 nw->_private = cur->_private;
7376 if (firstChild == NULL){
7377 firstChild = nw;
7378 }
7379 nw = xmlAddChild(ctxt->node, nw);
7380 }
7381 if (cur == ent->last) {
7382 /*
7383 * needed to detect some strange empty
7384 * node cases in the reader tests
7385 */
7386 if ((ctxt->parseMode == XML_PARSE_READER) &&
7387 (nw != NULL) &&
7388 (nw->type == XML_ELEMENT_NODE) &&
7389 (nw->children == NULL))
7390 nw->extra = 1;
7391
7392 break;
7393 }
7394 cur = cur->next;
7395 }
7396#ifdef LIBXML_LEGACY_ENABLED
7397 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7398 xmlAddEntityReference(ent, firstChild, nw);
7399#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard28f5e1a2012-09-04 11:18:39 +08007400 } else if ((list == NULL) || (ctxt->inputNr > 0)) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007401 xmlNodePtr nw = NULL, cur, next, last,
7402 firstChild = NULL;
7403 /*
7404 * Copy the entity child list and make it the new
7405 * entity child list. The goal is to make sure any
7406 * ID or REF referenced will be the one from the
7407 * document content and not the entity copy.
7408 */
7409 cur = ent->children;
7410 ent->children = NULL;
7411 last = ent->last;
7412 ent->last = NULL;
7413 while (cur != NULL) {
7414 next = cur->next;
7415 cur->next = NULL;
7416 cur->parent = NULL;
7417 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7418 if (nw != NULL) {
7419 if (nw->_private == NULL)
7420 nw->_private = cur->_private;
7421 if (firstChild == NULL){
7422 firstChild = cur;
7423 }
7424 xmlAddChild((xmlNodePtr) ent, nw);
7425 xmlAddChild(ctxt->node, cur);
7426 }
7427 if (cur == last)
7428 break;
7429 cur = next;
7430 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007431 if (ent->owner == 0)
7432 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007433#ifdef LIBXML_LEGACY_ENABLED
7434 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7435 xmlAddEntityReference(ent, firstChild, nw);
7436#endif /* LIBXML_LEGACY_ENABLED */
7437 } else {
7438 const xmlChar *nbktext;
7439
7440 /*
7441 * the name change is to avoid coalescing of the
7442 * node with a possible previous text one which
7443 * would make ent->children a dangling pointer
7444 */
7445 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7446 -1);
7447 if (ent->children->type == XML_TEXT_NODE)
7448 ent->children->name = nbktext;
7449 if ((ent->last != ent->children) &&
7450 (ent->last->type == XML_TEXT_NODE))
7451 ent->last->name = nbktext;
7452 xmlAddChildList(ctxt->node, ent->children);
7453 }
7454
7455 /*
7456 * This is to avoid a nasty side effect, see
7457 * characters() in SAX.c
7458 */
7459 ctxt->nodemem = 0;
7460 ctxt->nodelen = 0;
7461 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007462 }
7463 }
7464}
7465
7466/**
7467 * xmlParseEntityRef:
7468 * @ctxt: an XML parser context
7469 *
7470 * parse ENTITY references declarations
7471 *
7472 * [68] EntityRef ::= '&' Name ';'
7473 *
7474 * [ WFC: Entity Declared ]
7475 * In a document without any DTD, a document with only an internal DTD
7476 * subset which contains no parameter entity references, or a document
7477 * with "standalone='yes'", the Name given in the entity reference
7478 * must match that in an entity declaration, except that well-formed
7479 * documents need not declare any of the following entities: amp, lt,
7480 * gt, apos, quot. The declaration of a parameter entity must precede
7481 * any reference to it. Similarly, the declaration of a general entity
7482 * must precede any reference to it which appears in a default value in an
7483 * attribute-list declaration. Note that if entities are declared in the
7484 * external subset or in external parameter entities, a non-validating
7485 * processor is not obligated to read and process their declarations;
7486 * for such documents, the rule that an entity must be declared is a
7487 * well-formedness constraint only if standalone='yes'.
7488 *
7489 * [ WFC: Parsed Entity ]
7490 * An entity reference must not contain the name of an unparsed entity
7491 *
7492 * Returns the xmlEntityPtr if found, or NULL otherwise.
7493 */
7494xmlEntityPtr
7495xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007496 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007497 xmlEntityPtr ent = NULL;
7498
7499 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007500 if (ctxt->instate == XML_PARSER_EOF)
7501 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007502
Daniel Veillard0161e632008-08-28 15:36:32 +00007503 if (RAW != '&')
7504 return(NULL);
7505 NEXT;
7506 name = xmlParseName(ctxt);
7507 if (name == NULL) {
7508 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7509 "xmlParseEntityRef: no name\n");
7510 return(NULL);
7511 }
7512 if (RAW != ';') {
7513 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7514 return(NULL);
7515 }
7516 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007517
Daniel Veillard0161e632008-08-28 15:36:32 +00007518 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007519 * Predefined entities override any extra definition
Daniel Veillard0161e632008-08-28 15:36:32 +00007520 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007521 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7522 ent = xmlGetPredefinedEntity(name);
7523 if (ent != NULL)
7524 return(ent);
7525 }
Owen Taylor3473f882001-02-23 17:55:21 +00007526
Daniel Veillard0161e632008-08-28 15:36:32 +00007527 /*
Jan Pokorný81d7a822012-09-13 15:56:51 +02007528 * Increase the number of entity references parsed
Daniel Veillard0161e632008-08-28 15:36:32 +00007529 */
7530 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007531
Daniel Veillard0161e632008-08-28 15:36:32 +00007532 /*
7533 * Ask first SAX for entity resolution, otherwise try the
7534 * entities which may have stored in the parser context.
7535 */
7536 if (ctxt->sax != NULL) {
7537 if (ctxt->sax->getEntity != NULL)
7538 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007539 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007540 (ctxt->options & XML_PARSE_OLDSAX))
7541 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007542 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7543 (ctxt->userData==ctxt)) {
7544 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007545 }
7546 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007547 /*
7548 * [ WFC: Entity Declared ]
7549 * In a document without any DTD, a document with only an
7550 * internal DTD subset which contains no parameter entity
7551 * references, or a document with "standalone='yes'", the
7552 * Name given in the entity reference must match that in an
7553 * entity declaration, except that well-formed documents
7554 * need not declare any of the following entities: amp, lt,
7555 * gt, apos, quot.
7556 * The declaration of a parameter entity must precede any
7557 * reference to it.
7558 * Similarly, the declaration of a general entity must
7559 * precede any reference to it which appears in a default
7560 * value in an attribute-list declaration. Note that if
7561 * entities are declared in the external subset or in
7562 * external parameter entities, a non-validating processor
7563 * is not obligated to read and process their declarations;
7564 * for such documents, the rule that an entity must be
7565 * declared is a well-formedness constraint only if
7566 * standalone='yes'.
7567 */
7568 if (ent == NULL) {
7569 if ((ctxt->standalone == 1) ||
7570 ((ctxt->hasExternalSubset == 0) &&
7571 (ctxt->hasPErefs == 0))) {
7572 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7573 "Entity '%s' not defined\n", name);
7574 } else {
7575 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7576 "Entity '%s' not defined\n", name);
7577 if ((ctxt->inSubset == 0) &&
7578 (ctxt->sax != NULL) &&
7579 (ctxt->sax->reference != NULL)) {
7580 ctxt->sax->reference(ctxt->userData, name);
7581 }
7582 }
7583 ctxt->valid = 0;
7584 }
7585
7586 /*
7587 * [ WFC: Parsed Entity ]
7588 * An entity reference must not contain the name of an
7589 * unparsed entity
7590 */
7591 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7592 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7593 "Entity reference to unparsed entity %s\n", name);
7594 }
7595
7596 /*
7597 * [ WFC: No External Entity References ]
7598 * Attribute values cannot contain direct or indirect
7599 * entity references to external entities.
7600 */
7601 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7602 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7603 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7604 "Attribute references external entity '%s'\n", name);
7605 }
7606 /*
7607 * [ WFC: No < in Attribute Values ]
7608 * The replacement text of any entity referred to directly or
7609 * indirectly in an attribute value (other than "&lt;") must
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007610 * not contain a <.
Daniel Veillard0161e632008-08-28 15:36:32 +00007611 */
7612 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7613 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007614 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007615 (xmlStrchr(ent->content, '<'))) {
7616 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7617 "'<' in entity '%s' is not allowed in attributes values\n", name);
7618 }
7619
7620 /*
7621 * Internal check, no parameter entities here ...
7622 */
7623 else {
7624 switch (ent->etype) {
7625 case XML_INTERNAL_PARAMETER_ENTITY:
7626 case XML_EXTERNAL_PARAMETER_ENTITY:
7627 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7628 "Attempt to reference the parameter entity '%s'\n",
7629 name);
7630 break;
7631 default:
7632 break;
7633 }
7634 }
7635
7636 /*
7637 * [ WFC: No Recursion ]
7638 * A parsed entity must not contain a recursive reference
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007639 * to itself, either directly or indirectly.
Daniel Veillard0161e632008-08-28 15:36:32 +00007640 * Done somewhere else
7641 */
Owen Taylor3473f882001-02-23 17:55:21 +00007642 return(ent);
7643}
7644
7645/**
7646 * xmlParseStringEntityRef:
7647 * @ctxt: an XML parser context
7648 * @str: a pointer to an index in the string
7649 *
7650 * parse ENTITY references declarations, but this version parses it from
7651 * a string value.
7652 *
7653 * [68] EntityRef ::= '&' Name ';'
7654 *
7655 * [ WFC: Entity Declared ]
7656 * In a document without any DTD, a document with only an internal DTD
7657 * subset which contains no parameter entity references, or a document
7658 * with "standalone='yes'", the Name given in the entity reference
7659 * must match that in an entity declaration, except that well-formed
7660 * documents need not declare any of the following entities: amp, lt,
7661 * gt, apos, quot. The declaration of a parameter entity must precede
7662 * any reference to it. Similarly, the declaration of a general entity
7663 * must precede any reference to it which appears in a default value in an
7664 * attribute-list declaration. Note that if entities are declared in the
7665 * external subset or in external parameter entities, a non-validating
7666 * processor is not obligated to read and process their declarations;
7667 * for such documents, the rule that an entity must be declared is a
7668 * well-formedness constraint only if standalone='yes'.
7669 *
7670 * [ WFC: Parsed Entity ]
7671 * An entity reference must not contain the name of an unparsed entity
7672 *
7673 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7674 * is updated to the current location in the string.
7675 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007676static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007677xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7678 xmlChar *name;
7679 const xmlChar *ptr;
7680 xmlChar cur;
7681 xmlEntityPtr ent = NULL;
7682
7683 if ((str == NULL) || (*str == NULL))
7684 return(NULL);
7685 ptr = *str;
7686 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007687 if (cur != '&')
7688 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007689
Daniel Veillard0161e632008-08-28 15:36:32 +00007690 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007691 name = xmlParseStringName(ctxt, &ptr);
7692 if (name == NULL) {
7693 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7694 "xmlParseStringEntityRef: no name\n");
7695 *str = ptr;
7696 return(NULL);
7697 }
7698 if (*ptr != ';') {
7699 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007700 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007701 *str = ptr;
7702 return(NULL);
7703 }
7704 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007705
Owen Taylor3473f882001-02-23 17:55:21 +00007706
Daniel Veillard0161e632008-08-28 15:36:32 +00007707 /*
7708 * Predefined entites override any extra definition
7709 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007710 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7711 ent = xmlGetPredefinedEntity(name);
7712 if (ent != NULL) {
7713 xmlFree(name);
7714 *str = ptr;
7715 return(ent);
7716 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007717 }
Owen Taylor3473f882001-02-23 17:55:21 +00007718
Daniel Veillard0161e632008-08-28 15:36:32 +00007719 /*
7720 * Increate the number of entity references parsed
7721 */
7722 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007723
Daniel Veillard0161e632008-08-28 15:36:32 +00007724 /*
7725 * Ask first SAX for entity resolution, otherwise try the
7726 * entities which may have stored in the parser context.
7727 */
7728 if (ctxt->sax != NULL) {
7729 if (ctxt->sax->getEntity != NULL)
7730 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007731 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7732 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007733 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7734 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007735 }
7736 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007737
7738 /*
7739 * [ WFC: Entity Declared ]
7740 * In a document without any DTD, a document with only an
7741 * internal DTD subset which contains no parameter entity
7742 * references, or a document with "standalone='yes'", the
7743 * Name given in the entity reference must match that in an
7744 * entity declaration, except that well-formed documents
7745 * need not declare any of the following entities: amp, lt,
7746 * gt, apos, quot.
7747 * The declaration of a parameter entity must precede any
7748 * reference to it.
7749 * Similarly, the declaration of a general entity must
7750 * precede any reference to it which appears in a default
7751 * value in an attribute-list declaration. Note that if
7752 * entities are declared in the external subset or in
7753 * external parameter entities, a non-validating processor
7754 * is not obligated to read and process their declarations;
7755 * for such documents, the rule that an entity must be
7756 * declared is a well-formedness constraint only if
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007757 * standalone='yes'.
Daniel Veillard0161e632008-08-28 15:36:32 +00007758 */
7759 if (ent == NULL) {
7760 if ((ctxt->standalone == 1) ||
7761 ((ctxt->hasExternalSubset == 0) &&
7762 (ctxt->hasPErefs == 0))) {
7763 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7764 "Entity '%s' not defined\n", name);
7765 } else {
7766 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7767 "Entity '%s' not defined\n",
7768 name);
7769 }
7770 /* TODO ? check regressions ctxt->valid = 0; */
7771 }
7772
7773 /*
7774 * [ WFC: Parsed Entity ]
7775 * An entity reference must not contain the name of an
7776 * unparsed entity
7777 */
7778 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7779 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7780 "Entity reference to unparsed entity %s\n", name);
7781 }
7782
7783 /*
7784 * [ WFC: No External Entity References ]
7785 * Attribute values cannot contain direct or indirect
7786 * entity references to external entities.
7787 */
7788 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7789 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7790 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7791 "Attribute references external entity '%s'\n", name);
7792 }
7793 /*
7794 * [ WFC: No < in Attribute Values ]
7795 * The replacement text of any entity referred to directly or
7796 * indirectly in an attribute value (other than "&lt;") must
7797 * not contain a <.
7798 */
7799 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7800 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007801 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007802 (xmlStrchr(ent->content, '<'))) {
7803 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7804 "'<' in entity '%s' is not allowed in attributes values\n",
7805 name);
7806 }
7807
7808 /*
7809 * Internal check, no parameter entities here ...
7810 */
7811 else {
7812 switch (ent->etype) {
7813 case XML_INTERNAL_PARAMETER_ENTITY:
7814 case XML_EXTERNAL_PARAMETER_ENTITY:
7815 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7816 "Attempt to reference the parameter entity '%s'\n",
7817 name);
7818 break;
7819 default:
7820 break;
7821 }
7822 }
7823
7824 /*
7825 * [ WFC: No Recursion ]
7826 * A parsed entity must not contain a recursive reference
7827 * to itself, either directly or indirectly.
7828 * Done somewhere else
7829 */
7830
7831 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007832 *str = ptr;
7833 return(ent);
7834}
7835
7836/**
7837 * xmlParsePEReference:
7838 * @ctxt: an XML parser context
7839 *
7840 * parse PEReference declarations
7841 * The entity content is handled directly by pushing it's content as
7842 * a new input stream.
7843 *
7844 * [69] PEReference ::= '%' Name ';'
7845 *
7846 * [ WFC: No Recursion ]
7847 * A parsed entity must not contain a recursive
Daniel Veillardf8e3db02012-09-11 13:26:36 +08007848 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007849 *
7850 * [ WFC: Entity Declared ]
7851 * In a document without any DTD, a document with only an internal DTD
7852 * subset which contains no parameter entity references, or a document
7853 * with "standalone='yes'", ... ... The declaration of a parameter
7854 * entity must precede any reference to it...
7855 *
7856 * [ VC: Entity Declared ]
7857 * In a document with an external subset or external parameter entities
7858 * with "standalone='no'", ... ... The declaration of a parameter entity
7859 * must precede any reference to it...
7860 *
7861 * [ WFC: In DTD ]
7862 * Parameter-entity references may only appear in the DTD.
7863 * NOTE: misleading but this is handled.
7864 */
7865void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007866xmlParsePEReference(xmlParserCtxtPtr ctxt)
7867{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007868 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007869 xmlEntityPtr entity = NULL;
7870 xmlParserInputPtr input;
7871
Daniel Veillard0161e632008-08-28 15:36:32 +00007872 if (RAW != '%')
7873 return;
7874 NEXT;
7875 name = xmlParseName(ctxt);
7876 if (name == NULL) {
7877 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7878 "xmlParsePEReference: no name\n");
7879 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007880 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007881 if (RAW != ';') {
7882 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7883 return;
7884 }
7885
7886 NEXT;
7887
7888 /*
7889 * Increate the number of entity references parsed
7890 */
7891 ctxt->nbentities++;
7892
7893 /*
7894 * Request the entity from SAX
7895 */
7896 if ((ctxt->sax != NULL) &&
7897 (ctxt->sax->getParameterEntity != NULL))
7898 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7899 name);
7900 if (entity == NULL) {
7901 /*
7902 * [ WFC: Entity Declared ]
7903 * In a document without any DTD, a document with only an
7904 * internal DTD subset which contains no parameter entity
7905 * references, or a document with "standalone='yes'", ...
7906 * ... The declaration of a parameter entity must precede
7907 * any reference to it...
7908 */
7909 if ((ctxt->standalone == 1) ||
7910 ((ctxt->hasExternalSubset == 0) &&
7911 (ctxt->hasPErefs == 0))) {
7912 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7913 "PEReference: %%%s; not found\n",
7914 name);
7915 } else {
7916 /*
7917 * [ VC: Entity Declared ]
7918 * In a document with an external subset or external
7919 * parameter entities with "standalone='no'", ...
7920 * ... The declaration of a parameter entity must
7921 * precede any reference to it...
7922 */
7923 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7924 "PEReference: %%%s; not found\n",
7925 name, NULL);
7926 ctxt->valid = 0;
7927 }
7928 } else {
7929 /*
7930 * Internal checking in case the entity quest barfed
7931 */
7932 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7933 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7934 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7935 "Internal: %%%s; is not a parameter entity\n",
7936 name, NULL);
7937 } else if (ctxt->input->free != deallocblankswrapper) {
7938 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7939 if (xmlPushInput(ctxt, input) < 0)
7940 return;
7941 } else {
7942 /*
7943 * TODO !!!
7944 * handle the extra spaces added before and after
7945 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7946 */
7947 input = xmlNewEntityInputStream(ctxt, entity);
7948 if (xmlPushInput(ctxt, input) < 0)
7949 return;
7950 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7951 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7952 (IS_BLANK_CH(NXT(5)))) {
7953 xmlParseTextDecl(ctxt);
7954 if (ctxt->errNo ==
7955 XML_ERR_UNSUPPORTED_ENCODING) {
7956 /*
7957 * The XML REC instructs us to stop parsing
7958 * right here
7959 */
7960 ctxt->instate = XML_PARSER_EOF;
7961 return;
7962 }
7963 }
7964 }
7965 }
7966 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007967}
7968
7969/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007970 * xmlLoadEntityContent:
7971 * @ctxt: an XML parser context
7972 * @entity: an unloaded system entity
7973 *
7974 * Load the original content of the given system entity from the
7975 * ExternalID/SystemID given. This is to be used for Included in Literal
7976 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7977 *
7978 * Returns 0 in case of success and -1 in case of failure
7979 */
7980static int
7981xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7982 xmlParserInputPtr input;
7983 xmlBufferPtr buf;
7984 int l, c;
7985 int count = 0;
7986
7987 if ((ctxt == NULL) || (entity == NULL) ||
7988 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7989 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7990 (entity->content != NULL)) {
7991 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7992 "xmlLoadEntityContent parameter error");
7993 return(-1);
7994 }
7995
7996 if (xmlParserDebugEntities)
7997 xmlGenericError(xmlGenericErrorContext,
7998 "Reading %s entity content input\n", entity->name);
7999
8000 buf = xmlBufferCreate();
8001 if (buf == NULL) {
8002 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8003 "xmlLoadEntityContent parameter error");
8004 return(-1);
8005 }
8006
8007 input = xmlNewEntityInputStream(ctxt, entity);
8008 if (input == NULL) {
8009 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8010 "xmlLoadEntityContent input error");
8011 xmlBufferFree(buf);
8012 return(-1);
8013 }
8014
8015 /*
8016 * Push the entity as the current input, read char by char
8017 * saving to the buffer until the end of the entity or an error
8018 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00008019 if (xmlPushInput(ctxt, input) < 0) {
8020 xmlBufferFree(buf);
8021 return(-1);
8022 }
8023
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008024 GROW;
8025 c = CUR_CHAR(l);
8026 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8027 (IS_CHAR(c))) {
8028 xmlBufferAdd(buf, ctxt->input->cur, l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008029 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008030 count = 0;
8031 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008032 if (ctxt->instate == XML_PARSER_EOF) {
8033 xmlBufferFree(buf);
8034 return(-1);
8035 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008036 }
8037 NEXTL(l);
8038 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008039 if (c == 0) {
8040 count = 0;
8041 GROW;
8042 if (ctxt->instate == XML_PARSER_EOF) {
8043 xmlBufferFree(buf);
8044 return(-1);
8045 }
8046 c = CUR_CHAR(l);
8047 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008048 }
8049
8050 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8051 xmlPopInput(ctxt);
8052 } else if (!IS_CHAR(c)) {
8053 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8054 "xmlLoadEntityContent: invalid char value %d\n",
8055 c);
8056 xmlBufferFree(buf);
8057 return(-1);
8058 }
8059 entity->content = buf->content;
8060 buf->content = NULL;
8061 xmlBufferFree(buf);
8062
8063 return(0);
8064}
8065
8066/**
Owen Taylor3473f882001-02-23 17:55:21 +00008067 * xmlParseStringPEReference:
8068 * @ctxt: an XML parser context
8069 * @str: a pointer to an index in the string
8070 *
8071 * parse PEReference declarations
8072 *
8073 * [69] PEReference ::= '%' Name ';'
8074 *
8075 * [ WFC: No Recursion ]
8076 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008077 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008078 *
8079 * [ WFC: Entity Declared ]
8080 * In a document without any DTD, a document with only an internal DTD
8081 * subset which contains no parameter entity references, or a document
8082 * with "standalone='yes'", ... ... The declaration of a parameter
8083 * entity must precede any reference to it...
8084 *
8085 * [ VC: Entity Declared ]
8086 * In a document with an external subset or external parameter entities
8087 * with "standalone='no'", ... ... The declaration of a parameter entity
8088 * must precede any reference to it...
8089 *
8090 * [ WFC: In DTD ]
8091 * Parameter-entity references may only appear in the DTD.
8092 * NOTE: misleading but this is handled.
8093 *
8094 * Returns the string of the entity content.
8095 * str is updated to the current value of the index
8096 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02008097static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00008098xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8099 const xmlChar *ptr;
8100 xmlChar cur;
8101 xmlChar *name;
8102 xmlEntityPtr entity = NULL;
8103
8104 if ((str == NULL) || (*str == NULL)) return(NULL);
8105 ptr = *str;
8106 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00008107 if (cur != '%')
8108 return(NULL);
8109 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00008110 name = xmlParseStringName(ctxt, &ptr);
8111 if (name == NULL) {
8112 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8113 "xmlParseStringPEReference: no name\n");
8114 *str = ptr;
8115 return(NULL);
8116 }
8117 cur = *ptr;
8118 if (cur != ';') {
8119 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8120 xmlFree(name);
8121 *str = ptr;
8122 return(NULL);
8123 }
8124 ptr++;
8125
8126 /*
8127 * Increate the number of entity references parsed
8128 */
8129 ctxt->nbentities++;
8130
8131 /*
8132 * Request the entity from SAX
8133 */
8134 if ((ctxt->sax != NULL) &&
8135 (ctxt->sax->getParameterEntity != NULL))
8136 entity = ctxt->sax->getParameterEntity(ctxt->userData,
8137 name);
8138 if (entity == NULL) {
8139 /*
8140 * [ WFC: Entity Declared ]
8141 * In a document without any DTD, a document with only an
8142 * internal DTD subset which contains no parameter entity
8143 * references, or a document with "standalone='yes'", ...
8144 * ... The declaration of a parameter entity must precede
8145 * any reference to it...
8146 */
8147 if ((ctxt->standalone == 1) ||
8148 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8149 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8150 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008151 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00008152 /*
8153 * [ VC: Entity Declared ]
8154 * In a document with an external subset or external
8155 * parameter entities with "standalone='no'", ...
8156 * ... The declaration of a parameter entity must
8157 * precede any reference to it...
8158 */
8159 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8160 "PEReference: %%%s; not found\n",
8161 name, NULL);
8162 ctxt->valid = 0;
8163 }
8164 } else {
8165 /*
8166 * Internal checking in case the entity quest barfed
8167 */
8168 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8169 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8170 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8171 "%%%s; is not a parameter entity\n",
8172 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008173 }
8174 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008175 ctxt->hasPErefs = 1;
8176 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008177 *str = ptr;
8178 return(entity);
8179}
8180
8181/**
8182 * xmlParseDocTypeDecl:
8183 * @ctxt: an XML parser context
8184 *
8185 * parse a DOCTYPE declaration
8186 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008187 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
Owen Taylor3473f882001-02-23 17:55:21 +00008188 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8189 *
8190 * [ VC: Root Element Type ]
8191 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008192 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00008193 */
8194
8195void
8196xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008197 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008198 xmlChar *ExternalID = NULL;
8199 xmlChar *URI = NULL;
8200
8201 /*
8202 * We know that '<!DOCTYPE' has been detected.
8203 */
8204 SKIP(9);
8205
8206 SKIP_BLANKS;
8207
8208 /*
8209 * Parse the DOCTYPE name.
8210 */
8211 name = xmlParseName(ctxt);
8212 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008213 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8214 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008215 }
8216 ctxt->intSubName = name;
8217
8218 SKIP_BLANKS;
8219
8220 /*
8221 * Check for SystemID and ExternalID
8222 */
8223 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8224
8225 if ((URI != NULL) || (ExternalID != NULL)) {
8226 ctxt->hasExternalSubset = 1;
8227 }
8228 ctxt->extSubURI = URI;
8229 ctxt->extSubSystem = ExternalID;
8230
8231 SKIP_BLANKS;
8232
8233 /*
8234 * Create and update the internal subset.
8235 */
8236 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8237 (!ctxt->disableSAX))
8238 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8239
8240 /*
8241 * Is there any internal subset declarations ?
8242 * they are handled separately in xmlParseInternalSubset()
8243 */
8244 if (RAW == '[')
8245 return;
8246
8247 /*
8248 * We should be at the end of the DOCTYPE declaration.
8249 */
8250 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008251 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008252 }
8253 NEXT;
8254}
8255
8256/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008257 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008258 * @ctxt: an XML parser context
8259 *
8260 * parse the internal subset declaration
8261 *
8262 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8263 */
8264
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008265static void
Owen Taylor3473f882001-02-23 17:55:21 +00008266xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8267 /*
8268 * Is there any DTD definition ?
8269 */
8270 if (RAW == '[') {
8271 ctxt->instate = XML_PARSER_DTD;
8272 NEXT;
8273 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008274 * Parse the succession of Markup declarations and
Owen Taylor3473f882001-02-23 17:55:21 +00008275 * PEReferences.
8276 * Subsequence (markupdecl | PEReference | S)*
8277 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008278 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008279 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008280 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008281
8282 SKIP_BLANKS;
8283 xmlParseMarkupDecl(ctxt);
8284 xmlParsePEReference(ctxt);
8285
8286 /*
8287 * Pop-up of finished entities.
8288 */
8289 while ((RAW == 0) && (ctxt->inputNr > 1))
8290 xmlPopInput(ctxt);
8291
8292 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008293 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008294 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008295 break;
8296 }
8297 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008298 if (RAW == ']') {
Owen Taylor3473f882001-02-23 17:55:21 +00008299 NEXT;
8300 SKIP_BLANKS;
8301 }
8302 }
8303
8304 /*
8305 * We should be at the end of the DOCTYPE declaration.
8306 */
8307 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008308 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008309 }
8310 NEXT;
8311}
8312
Daniel Veillard81273902003-09-30 00:43:48 +00008313#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008314/**
8315 * xmlParseAttribute:
8316 * @ctxt: an XML parser context
8317 * @value: a xmlChar ** used to store the value of the attribute
8318 *
8319 * parse an attribute
8320 *
8321 * [41] Attribute ::= Name Eq AttValue
8322 *
8323 * [ WFC: No External Entity References ]
8324 * Attribute values cannot contain direct or indirect entity references
8325 * to external entities.
8326 *
8327 * [ WFC: No < in Attribute Values ]
8328 * The replacement text of any entity referred to directly or indirectly in
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008329 * an attribute value (other than "&lt;") must not contain a <.
8330 *
Owen Taylor3473f882001-02-23 17:55:21 +00008331 * [ VC: Attribute Value Type ]
8332 * The attribute must have been declared; the value must be of the type
8333 * declared for it.
8334 *
8335 * [25] Eq ::= S? '=' S?
8336 *
8337 * With namespace:
8338 *
8339 * [NS 11] Attribute ::= QName Eq AttValue
8340 *
8341 * Also the case QName == xmlns:??? is handled independently as a namespace
8342 * definition.
8343 *
8344 * Returns the attribute name, and the value in *value.
8345 */
8346
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008347const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008348xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008349 const xmlChar *name;
8350 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008351
8352 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008353 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008354 name = xmlParseName(ctxt);
8355 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008356 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008357 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008358 return(NULL);
8359 }
8360
8361 /*
8362 * read the value
8363 */
8364 SKIP_BLANKS;
8365 if (RAW == '=') {
8366 NEXT;
8367 SKIP_BLANKS;
8368 val = xmlParseAttValue(ctxt);
8369 ctxt->instate = XML_PARSER_CONTENT;
8370 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008371 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00008372 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008373 return(NULL);
8374 }
8375
8376 /*
8377 * Check that xml:lang conforms to the specification
8378 * No more registered as an error, just generate a warning now
8379 * since this was deprecated in XML second edition
8380 */
8381 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8382 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008383 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8384 "Malformed value for xml:lang : %s\n",
8385 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008386 }
8387 }
8388
8389 /*
8390 * Check that xml:space conforms to the specification
8391 */
8392 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8393 if (xmlStrEqual(val, BAD_CAST "default"))
8394 *(ctxt->space) = 0;
8395 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8396 *(ctxt->space) = 1;
8397 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008398 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008399"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008400 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008401 }
8402 }
8403
8404 *value = val;
8405 return(name);
8406}
8407
8408/**
8409 * xmlParseStartTag:
8410 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008411 *
Owen Taylor3473f882001-02-23 17:55:21 +00008412 * parse a start of tag either for rule element or
8413 * EmptyElement. In both case we don't parse the tag closing chars.
8414 *
8415 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8416 *
8417 * [ WFC: Unique Att Spec ]
8418 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008419 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008420 *
8421 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8422 *
8423 * [ WFC: Unique Att Spec ]
8424 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008425 * empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008426 *
8427 * With namespace:
8428 *
8429 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8430 *
8431 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8432 *
8433 * Returns the element name parsed
8434 */
8435
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008436const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008437xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008438 const xmlChar *name;
8439 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008440 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008441 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008442 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008443 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008444 int i;
8445
8446 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008447 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008448
8449 name = xmlParseName(ctxt);
8450 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008451 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008452 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008453 return(NULL);
8454 }
8455
8456 /*
8457 * Now parse the attributes, it ends up with the ending
8458 *
8459 * (S Attribute)* S?
8460 */
8461 SKIP_BLANKS;
8462 GROW;
8463
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008464 while (((RAW != '>') &&
Daniel Veillard21a0f912001-02-25 19:54:14 +00008465 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008466 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008467 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008468 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008469
8470 attname = xmlParseAttribute(ctxt, &attvalue);
8471 if ((attname != NULL) && (attvalue != NULL)) {
8472 /*
8473 * [ WFC: Unique Att Spec ]
8474 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008475 * start-tag or empty-element tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008476 */
8477 for (i = 0; i < nbatts;i += 2) {
8478 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008479 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008480 xmlFree(attvalue);
8481 goto failed;
8482 }
8483 }
Owen Taylor3473f882001-02-23 17:55:21 +00008484 /*
8485 * Add the pair to atts
8486 */
8487 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008488 maxatts = 22; /* allow for 10 attrs by default */
8489 atts = (const xmlChar **)
8490 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008491 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008492 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008493 if (attvalue != NULL)
8494 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008495 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008496 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008497 ctxt->atts = atts;
8498 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008499 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008500 const xmlChar **n;
8501
Owen Taylor3473f882001-02-23 17:55:21 +00008502 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008503 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008504 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008505 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008506 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008507 if (attvalue != NULL)
8508 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008509 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008510 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008511 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008512 ctxt->atts = atts;
8513 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008514 }
8515 atts[nbatts++] = attname;
8516 atts[nbatts++] = attvalue;
8517 atts[nbatts] = NULL;
8518 atts[nbatts + 1] = NULL;
8519 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008520 if (attvalue != NULL)
8521 xmlFree(attvalue);
8522 }
8523
Daniel Veillard52d8ade2012-07-30 10:08:45 +08008524failed:
Owen Taylor3473f882001-02-23 17:55:21 +00008525
Daniel Veillard3772de32002-12-17 10:31:45 +00008526 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008527 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8528 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008529 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008530 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8531 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008532 }
8533 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008534 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8535 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008536 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8537 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008538 break;
8539 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008540 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008541 GROW;
8542 }
8543
8544 /*
8545 * SAX: Start of Element !
8546 */
8547 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008548 (!ctxt->disableSAX)) {
8549 if (nbatts > 0)
8550 ctxt->sax->startElement(ctxt->userData, name, atts);
8551 else
8552 ctxt->sax->startElement(ctxt->userData, name, NULL);
8553 }
Owen Taylor3473f882001-02-23 17:55:21 +00008554
8555 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008556 /* Free only the content strings */
8557 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008558 if (atts[i] != NULL)
8559 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008560 }
8561 return(name);
8562}
8563
8564/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008565 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008566 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008567 * @line: line of the start tag
8568 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008569 *
8570 * parse an end of tag
8571 *
8572 * [42] ETag ::= '</' Name S? '>'
8573 *
8574 * With namespace
8575 *
8576 * [NS 9] ETag ::= '</' QName S? '>'
8577 */
8578
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008579static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008580xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008581 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008582
8583 GROW;
8584 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008585 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008586 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008587 return;
8588 }
8589 SKIP(2);
8590
Daniel Veillard46de64e2002-05-29 08:21:33 +00008591 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008592
8593 /*
8594 * We should definitely be at the ending "S? '>'" part
8595 */
8596 GROW;
8597 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008598 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008599 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008600 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008601 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008602
8603 /*
8604 * [ WFC: Element Type Match ]
8605 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008606 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00008607 *
8608 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008609 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008610 if (name == NULL) name = BAD_CAST "unparseable";
8611 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008612 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008613 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008614 }
8615
8616 /*
8617 * SAX: End of Tag
8618 */
8619 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8620 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008621 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008622
Daniel Veillarde57ec792003-09-10 10:50:59 +00008623 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008624 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008625 return;
8626}
8627
8628/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008629 * xmlParseEndTag:
8630 * @ctxt: an XML parser context
8631 *
8632 * parse an end of tag
8633 *
8634 * [42] ETag ::= '</' Name S? '>'
8635 *
8636 * With namespace
8637 *
8638 * [NS 9] ETag ::= '</' QName S? '>'
8639 */
8640
8641void
8642xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008643 xmlParseEndTag1(ctxt, 0);
8644}
Daniel Veillard81273902003-09-30 00:43:48 +00008645#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008646
8647/************************************************************************
8648 * *
8649 * SAX 2 specific operations *
8650 * *
8651 ************************************************************************/
8652
Daniel Veillard0fb18932003-09-07 09:14:37 +00008653/*
8654 * xmlGetNamespace:
8655 * @ctxt: an XML parser context
8656 * @prefix: the prefix to lookup
8657 *
8658 * Lookup the namespace name for the @prefix (which ca be NULL)
8659 * The prefix must come from the @ctxt->dict dictionnary
8660 *
8661 * Returns the namespace name or NULL if not bound
8662 */
8663static const xmlChar *
8664xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8665 int i;
8666
Daniel Veillarde57ec792003-09-10 10:50:59 +00008667 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008668 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008669 if (ctxt->nsTab[i] == prefix) {
8670 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8671 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008672 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008673 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008674 return(NULL);
8675}
8676
8677/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008678 * xmlParseQName:
8679 * @ctxt: an XML parser context
8680 * @prefix: pointer to store the prefix part
8681 *
8682 * parse an XML Namespace QName
8683 *
8684 * [6] QName ::= (Prefix ':')? LocalPart
8685 * [7] Prefix ::= NCName
8686 * [8] LocalPart ::= NCName
8687 *
8688 * Returns the Name parsed or NULL
8689 */
8690
8691static const xmlChar *
8692xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8693 const xmlChar *l, *p;
8694
8695 GROW;
8696
8697 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008698 if (l == NULL) {
8699 if (CUR == ':') {
8700 l = xmlParseName(ctxt);
8701 if (l != NULL) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008702 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008703 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008704 *prefix = NULL;
8705 return(l);
8706 }
8707 }
8708 return(NULL);
8709 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008710 if (CUR == ':') {
8711 NEXT;
8712 p = l;
8713 l = xmlParseNCName(ctxt);
8714 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008715 xmlChar *tmp;
8716
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008717 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8718 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008719 l = xmlParseNmtoken(ctxt);
8720 if (l == NULL)
8721 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8722 else {
8723 tmp = xmlBuildQName(l, p, NULL, 0);
8724 xmlFree((char *)l);
8725 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008726 p = xmlDictLookup(ctxt->dict, tmp, -1);
8727 if (tmp != NULL) xmlFree(tmp);
8728 *prefix = NULL;
8729 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008730 }
8731 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008732 xmlChar *tmp;
8733
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008734 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8735 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008736 NEXT;
8737 tmp = (xmlChar *) xmlParseName(ctxt);
8738 if (tmp != NULL) {
8739 tmp = xmlBuildQName(tmp, l, NULL, 0);
8740 l = xmlDictLookup(ctxt->dict, tmp, -1);
8741 if (tmp != NULL) xmlFree(tmp);
8742 *prefix = p;
8743 return(l);
8744 }
8745 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8746 l = xmlDictLookup(ctxt->dict, tmp, -1);
8747 if (tmp != NULL) xmlFree(tmp);
8748 *prefix = p;
8749 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008750 }
8751 *prefix = p;
8752 } else
8753 *prefix = NULL;
8754 return(l);
8755}
8756
8757/**
8758 * xmlParseQNameAndCompare:
8759 * @ctxt: an XML parser context
8760 * @name: the localname
8761 * @prefix: the prefix, if any.
8762 *
8763 * parse an XML name and compares for match
8764 * (specialized for endtag parsing)
8765 *
8766 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8767 * and the name for mismatch
8768 */
8769
8770static const xmlChar *
8771xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8772 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008773 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008774 const xmlChar *in;
8775 const xmlChar *ret;
8776 const xmlChar *prefix2;
8777
8778 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8779
8780 GROW;
8781 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008782
Daniel Veillard0fb18932003-09-07 09:14:37 +00008783 cmp = prefix;
8784 while (*in != 0 && *in == *cmp) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008785 ++in;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008786 ++cmp;
8787 }
8788 if ((*cmp == 0) && (*in == ':')) {
8789 in++;
8790 cmp = name;
8791 while (*in != 0 && *in == *cmp) {
8792 ++in;
8793 ++cmp;
8794 }
William M. Brack76e95df2003-10-18 16:20:14 +00008795 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008796 /* success */
8797 ctxt->input->cur = in;
8798 return((const xmlChar*) 1);
8799 }
8800 }
8801 /*
8802 * all strings coms from the dictionary, equality can be done directly
8803 */
8804 ret = xmlParseQName (ctxt, &prefix2);
8805 if ((ret == name) && (prefix == prefix2))
8806 return((const xmlChar*) 1);
8807 return ret;
8808}
8809
8810/**
8811 * xmlParseAttValueInternal:
8812 * @ctxt: an XML parser context
8813 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008814 * @alloc: whether the attribute was reallocated as a new string
8815 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008816 *
8817 * parse a value for an attribute.
8818 * NOTE: if no normalization is needed, the routine will return pointers
8819 * directly from the data buffer.
8820 *
8821 * 3.3.3 Attribute-Value Normalization:
8822 * Before the value of an attribute is passed to the application or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008823 * checked for validity, the XML processor must normalize it as follows:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008824 * - a character reference is processed by appending the referenced
8825 * character to the attribute value
8826 * - an entity reference is processed by recursively processing the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008827 * replacement text of the entity
Daniel Veillard0fb18932003-09-07 09:14:37 +00008828 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8829 * appending #x20 to the normalized value, except that only a single
8830 * #x20 is appended for a "#xD#xA" sequence that is part of an external
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008831 * parsed entity or the literal entity value of an internal parsed entity
8832 * - other characters are processed by appending them to the normalized value
Daniel Veillard0fb18932003-09-07 09:14:37 +00008833 * If the declared value is not CDATA, then the XML processor must further
8834 * process the normalized attribute value by discarding any leading and
8835 * trailing space (#x20) characters, and by replacing sequences of space
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008836 * (#x20) characters by a single space (#x20) character.
Daniel Veillard0fb18932003-09-07 09:14:37 +00008837 * All attributes for which no declaration has been read should be treated
8838 * by a non-validating parser as if declared CDATA.
8839 *
8840 * Returns the AttValue parsed or NULL. The value has to be freed by the
8841 * caller if it was copied, this can be detected by val[*len] == 0.
8842 */
8843
8844static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008845xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8846 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008847{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008848 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008849 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008850 xmlChar *ret = NULL;
8851
8852 GROW;
8853 in = (xmlChar *) CUR_PTR;
8854 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008855 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008856 return (NULL);
8857 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008858 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008859
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008860 /*
8861 * try to handle in this routine the most common case where no
8862 * allocation of a new string is required and where content is
8863 * pure ASCII.
8864 */
8865 limit = *in++;
8866 end = ctxt->input->end;
8867 start = in;
8868 if (in >= end) {
8869 const xmlChar *oldbase = ctxt->input->base;
8870 GROW;
8871 if (oldbase != ctxt->input->base) {
8872 long delta = ctxt->input->base - oldbase;
8873 start = start + delta;
8874 in = in + delta;
8875 }
8876 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008877 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008878 if (normalize) {
8879 /*
8880 * Skip any leading spaces
8881 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008882 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008883 ((*in == 0x20) || (*in == 0x9) ||
8884 (*in == 0xA) || (*in == 0xD))) {
8885 in++;
8886 start = in;
8887 if (in >= end) {
8888 const xmlChar *oldbase = ctxt->input->base;
8889 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008890 if (ctxt->instate == XML_PARSER_EOF)
8891 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008892 if (oldbase != ctxt->input->base) {
8893 long delta = ctxt->input->base - oldbase;
8894 start = start + delta;
8895 in = in + delta;
8896 }
8897 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008898 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8899 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8900 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008901 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008902 return(NULL);
8903 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008904 }
8905 }
8906 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8907 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8908 if ((*in++ == 0x20) && (*in == 0x20)) break;
8909 if (in >= end) {
8910 const xmlChar *oldbase = ctxt->input->base;
8911 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008912 if (ctxt->instate == XML_PARSER_EOF)
8913 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008914 if (oldbase != ctxt->input->base) {
8915 long delta = ctxt->input->base - oldbase;
8916 start = start + delta;
8917 in = in + delta;
8918 }
8919 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008920 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8921 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8922 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008923 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008924 return(NULL);
8925 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008926 }
8927 }
8928 last = in;
8929 /*
8930 * skip the trailing blanks
8931 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008932 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillardf8e3db02012-09-11 13:26:36 +08008933 while ((in < end) && (*in != limit) &&
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008934 ((*in == 0x20) || (*in == 0x9) ||
8935 (*in == 0xA) || (*in == 0xD))) {
8936 in++;
8937 if (in >= end) {
8938 const xmlChar *oldbase = ctxt->input->base;
8939 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008940 if (ctxt->instate == XML_PARSER_EOF)
8941 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008942 if (oldbase != ctxt->input->base) {
8943 long delta = ctxt->input->base - oldbase;
8944 start = start + delta;
8945 in = in + delta;
8946 last = last + delta;
8947 }
8948 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008949 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8950 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8951 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008952 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008953 return(NULL);
8954 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008955 }
8956 }
Daniel Veillarde17db992012-07-19 11:25:16 +08008957 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8958 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8959 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008960 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008961 return(NULL);
8962 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008963 if (*in != limit) goto need_complex;
8964 } else {
8965 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8966 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8967 in++;
8968 if (in >= end) {
8969 const xmlChar *oldbase = ctxt->input->base;
8970 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008971 if (ctxt->instate == XML_PARSER_EOF)
8972 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008973 if (oldbase != ctxt->input->base) {
8974 long delta = ctxt->input->base - oldbase;
8975 start = start + delta;
8976 in = in + delta;
8977 }
8978 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008979 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8980 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8981 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008982 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008983 return(NULL);
8984 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008985 }
8986 }
8987 last = in;
Daniel Veillarde17db992012-07-19 11:25:16 +08008988 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8989 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8990 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
Michael Woodfb27e2c2012-09-28 08:59:33 +02008991 "AttValue length too long\n");
Daniel Veillarde17db992012-07-19 11:25:16 +08008992 return(NULL);
8993 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008994 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008995 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008996 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008997 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008998 *len = last - start;
8999 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009000 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009001 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009002 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009003 }
9004 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009005 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009006 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009007need_complex:
9008 if (alloc) *alloc = 1;
9009 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009010}
9011
9012/**
9013 * xmlParseAttribute2:
9014 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009015 * @pref: the element prefix
9016 * @elem: the element name
9017 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00009018 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009019 * @len: an int * to save the length of the attribute
9020 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00009021 *
9022 * parse an attribute in the new SAX2 framework.
9023 *
9024 * Returns the attribute name, and the value in *value, .
9025 */
9026
9027static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009028xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009029 const xmlChar * pref, const xmlChar * elem,
9030 const xmlChar ** prefix, xmlChar ** value,
9031 int *len, int *alloc)
9032{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009033 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00009034 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009035 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009036
9037 *value = NULL;
9038 GROW;
9039 name = xmlParseQName(ctxt, prefix);
9040 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009041 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9042 "error parsing attribute name\n");
9043 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009044 }
9045
9046 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009047 * get the type if needed
9048 */
9049 if (ctxt->attsSpecial != NULL) {
9050 int type;
9051
9052 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009053 pref, elem, *prefix, name);
9054 if (type != 0)
9055 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009056 }
9057
9058 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009059 * read the value
9060 */
9061 SKIP_BLANKS;
9062 if (RAW == '=') {
9063 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009064 SKIP_BLANKS;
9065 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9066 if (normalize) {
9067 /*
9068 * Sometimes a second normalisation pass for spaces is needed
9069 * but that only happens if charrefs or entities refernces
9070 * have been used in the attribute value, i.e. the attribute
9071 * value have been extracted in an allocated string already.
9072 */
9073 if (*alloc) {
9074 const xmlChar *val2;
9075
9076 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009077 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009078 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00009079 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009080 }
9081 }
9082 }
9083 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009084 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009085 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9086 "Specification mandate value for attribute %s\n",
9087 name);
9088 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009089 }
9090
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009091 if (*prefix == ctxt->str_xml) {
9092 /*
9093 * Check that xml:lang conforms to the specification
9094 * No more registered as an error, just generate a warning now
9095 * since this was deprecated in XML second edition
9096 */
9097 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9098 internal_val = xmlStrndup(val, *len);
9099 if (!xmlCheckLanguageID(internal_val)) {
9100 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9101 "Malformed value for xml:lang : %s\n",
9102 internal_val, NULL);
9103 }
9104 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009105
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009106 /*
9107 * Check that xml:space conforms to the specification
9108 */
9109 if (xmlStrEqual(name, BAD_CAST "space")) {
9110 internal_val = xmlStrndup(val, *len);
9111 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9112 *(ctxt->space) = 0;
9113 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9114 *(ctxt->space) = 1;
9115 else {
9116 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9117 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9118 internal_val, NULL);
9119 }
9120 }
9121 if (internal_val) {
9122 xmlFree(internal_val);
9123 }
9124 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009125
9126 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009127 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009128}
Daniel Veillard0fb18932003-09-07 09:14:37 +00009129/**
9130 * xmlParseStartTag2:
9131 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009132 *
Daniel Veillard0fb18932003-09-07 09:14:37 +00009133 * parse a start of tag either for rule element or
9134 * EmptyElement. In both case we don't parse the tag closing chars.
9135 * This routine is called when running SAX2 parsing
9136 *
9137 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9138 *
9139 * [ WFC: Unique Att Spec ]
9140 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009141 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009142 *
9143 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9144 *
9145 * [ WFC: Unique Att Spec ]
9146 * No attribute name may appear more than once in the same start-tag or
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009147 * empty-element tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009148 *
9149 * With namespace:
9150 *
9151 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9152 *
9153 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9154 *
9155 * Returns the element name parsed
9156 */
9157
9158static const xmlChar *
9159xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009160 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009161 const xmlChar *localname;
9162 const xmlChar *prefix;
9163 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009164 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009165 const xmlChar *nsname;
9166 xmlChar *attvalue;
9167 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009168 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009169 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009170 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009171 const xmlChar *base;
9172 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00009173 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009174
9175 if (RAW != '<') return(NULL);
9176 NEXT1;
9177
9178 /*
9179 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9180 * point since the attribute values may be stored as pointers to
9181 * the buffer and calling SHRINK would destroy them !
9182 * The Shrinking is only possible once the full set of attribute
9183 * callbacks have been done.
9184 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009185reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009186 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009187 base = ctxt->input->base;
9188 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009189 oldline = ctxt->input->line;
9190 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009191 nbatts = 0;
9192 nratts = 0;
9193 nbdef = 0;
9194 nbNs = 0;
9195 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00009196 /* Forget any namespaces added during an earlier parse of this element. */
9197 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009198
9199 localname = xmlParseQName(ctxt, &prefix);
9200 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009201 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9202 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009203 return(NULL);
9204 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009205 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009206
9207 /*
9208 * Now parse the attributes, it ends up with the ending
9209 *
9210 * (S Attribute)* S?
9211 */
9212 SKIP_BLANKS;
9213 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009214 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009215
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009216 while (((RAW != '>') &&
Daniel Veillard0fb18932003-09-07 09:14:37 +00009217 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009218 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009219 const xmlChar *q = CUR_PTR;
9220 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009221 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009222
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009223 attname = xmlParseAttribute2(ctxt, prefix, localname,
9224 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00009225 if (ctxt->input->base != base) {
9226 if ((attvalue != NULL) && (alloc != 0))
9227 xmlFree(attvalue);
9228 attvalue = NULL;
9229 goto base_changed;
9230 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009231 if ((attname != NULL) && (attvalue != NULL)) {
9232 if (len < 0) len = xmlStrlen(attvalue);
9233 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009234 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9235 xmlURIPtr uri;
9236
9237 if (*URL != 0) {
9238 uri = xmlParseURI((const char *) URL);
9239 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009240 xmlNsErr(ctxt, XML_WAR_NS_URI,
9241 "xmlns: '%s' is not a valid URI\n",
9242 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009243 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00009244 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009245 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9246 "xmlns: URI %s is not absolute\n",
9247 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009248 }
9249 xmlFreeURI(uri);
9250 }
Daniel Veillard37334572008-07-31 08:20:02 +00009251 if (URL == ctxt->str_xml_ns) {
9252 if (attname != ctxt->str_xml) {
9253 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9254 "xml namespace URI cannot be the default namespace\n",
9255 NULL, NULL, NULL);
9256 }
9257 goto skip_default_ns;
9258 }
9259 if ((len == 29) &&
9260 (xmlStrEqual(URL,
9261 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9262 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9263 "reuse of the xmlns namespace name is forbidden\n",
9264 NULL, NULL, NULL);
9265 goto skip_default_ns;
9266 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009267 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009268 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009269 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009270 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009271 for (j = 1;j <= nbNs;j++)
9272 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9273 break;
9274 if (j <= nbNs)
9275 xmlErrAttributeDup(ctxt, NULL, attname);
9276 else
9277 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009278skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009279 if (alloc != 0) xmlFree(attvalue);
9280 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009281 continue;
9282 }
9283 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009284 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9285 xmlURIPtr uri;
9286
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009287 if (attname == ctxt->str_xml) {
9288 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009289 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9290 "xml namespace prefix mapped to wrong URI\n",
9291 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009292 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009293 /*
9294 * Do not keep a namespace definition node
9295 */
Daniel Veillard37334572008-07-31 08:20:02 +00009296 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009297 }
Daniel Veillard37334572008-07-31 08:20:02 +00009298 if (URL == ctxt->str_xml_ns) {
9299 if (attname != ctxt->str_xml) {
9300 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9301 "xml namespace URI mapped to wrong prefix\n",
9302 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009303 }
Daniel Veillard37334572008-07-31 08:20:02 +00009304 goto skip_ns;
9305 }
9306 if (attname == ctxt->str_xmlns) {
9307 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9308 "redefinition of the xmlns prefix is forbidden\n",
9309 NULL, NULL, NULL);
9310 goto skip_ns;
9311 }
9312 if ((len == 29) &&
9313 (xmlStrEqual(URL,
9314 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9315 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9316 "reuse of the xmlns namespace name is forbidden\n",
9317 NULL, NULL, NULL);
9318 goto skip_ns;
9319 }
9320 if ((URL == NULL) || (URL[0] == 0)) {
9321 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9322 "xmlns:%s: Empty XML namespace is not allowed\n",
9323 attname, NULL, NULL);
9324 goto skip_ns;
9325 } else {
9326 uri = xmlParseURI((const char *) URL);
9327 if (uri == NULL) {
9328 xmlNsErr(ctxt, XML_WAR_NS_URI,
9329 "xmlns:%s: '%s' is not a valid URI\n",
9330 attname, URL, NULL);
9331 } else {
9332 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9333 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9334 "xmlns:%s: URI %s is not absolute\n",
9335 attname, URL, NULL);
9336 }
9337 xmlFreeURI(uri);
9338 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009339 }
9340
Daniel Veillard0fb18932003-09-07 09:14:37 +00009341 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009342 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009343 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009344 for (j = 1;j <= nbNs;j++)
9345 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9346 break;
9347 if (j <= nbNs)
9348 xmlErrAttributeDup(ctxt, aprefix, attname);
9349 else
9350 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009351skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009352 if (alloc != 0) xmlFree(attvalue);
9353 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00009354 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009355 continue;
9356 }
9357
9358 /*
9359 * Add the pair to atts
9360 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009361 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9362 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009363 if (attvalue[len] == 0)
9364 xmlFree(attvalue);
9365 goto failed;
9366 }
9367 maxatts = ctxt->maxatts;
9368 atts = ctxt->atts;
9369 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009370 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009371 atts[nbatts++] = attname;
9372 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009373 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00009374 atts[nbatts++] = attvalue;
9375 attvalue += len;
9376 atts[nbatts++] = attvalue;
9377 /*
9378 * tag if some deallocation is needed
9379 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009380 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009381 } else {
9382 if ((attvalue != NULL) && (attvalue[len] == 0))
9383 xmlFree(attvalue);
9384 }
9385
Daniel Veillard37334572008-07-31 08:20:02 +00009386failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009387
9388 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009389 if (ctxt->instate == XML_PARSER_EOF)
9390 break;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009391 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009392 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9393 break;
William M. Brack76e95df2003-10-18 16:20:14 +00009394 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009395 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9396 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009397 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009398 }
9399 SKIP_BLANKS;
9400 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9401 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009402 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009403 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009404 break;
9405 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009406 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009407 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009408 }
9409
Daniel Veillard0fb18932003-09-07 09:14:37 +00009410 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009411 * The attributes defaulting
9412 */
9413 if (ctxt->attsDefault != NULL) {
9414 xmlDefAttrsPtr defaults;
9415
9416 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9417 if (defaults != NULL) {
9418 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009419 attname = defaults->values[5 * i];
9420 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009421
9422 /*
9423 * special work for namespaces defaulted defs
9424 */
9425 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9426 /*
9427 * check that it's not a defined namespace
9428 */
9429 for (j = 1;j <= nbNs;j++)
9430 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9431 break;
9432 if (j <= nbNs) continue;
9433
9434 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009435 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009436 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009437 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009438 nbNs++;
9439 }
9440 } else if (aprefix == ctxt->str_xmlns) {
9441 /*
9442 * check that it's not a defined namespace
9443 */
9444 for (j = 1;j <= nbNs;j++)
9445 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9446 break;
9447 if (j <= nbNs) continue;
9448
9449 nsname = xmlGetNamespace(ctxt, attname);
9450 if (nsname != defaults->values[2]) {
9451 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009452 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009453 nbNs++;
9454 }
9455 } else {
9456 /*
9457 * check that it's not a defined attribute
9458 */
9459 for (j = 0;j < nbatts;j+=5) {
9460 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9461 break;
9462 }
9463 if (j < nbatts) continue;
9464
9465 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9466 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009467 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009468 }
9469 maxatts = ctxt->maxatts;
9470 atts = ctxt->atts;
9471 }
9472 atts[nbatts++] = attname;
9473 atts[nbatts++] = aprefix;
9474 if (aprefix == NULL)
9475 atts[nbatts++] = NULL;
9476 else
9477 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009478 atts[nbatts++] = defaults->values[5 * i + 2];
9479 atts[nbatts++] = defaults->values[5 * i + 3];
9480 if ((ctxt->standalone == 1) &&
9481 (defaults->values[5 * i + 4] != NULL)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009482 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009483 "standalone: attribute %s on %s defaulted from external subset\n",
9484 attname, localname);
9485 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009486 nbdef++;
9487 }
9488 }
9489 }
9490 }
9491
Daniel Veillarde70c8772003-11-25 07:21:18 +00009492 /*
9493 * The attributes checkings
9494 */
9495 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009496 /*
9497 * The default namespace does not apply to attribute names.
9498 */
9499 if (atts[i + 1] != NULL) {
9500 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9501 if (nsname == NULL) {
9502 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9503 "Namespace prefix %s for %s on %s is not defined\n",
9504 atts[i + 1], atts[i], localname);
9505 }
9506 atts[i + 2] = nsname;
9507 } else
9508 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009509 /*
9510 * [ WFC: Unique Att Spec ]
9511 * No attribute name may appear more than once in the same
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009512 * start-tag or empty-element tag.
Daniel Veillarde70c8772003-11-25 07:21:18 +00009513 * As extended by the Namespace in XML REC.
9514 */
9515 for (j = 0; j < i;j += 5) {
9516 if (atts[i] == atts[j]) {
9517 if (atts[i+1] == atts[j+1]) {
9518 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9519 break;
9520 }
9521 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9522 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9523 "Namespaced Attribute %s in '%s' redefined\n",
9524 atts[i], nsname, NULL);
9525 break;
9526 }
9527 }
9528 }
9529 }
9530
Daniel Veillarde57ec792003-09-10 10:50:59 +00009531 nsname = xmlGetNamespace(ctxt, prefix);
9532 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009533 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9534 "Namespace prefix %s on %s is not defined\n",
9535 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009536 }
9537 *pref = prefix;
9538 *URI = nsname;
9539
9540 /*
9541 * SAX: Start of Element !
9542 */
9543 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9544 (!ctxt->disableSAX)) {
9545 if (nbNs > 0)
9546 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9547 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9548 nbatts / 5, nbdef, atts);
9549 else
9550 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9551 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9552 }
9553
9554 /*
9555 * Free up attribute allocated strings if needed
9556 */
9557 if (attval != 0) {
9558 for (i = 3,j = 0; j < nratts;i += 5,j++)
9559 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9560 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009561 }
9562
9563 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009564
9565base_changed:
9566 /*
9567 * the attribute strings are valid iif the base didn't changed
9568 */
9569 if (attval != 0) {
9570 for (i = 3,j = 0; j < nratts;i += 5,j++)
9571 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9572 xmlFree((xmlChar *) atts[i]);
9573 }
9574 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009575 ctxt->input->line = oldline;
9576 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009577 if (ctxt->wellFormed == 1) {
9578 goto reparse;
9579 }
9580 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009581}
9582
9583/**
9584 * xmlParseEndTag2:
9585 * @ctxt: an XML parser context
9586 * @line: line of the start tag
9587 * @nsNr: number of namespaces on the start tag
9588 *
9589 * parse an end of tag
9590 *
9591 * [42] ETag ::= '</' Name S? '>'
9592 *
9593 * With namespace
9594 *
9595 * [NS 9] ETag ::= '</' QName S? '>'
9596 */
9597
9598static void
9599xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009600 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009601 const xmlChar *name;
9602
9603 GROW;
9604 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009605 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009606 return;
9607 }
9608 SKIP(2);
9609
William M. Brack13dfa872004-09-18 04:52:08 +00009610 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009611 if (ctxt->input->cur[tlen] == '>') {
9612 ctxt->input->cur += tlen + 1;
9613 goto done;
9614 }
9615 ctxt->input->cur += tlen;
9616 name = (xmlChar*)1;
9617 } else {
9618 if (prefix == NULL)
9619 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9620 else
9621 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9622 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009623
9624 /*
9625 * We should definitely be at the ending "S? '>'" part
9626 */
9627 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009628 if (ctxt->instate == XML_PARSER_EOF)
9629 return;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009630 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009631 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009632 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009633 } else
9634 NEXT1;
9635
9636 /*
9637 * [ WFC: Element Type Match ]
9638 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009639 * start-tag.
Daniel Veillard0fb18932003-09-07 09:14:37 +00009640 *
9641 */
9642 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009643 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009644 if ((line == 0) && (ctxt->node != NULL))
9645 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009646 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009647 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009648 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009649 }
9650
9651 /*
9652 * SAX: End of Tag
9653 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009654done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009655 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9656 (!ctxt->disableSAX))
9657 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9658
Daniel Veillard0fb18932003-09-07 09:14:37 +00009659 spacePop(ctxt);
9660 if (nsNr != 0)
9661 nsPop(ctxt, nsNr);
9662 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009663}
9664
9665/**
Owen Taylor3473f882001-02-23 17:55:21 +00009666 * xmlParseCDSect:
9667 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009668 *
Owen Taylor3473f882001-02-23 17:55:21 +00009669 * Parse escaped pure raw content.
9670 *
9671 * [18] CDSect ::= CDStart CData CDEnd
9672 *
9673 * [19] CDStart ::= '<![CDATA['
9674 *
9675 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9676 *
9677 * [21] CDEnd ::= ']]>'
9678 */
9679void
9680xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9681 xmlChar *buf = NULL;
9682 int len = 0;
9683 int size = XML_PARSER_BUFFER_SIZE;
9684 int r, rl;
9685 int s, sl;
9686 int cur, l;
9687 int count = 0;
9688
Daniel Veillard8f597c32003-10-06 08:19:27 +00009689 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009690 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009691 SKIP(9);
9692 } else
9693 return;
9694
9695 ctxt->instate = XML_PARSER_CDATA_SECTION;
9696 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009697 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009698 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009699 ctxt->instate = XML_PARSER_CONTENT;
9700 return;
9701 }
9702 NEXTL(rl);
9703 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009704 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009705 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009706 ctxt->instate = XML_PARSER_CONTENT;
9707 return;
9708 }
9709 NEXTL(sl);
9710 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009711 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009712 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009713 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009714 return;
9715 }
William M. Brack871611b2003-10-18 04:53:14 +00009716 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009717 ((r != ']') || (s != ']') || (cur != '>'))) {
9718 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009719 xmlChar *tmp;
9720
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009721 if ((size > XML_MAX_TEXT_LENGTH) &&
9722 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9723 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9724 "CData section too big found", NULL);
9725 xmlFree (buf);
9726 return;
9727 }
9728 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00009729 if (tmp == NULL) {
9730 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009731 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009732 return;
9733 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009734 buf = tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009735 size *= 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009736 }
9737 COPY_BUF(rl,buf,len,r);
9738 r = s;
9739 rl = sl;
9740 s = cur;
9741 sl = l;
9742 count++;
9743 if (count > 50) {
9744 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009745 if (ctxt->instate == XML_PARSER_EOF) {
9746 xmlFree(buf);
9747 return;
9748 }
Owen Taylor3473f882001-02-23 17:55:21 +00009749 count = 0;
9750 }
9751 NEXTL(l);
9752 cur = CUR_CHAR(l);
9753 }
9754 buf[len] = 0;
9755 ctxt->instate = XML_PARSER_CONTENT;
9756 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009757 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009758 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009759 xmlFree(buf);
9760 return;
9761 }
9762 NEXTL(l);
9763
9764 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009765 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009766 */
9767 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9768 if (ctxt->sax->cdataBlock != NULL)
9769 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009770 else if (ctxt->sax->characters != NULL)
9771 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009772 }
9773 xmlFree(buf);
9774}
9775
9776/**
9777 * xmlParseContent:
9778 * @ctxt: an XML parser context
9779 *
9780 * Parse a content:
9781 *
9782 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9783 */
9784
9785void
9786xmlParseContent(xmlParserCtxtPtr ctxt) {
9787 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009788 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009789 ((RAW != '<') || (NXT(1) != '/')) &&
9790 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009791 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009792 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009793 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009794
9795 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009796 * First case : a Processing Instruction.
9797 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009798 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009799 xmlParsePI(ctxt);
9800 }
9801
9802 /*
9803 * Second case : a CDSection
9804 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009805 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009806 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009807 xmlParseCDSect(ctxt);
9808 }
9809
9810 /*
9811 * Third case : a comment
9812 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009813 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009814 (NXT(2) == '-') && (NXT(3) == '-')) {
9815 xmlParseComment(ctxt);
9816 ctxt->instate = XML_PARSER_CONTENT;
9817 }
9818
9819 /*
9820 * Fourth case : a sub-element.
9821 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009822 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009823 xmlParseElement(ctxt);
9824 }
9825
9826 /*
9827 * Fifth case : a reference. If if has not been resolved,
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009828 * parsing returns it's Name, create the node
Owen Taylor3473f882001-02-23 17:55:21 +00009829 */
9830
Daniel Veillard21a0f912001-02-25 19:54:14 +00009831 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009832 xmlParseReference(ctxt);
9833 }
9834
9835 /*
9836 * Last case, text. Note that References are handled directly.
9837 */
9838 else {
9839 xmlParseCharData(ctxt, 0);
9840 }
9841
9842 GROW;
9843 /*
9844 * Pop-up of finished entities.
9845 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009846 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009847 xmlPopInput(ctxt);
9848 SHRINK;
9849
Daniel Veillardfdc91562002-07-01 21:52:03 +00009850 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009851 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9852 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009853 ctxt->instate = XML_PARSER_EOF;
9854 break;
9855 }
9856 }
9857}
9858
9859/**
9860 * xmlParseElement:
9861 * @ctxt: an XML parser context
9862 *
9863 * parse an XML element, this is highly recursive
9864 *
9865 * [39] element ::= EmptyElemTag | STag content ETag
9866 *
9867 * [ WFC: Element Type Match ]
9868 * The Name in an element's end-tag must match the element type in the
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009869 * start-tag.
Owen Taylor3473f882001-02-23 17:55:21 +00009870 *
Owen Taylor3473f882001-02-23 17:55:21 +00009871 */
9872
9873void
9874xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009875 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009876 const xmlChar *prefix = NULL;
9877 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009878 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +08009879 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00009880 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009881 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009882
Daniel Veillard8915c152008-08-26 13:05:34 +00009883 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9884 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9885 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9886 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9887 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009888 ctxt->instate = XML_PARSER_EOF;
9889 return;
9890 }
9891
Owen Taylor3473f882001-02-23 17:55:21 +00009892 /* Capture start position */
9893 if (ctxt->record_info) {
9894 node_info.begin_pos = ctxt->input->consumed +
9895 (CUR_PTR - ctxt->input->base);
9896 node_info.begin_line = ctxt->input->line;
9897 }
9898
9899 if (ctxt->spaceNr == 0)
9900 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009901 else if (*ctxt->space == -2)
9902 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009903 else
9904 spacePush(ctxt, *ctxt->space);
9905
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009906 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009907#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009908 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009909#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009910 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009911#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009912 else
9913 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009914#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +08009915 if (ctxt->instate == XML_PARSER_EOF)
9916 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009917 if (name == NULL) {
9918 spacePop(ctxt);
9919 return;
9920 }
9921 namePush(ctxt, name);
9922 ret = ctxt->node;
9923
Daniel Veillard4432df22003-09-28 18:58:27 +00009924#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009925 /*
9926 * [ VC: Root Element Type ]
9927 * The Name in the document type declaration must match the element
Daniel Veillardf8e3db02012-09-11 13:26:36 +08009928 * type of the root element.
Owen Taylor3473f882001-02-23 17:55:21 +00009929 */
9930 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9931 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9932 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009933#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009934
9935 /*
9936 * Check for an Empty Element.
9937 */
9938 if ((RAW == '/') && (NXT(1) == '>')) {
9939 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009940 if (ctxt->sax2) {
9941 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9942 (!ctxt->disableSAX))
9943 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009944#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009945 } else {
9946 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9947 (!ctxt->disableSAX))
9948 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009949#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009950 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009951 namePop(ctxt);
9952 spacePop(ctxt);
9953 if (nsNr != ctxt->nsNr)
9954 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009955 if ( ret != NULL && ctxt->record_info ) {
9956 node_info.end_pos = ctxt->input->consumed +
9957 (CUR_PTR - ctxt->input->base);
9958 node_info.end_line = ctxt->input->line;
9959 node_info.node = ret;
9960 xmlParserAddNodeInfo(ctxt, &node_info);
9961 }
9962 return;
9963 }
9964 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009965 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009966 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009967 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9968 "Couldn't find end of Start Tag %s line %d\n",
9969 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009970
9971 /*
9972 * end of parsing of this node.
9973 */
9974 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009975 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009976 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009977 if (nsNr != ctxt->nsNr)
9978 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009979
9980 /*
9981 * Capture end position and add node
9982 */
9983 if ( ret != NULL && ctxt->record_info ) {
9984 node_info.end_pos = ctxt->input->consumed +
9985 (CUR_PTR - ctxt->input->base);
9986 node_info.end_line = ctxt->input->line;
9987 node_info.node = ret;
9988 xmlParserAddNodeInfo(ctxt, &node_info);
9989 }
9990 return;
9991 }
9992
9993 /*
9994 * Parse the content of the element:
9995 */
9996 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009997 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009998 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009999 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +000010000 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010001
10002 /*
10003 * end of parsing of this node.
10004 */
10005 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010006 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010007 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010008 if (nsNr != ctxt->nsNr)
10009 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010010 return;
10011 }
10012
10013 /*
10014 * parse the end of tag: '</' should be here.
10015 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010016 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010017 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010018 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010019 }
10020#ifdef LIBXML_SAX1_ENABLED
10021 else
Daniel Veillard0fb18932003-09-07 09:14:37 +000010022 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +000010023#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010024
10025 /*
10026 * Capture end position and add node
10027 */
10028 if ( ret != NULL && ctxt->record_info ) {
10029 node_info.end_pos = ctxt->input->consumed +
10030 (CUR_PTR - ctxt->input->base);
10031 node_info.end_line = ctxt->input->line;
10032 node_info.node = ret;
10033 xmlParserAddNodeInfo(ctxt, &node_info);
10034 }
10035}
10036
10037/**
10038 * xmlParseVersionNum:
10039 * @ctxt: an XML parser context
10040 *
10041 * parse the XML version value.
10042 *
Daniel Veillard34e3f642008-07-29 09:02:27 +000010043 * [26] VersionNum ::= '1.' [0-9]+
10044 *
10045 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +000010046 *
10047 * Returns the string giving the XML version number, or NULL
10048 */
10049xmlChar *
10050xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10051 xmlChar *buf = NULL;
10052 int len = 0;
10053 int size = 10;
10054 xmlChar cur;
10055
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010056 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010057 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010058 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010059 return(NULL);
10060 }
10061 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +000010062 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010063 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010064 return(NULL);
10065 }
10066 buf[len++] = cur;
10067 NEXT;
10068 cur=CUR;
10069 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010070 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010071 return(NULL);
10072 }
10073 buf[len++] = cur;
10074 NEXT;
10075 cur=CUR;
10076 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010077 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010078 xmlChar *tmp;
10079
Owen Taylor3473f882001-02-23 17:55:21 +000010080 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010081 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10082 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +000010083 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010084 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010085 return(NULL);
10086 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010087 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010088 }
10089 buf[len++] = cur;
10090 NEXT;
10091 cur=CUR;
10092 }
10093 buf[len] = 0;
10094 return(buf);
10095}
10096
10097/**
10098 * xmlParseVersionInfo:
10099 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +000010100 *
Owen Taylor3473f882001-02-23 17:55:21 +000010101 * parse the XML version.
10102 *
10103 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +000010104 *
Owen Taylor3473f882001-02-23 17:55:21 +000010105 * [25] Eq ::= S? '=' S?
10106 *
10107 * Returns the version string, e.g. "1.0"
10108 */
10109
10110xmlChar *
10111xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10112 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010113
Daniel Veillarda07050d2003-10-19 14:46:32 +000010114 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010115 SKIP(7);
10116 SKIP_BLANKS;
10117 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010118 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010119 return(NULL);
10120 }
10121 NEXT;
10122 SKIP_BLANKS;
10123 if (RAW == '"') {
10124 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010125 version = xmlParseVersionNum(ctxt);
10126 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010127 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010128 } else
10129 NEXT;
10130 } else if (RAW == '\''){
10131 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010132 version = xmlParseVersionNum(ctxt);
10133 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010134 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010135 } else
10136 NEXT;
10137 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010138 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010139 }
10140 }
10141 return(version);
10142}
10143
10144/**
10145 * xmlParseEncName:
10146 * @ctxt: an XML parser context
10147 *
10148 * parse the XML encoding name
10149 *
10150 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10151 *
10152 * Returns the encoding name value or NULL
10153 */
10154xmlChar *
10155xmlParseEncName(xmlParserCtxtPtr ctxt) {
10156 xmlChar *buf = NULL;
10157 int len = 0;
10158 int size = 10;
10159 xmlChar cur;
10160
10161 cur = CUR;
10162 if (((cur >= 'a') && (cur <= 'z')) ||
10163 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010164 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010165 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010166 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010167 return(NULL);
10168 }
Daniel Veillard34e3f642008-07-29 09:02:27 +000010169
Owen Taylor3473f882001-02-23 17:55:21 +000010170 buf[len++] = cur;
10171 NEXT;
10172 cur = CUR;
10173 while (((cur >= 'a') && (cur <= 'z')) ||
10174 ((cur >= 'A') && (cur <= 'Z')) ||
10175 ((cur >= '0') && (cur <= '9')) ||
10176 (cur == '.') || (cur == '_') ||
10177 (cur == '-')) {
10178 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010179 xmlChar *tmp;
10180
Owen Taylor3473f882001-02-23 17:55:21 +000010181 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010182 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10183 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010184 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +000010185 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010186 return(NULL);
10187 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010188 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010189 }
10190 buf[len++] = cur;
10191 NEXT;
10192 cur = CUR;
10193 if (cur == 0) {
10194 SHRINK;
10195 GROW;
10196 cur = CUR;
10197 }
10198 }
10199 buf[len] = 0;
10200 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010201 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010202 }
10203 return(buf);
10204}
10205
10206/**
10207 * xmlParseEncodingDecl:
10208 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010209 *
Owen Taylor3473f882001-02-23 17:55:21 +000010210 * parse the XML encoding declaration
10211 *
10212 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10213 *
10214 * this setups the conversion filters.
10215 *
10216 * Returns the encoding value or NULL
10217 */
10218
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010219const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +000010220xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10221 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010222
10223 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010224 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010225 SKIP(8);
10226 SKIP_BLANKS;
10227 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010228 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010229 return(NULL);
10230 }
10231 NEXT;
10232 SKIP_BLANKS;
10233 if (RAW == '"') {
10234 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010235 encoding = xmlParseEncName(ctxt);
10236 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010237 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010238 } else
10239 NEXT;
10240 } else if (RAW == '\''){
10241 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010242 encoding = xmlParseEncName(ctxt);
10243 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010244 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010245 } else
10246 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +000010247 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010248 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010249 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010250
10251 /*
10252 * Non standard parsing, allowing the user to ignore encoding
10253 */
10254 if (ctxt->options & XML_PARSE_IGNORE_ENC)
10255 return(encoding);
10256
Daniel Veillard6b621b82003-08-11 15:03:34 +000010257 /*
10258 * UTF-16 encoding stwich has already taken place at this stage,
10259 * more over the little-endian/big-endian selection is already done
10260 */
10261 if ((encoding != NULL) &&
10262 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10263 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +000010264 /*
10265 * If no encoding was passed to the parser, that we are
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010266 * using UTF-16 and no decoder is present i.e. the
Daniel Veillard37334572008-07-31 08:20:02 +000010267 * document is apparently UTF-8 compatible, then raise an
10268 * encoding mismatch fatal error
10269 */
10270 if ((ctxt->encoding == NULL) &&
10271 (ctxt->input->buf != NULL) &&
10272 (ctxt->input->buf->encoder == NULL)) {
10273 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10274 "Document labelled UTF-16 but has UTF-8 content\n");
10275 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010276 if (ctxt->encoding != NULL)
10277 xmlFree((xmlChar *) ctxt->encoding);
10278 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +000010279 }
10280 /*
10281 * UTF-8 encoding is handled natively
10282 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010283 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +000010284 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10285 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010286 if (ctxt->encoding != NULL)
10287 xmlFree((xmlChar *) ctxt->encoding);
10288 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010289 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010290 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010291 xmlCharEncodingHandlerPtr handler;
10292
10293 if (ctxt->input->encoding != NULL)
10294 xmlFree((xmlChar *) ctxt->input->encoding);
10295 ctxt->input->encoding = encoding;
10296
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010297 handler = xmlFindCharEncodingHandler((const char *) encoding);
10298 if (handler != NULL) {
10299 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +000010300 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010301 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010302 "Unsupported encoding %s\n", encoding);
10303 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010304 }
10305 }
10306 }
10307 return(encoding);
10308}
10309
10310/**
10311 * xmlParseSDDecl:
10312 * @ctxt: an XML parser context
10313 *
10314 * parse the XML standalone declaration
10315 *
10316 * [32] SDDecl ::= S 'standalone' Eq
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010317 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Owen Taylor3473f882001-02-23 17:55:21 +000010318 *
10319 * [ VC: Standalone Document Declaration ]
10320 * TODO The standalone document declaration must have the value "no"
10321 * if any external markup declarations contain declarations of:
10322 * - attributes with default values, if elements to which these
10323 * attributes apply appear in the document without specifications
10324 * of values for these attributes, or
10325 * - entities (other than amp, lt, gt, apos, quot), if references
10326 * to those entities appear in the document, or
10327 * - attributes with values subject to normalization, where the
10328 * attribute appears in the document with a value which will change
10329 * as a result of normalization, or
10330 * - element types with element content, if white space occurs directly
10331 * within any instance of those types.
10332 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010333 * Returns:
10334 * 1 if standalone="yes"
10335 * 0 if standalone="no"
10336 * -2 if standalone attribute is missing or invalid
10337 * (A standalone value of -2 means that the XML declaration was found,
10338 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010339 */
10340
10341int
10342xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010343 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010344
10345 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010346 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010347 SKIP(10);
10348 SKIP_BLANKS;
10349 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010350 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010351 return(standalone);
10352 }
10353 NEXT;
10354 SKIP_BLANKS;
10355 if (RAW == '\''){
10356 NEXT;
10357 if ((RAW == 'n') && (NXT(1) == 'o')) {
10358 standalone = 0;
10359 SKIP(2);
10360 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10361 (NXT(2) == 's')) {
10362 standalone = 1;
10363 SKIP(3);
10364 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010365 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010366 }
10367 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010368 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010369 } else
10370 NEXT;
10371 } else if (RAW == '"'){
10372 NEXT;
10373 if ((RAW == 'n') && (NXT(1) == 'o')) {
10374 standalone = 0;
10375 SKIP(2);
10376 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10377 (NXT(2) == 's')) {
10378 standalone = 1;
10379 SKIP(3);
10380 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010381 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010382 }
10383 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010384 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010385 } else
10386 NEXT;
10387 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010388 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010389 }
10390 }
10391 return(standalone);
10392}
10393
10394/**
10395 * xmlParseXMLDecl:
10396 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010397 *
Owen Taylor3473f882001-02-23 17:55:21 +000010398 * parse an XML declaration header
10399 *
10400 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10401 */
10402
10403void
10404xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10405 xmlChar *version;
10406
10407 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010408 * This value for standalone indicates that the document has an
10409 * XML declaration but it does not have a standalone attribute.
10410 * It will be overwritten later if a standalone attribute is found.
10411 */
10412 ctxt->input->standalone = -2;
10413
10414 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010415 * We know that '<?xml' is here.
10416 */
10417 SKIP(5);
10418
William M. Brack76e95df2003-10-18 16:20:14 +000010419 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010420 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10421 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010422 }
10423 SKIP_BLANKS;
10424
10425 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010426 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010427 */
10428 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010429 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010430 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010431 } else {
10432 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10433 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010434 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010435 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010436 if (ctxt->options & XML_PARSE_OLD10) {
10437 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10438 "Unsupported version '%s'\n",
10439 version);
10440 } else {
10441 if ((version[0] == '1') && ((version[1] == '.'))) {
10442 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10443 "Unsupported version '%s'\n",
10444 version, NULL);
10445 } else {
10446 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10447 "Unsupported version '%s'\n",
10448 version);
10449 }
10450 }
Daniel Veillard19840942001-11-29 16:11:38 +000010451 }
10452 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010453 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010454 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010455 }
Owen Taylor3473f882001-02-23 17:55:21 +000010456
10457 /*
10458 * We may have the encoding declaration
10459 */
William M. Brack76e95df2003-10-18 16:20:14 +000010460 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010461 if ((RAW == '?') && (NXT(1) == '>')) {
10462 SKIP(2);
10463 return;
10464 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010465 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010466 }
10467 xmlParseEncodingDecl(ctxt);
10468 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10469 /*
10470 * The XML REC instructs us to stop parsing right here
10471 */
10472 return;
10473 }
10474
10475 /*
10476 * We may have the standalone status.
10477 */
William M. Brack76e95df2003-10-18 16:20:14 +000010478 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010479 if ((RAW == '?') && (NXT(1) == '>')) {
10480 SKIP(2);
10481 return;
10482 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010483 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010484 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010485
10486 /*
10487 * We can grow the input buffer freely at that point
10488 */
10489 GROW;
10490
Owen Taylor3473f882001-02-23 17:55:21 +000010491 SKIP_BLANKS;
10492 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10493
10494 SKIP_BLANKS;
10495 if ((RAW == '?') && (NXT(1) == '>')) {
10496 SKIP(2);
10497 } else if (RAW == '>') {
10498 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010499 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010500 NEXT;
10501 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010502 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010503 MOVETO_ENDTAG(CUR_PTR);
10504 NEXT;
10505 }
10506}
10507
10508/**
10509 * xmlParseMisc:
10510 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010511 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010512 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010513 *
10514 * [27] Misc ::= Comment | PI | S
10515 */
10516
10517void
10518xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080010519 while ((ctxt->instate != XML_PARSER_EOF) &&
10520 (((RAW == '<') && (NXT(1) == '?')) ||
10521 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10522 IS_BLANK_CH(CUR))) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010523 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010524 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010525 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010526 NEXT;
10527 } else
10528 xmlParseComment(ctxt);
10529 }
10530}
10531
10532/**
10533 * xmlParseDocument:
10534 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010535 *
Owen Taylor3473f882001-02-23 17:55:21 +000010536 * parse an XML document (and build a tree if using the standard SAX
10537 * interface).
10538 *
10539 * [1] document ::= prolog element Misc*
10540 *
10541 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10542 *
10543 * Returns 0, -1 in case of error. the parser context is augmented
10544 * as a result of the parsing.
10545 */
10546
10547int
10548xmlParseDocument(xmlParserCtxtPtr ctxt) {
10549 xmlChar start[4];
10550 xmlCharEncoding enc;
10551
10552 xmlInitParser();
10553
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010554 if ((ctxt == NULL) || (ctxt->input == NULL))
10555 return(-1);
10556
Owen Taylor3473f882001-02-23 17:55:21 +000010557 GROW;
10558
10559 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010560 * SAX: detecting the level.
10561 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010562 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010563
10564 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010565 * SAX: beginning of the document processing.
10566 */
10567 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10568 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10569
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010570 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010571 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010572 /*
Daniel Veillard4aafa792001-07-28 17:21:12 +000010573 * Get the 4 first bytes and decode the charset
10574 * if enc != XML_CHAR_ENCODING_NONE
10575 * plug some encoding conversion routines.
10576 */
10577 start[0] = RAW;
10578 start[1] = NXT(1);
10579 start[2] = NXT(2);
10580 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010581 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010582 if (enc != XML_CHAR_ENCODING_NONE) {
10583 xmlSwitchEncoding(ctxt, enc);
10584 }
Owen Taylor3473f882001-02-23 17:55:21 +000010585 }
10586
10587
10588 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010589 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010590 }
10591
10592 /*
10593 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010594 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010595 * than just the first line, unless the amount of data is really
10596 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010597 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010598 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10599 GROW;
10600 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010601 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010602
10603 /*
10604 * Note that we will switch encoding on the fly.
10605 */
10606 xmlParseXMLDecl(ctxt);
10607 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10608 /*
10609 * The XML REC instructs us to stop parsing right here
10610 */
10611 return(-1);
10612 }
10613 ctxt->standalone = ctxt->input->standalone;
10614 SKIP_BLANKS;
10615 } else {
10616 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10617 }
10618 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10619 ctxt->sax->startDocument(ctxt->userData);
10620
10621 /*
10622 * The Misc part of the Prolog
10623 */
10624 GROW;
10625 xmlParseMisc(ctxt);
10626
10627 /*
10628 * Then possibly doc type declaration(s) and more Misc
10629 * (doctypedecl Misc*)?
10630 */
10631 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010632 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010633
10634 ctxt->inSubset = 1;
10635 xmlParseDocTypeDecl(ctxt);
10636 if (RAW == '[') {
10637 ctxt->instate = XML_PARSER_DTD;
10638 xmlParseInternalSubset(ctxt);
10639 }
10640
10641 /*
10642 * Create and update the external subset.
10643 */
10644 ctxt->inSubset = 2;
10645 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10646 (!ctxt->disableSAX))
10647 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10648 ctxt->extSubSystem, ctxt->extSubURI);
10649 ctxt->inSubset = 0;
10650
Daniel Veillardac4118d2008-01-11 05:27:32 +000010651 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010652
10653 ctxt->instate = XML_PARSER_PROLOG;
10654 xmlParseMisc(ctxt);
10655 }
10656
10657 /*
10658 * Time to start parsing the tree itself
10659 */
10660 GROW;
10661 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010662 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10663 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010664 } else {
10665 ctxt->instate = XML_PARSER_CONTENT;
10666 xmlParseElement(ctxt);
10667 ctxt->instate = XML_PARSER_EPILOG;
10668
10669
10670 /*
10671 * The Misc part at the end
10672 */
10673 xmlParseMisc(ctxt);
10674
Daniel Veillard561b7f82002-03-20 21:55:57 +000010675 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010676 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010677 }
10678 ctxt->instate = XML_PARSER_EOF;
10679 }
10680
10681 /*
10682 * SAX: end of the document processing.
10683 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010684 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010685 ctxt->sax->endDocument(ctxt->userData);
10686
Daniel Veillard5997aca2002-03-18 18:36:20 +000010687 /*
10688 * Remove locally kept entity definitions if the tree was not built
10689 */
10690 if ((ctxt->myDoc != NULL) &&
10691 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10692 xmlFreeDoc(ctxt->myDoc);
10693 ctxt->myDoc = NULL;
10694 }
10695
Daniel Veillardae0765b2008-07-31 19:54:59 +000010696 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10697 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10698 if (ctxt->valid)
10699 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10700 if (ctxt->nsWellFormed)
10701 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10702 if (ctxt->options & XML_PARSE_OLD10)
10703 ctxt->myDoc->properties |= XML_DOC_OLD10;
10704 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010705 if (! ctxt->wellFormed) {
10706 ctxt->valid = 0;
10707 return(-1);
10708 }
Owen Taylor3473f882001-02-23 17:55:21 +000010709 return(0);
10710}
10711
10712/**
10713 * xmlParseExtParsedEnt:
10714 * @ctxt: an XML parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010715 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010716 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010717 * An external general parsed entity is well-formed if it matches the
10718 * production labeled extParsedEnt.
10719 *
10720 * [78] extParsedEnt ::= TextDecl? content
10721 *
10722 * Returns 0, -1 in case of error. the parser context is augmented
10723 * as a result of the parsing.
10724 */
10725
10726int
10727xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10728 xmlChar start[4];
10729 xmlCharEncoding enc;
10730
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010731 if ((ctxt == NULL) || (ctxt->input == NULL))
10732 return(-1);
10733
Owen Taylor3473f882001-02-23 17:55:21 +000010734 xmlDefaultSAXHandlerInit();
10735
Daniel Veillard309f81d2003-09-23 09:02:53 +000010736 xmlDetectSAX2(ctxt);
10737
Owen Taylor3473f882001-02-23 17:55:21 +000010738 GROW;
10739
10740 /*
10741 * SAX: beginning of the document processing.
10742 */
10743 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10744 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10745
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010746 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010747 * Get the 4 first bytes and decode the charset
10748 * if enc != XML_CHAR_ENCODING_NONE
10749 * plug some encoding conversion routines.
10750 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010751 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10752 start[0] = RAW;
10753 start[1] = NXT(1);
10754 start[2] = NXT(2);
10755 start[3] = NXT(3);
10756 enc = xmlDetectCharEncoding(start, 4);
10757 if (enc != XML_CHAR_ENCODING_NONE) {
10758 xmlSwitchEncoding(ctxt, enc);
10759 }
Owen Taylor3473f882001-02-23 17:55:21 +000010760 }
10761
10762
10763 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010764 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010765 }
10766
10767 /*
10768 * Check for the XMLDecl in the Prolog.
10769 */
10770 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010771 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010772
10773 /*
10774 * Note that we will switch encoding on the fly.
10775 */
10776 xmlParseXMLDecl(ctxt);
10777 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10778 /*
10779 * The XML REC instructs us to stop parsing right here
10780 */
10781 return(-1);
10782 }
10783 SKIP_BLANKS;
10784 } else {
10785 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10786 }
10787 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10788 ctxt->sax->startDocument(ctxt->userData);
10789
10790 /*
10791 * Doing validity checking on chunk doesn't make sense
10792 */
10793 ctxt->instate = XML_PARSER_CONTENT;
10794 ctxt->validate = 0;
10795 ctxt->loadsubset = 0;
10796 ctxt->depth = 0;
10797
10798 xmlParseContent(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010799
Owen Taylor3473f882001-02-23 17:55:21 +000010800 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010801 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010802 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010803 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010804 }
10805
10806 /*
10807 * SAX: end of the document processing.
10808 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010809 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010810 ctxt->sax->endDocument(ctxt->userData);
10811
10812 if (! ctxt->wellFormed) return(-1);
10813 return(0);
10814}
10815
Daniel Veillard73b013f2003-09-30 12:36:01 +000010816#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010817/************************************************************************
10818 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010819 * Progressive parsing interfaces *
Owen Taylor3473f882001-02-23 17:55:21 +000010820 * *
10821 ************************************************************************/
10822
10823/**
10824 * xmlParseLookupSequence:
10825 * @ctxt: an XML parser context
10826 * @first: the first char to lookup
10827 * @next: the next char to lookup or zero
10828 * @third: the next char to lookup or zero
10829 *
10830 * Try to find if a sequence (first, next, third) or just (first next) or
10831 * (first) is available in the input stream.
10832 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10833 * to avoid rescanning sequences of bytes, it DOES change the state of the
10834 * parser, do not use liberally.
10835 *
10836 * Returns the index to the current parsing point if the full sequence
10837 * is available, -1 otherwise.
10838 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010839static int
Owen Taylor3473f882001-02-23 17:55:21 +000010840xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10841 xmlChar next, xmlChar third) {
10842 int base, len;
10843 xmlParserInputPtr in;
10844 const xmlChar *buf;
10845
10846 in = ctxt->input;
10847 if (in == NULL) return(-1);
10848 base = in->cur - in->base;
10849 if (base < 0) return(-1);
10850 if (ctxt->checkIndex > base)
10851 base = ctxt->checkIndex;
10852 if (in->buf == NULL) {
10853 buf = in->base;
10854 len = in->length;
10855 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080010856 buf = xmlBufContent(in->buf->buffer);
10857 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000010858 }
10859 /* take into account the sequence length */
10860 if (third) len -= 2;
10861 else if (next) len --;
10862 for (;base < len;base++) {
10863 if (buf[base] == first) {
10864 if (third != 0) {
10865 if ((buf[base + 1] != next) ||
10866 (buf[base + 2] != third)) continue;
10867 } else if (next != 0) {
10868 if (buf[base + 1] != next) continue;
10869 }
10870 ctxt->checkIndex = 0;
10871#ifdef DEBUG_PUSH
10872 if (next == 0)
10873 xmlGenericError(xmlGenericErrorContext,
10874 "PP: lookup '%c' found at %d\n",
10875 first, base);
10876 else if (third == 0)
10877 xmlGenericError(xmlGenericErrorContext,
10878 "PP: lookup '%c%c' found at %d\n",
10879 first, next, base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010880 else
Owen Taylor3473f882001-02-23 17:55:21 +000010881 xmlGenericError(xmlGenericErrorContext,
10882 "PP: lookup '%c%c%c' found at %d\n",
10883 first, next, third, base);
10884#endif
10885 return(base - (in->cur - in->base));
10886 }
10887 }
10888 ctxt->checkIndex = base;
10889#ifdef DEBUG_PUSH
10890 if (next == 0)
10891 xmlGenericError(xmlGenericErrorContext,
10892 "PP: lookup '%c' failed\n", first);
10893 else if (third == 0)
10894 xmlGenericError(xmlGenericErrorContext,
10895 "PP: lookup '%c%c' failed\n", first, next);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010896 else
Owen Taylor3473f882001-02-23 17:55:21 +000010897 xmlGenericError(xmlGenericErrorContext,
10898 "PP: lookup '%c%c%c' failed\n", first, next, third);
10899#endif
10900 return(-1);
10901}
10902
10903/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010904 * xmlParseGetLasts:
10905 * @ctxt: an XML parser context
10906 * @lastlt: pointer to store the last '<' from the input
10907 * @lastgt: pointer to store the last '>' from the input
10908 *
10909 * Lookup the last < and > in the current chunk
10910 */
10911static void
10912xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10913 const xmlChar **lastgt) {
10914 const xmlChar *tmp;
10915
10916 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10917 xmlGenericError(xmlGenericErrorContext,
10918 "Internal error: xmlParseGetLasts\n");
10919 return;
10920 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010921 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010922 tmp = ctxt->input->end;
10923 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010924 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010925 if (tmp < ctxt->input->base) {
10926 *lastlt = NULL;
10927 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010928 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010929 *lastlt = tmp;
10930 tmp++;
10931 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10932 if (*tmp == '\'') {
10933 tmp++;
10934 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10935 if (tmp < ctxt->input->end) tmp++;
10936 } else if (*tmp == '"') {
10937 tmp++;
10938 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10939 if (tmp < ctxt->input->end) tmp++;
10940 } else
10941 tmp++;
10942 }
10943 if (tmp < ctxt->input->end)
10944 *lastgt = tmp;
10945 else {
10946 tmp = *lastlt;
10947 tmp--;
10948 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10949 if (tmp >= ctxt->input->base)
10950 *lastgt = tmp;
10951 else
10952 *lastgt = NULL;
10953 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010954 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010955 } else {
10956 *lastlt = NULL;
10957 *lastgt = NULL;
10958 }
10959}
10960/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010961 * xmlCheckCdataPush:
10962 * @cur: pointer to the bock of characters
10963 * @len: length of the block in bytes
10964 *
10965 * Check that the block of characters is okay as SCdata content [20]
10966 *
10967 * Returns the number of bytes to pass if okay, a negative index where an
10968 * UTF-8 error occured otherwise
10969 */
10970static int
10971xmlCheckCdataPush(const xmlChar *utf, int len) {
10972 int ix;
10973 unsigned char c;
10974 int codepoint;
10975
10976 if ((utf == NULL) || (len <= 0))
10977 return(0);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080010978
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010979 for (ix = 0; ix < len;) { /* string is 0-terminated */
10980 c = utf[ix];
10981 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10982 if (c >= 0x20)
10983 ix++;
10984 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10985 ix++;
10986 else
10987 return(-ix);
10988 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10989 if (ix + 2 > len) return(ix);
10990 if ((utf[ix+1] & 0xc0 ) != 0x80)
10991 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010992 codepoint = (utf[ix] & 0x1f) << 6;
10993 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010994 if (!xmlIsCharQ(codepoint))
10995 return(-ix);
10996 ix += 2;
10997 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10998 if (ix + 3 > len) return(ix);
10999 if (((utf[ix+1] & 0xc0) != 0x80) ||
11000 ((utf[ix+2] & 0xc0) != 0x80))
11001 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011002 codepoint = (utf[ix] & 0xf) << 12;
11003 codepoint |= (utf[ix+1] & 0x3f) << 6;
11004 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011005 if (!xmlIsCharQ(codepoint))
11006 return(-ix);
11007 ix += 3;
11008 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11009 if (ix + 4 > len) return(ix);
11010 if (((utf[ix+1] & 0xc0) != 0x80) ||
11011 ((utf[ix+2] & 0xc0) != 0x80) ||
11012 ((utf[ix+3] & 0xc0) != 0x80))
11013 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011014 codepoint = (utf[ix] & 0x7) << 18;
11015 codepoint |= (utf[ix+1] & 0x3f) << 12;
11016 codepoint |= (utf[ix+2] & 0x3f) << 6;
11017 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011018 if (!xmlIsCharQ(codepoint))
11019 return(-ix);
11020 ix += 4;
11021 } else /* unknown encoding */
11022 return(-ix);
11023 }
11024 return(ix);
11025}
11026
11027/**
Owen Taylor3473f882001-02-23 17:55:21 +000011028 * xmlParseTryOrFinish:
11029 * @ctxt: an XML parser context
11030 * @terminate: last chunk indicator
11031 *
11032 * Try to progress on parsing
11033 *
11034 * Returns zero if no parsing was possible
11035 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011036static int
Owen Taylor3473f882001-02-23 17:55:21 +000011037xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11038 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011039 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000011040 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000011041 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000011042
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011043 if (ctxt->input == NULL)
11044 return(0);
11045
Owen Taylor3473f882001-02-23 17:55:21 +000011046#ifdef DEBUG_PUSH
11047 switch (ctxt->instate) {
11048 case XML_PARSER_EOF:
11049 xmlGenericError(xmlGenericErrorContext,
11050 "PP: try EOF\n"); break;
11051 case XML_PARSER_START:
11052 xmlGenericError(xmlGenericErrorContext,
11053 "PP: try START\n"); break;
11054 case XML_PARSER_MISC:
11055 xmlGenericError(xmlGenericErrorContext,
11056 "PP: try MISC\n");break;
11057 case XML_PARSER_COMMENT:
11058 xmlGenericError(xmlGenericErrorContext,
11059 "PP: try COMMENT\n");break;
11060 case XML_PARSER_PROLOG:
11061 xmlGenericError(xmlGenericErrorContext,
11062 "PP: try PROLOG\n");break;
11063 case XML_PARSER_START_TAG:
11064 xmlGenericError(xmlGenericErrorContext,
11065 "PP: try START_TAG\n");break;
11066 case XML_PARSER_CONTENT:
11067 xmlGenericError(xmlGenericErrorContext,
11068 "PP: try CONTENT\n");break;
11069 case XML_PARSER_CDATA_SECTION:
11070 xmlGenericError(xmlGenericErrorContext,
11071 "PP: try CDATA_SECTION\n");break;
11072 case XML_PARSER_END_TAG:
11073 xmlGenericError(xmlGenericErrorContext,
11074 "PP: try END_TAG\n");break;
11075 case XML_PARSER_ENTITY_DECL:
11076 xmlGenericError(xmlGenericErrorContext,
11077 "PP: try ENTITY_DECL\n");break;
11078 case XML_PARSER_ENTITY_VALUE:
11079 xmlGenericError(xmlGenericErrorContext,
11080 "PP: try ENTITY_VALUE\n");break;
11081 case XML_PARSER_ATTRIBUTE_VALUE:
11082 xmlGenericError(xmlGenericErrorContext,
11083 "PP: try ATTRIBUTE_VALUE\n");break;
11084 case XML_PARSER_DTD:
11085 xmlGenericError(xmlGenericErrorContext,
11086 "PP: try DTD\n");break;
11087 case XML_PARSER_EPILOG:
11088 xmlGenericError(xmlGenericErrorContext,
11089 "PP: try EPILOG\n");break;
11090 case XML_PARSER_PI:
11091 xmlGenericError(xmlGenericErrorContext,
11092 "PP: try PI\n");break;
11093 case XML_PARSER_IGNORE:
11094 xmlGenericError(xmlGenericErrorContext,
11095 "PP: try IGNORE\n");break;
11096 }
11097#endif
11098
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011099 if ((ctxt->input != NULL) &&
11100 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011101 xmlSHRINK(ctxt);
11102 ctxt->checkIndex = 0;
11103 }
11104 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000011105
Daniel Veillarda880b122003-04-21 21:36:41 +000011106 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000011107 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011108 return(0);
11109
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011110
Owen Taylor3473f882001-02-23 17:55:21 +000011111 /*
11112 * Pop-up of finished entities.
11113 */
11114 while ((RAW == 0) && (ctxt->inputNr > 1))
11115 xmlPopInput(ctxt);
11116
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011117 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000011118 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011119 avail = ctxt->input->length -
11120 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011121 else {
11122 /*
11123 * If we are operating on converted input, try to flush
11124 * remainng chars to avoid them stalling in the non-converted
11125 * buffer.
11126 */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011127 if (xmlBufIsEmpty(ctxt->input->buf->buffer) == 0) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011128 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11129 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011130 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000011131
11132 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011133 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11134 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011135 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011136 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000011137 (ctxt->input->cur - ctxt->input->base);
11138 }
Owen Taylor3473f882001-02-23 17:55:21 +000011139 if (avail < 1)
11140 goto done;
11141 switch (ctxt->instate) {
11142 case XML_PARSER_EOF:
11143 /*
11144 * Document parsing is done !
11145 */
11146 goto done;
11147 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011148 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11149 xmlChar start[4];
11150 xmlCharEncoding enc;
11151
11152 /*
11153 * Very first chars read from the document flow.
11154 */
11155 if (avail < 4)
11156 goto done;
11157
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011158 /*
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011159 * Get the 4 first bytes and decode the charset
11160 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000011161 * plug some encoding conversion routines,
11162 * else xmlSwitchEncoding will set to (default)
11163 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011164 */
11165 start[0] = RAW;
11166 start[1] = NXT(1);
11167 start[2] = NXT(2);
11168 start[3] = NXT(3);
11169 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000011170 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011171 break;
11172 }
Owen Taylor3473f882001-02-23 17:55:21 +000011173
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011174 if (avail < 2)
11175 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011176 cur = ctxt->input->cur[0];
11177 next = ctxt->input->cur[1];
11178 if (cur == 0) {
11179 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11180 ctxt->sax->setDocumentLocator(ctxt->userData,
11181 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011182 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011183 ctxt->instate = XML_PARSER_EOF;
11184#ifdef DEBUG_PUSH
11185 xmlGenericError(xmlGenericErrorContext,
11186 "PP: entering EOF\n");
11187#endif
11188 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11189 ctxt->sax->endDocument(ctxt->userData);
11190 goto done;
11191 }
11192 if ((cur == '<') && (next == '?')) {
11193 /* PI or XML decl */
11194 if (avail < 5) return(ret);
11195 if ((!terminate) &&
11196 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11197 return(ret);
11198 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11199 ctxt->sax->setDocumentLocator(ctxt->userData,
11200 &xmlDefaultSAXLocator);
11201 if ((ctxt->input->cur[2] == 'x') &&
11202 (ctxt->input->cur[3] == 'm') &&
11203 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000011204 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011205 ret += 5;
11206#ifdef DEBUG_PUSH
11207 xmlGenericError(xmlGenericErrorContext,
11208 "PP: Parsing XML Decl\n");
11209#endif
11210 xmlParseXMLDecl(ctxt);
11211 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11212 /*
11213 * The XML REC instructs us to stop parsing right
11214 * here
11215 */
11216 ctxt->instate = XML_PARSER_EOF;
11217 return(0);
11218 }
11219 ctxt->standalone = ctxt->input->standalone;
11220 if ((ctxt->encoding == NULL) &&
11221 (ctxt->input->encoding != NULL))
11222 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11223 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11224 (!ctxt->disableSAX))
11225 ctxt->sax->startDocument(ctxt->userData);
11226 ctxt->instate = XML_PARSER_MISC;
11227#ifdef DEBUG_PUSH
11228 xmlGenericError(xmlGenericErrorContext,
11229 "PP: entering MISC\n");
11230#endif
11231 } else {
11232 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11233 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11234 (!ctxt->disableSAX))
11235 ctxt->sax->startDocument(ctxt->userData);
11236 ctxt->instate = XML_PARSER_MISC;
11237#ifdef DEBUG_PUSH
11238 xmlGenericError(xmlGenericErrorContext,
11239 "PP: entering MISC\n");
11240#endif
11241 }
11242 } else {
11243 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11244 ctxt->sax->setDocumentLocator(ctxt->userData,
11245 &xmlDefaultSAXLocator);
11246 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000011247 if (ctxt->version == NULL) {
11248 xmlErrMemory(ctxt, NULL);
11249 break;
11250 }
Owen Taylor3473f882001-02-23 17:55:21 +000011251 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11252 (!ctxt->disableSAX))
11253 ctxt->sax->startDocument(ctxt->userData);
11254 ctxt->instate = XML_PARSER_MISC;
11255#ifdef DEBUG_PUSH
11256 xmlGenericError(xmlGenericErrorContext,
11257 "PP: entering MISC\n");
11258#endif
11259 }
11260 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011261 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000011262 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020011263 const xmlChar *prefix = NULL;
11264 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011265 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000011266
11267 if ((avail < 2) && (ctxt->inputNr == 1))
11268 goto done;
11269 cur = ctxt->input->cur[0];
11270 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011271 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000011272 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000011273 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11274 ctxt->sax->endDocument(ctxt->userData);
11275 goto done;
11276 }
11277 if (!terminate) {
11278 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000011279 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000011280 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011281 goto done;
11282 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11283 goto done;
11284 }
11285 }
11286 if (ctxt->spaceNr == 0)
11287 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011288 else if (*ctxt->space == -2)
11289 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011290 else
11291 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011292#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011293 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011294#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011295 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011296#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011297 else
11298 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011299#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011300 if (ctxt->instate == XML_PARSER_EOF)
11301 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011302 if (name == NULL) {
11303 spacePop(ctxt);
11304 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000011305 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11306 ctxt->sax->endDocument(ctxt->userData);
11307 goto done;
11308 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011309#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011310 /*
11311 * [ VC: Root Element Type ]
11312 * The Name in the document type declaration must match
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011313 * the element type of the root element.
Daniel Veillarda880b122003-04-21 21:36:41 +000011314 */
11315 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11316 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11317 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011318#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011319
11320 /*
11321 * Check for an Empty Element.
11322 */
11323 if ((RAW == '/') && (NXT(1) == '>')) {
11324 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011325
11326 if (ctxt->sax2) {
11327 if ((ctxt->sax != NULL) &&
11328 (ctxt->sax->endElementNs != NULL) &&
11329 (!ctxt->disableSAX))
11330 ctxt->sax->endElementNs(ctxt->userData, name,
11331 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011332 if (ctxt->nsNr - nsNr > 0)
11333 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011334#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011335 } else {
11336 if ((ctxt->sax != NULL) &&
11337 (ctxt->sax->endElement != NULL) &&
11338 (!ctxt->disableSAX))
11339 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011340#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011341 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011342 spacePop(ctxt);
11343 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011344 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011345 } else {
11346 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011347 }
Daniel Veillard65686452012-07-19 18:25:01 +080011348 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011349 break;
11350 }
11351 if (RAW == '>') {
11352 NEXT;
11353 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011354 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011355 "Couldn't find end of Start Tag %s\n",
11356 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011357 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011358 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011359 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011360 if (ctxt->sax2)
11361 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011362#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011363 else
11364 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011365#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011366
Daniel Veillarda880b122003-04-21 21:36:41 +000011367 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011368 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011369 break;
11370 }
11371 case XML_PARSER_CONTENT: {
11372 const xmlChar *test;
11373 unsigned int cons;
11374 if ((avail < 2) && (ctxt->inputNr == 1))
11375 goto done;
11376 cur = ctxt->input->cur[0];
11377 next = ctxt->input->cur[1];
11378
11379 test = CUR_PTR;
11380 cons = ctxt->input->consumed;
11381 if ((cur == '<') && (next == '/')) {
11382 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011383 break;
11384 } else if ((cur == '<') && (next == '?')) {
11385 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011386 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11387 ctxt->progressive = XML_PARSER_PI;
Daniel Veillarda880b122003-04-21 21:36:41 +000011388 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011389 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011390 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011391 ctxt->instate = XML_PARSER_CONTENT;
11392 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011393 } else if ((cur == '<') && (next != '!')) {
11394 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011395 break;
11396 } else if ((cur == '<') && (next == '!') &&
11397 (ctxt->input->cur[2] == '-') &&
11398 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011399 int term;
11400
11401 if (avail < 4)
11402 goto done;
11403 ctxt->input->cur += 4;
11404 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11405 ctxt->input->cur -= 4;
Daniel Veillard65686452012-07-19 18:25:01 +080011406 if ((!terminate) && (term < 0)) {
11407 ctxt->progressive = XML_PARSER_COMMENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011408 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011409 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011410 xmlParseComment(ctxt);
11411 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011412 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011413 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11414 (ctxt->input->cur[2] == '[') &&
11415 (ctxt->input->cur[3] == 'C') &&
11416 (ctxt->input->cur[4] == 'D') &&
11417 (ctxt->input->cur[5] == 'A') &&
11418 (ctxt->input->cur[6] == 'T') &&
11419 (ctxt->input->cur[7] == 'A') &&
11420 (ctxt->input->cur[8] == '[')) {
11421 SKIP(9);
11422 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011423 break;
11424 } else if ((cur == '<') && (next == '!') &&
11425 (avail < 9)) {
11426 goto done;
11427 } else if (cur == '&') {
11428 if ((!terminate) &&
11429 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11430 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011431 xmlParseReference(ctxt);
11432 } else {
11433 /* TODO Avoid the extra copy, handle directly !!! */
11434 /*
11435 * Goal of the following test is:
11436 * - minimize calls to the SAX 'character' callback
11437 * when they are mergeable
11438 * - handle an problem for isBlank when we only parse
11439 * a sequence of blank chars and the next one is
11440 * not available to check against '<' presence.
11441 * - tries to homogenize the differences in SAX
11442 * callbacks between the push and pull versions
11443 * of the parser.
11444 */
11445 if ((ctxt->inputNr == 1) &&
11446 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11447 if (!terminate) {
11448 if (ctxt->progressive) {
11449 if ((lastlt == NULL) ||
11450 (ctxt->input->cur > lastlt))
11451 goto done;
11452 } else if (xmlParseLookupSequence(ctxt,
11453 '<', 0, 0) < 0) {
11454 goto done;
11455 }
11456 }
11457 }
11458 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011459 xmlParseCharData(ctxt, 0);
11460 }
11461 /*
11462 * Pop-up of finished entities.
11463 */
11464 while ((RAW == 0) && (ctxt->inputNr > 1))
11465 xmlPopInput(ctxt);
11466 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011467 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11468 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000011469 ctxt->instate = XML_PARSER_EOF;
11470 break;
11471 }
11472 break;
11473 }
11474 case XML_PARSER_END_TAG:
11475 if (avail < 2)
11476 goto done;
11477 if (!terminate) {
11478 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011479 /* > can be found unescaped in attribute values */
11480 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011481 goto done;
11482 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11483 goto done;
11484 }
11485 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011486 if (ctxt->sax2) {
11487 xmlParseEndTag2(ctxt,
11488 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11489 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011490 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011491 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011492 }
11493#ifdef LIBXML_SAX1_ENABLED
11494 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011495 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011496#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011497 if (ctxt->instate == XML_PARSER_EOF) {
11498 /* Nothing */
11499 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011500 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011501 } else {
11502 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011503 }
11504 break;
11505 case XML_PARSER_CDATA_SECTION: {
11506 /*
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011507 * The Push mode need to have the SAX callback for
Daniel Veillarda880b122003-04-21 21:36:41 +000011508 * cdataBlock merge back contiguous callbacks.
11509 */
11510 int base;
11511
11512 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11513 if (base < 0) {
11514 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011515 int tmp;
11516
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011517 tmp = xmlCheckCdataPush(ctxt->input->cur,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011518 XML_PARSER_BIG_BUFFER_SIZE);
11519 if (tmp < 0) {
11520 tmp = -tmp;
11521 ctxt->input->cur += tmp;
11522 goto encoding_error;
11523 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011524 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11525 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011526 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011527 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011528 else if (ctxt->sax->characters != NULL)
11529 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011530 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011531 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011532 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011533 ctxt->checkIndex = 0;
11534 }
11535 goto done;
11536 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011537 int tmp;
11538
11539 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11540 if ((tmp < 0) || (tmp != base)) {
11541 tmp = -tmp;
11542 ctxt->input->cur += tmp;
11543 goto encoding_error;
11544 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011545 if ((ctxt->sax != NULL) && (base == 0) &&
11546 (ctxt->sax->cdataBlock != NULL) &&
11547 (!ctxt->disableSAX)) {
11548 /*
11549 * Special case to provide identical behaviour
11550 * between pull and push parsers on enpty CDATA
11551 * sections
11552 */
11553 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11554 (!strncmp((const char *)&ctxt->input->cur[-9],
11555 "<![CDATA[", 9)))
11556 ctxt->sax->cdataBlock(ctxt->userData,
11557 BAD_CAST "", 0);
11558 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011559 (!ctxt->disableSAX)) {
11560 if (ctxt->sax->cdataBlock != NULL)
11561 ctxt->sax->cdataBlock(ctxt->userData,
11562 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011563 else if (ctxt->sax->characters != NULL)
11564 ctxt->sax->characters(ctxt->userData,
11565 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011566 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000011567 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011568 ctxt->checkIndex = 0;
11569 ctxt->instate = XML_PARSER_CONTENT;
11570#ifdef DEBUG_PUSH
11571 xmlGenericError(xmlGenericErrorContext,
11572 "PP: entering CONTENT\n");
11573#endif
11574 }
11575 break;
11576 }
Owen Taylor3473f882001-02-23 17:55:21 +000011577 case XML_PARSER_MISC:
11578 SKIP_BLANKS;
11579 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011580 avail = ctxt->input->length -
11581 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011582 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011583 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011584 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011585 if (avail < 2)
11586 goto done;
11587 cur = ctxt->input->cur[0];
11588 next = ctxt->input->cur[1];
11589 if ((cur == '<') && (next == '?')) {
11590 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011591 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11592 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011593 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011594 }
Owen Taylor3473f882001-02-23 17:55:21 +000011595#ifdef DEBUG_PUSH
11596 xmlGenericError(xmlGenericErrorContext,
11597 "PP: Parsing PI\n");
11598#endif
11599 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011600 ctxt->instate = XML_PARSER_MISC;
11601 ctxt->progressive = 1;
Daniel Veillard40e4b212007-06-12 14:46:40 +000011602 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011603 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011604 (ctxt->input->cur[2] == '-') &&
11605 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011606 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011607 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11608 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011609 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011610 }
Owen Taylor3473f882001-02-23 17:55:21 +000011611#ifdef DEBUG_PUSH
11612 xmlGenericError(xmlGenericErrorContext,
11613 "PP: Parsing Comment\n");
11614#endif
11615 xmlParseComment(ctxt);
11616 ctxt->instate = XML_PARSER_MISC;
Daniel Veillard65686452012-07-19 18:25:01 +080011617 ctxt->progressive = 1;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011618 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011619 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011620 (ctxt->input->cur[2] == 'D') &&
11621 (ctxt->input->cur[3] == 'O') &&
11622 (ctxt->input->cur[4] == 'C') &&
11623 (ctxt->input->cur[5] == 'T') &&
11624 (ctxt->input->cur[6] == 'Y') &&
11625 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011626 (ctxt->input->cur[8] == 'E')) {
11627 if ((!terminate) &&
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011628 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11629 ctxt->progressive = XML_PARSER_DTD;
Owen Taylor3473f882001-02-23 17:55:21 +000011630 goto done;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011631 }
Owen Taylor3473f882001-02-23 17:55:21 +000011632#ifdef DEBUG_PUSH
11633 xmlGenericError(xmlGenericErrorContext,
11634 "PP: Parsing internal subset\n");
11635#endif
11636 ctxt->inSubset = 1;
Daniel Veillard6c91aa32012-10-25 15:33:59 +080011637 ctxt->progressive = 0;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011638 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011639 xmlParseDocTypeDecl(ctxt);
11640 if (RAW == '[') {
11641 ctxt->instate = XML_PARSER_DTD;
11642#ifdef DEBUG_PUSH
11643 xmlGenericError(xmlGenericErrorContext,
11644 "PP: entering DTD\n");
11645#endif
11646 } else {
11647 /*
11648 * Create and update the external subset.
11649 */
11650 ctxt->inSubset = 2;
11651 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11652 (ctxt->sax->externalSubset != NULL))
11653 ctxt->sax->externalSubset(ctxt->userData,
11654 ctxt->intSubName, ctxt->extSubSystem,
11655 ctxt->extSubURI);
11656 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011657 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011658 ctxt->instate = XML_PARSER_PROLOG;
11659#ifdef DEBUG_PUSH
11660 xmlGenericError(xmlGenericErrorContext,
11661 "PP: entering PROLOG\n");
11662#endif
11663 }
11664 } else if ((cur == '<') && (next == '!') &&
11665 (avail < 9)) {
11666 goto done;
11667 } else {
11668 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard65686452012-07-19 18:25:01 +080011669 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011670 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011671#ifdef DEBUG_PUSH
11672 xmlGenericError(xmlGenericErrorContext,
11673 "PP: entering START_TAG\n");
11674#endif
11675 }
11676 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011677 case XML_PARSER_PROLOG:
11678 SKIP_BLANKS;
11679 if (ctxt->input->buf == NULL)
11680 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11681 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011682 avail = xmlBufUse(ctxt->input->buf->buffer) -
11683 (ctxt->input->cur - ctxt->input->base);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011684 if (avail < 2)
Owen Taylor3473f882001-02-23 17:55:21 +000011685 goto done;
11686 cur = ctxt->input->cur[0];
11687 next = ctxt->input->cur[1];
11688 if ((cur == '<') && (next == '?')) {
11689 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011690 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11691 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011692 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011693 }
Owen Taylor3473f882001-02-23 17:55:21 +000011694#ifdef DEBUG_PUSH
11695 xmlGenericError(xmlGenericErrorContext,
11696 "PP: Parsing PI\n");
11697#endif
11698 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011699 ctxt->instate = XML_PARSER_PROLOG;
11700 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011701 } else if ((cur == '<') && (next == '!') &&
11702 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11703 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011704 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11705 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011706 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011707 }
Owen Taylor3473f882001-02-23 17:55:21 +000011708#ifdef DEBUG_PUSH
11709 xmlGenericError(xmlGenericErrorContext,
11710 "PP: Parsing Comment\n");
11711#endif
11712 xmlParseComment(ctxt);
11713 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011714 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011715 } else if ((cur == '<') && (next == '!') &&
11716 (avail < 4)) {
11717 goto done;
11718 } else {
11719 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011720 if (ctxt->progressive == 0)
Daniel Veillard65686452012-07-19 18:25:01 +080011721 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011722 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011723#ifdef DEBUG_PUSH
11724 xmlGenericError(xmlGenericErrorContext,
11725 "PP: entering START_TAG\n");
11726#endif
11727 }
11728 break;
11729 case XML_PARSER_EPILOG:
11730 SKIP_BLANKS;
11731 if (ctxt->input->buf == NULL)
11732 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11733 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011734 avail = xmlBufUse(ctxt->input->buf->buffer) -
11735 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011736 if (avail < 2)
11737 goto done;
11738 cur = ctxt->input->cur[0];
11739 next = ctxt->input->cur[1];
11740 if ((cur == '<') && (next == '?')) {
11741 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011742 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11743 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011744 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011745 }
Owen Taylor3473f882001-02-23 17:55:21 +000011746#ifdef DEBUG_PUSH
11747 xmlGenericError(xmlGenericErrorContext,
11748 "PP: Parsing PI\n");
11749#endif
11750 xmlParsePI(ctxt);
11751 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillardf572a782012-07-19 20:36:25 +080011752 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011753 } else if ((cur == '<') && (next == '!') &&
11754 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11755 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011756 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11757 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011758 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011759 }
Owen Taylor3473f882001-02-23 17:55:21 +000011760#ifdef DEBUG_PUSH
11761 xmlGenericError(xmlGenericErrorContext,
11762 "PP: Parsing Comment\n");
11763#endif
11764 xmlParseComment(ctxt);
11765 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011766 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011767 } else if ((cur == '<') && (next == '!') &&
11768 (avail < 4)) {
11769 goto done;
11770 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011771 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011772 ctxt->instate = XML_PARSER_EOF;
11773#ifdef DEBUG_PUSH
11774 xmlGenericError(xmlGenericErrorContext,
11775 "PP: entering EOF\n");
11776#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011777 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011778 ctxt->sax->endDocument(ctxt->userData);
11779 goto done;
11780 }
11781 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011782 case XML_PARSER_DTD: {
11783 /*
11784 * Sorry but progressive parsing of the internal subset
11785 * is not expected to be supported. We first check that
11786 * the full content of the internal subset is available and
11787 * the parsing is launched only at that point.
11788 * Internal subset ends up with "']' S? '>'" in an unescaped
11789 * section and not in a ']]>' sequence which are conditional
11790 * sections (whoever argued to keep that crap in XML deserve
11791 * a place in hell !).
11792 */
11793 int base, i;
11794 xmlChar *buf;
11795 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011796 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000011797
11798 base = ctxt->input->cur - ctxt->input->base;
11799 if (base < 0) return(0);
11800 if (ctxt->checkIndex > base)
11801 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011802 buf = xmlBufContent(ctxt->input->buf->buffer);
11803 use = xmlBufUse(ctxt->input->buf->buffer);
11804 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000011805 if (quote != 0) {
11806 if (buf[base] == quote)
11807 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011808 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000011809 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011810 if ((quote == 0) && (buf[base] == '<')) {
11811 int found = 0;
11812 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011813 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000011814 (buf[base + 1] == '!') &&
11815 (buf[base + 2] == '-') &&
11816 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011817 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000011818 if ((buf[base] == '-') &&
11819 (buf[base + 1] == '-') &&
11820 (buf[base + 2] == '>')) {
11821 found = 1;
11822 base += 2;
11823 break;
11824 }
11825 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011826 if (!found) {
11827#if 0
11828 fprintf(stderr, "unfinished comment\n");
11829#endif
11830 break; /* for */
11831 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011832 continue;
11833 }
11834 }
Owen Taylor3473f882001-02-23 17:55:21 +000011835 if (buf[base] == '"') {
11836 quote = '"';
11837 continue;
11838 }
11839 if (buf[base] == '\'') {
11840 quote = '\'';
11841 continue;
11842 }
11843 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011844#if 0
11845 fprintf(stderr, "%c%c%c%c: ", buf[base],
11846 buf[base + 1], buf[base + 2], buf[base + 3]);
11847#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011848 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000011849 break;
11850 if (buf[base + 1] == ']') {
11851 /* conditional crap, skip both ']' ! */
11852 base++;
11853 continue;
11854 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011855 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011856 if (buf[base + i] == '>') {
11857#if 0
11858 fprintf(stderr, "found\n");
11859#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011860 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011861 }
11862 if (!IS_BLANK_CH(buf[base + i])) {
11863#if 0
11864 fprintf(stderr, "not found\n");
11865#endif
11866 goto not_end_of_int_subset;
11867 }
Owen Taylor3473f882001-02-23 17:55:21 +000011868 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011869#if 0
11870 fprintf(stderr, "end of stream\n");
11871#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011872 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011873
Owen Taylor3473f882001-02-23 17:55:21 +000011874 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011875not_end_of_int_subset:
11876 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011877 }
11878 /*
11879 * We didn't found the end of the Internal subset
11880 */
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011881 if (quote == 0)
11882 ctxt->checkIndex = base;
11883 else
11884 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011885#ifdef DEBUG_PUSH
11886 if (next == 0)
11887 xmlGenericError(xmlGenericErrorContext,
11888 "PP: lookup of int subset end filed\n");
11889#endif
11890 goto done;
11891
11892found_end_int_subset:
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011893 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011894 xmlParseInternalSubset(ctxt);
11895 ctxt->inSubset = 2;
11896 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11897 (ctxt->sax->externalSubset != NULL))
11898 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11899 ctxt->extSubSystem, ctxt->extSubURI);
11900 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011901 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011902 ctxt->instate = XML_PARSER_PROLOG;
11903 ctxt->checkIndex = 0;
11904#ifdef DEBUG_PUSH
11905 xmlGenericError(xmlGenericErrorContext,
11906 "PP: entering PROLOG\n");
11907#endif
11908 break;
11909 }
11910 case XML_PARSER_COMMENT:
11911 xmlGenericError(xmlGenericErrorContext,
11912 "PP: internal error, state == COMMENT\n");
11913 ctxt->instate = XML_PARSER_CONTENT;
11914#ifdef DEBUG_PUSH
11915 xmlGenericError(xmlGenericErrorContext,
11916 "PP: entering CONTENT\n");
11917#endif
11918 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011919 case XML_PARSER_IGNORE:
11920 xmlGenericError(xmlGenericErrorContext,
11921 "PP: internal error, state == IGNORE");
11922 ctxt->instate = XML_PARSER_DTD;
11923#ifdef DEBUG_PUSH
11924 xmlGenericError(xmlGenericErrorContext,
11925 "PP: entering DTD\n");
11926#endif
11927 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011928 case XML_PARSER_PI:
11929 xmlGenericError(xmlGenericErrorContext,
11930 "PP: internal error, state == PI\n");
11931 ctxt->instate = XML_PARSER_CONTENT;
11932#ifdef DEBUG_PUSH
11933 xmlGenericError(xmlGenericErrorContext,
11934 "PP: entering CONTENT\n");
11935#endif
11936 break;
11937 case XML_PARSER_ENTITY_DECL:
11938 xmlGenericError(xmlGenericErrorContext,
11939 "PP: internal error, state == ENTITY_DECL\n");
11940 ctxt->instate = XML_PARSER_DTD;
11941#ifdef DEBUG_PUSH
11942 xmlGenericError(xmlGenericErrorContext,
11943 "PP: entering DTD\n");
11944#endif
11945 break;
11946 case XML_PARSER_ENTITY_VALUE:
11947 xmlGenericError(xmlGenericErrorContext,
11948 "PP: internal error, state == ENTITY_VALUE\n");
11949 ctxt->instate = XML_PARSER_CONTENT;
11950#ifdef DEBUG_PUSH
11951 xmlGenericError(xmlGenericErrorContext,
11952 "PP: entering DTD\n");
11953#endif
11954 break;
11955 case XML_PARSER_ATTRIBUTE_VALUE:
11956 xmlGenericError(xmlGenericErrorContext,
11957 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11958 ctxt->instate = XML_PARSER_START_TAG;
11959#ifdef DEBUG_PUSH
11960 xmlGenericError(xmlGenericErrorContext,
11961 "PP: entering START_TAG\n");
11962#endif
11963 break;
11964 case XML_PARSER_SYSTEM_LITERAL:
11965 xmlGenericError(xmlGenericErrorContext,
11966 "PP: internal error, state == SYSTEM_LITERAL\n");
11967 ctxt->instate = XML_PARSER_START_TAG;
11968#ifdef DEBUG_PUSH
11969 xmlGenericError(xmlGenericErrorContext,
11970 "PP: entering START_TAG\n");
11971#endif
11972 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011973 case XML_PARSER_PUBLIC_LITERAL:
11974 xmlGenericError(xmlGenericErrorContext,
11975 "PP: internal error, state == PUBLIC_LITERAL\n");
11976 ctxt->instate = XML_PARSER_START_TAG;
11977#ifdef DEBUG_PUSH
11978 xmlGenericError(xmlGenericErrorContext,
11979 "PP: entering START_TAG\n");
11980#endif
11981 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011982 }
11983 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080011984done:
Owen Taylor3473f882001-02-23 17:55:21 +000011985#ifdef DEBUG_PUSH
11986 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11987#endif
11988 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011989encoding_error:
11990 {
11991 char buffer[150];
11992
11993 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11994 ctxt->input->cur[0], ctxt->input->cur[1],
11995 ctxt->input->cur[2], ctxt->input->cur[3]);
11996 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11997 "Input is not proper UTF-8, indicate encoding !\n%s",
11998 BAD_CAST buffer, NULL);
11999 }
12000 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012001}
12002
12003/**
Daniel Veillard65686452012-07-19 18:25:01 +080012004 * xmlParseCheckTransition:
12005 * @ctxt: an XML parser context
12006 * @chunk: a char array
12007 * @size: the size in byte of the chunk
12008 *
12009 * Check depending on the current parser state if the chunk given must be
12010 * processed immediately or one need more data to advance on parsing.
12011 *
12012 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12013 */
12014static int
12015xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12016 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12017 return(-1);
12018 if (ctxt->instate == XML_PARSER_START_TAG) {
12019 if (memchr(chunk, '>', size) != NULL)
12020 return(1);
12021 return(0);
12022 }
12023 if (ctxt->progressive == XML_PARSER_COMMENT) {
12024 if (memchr(chunk, '>', size) != NULL)
12025 return(1);
12026 return(0);
12027 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012028 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12029 if (memchr(chunk, '>', size) != NULL)
12030 return(1);
12031 return(0);
12032 }
Daniel Veillardf572a782012-07-19 20:36:25 +080012033 if (ctxt->progressive == XML_PARSER_PI) {
12034 if (memchr(chunk, '>', size) != NULL)
12035 return(1);
12036 return(0);
12037 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012038 if (ctxt->instate == XML_PARSER_END_TAG) {
12039 if (memchr(chunk, '>', size) != NULL)
12040 return(1);
12041 return(0);
12042 }
12043 if ((ctxt->progressive == XML_PARSER_DTD) ||
12044 (ctxt->instate == XML_PARSER_DTD)) {
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012045 if (memchr(chunk, ']', size) != NULL)
12046 return(1);
12047 return(0);
12048 }
Daniel Veillard65686452012-07-19 18:25:01 +080012049 return(1);
12050}
12051
12052/**
Owen Taylor3473f882001-02-23 17:55:21 +000012053 * xmlParseChunk:
12054 * @ctxt: an XML parser context
12055 * @chunk: an char array
12056 * @size: the size in byte of the chunk
12057 * @terminate: last chunk indicator
12058 *
12059 * Parse a Chunk of memory
12060 *
12061 * Returns zero if no error, the xmlParserErrors otherwise.
12062 */
12063int
12064xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12065 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000012066 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012067 int remain = 0;
Daniel Veillard65686452012-07-19 18:25:01 +080012068 size_t old_avail = 0;
12069 size_t avail = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000012070
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012071 if (ctxt == NULL)
12072 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000012073 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012074 return(ctxt->errNo);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080012075 if (ctxt->instate == XML_PARSER_EOF)
12076 return(-1);
Daniel Veillard309f81d2003-09-23 09:02:53 +000012077 if (ctxt->instate == XML_PARSER_START)
12078 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000012079 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12080 (chunk[size - 1] == '\r')) {
12081 end_in_lf = 1;
12082 size--;
12083 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012084
12085xmldecl_done:
12086
Owen Taylor3473f882001-02-23 17:55:21 +000012087 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12088 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012089 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12090 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000012091 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012092
Daniel Veillard65686452012-07-19 18:25:01 +080012093 old_avail = xmlBufUse(ctxt->input->buf->buffer);
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012094 /*
12095 * Specific handling if we autodetected an encoding, we should not
12096 * push more than the first line ... which depend on the encoding
12097 * And only push the rest once the final encoding was detected
12098 */
12099 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12100 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010012101 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012102
12103 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12104 BAD_CAST "UTF-16")) ||
12105 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12106 BAD_CAST "UTF16")))
12107 len = 90;
12108 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12109 BAD_CAST "UCS-4")) ||
12110 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12111 BAD_CAST "UCS4")))
12112 len = 180;
12113
12114 if (ctxt->input->buf->rawconsumed < len)
12115 len -= ctxt->input->buf->rawconsumed;
12116
Raul Hudeaba9716a2010-03-15 10:13:29 +010012117 /*
12118 * Change size for reading the initial declaration only
12119 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12120 * will blindly copy extra bytes from memory.
12121 */
Daniel Veillard60587d62010-11-04 15:16:27 +010012122 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010012123 remain = size - len;
12124 size = len;
12125 } else {
12126 remain = 0;
12127 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012128 }
William M. Bracka3215c72004-07-31 16:24:01 +000012129 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12130 if (res < 0) {
12131 ctxt->errNo = XML_PARSER_EOF;
12132 ctxt->disableSAX = 1;
12133 return (XML_PARSER_EOF);
12134 }
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012135 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012136#ifdef DEBUG_PUSH
12137 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12138#endif
12139
Owen Taylor3473f882001-02-23 17:55:21 +000012140 } else if (ctxt->instate != XML_PARSER_EOF) {
12141 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12142 xmlParserInputBufferPtr in = ctxt->input->buf;
12143 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12144 (in->raw != NULL)) {
12145 int nbchars;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012146
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012147 nbchars = xmlCharEncInput(in);
Owen Taylor3473f882001-02-23 17:55:21 +000012148 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012149 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000012150 xmlGenericError(xmlGenericErrorContext,
12151 "xmlParseChunk: encoder error\n");
12152 return(XML_ERR_INVALID_ENCODING);
12153 }
12154 }
12155 }
12156 }
Daniel Veillard65686452012-07-19 18:25:01 +080012157 if (remain != 0) {
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012158 xmlParseTryOrFinish(ctxt, 0);
Daniel Veillard65686452012-07-19 18:25:01 +080012159 } else {
12160 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12161 avail = xmlBufUse(ctxt->input->buf->buffer);
12162 /*
12163 * Depending on the current state it may not be such
12164 * a good idea to try parsing if there is nothing in the chunk
12165 * which would be worth doing a parser state transition and we
12166 * need to wait for more data
12167 */
12168 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12169 (old_avail == 0) || (avail == 0) ||
12170 (xmlParseCheckTransition(ctxt,
12171 (const char *)&ctxt->input->base[old_avail],
12172 avail - old_avail)))
12173 xmlParseTryOrFinish(ctxt, terminate);
12174 }
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012175 if ((ctxt->input != NULL) &&
12176 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12177 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12178 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12179 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12180 ctxt->instate = XML_PARSER_EOF;
12181 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012182 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12183 return(ctxt->errNo);
12184
12185 if (remain != 0) {
12186 chunk += size;
12187 size = remain;
12188 remain = 0;
12189 goto xmldecl_done;
12190 }
Daniel Veillarda617e242006-01-09 14:38:44 +000012191 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12192 (ctxt->input->buf != NULL)) {
12193 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12194 }
Owen Taylor3473f882001-02-23 17:55:21 +000012195 if (terminate) {
12196 /*
12197 * Check for termination
12198 */
Daniel Veillard65686452012-07-19 18:25:01 +080012199 int cur_avail = 0;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012200
12201 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012202 if (ctxt->input->buf == NULL)
Daniel Veillard65686452012-07-19 18:25:01 +080012203 cur_avail = ctxt->input->length -
12204 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012205 else
Daniel Veillard65686452012-07-19 18:25:01 +080012206 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12207 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012208 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012209
Owen Taylor3473f882001-02-23 17:55:21 +000012210 if ((ctxt->instate != XML_PARSER_EOF) &&
12211 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012212 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012213 }
Daniel Veillard65686452012-07-19 18:25:01 +080012214 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012215 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012216 }
Owen Taylor3473f882001-02-23 17:55:21 +000012217 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012218 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012219 ctxt->sax->endDocument(ctxt->userData);
12220 }
12221 ctxt->instate = XML_PARSER_EOF;
12222 }
Daniel Veillard6c91aa32012-10-25 15:33:59 +080012223 if (ctxt->wellFormed == 0)
12224 return((xmlParserErrors) ctxt->errNo);
12225 else
12226 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012227}
12228
12229/************************************************************************
12230 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012231 * I/O front end functions to the parser *
Owen Taylor3473f882001-02-23 17:55:21 +000012232 * *
12233 ************************************************************************/
12234
12235/**
Owen Taylor3473f882001-02-23 17:55:21 +000012236 * xmlCreatePushParserCtxt:
12237 * @sax: a SAX handler
12238 * @user_data: The user data returned on SAX callbacks
12239 * @chunk: a pointer to an array of chars
12240 * @size: number of chars in the array
12241 * @filename: an optional file name or URI
12242 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000012243 * Create a parser context for using the XML parser in push mode.
12244 * If @buffer and @size are non-NULL, the data is used to detect
12245 * the encoding. The remaining characters will be parsed so they
12246 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000012247 * To allow content encoding detection, @size should be >= 4
12248 * The value of @filename is used for fetching external entities
12249 * and error/warning reports.
12250 *
12251 * Returns the new parser context or NULL
12252 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000012253
Owen Taylor3473f882001-02-23 17:55:21 +000012254xmlParserCtxtPtr
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012255xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
Owen Taylor3473f882001-02-23 17:55:21 +000012256 const char *chunk, int size, const char *filename) {
12257 xmlParserCtxtPtr ctxt;
12258 xmlParserInputPtr inputStream;
12259 xmlParserInputBufferPtr buf;
12260 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12261
12262 /*
12263 * plug some encoding conversion routines
12264 */
12265 if ((chunk != NULL) && (size >= 4))
12266 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12267
12268 buf = xmlAllocParserInputBuffer(enc);
12269 if (buf == NULL) return(NULL);
12270
12271 ctxt = xmlNewParserCtxt();
12272 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012273 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012274 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012275 return(NULL);
12276 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012277 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012278 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12279 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012280 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000012281 xmlFreeParserInputBuffer(buf);
12282 xmlFreeParserCtxt(ctxt);
12283 return(NULL);
12284 }
Owen Taylor3473f882001-02-23 17:55:21 +000012285 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012286#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012287 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012288#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012289 xmlFree(ctxt->sax);
12290 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12291 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012292 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012293 xmlFreeParserInputBuffer(buf);
12294 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012295 return(NULL);
12296 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012297 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12298 if (sax->initialized == XML_SAX2_MAGIC)
12299 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12300 else
12301 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012302 if (user_data != NULL)
12303 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012304 }
Owen Taylor3473f882001-02-23 17:55:21 +000012305 if (filename == NULL) {
12306 ctxt->directory = NULL;
12307 } else {
12308 ctxt->directory = xmlParserGetDirectory(filename);
12309 }
12310
12311 inputStream = xmlNewInputStream(ctxt);
12312 if (inputStream == NULL) {
12313 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012314 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012315 return(NULL);
12316 }
12317
12318 if (filename == NULL)
12319 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000012320 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000012321 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012322 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000012323 if (inputStream->filename == NULL) {
12324 xmlFreeParserCtxt(ctxt);
12325 xmlFreeParserInputBuffer(buf);
12326 return(NULL);
12327 }
12328 }
Owen Taylor3473f882001-02-23 17:55:21 +000012329 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080012330 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000012331 inputPush(ctxt, inputStream);
12332
William M. Brack3a1cd212005-02-11 14:35:54 +000012333 /*
12334 * If the caller didn't provide an initial 'chunk' for determining
12335 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12336 * that it can be automatically determined later
12337 */
12338 if ((size == 0) || (chunk == NULL)) {
12339 ctxt->charset = XML_CHAR_ENCODING_NONE;
12340 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012341 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12342 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012343
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012344 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012345
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012346 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012347#ifdef DEBUG_PUSH
12348 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12349#endif
12350 }
12351
Daniel Veillard0e4cd172001-06-28 12:13:56 +000012352 if (enc != XML_CHAR_ENCODING_NONE) {
12353 xmlSwitchEncoding(ctxt, enc);
12354 }
12355
Owen Taylor3473f882001-02-23 17:55:21 +000012356 return(ctxt);
12357}
Daniel Veillard73b013f2003-09-30 12:36:01 +000012358#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012359
12360/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000012361 * xmlStopParser:
12362 * @ctxt: an XML parser context
12363 *
12364 * Blocks further parser processing
12365 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012366void
Daniel Veillard39e5c892005-07-03 22:48:50 +000012367xmlStopParser(xmlParserCtxtPtr ctxt) {
12368 if (ctxt == NULL)
12369 return;
12370 ctxt->instate = XML_PARSER_EOF;
12371 ctxt->disableSAX = 1;
12372 if (ctxt->input != NULL) {
12373 ctxt->input->cur = BAD_CAST"";
12374 ctxt->input->base = ctxt->input->cur;
12375 }
12376}
12377
12378/**
Owen Taylor3473f882001-02-23 17:55:21 +000012379 * xmlCreateIOParserCtxt:
12380 * @sax: a SAX handler
12381 * @user_data: The user data returned on SAX callbacks
12382 * @ioread: an I/O read function
12383 * @ioclose: an I/O close function
12384 * @ioctx: an I/O handler
12385 * @enc: the charset encoding if known
12386 *
12387 * Create a parser context for using the XML parser with an existing
12388 * I/O stream
12389 *
12390 * Returns the new parser context or NULL
12391 */
12392xmlParserCtxtPtr
12393xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12394 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12395 void *ioctx, xmlCharEncoding enc) {
12396 xmlParserCtxtPtr ctxt;
12397 xmlParserInputPtr inputStream;
12398 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012399
Daniel Veillard42595322004-11-08 10:52:06 +000012400 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012401
12402 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012403 if (buf == NULL) {
12404 if (ioclose != NULL)
12405 ioclose(ioctx);
12406 return (NULL);
12407 }
Owen Taylor3473f882001-02-23 17:55:21 +000012408
12409 ctxt = xmlNewParserCtxt();
12410 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012411 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012412 return(NULL);
12413 }
12414 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012415#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012416 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012417#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012418 xmlFree(ctxt->sax);
12419 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12420 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012421 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012422 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012423 return(NULL);
12424 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012425 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12426 if (sax->initialized == XML_SAX2_MAGIC)
12427 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12428 else
12429 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012430 if (user_data != NULL)
12431 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012432 }
Owen Taylor3473f882001-02-23 17:55:21 +000012433
12434 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12435 if (inputStream == NULL) {
12436 xmlFreeParserCtxt(ctxt);
12437 return(NULL);
12438 }
12439 inputPush(ctxt, inputStream);
12440
12441 return(ctxt);
12442}
12443
Daniel Veillard4432df22003-09-28 18:58:27 +000012444#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012445/************************************************************************
12446 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012447 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012448 * *
12449 ************************************************************************/
12450
12451/**
12452 * xmlIOParseDTD:
12453 * @sax: the SAX handler block or NULL
12454 * @input: an Input Buffer
12455 * @enc: the charset encoding if known
12456 *
12457 * Load and parse a DTD
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012458 *
Owen Taylor3473f882001-02-23 17:55:21 +000012459 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012460 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012461 */
12462
12463xmlDtdPtr
12464xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12465 xmlCharEncoding enc) {
12466 xmlDtdPtr ret = NULL;
12467 xmlParserCtxtPtr ctxt;
12468 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012469 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012470
12471 if (input == NULL)
12472 return(NULL);
12473
12474 ctxt = xmlNewParserCtxt();
12475 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012476 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012477 return(NULL);
12478 }
12479
12480 /*
12481 * Set-up the SAX context
12482 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012483 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012484 if (ctxt->sax != NULL)
12485 xmlFree(ctxt->sax);
12486 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012487 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012488 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012489 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012490
12491 /*
12492 * generate a parser input from the I/O handler
12493 */
12494
Daniel Veillard43caefb2003-12-07 19:32:22 +000012495 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012496 if (pinput == NULL) {
12497 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012498 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012499 xmlFreeParserCtxt(ctxt);
12500 return(NULL);
12501 }
12502
12503 /*
12504 * plug some encoding conversion routines here.
12505 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012506 if (xmlPushInput(ctxt, pinput) < 0) {
12507 if (sax != NULL) ctxt->sax = NULL;
12508 xmlFreeParserCtxt(ctxt);
12509 return(NULL);
12510 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012511 if (enc != XML_CHAR_ENCODING_NONE) {
12512 xmlSwitchEncoding(ctxt, enc);
12513 }
Owen Taylor3473f882001-02-23 17:55:21 +000012514
12515 pinput->filename = NULL;
12516 pinput->line = 1;
12517 pinput->col = 1;
12518 pinput->base = ctxt->input->cur;
12519 pinput->cur = ctxt->input->cur;
12520 pinput->free = NULL;
12521
12522 /*
12523 * let's parse that entity knowing it's an external subset.
12524 */
12525 ctxt->inSubset = 2;
12526 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012527 if (ctxt->myDoc == NULL) {
12528 xmlErrMemory(ctxt, "New Doc failed");
12529 return(NULL);
12530 }
12531 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012532 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12533 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012534
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012535 if ((enc == XML_CHAR_ENCODING_NONE) &&
12536 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012537 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012538 * Get the 4 first bytes and decode the charset
12539 * if enc != XML_CHAR_ENCODING_NONE
12540 * plug some encoding conversion routines.
12541 */
12542 start[0] = RAW;
12543 start[1] = NXT(1);
12544 start[2] = NXT(2);
12545 start[3] = NXT(3);
12546 enc = xmlDetectCharEncoding(start, 4);
12547 if (enc != XML_CHAR_ENCODING_NONE) {
12548 xmlSwitchEncoding(ctxt, enc);
12549 }
12550 }
12551
Owen Taylor3473f882001-02-23 17:55:21 +000012552 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12553
12554 if (ctxt->myDoc != NULL) {
12555 if (ctxt->wellFormed) {
12556 ret = ctxt->myDoc->extSubset;
12557 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012558 if (ret != NULL) {
12559 xmlNodePtr tmp;
12560
12561 ret->doc = NULL;
12562 tmp = ret->children;
12563 while (tmp != NULL) {
12564 tmp->doc = NULL;
12565 tmp = tmp->next;
12566 }
12567 }
Owen Taylor3473f882001-02-23 17:55:21 +000012568 } else {
12569 ret = NULL;
12570 }
12571 xmlFreeDoc(ctxt->myDoc);
12572 ctxt->myDoc = NULL;
12573 }
12574 if (sax != NULL) ctxt->sax = NULL;
12575 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012576
Owen Taylor3473f882001-02-23 17:55:21 +000012577 return(ret);
12578}
12579
12580/**
12581 * xmlSAXParseDTD:
12582 * @sax: the SAX handler block
12583 * @ExternalID: a NAME* containing the External ID of the DTD
12584 * @SystemID: a NAME* containing the URL to the DTD
12585 *
12586 * Load and parse an external subset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012587 *
Owen Taylor3473f882001-02-23 17:55:21 +000012588 * Returns the resulting xmlDtdPtr or NULL in case of error.
12589 */
12590
12591xmlDtdPtr
12592xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12593 const xmlChar *SystemID) {
12594 xmlDtdPtr ret = NULL;
12595 xmlParserCtxtPtr ctxt;
12596 xmlParserInputPtr input = NULL;
12597 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012598 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012599
12600 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12601
12602 ctxt = xmlNewParserCtxt();
12603 if (ctxt == NULL) {
12604 return(NULL);
12605 }
12606
12607 /*
12608 * Set-up the SAX context
12609 */
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012610 if (sax != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012611 if (ctxt->sax != NULL)
12612 xmlFree(ctxt->sax);
12613 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012614 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012615 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012616
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012617 /*
12618 * Canonicalise the system ID
12619 */
12620 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012621 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012622 xmlFreeParserCtxt(ctxt);
12623 return(NULL);
12624 }
Owen Taylor3473f882001-02-23 17:55:21 +000012625
12626 /*
12627 * Ask the Entity resolver to load the damn thing
12628 */
12629
12630 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012631 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12632 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012633 if (input == NULL) {
12634 if (sax != NULL) ctxt->sax = NULL;
12635 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012636 if (systemIdCanonic != NULL)
12637 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012638 return(NULL);
12639 }
12640
12641 /*
12642 * plug some encoding conversion routines here.
12643 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012644 if (xmlPushInput(ctxt, input) < 0) {
12645 if (sax != NULL) ctxt->sax = NULL;
12646 xmlFreeParserCtxt(ctxt);
12647 if (systemIdCanonic != NULL)
12648 xmlFree(systemIdCanonic);
12649 return(NULL);
12650 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012651 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12652 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12653 xmlSwitchEncoding(ctxt, enc);
12654 }
Owen Taylor3473f882001-02-23 17:55:21 +000012655
12656 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012657 input->filename = (char *) systemIdCanonic;
12658 else
12659 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012660 input->line = 1;
12661 input->col = 1;
12662 input->base = ctxt->input->cur;
12663 input->cur = ctxt->input->cur;
12664 input->free = NULL;
12665
12666 /*
12667 * let's parse that entity knowing it's an external subset.
12668 */
12669 ctxt->inSubset = 2;
12670 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012671 if (ctxt->myDoc == NULL) {
12672 xmlErrMemory(ctxt, "New Doc failed");
12673 if (sax != NULL) ctxt->sax = NULL;
12674 xmlFreeParserCtxt(ctxt);
12675 return(NULL);
12676 }
12677 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012678 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12679 ExternalID, SystemID);
12680 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12681
12682 if (ctxt->myDoc != NULL) {
12683 if (ctxt->wellFormed) {
12684 ret = ctxt->myDoc->extSubset;
12685 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012686 if (ret != NULL) {
12687 xmlNodePtr tmp;
12688
12689 ret->doc = NULL;
12690 tmp = ret->children;
12691 while (tmp != NULL) {
12692 tmp->doc = NULL;
12693 tmp = tmp->next;
12694 }
12695 }
Owen Taylor3473f882001-02-23 17:55:21 +000012696 } else {
12697 ret = NULL;
12698 }
12699 xmlFreeDoc(ctxt->myDoc);
12700 ctxt->myDoc = NULL;
12701 }
12702 if (sax != NULL) ctxt->sax = NULL;
12703 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012704
Owen Taylor3473f882001-02-23 17:55:21 +000012705 return(ret);
12706}
12707
Daniel Veillard4432df22003-09-28 18:58:27 +000012708
Owen Taylor3473f882001-02-23 17:55:21 +000012709/**
12710 * xmlParseDTD:
12711 * @ExternalID: a NAME* containing the External ID of the DTD
12712 * @SystemID: a NAME* containing the URL to the DTD
12713 *
12714 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012715 *
Owen Taylor3473f882001-02-23 17:55:21 +000012716 * Returns the resulting xmlDtdPtr or NULL in case of error.
12717 */
12718
12719xmlDtdPtr
12720xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12721 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12722}
Daniel Veillard4432df22003-09-28 18:58:27 +000012723#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012724
12725/************************************************************************
12726 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012727 * Front ends when parsing an Entity *
Owen Taylor3473f882001-02-23 17:55:21 +000012728 * *
12729 ************************************************************************/
12730
12731/**
Owen Taylor3473f882001-02-23 17:55:21 +000012732 * xmlParseCtxtExternalEntity:
12733 * @ctx: the existing parsing context
12734 * @URL: the URL for the entity to load
12735 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012736 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012737 *
12738 * Parse an external general entity within an existing parsing context
12739 * An external general parsed entity is well-formed if it matches the
12740 * production labeled extParsedEnt.
12741 *
12742 * [78] extParsedEnt ::= TextDecl? content
12743 *
12744 * Returns 0 if the entity is well formed, -1 in case of args problem and
12745 * the parser error code otherwise
12746 */
12747
12748int
12749xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012750 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012751 xmlParserCtxtPtr ctxt;
12752 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012753 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012754 xmlSAXHandlerPtr oldsax = NULL;
12755 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012756 xmlChar start[4];
12757 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012758
Daniel Veillardce682bc2004-11-05 17:22:25 +000012759 if (ctx == NULL) return(-1);
12760
Daniel Veillard0161e632008-08-28 15:36:32 +000012761 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12762 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012763 return(XML_ERR_ENTITY_LOOP);
12764 }
12765
Daniel Veillardcda96922001-08-21 10:56:31 +000012766 if (lst != NULL)
12767 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012768 if ((URL == NULL) && (ID == NULL))
12769 return(-1);
12770 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12771 return(-1);
12772
Rob Richards798743a2009-06-19 13:54:25 -040012773 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012774 if (ctxt == NULL) {
12775 return(-1);
12776 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012777
Owen Taylor3473f882001-02-23 17:55:21 +000012778 oldsax = ctxt->sax;
12779 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012780 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012781 newDoc = xmlNewDoc(BAD_CAST "1.0");
12782 if (newDoc == NULL) {
12783 xmlFreeParserCtxt(ctxt);
12784 return(-1);
12785 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012786 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012787 if (ctx->myDoc->dict) {
12788 newDoc->dict = ctx->myDoc->dict;
12789 xmlDictReference(newDoc->dict);
12790 }
Owen Taylor3473f882001-02-23 17:55:21 +000012791 if (ctx->myDoc != NULL) {
12792 newDoc->intSubset = ctx->myDoc->intSubset;
12793 newDoc->extSubset = ctx->myDoc->extSubset;
12794 }
12795 if (ctx->myDoc->URL != NULL) {
12796 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12797 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012798 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12799 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012800 ctxt->sax = oldsax;
12801 xmlFreeParserCtxt(ctxt);
12802 newDoc->intSubset = NULL;
12803 newDoc->extSubset = NULL;
12804 xmlFreeDoc(newDoc);
12805 return(-1);
12806 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012807 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012808 nodePush(ctxt, newDoc->children);
12809 if (ctx->myDoc == NULL) {
12810 ctxt->myDoc = newDoc;
12811 } else {
12812 ctxt->myDoc = ctx->myDoc;
12813 newDoc->children->doc = ctx->myDoc;
12814 }
12815
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012816 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012817 * Get the 4 first bytes and decode the charset
12818 * if enc != XML_CHAR_ENCODING_NONE
12819 * plug some encoding conversion routines.
12820 */
12821 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012822 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12823 start[0] = RAW;
12824 start[1] = NXT(1);
12825 start[2] = NXT(2);
12826 start[3] = NXT(3);
12827 enc = xmlDetectCharEncoding(start, 4);
12828 if (enc != XML_CHAR_ENCODING_NONE) {
12829 xmlSwitchEncoding(ctxt, enc);
12830 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012831 }
12832
Owen Taylor3473f882001-02-23 17:55:21 +000012833 /*
12834 * Parse a possible text declaration first
12835 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012836 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012837 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012838 /*
12839 * An XML-1.0 document can't reference an entity not XML-1.0
12840 */
12841 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12842 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
Daniel Veillardf8e3db02012-09-11 13:26:36 +080012843 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012844 "Version mismatch between document and entity\n");
12845 }
Owen Taylor3473f882001-02-23 17:55:21 +000012846 }
12847
12848 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012849 * If the user provided its own SAX callbacks then reuse the
12850 * useData callback field, otherwise the expected setup in a
12851 * DOM builder is to have userData == ctxt
12852 */
12853 if (ctx->userData == ctx)
12854 ctxt->userData = ctxt;
12855 else
12856 ctxt->userData = ctx->userData;
12857
12858 /*
Owen Taylor3473f882001-02-23 17:55:21 +000012859 * Doing validity checking on chunk doesn't make sense
12860 */
12861 ctxt->instate = XML_PARSER_CONTENT;
12862 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012863 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012864 ctxt->loadsubset = ctx->loadsubset;
12865 ctxt->depth = ctx->depth + 1;
12866 ctxt->replaceEntities = ctx->replaceEntities;
12867 if (ctxt->validate) {
12868 ctxt->vctxt.error = ctx->vctxt.error;
12869 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012870 } else {
12871 ctxt->vctxt.error = NULL;
12872 ctxt->vctxt.warning = NULL;
12873 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012874 ctxt->vctxt.nodeTab = NULL;
12875 ctxt->vctxt.nodeNr = 0;
12876 ctxt->vctxt.nodeMax = 0;
12877 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012878 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12879 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012880 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12881 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12882 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012883 ctxt->dictNames = ctx->dictNames;
12884 ctxt->attsDefault = ctx->attsDefault;
12885 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012886 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012887
12888 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012889
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012890 ctx->validate = ctxt->validate;
12891 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012892 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012893 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012894 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012895 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012896 }
12897 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012898 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012899 }
12900
12901 if (!ctxt->wellFormed) {
12902 if (ctxt->errNo == 0)
12903 ret = 1;
12904 else
12905 ret = ctxt->errNo;
12906 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012907 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012908 xmlNodePtr cur;
12909
12910 /*
12911 * Return the newly created nodeset after unlinking it from
12912 * they pseudo parent.
12913 */
12914 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012915 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012916 while (cur != NULL) {
12917 cur->parent = NULL;
12918 cur = cur->next;
12919 }
12920 newDoc->children->children = NULL;
12921 }
12922 ret = 0;
12923 }
12924 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012925 ctxt->dict = NULL;
12926 ctxt->attsDefault = NULL;
12927 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012928 xmlFreeParserCtxt(ctxt);
12929 newDoc->intSubset = NULL;
12930 newDoc->extSubset = NULL;
12931 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012932
Owen Taylor3473f882001-02-23 17:55:21 +000012933 return(ret);
12934}
12935
12936/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012937 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012938 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012939 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012940 * @sax: the SAX handler bloc (possibly NULL)
12941 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12942 * @depth: Used for loop detection, use 0
12943 * @URL: the URL for the entity to load
12944 * @ID: the System ID for the entity to load
12945 * @list: the return value for the set of parsed nodes
12946 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012947 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012948 *
12949 * Returns 0 if the entity is well formed, -1 in case of args problem and
12950 * the parser error code otherwise
12951 */
12952
Daniel Veillard7d515752003-09-26 19:12:37 +000012953static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012954xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12955 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012956 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012957 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012958 xmlParserCtxtPtr ctxt;
12959 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012960 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012961 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012962 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012963 xmlChar start[4];
12964 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012965
Daniel Veillard0161e632008-08-28 15:36:32 +000012966 if (((depth > 40) &&
12967 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12968 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012969 return(XML_ERR_ENTITY_LOOP);
12970 }
12971
Owen Taylor3473f882001-02-23 17:55:21 +000012972 if (list != NULL)
12973 *list = NULL;
12974 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012975 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012976 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012977 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012978
12979
Rob Richards9c0aa472009-03-26 18:10:19 +000012980 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000012981 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012982 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012983 if (oldctxt != NULL) {
12984 ctxt->_private = oldctxt->_private;
12985 ctxt->loadsubset = oldctxt->loadsubset;
12986 ctxt->validate = oldctxt->validate;
12987 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012988 ctxt->record_info = oldctxt->record_info;
12989 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12990 ctxt->node_seq.length = oldctxt->node_seq.length;
12991 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012992 } else {
12993 /*
12994 * Doing validity checking on chunk without context
12995 * doesn't make sense
12996 */
12997 ctxt->_private = NULL;
12998 ctxt->validate = 0;
12999 ctxt->external = 2;
13000 ctxt->loadsubset = 0;
13001 }
Owen Taylor3473f882001-02-23 17:55:21 +000013002 if (sax != NULL) {
13003 oldsax = ctxt->sax;
13004 ctxt->sax = sax;
13005 if (user_data != NULL)
13006 ctxt->userData = user_data;
13007 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013008 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013009 newDoc = xmlNewDoc(BAD_CAST "1.0");
13010 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013011 ctxt->node_seq.maximum = 0;
13012 ctxt->node_seq.length = 0;
13013 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013014 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000013015 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013016 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013017 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000013018 newDoc->intSubset = doc->intSubset;
13019 newDoc->extSubset = doc->extSubset;
13020 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013021 xmlDictReference(newDoc->dict);
13022
Owen Taylor3473f882001-02-23 17:55:21 +000013023 if (doc->URL != NULL) {
13024 newDoc->URL = xmlStrdup(doc->URL);
13025 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013026 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13027 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013028 if (sax != NULL)
13029 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013030 ctxt->node_seq.maximum = 0;
13031 ctxt->node_seq.length = 0;
13032 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013033 xmlFreeParserCtxt(ctxt);
13034 newDoc->intSubset = NULL;
13035 newDoc->extSubset = NULL;
13036 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000013037 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013038 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013039 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013040 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000013041 ctxt->myDoc = doc;
13042 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000013043
Daniel Veillard0161e632008-08-28 15:36:32 +000013044 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013045 * Get the 4 first bytes and decode the charset
13046 * if enc != XML_CHAR_ENCODING_NONE
13047 * plug some encoding conversion routines.
13048 */
13049 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013050 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13051 start[0] = RAW;
13052 start[1] = NXT(1);
13053 start[2] = NXT(2);
13054 start[3] = NXT(3);
13055 enc = xmlDetectCharEncoding(start, 4);
13056 if (enc != XML_CHAR_ENCODING_NONE) {
13057 xmlSwitchEncoding(ctxt, enc);
13058 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013059 }
13060
Owen Taylor3473f882001-02-23 17:55:21 +000013061 /*
13062 * Parse a possible text declaration first
13063 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013064 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013065 xmlParseTextDecl(ctxt);
13066 }
13067
Owen Taylor3473f882001-02-23 17:55:21 +000013068 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000013069 ctxt->depth = depth;
13070
13071 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013072
Daniel Veillard561b7f82002-03-20 21:55:57 +000013073 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013074 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000013075 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013076 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013077 }
13078 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013079 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013080 }
13081
13082 if (!ctxt->wellFormed) {
13083 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013084 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000013085 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013086 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000013087 } else {
13088 if (list != NULL) {
13089 xmlNodePtr cur;
13090
13091 /*
13092 * Return the newly created nodeset after unlinking it from
13093 * they pseudo parent.
13094 */
13095 cur = newDoc->children->children;
13096 *list = cur;
13097 while (cur != NULL) {
13098 cur->parent = NULL;
13099 cur = cur->next;
13100 }
13101 newDoc->children->children = NULL;
13102 }
Daniel Veillard7d515752003-09-26 19:12:37 +000013103 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000013104 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013105
13106 /*
13107 * Record in the parent context the number of entities replacement
13108 * done when parsing that reference.
13109 */
Daniel Veillard76d36452009-09-07 11:19:33 +020013110 if (oldctxt != NULL)
13111 oldctxt->nbentities += ctxt->nbentities;
13112
Daniel Veillard0161e632008-08-28 15:36:32 +000013113 /*
13114 * Also record the size of the entity parsed
13115 */
13116 if (ctxt->input != NULL) {
13117 oldctxt->sizeentities += ctxt->input->consumed;
13118 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13119 }
13120 /*
13121 * And record the last error if any
13122 */
13123 if (ctxt->lastError.code != XML_ERR_OK)
13124 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13125
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013126 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013127 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000013128 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13129 oldctxt->node_seq.length = ctxt->node_seq.length;
13130 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013131 ctxt->node_seq.maximum = 0;
13132 ctxt->node_seq.length = 0;
13133 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013134 xmlFreeParserCtxt(ctxt);
13135 newDoc->intSubset = NULL;
13136 newDoc->extSubset = NULL;
13137 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013138
Owen Taylor3473f882001-02-23 17:55:21 +000013139 return(ret);
13140}
13141
Daniel Veillard81273902003-09-30 00:43:48 +000013142#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013143/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013144 * xmlParseExternalEntity:
13145 * @doc: the document the chunk pertains to
13146 * @sax: the SAX handler bloc (possibly NULL)
13147 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13148 * @depth: Used for loop detection, use 0
13149 * @URL: the URL for the entity to load
13150 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013151 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000013152 *
13153 * Parse an external general entity
13154 * An external general parsed entity is well-formed if it matches the
13155 * production labeled extParsedEnt.
13156 *
13157 * [78] extParsedEnt ::= TextDecl? content
13158 *
13159 * Returns 0 if the entity is well formed, -1 in case of args problem and
13160 * the parser error code otherwise
13161 */
13162
13163int
13164xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000013165 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013166 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013167 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000013168}
13169
13170/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000013171 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000013172 * @doc: the document the chunk pertains to
13173 * @sax: the SAX handler bloc (possibly NULL)
13174 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13175 * @depth: Used for loop detection, use 0
13176 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000013177 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013178 *
13179 * Parse a well-balanced chunk of an XML document
13180 * called by the parser
13181 * The allowed sequence for the Well Balanced Chunk is the one defined by
13182 * the content production in the XML grammar:
13183 *
13184 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13185 *
13186 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13187 * the parser error code otherwise
13188 */
13189
13190int
13191xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000013192 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013193 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13194 depth, string, lst, 0 );
13195}
Daniel Veillard81273902003-09-30 00:43:48 +000013196#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000013197
13198/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000013199 * xmlParseBalancedChunkMemoryInternal:
13200 * @oldctxt: the existing parsing context
13201 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13202 * @user_data: the user data field for the parser context
13203 * @lst: the return value for the set of parsed nodes
13204 *
13205 *
13206 * Parse a well-balanced chunk of an XML document
13207 * called by the parser
13208 * The allowed sequence for the Well Balanced Chunk is the one defined by
13209 * the content production in the XML grammar:
13210 *
13211 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13212 *
Daniel Veillard7d515752003-09-26 19:12:37 +000013213 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13214 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000013215 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000013216 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000013217 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000013218 */
Daniel Veillard7d515752003-09-26 19:12:37 +000013219static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000013220xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13221 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13222 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013223 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013224 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013225 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013226 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013227 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013228 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000013229 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020013230#ifdef SAX2
13231 int i;
13232#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000013233
Daniel Veillard0161e632008-08-28 15:36:32 +000013234 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13235 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013236 return(XML_ERR_ENTITY_LOOP);
13237 }
13238
13239
13240 if (lst != NULL)
13241 *lst = NULL;
13242 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000013243 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013244
13245 size = xmlStrlen(string);
13246
13247 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000013248 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013249 if (user_data != NULL)
13250 ctxt->userData = user_data;
13251 else
13252 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013253 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13254 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013255 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13256 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13257 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013258
Daniel Veillard74eaec12009-08-26 15:57:20 +020013259#ifdef SAX2
13260 /* propagate namespaces down the entity */
13261 for (i = 0;i < oldctxt->nsNr;i += 2) {
13262 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13263 }
13264#endif
13265
Daniel Veillard328f48c2002-11-15 15:24:34 +000013266 oldsax = ctxt->sax;
13267 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013268 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013269 ctxt->replaceEntities = oldctxt->replaceEntities;
13270 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000013271
Daniel Veillarde1ca5032002-12-09 14:13:43 +000013272 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013273 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013274 newDoc = xmlNewDoc(BAD_CAST "1.0");
13275 if (newDoc == NULL) {
13276 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013277 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013278 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000013279 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013280 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013281 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013282 newDoc->dict = ctxt->dict;
13283 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013284 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013285 } else {
13286 ctxt->myDoc = oldctxt->myDoc;
13287 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013288 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013289 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013290 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13291 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013292 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013293 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013294 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013295 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013296 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013297 }
William M. Brack7b9154b2003-09-27 19:23:50 +000013298 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013299 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013300 ctxt->myDoc->children = NULL;
13301 ctxt->myDoc->last = NULL;
13302 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013303 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013304 ctxt->instate = XML_PARSER_CONTENT;
13305 ctxt->depth = oldctxt->depth + 1;
13306
Daniel Veillard328f48c2002-11-15 15:24:34 +000013307 ctxt->validate = 0;
13308 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000013309 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13310 /*
13311 * ID/IDREF registration will be done in xmlValidateElement below
13312 */
13313 ctxt->loadsubset |= XML_SKIP_IDS;
13314 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013315 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013316 ctxt->attsDefault = oldctxt->attsDefault;
13317 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013318
Daniel Veillard68e9e742002-11-16 15:35:11 +000013319 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013320 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013321 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013322 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013323 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013324 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013325 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013326 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013327 }
13328
13329 if (!ctxt->wellFormed) {
13330 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013331 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013332 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013333 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013334 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000013335 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013336 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013337
William M. Brack7b9154b2003-09-27 19:23:50 +000013338 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013339 xmlNodePtr cur;
13340
13341 /*
13342 * Return the newly created nodeset after unlinking it from
13343 * they pseudo parent.
13344 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000013345 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013346 *lst = cur;
13347 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000013348#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000013349 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13350 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13351 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000013352 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13353 oldctxt->myDoc, cur);
13354 }
Daniel Veillard4432df22003-09-28 18:58:27 +000013355#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000013356 cur->parent = NULL;
13357 cur = cur->next;
13358 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013359 ctxt->myDoc->children->children = NULL;
13360 }
13361 if (ctxt->myDoc != NULL) {
13362 xmlFreeNode(ctxt->myDoc->children);
13363 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013364 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013365 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013366
13367 /*
13368 * Record in the parent context the number of entities replacement
13369 * done when parsing that reference.
13370 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020013371 if (oldctxt != NULL)
13372 oldctxt->nbentities += ctxt->nbentities;
13373
Daniel Veillard0161e632008-08-28 15:36:32 +000013374 /*
13375 * Also record the last error if any
13376 */
13377 if (ctxt->lastError.code != XML_ERR_OK)
13378 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13379
Daniel Veillard328f48c2002-11-15 15:24:34 +000013380 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013381 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013382 ctxt->attsDefault = NULL;
13383 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013384 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013385 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013386 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013387 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013388
Daniel Veillard328f48c2002-11-15 15:24:34 +000013389 return(ret);
13390}
13391
Daniel Veillard29b17482004-08-16 00:39:03 +000013392/**
13393 * xmlParseInNodeContext:
13394 * @node: the context node
13395 * @data: the input string
13396 * @datalen: the input string length in bytes
13397 * @options: a combination of xmlParserOption
13398 * @lst: the return value for the set of parsed nodes
13399 *
13400 * Parse a well-balanced chunk of an XML document
13401 * within the context (DTD, namespaces, etc ...) of the given node.
13402 *
13403 * The allowed sequence for the data is a Well Balanced Chunk defined by
13404 * the content production in the XML grammar:
13405 *
13406 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13407 *
13408 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13409 * error code otherwise
13410 */
13411xmlParserErrors
13412xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13413 int options, xmlNodePtr *lst) {
13414#ifdef SAX2
13415 xmlParserCtxtPtr ctxt;
13416 xmlDocPtr doc = NULL;
13417 xmlNodePtr fake, cur;
13418 int nsnr = 0;
13419
13420 xmlParserErrors ret = XML_ERR_OK;
13421
13422 /*
13423 * check all input parameters, grab the document
13424 */
13425 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13426 return(XML_ERR_INTERNAL_ERROR);
13427 switch (node->type) {
13428 case XML_ELEMENT_NODE:
13429 case XML_ATTRIBUTE_NODE:
13430 case XML_TEXT_NODE:
13431 case XML_CDATA_SECTION_NODE:
13432 case XML_ENTITY_REF_NODE:
13433 case XML_PI_NODE:
13434 case XML_COMMENT_NODE:
13435 case XML_DOCUMENT_NODE:
13436 case XML_HTML_DOCUMENT_NODE:
13437 break;
13438 default:
13439 return(XML_ERR_INTERNAL_ERROR);
13440
13441 }
13442 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13443 (node->type != XML_DOCUMENT_NODE) &&
13444 (node->type != XML_HTML_DOCUMENT_NODE))
13445 node = node->parent;
13446 if (node == NULL)
13447 return(XML_ERR_INTERNAL_ERROR);
13448 if (node->type == XML_ELEMENT_NODE)
13449 doc = node->doc;
13450 else
13451 doc = (xmlDocPtr) node;
13452 if (doc == NULL)
13453 return(XML_ERR_INTERNAL_ERROR);
13454
13455 /*
13456 * allocate a context and set-up everything not related to the
13457 * node position in the tree
13458 */
13459 if (doc->type == XML_DOCUMENT_NODE)
13460 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13461#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013462 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013463 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013464 /*
13465 * When parsing in context, it makes no sense to add implied
13466 * elements like html/body/etc...
13467 */
13468 options |= HTML_PARSE_NOIMPLIED;
13469 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013470#endif
13471 else
13472 return(XML_ERR_INTERNAL_ERROR);
13473
13474 if (ctxt == NULL)
13475 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013476
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013477 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013478 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13479 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13480 * we must wait until the last moment to free the original one.
13481 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013482 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013483 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013484 xmlDictFree(ctxt->dict);
13485 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013486 } else
13487 options |= XML_PARSE_NODICT;
13488
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013489 if (doc->encoding != NULL) {
13490 xmlCharEncodingHandlerPtr hdlr;
13491
13492 if (ctxt->encoding != NULL)
13493 xmlFree((xmlChar *) ctxt->encoding);
13494 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13495
13496 hdlr = xmlFindCharEncodingHandler(doc->encoding);
13497 if (hdlr != NULL) {
13498 xmlSwitchToEncoding(ctxt, hdlr);
13499 } else {
13500 return(XML_ERR_UNSUPPORTED_ENCODING);
13501 }
13502 }
13503
Daniel Veillard37334572008-07-31 08:20:02 +000013504 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013505 xmlDetectSAX2(ctxt);
13506 ctxt->myDoc = doc;
13507
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013508 fake = xmlNewComment(NULL);
13509 if (fake == NULL) {
13510 xmlFreeParserCtxt(ctxt);
13511 return(XML_ERR_NO_MEMORY);
13512 }
13513 xmlAddChild(node, fake);
13514
Daniel Veillard29b17482004-08-16 00:39:03 +000013515 if (node->type == XML_ELEMENT_NODE) {
13516 nodePush(ctxt, node);
13517 /*
13518 * initialize the SAX2 namespaces stack
13519 */
13520 cur = node;
13521 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13522 xmlNsPtr ns = cur->nsDef;
13523 const xmlChar *iprefix, *ihref;
13524
13525 while (ns != NULL) {
13526 if (ctxt->dict) {
13527 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13528 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13529 } else {
13530 iprefix = ns->prefix;
13531 ihref = ns->href;
13532 }
13533
13534 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13535 nsPush(ctxt, iprefix, ihref);
13536 nsnr++;
13537 }
13538 ns = ns->next;
13539 }
13540 cur = cur->parent;
13541 }
13542 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000013543 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013544
13545 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13546 /*
13547 * ID/IDREF registration will be done in xmlValidateElement below
13548 */
13549 ctxt->loadsubset |= XML_SKIP_IDS;
13550 }
13551
Daniel Veillard499cc922006-01-18 17:22:35 +000013552#ifdef LIBXML_HTML_ENABLED
13553 if (doc->type == XML_HTML_DOCUMENT_NODE)
13554 __htmlParseContent(ctxt);
13555 else
13556#endif
13557 xmlParseContent(ctxt);
13558
Daniel Veillard29b17482004-08-16 00:39:03 +000013559 nsPop(ctxt, nsnr);
13560 if ((RAW == '<') && (NXT(1) == '/')) {
13561 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13562 } else if (RAW != 0) {
13563 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13564 }
13565 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13566 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13567 ctxt->wellFormed = 0;
13568 }
13569
13570 if (!ctxt->wellFormed) {
13571 if (ctxt->errNo == 0)
13572 ret = XML_ERR_INTERNAL_ERROR;
13573 else
13574 ret = (xmlParserErrors)ctxt->errNo;
13575 } else {
13576 ret = XML_ERR_OK;
13577 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013578
Daniel Veillard29b17482004-08-16 00:39:03 +000013579 /*
13580 * Return the newly created nodeset after unlinking it from
13581 * the pseudo sibling.
13582 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013583
Daniel Veillard29b17482004-08-16 00:39:03 +000013584 cur = fake->next;
13585 fake->next = NULL;
13586 node->last = fake;
13587
13588 if (cur != NULL) {
13589 cur->prev = NULL;
13590 }
13591
13592 *lst = cur;
13593
13594 while (cur != NULL) {
13595 cur->parent = NULL;
13596 cur = cur->next;
13597 }
13598
13599 xmlUnlinkNode(fake);
13600 xmlFreeNode(fake);
13601
13602
13603 if (ret != XML_ERR_OK) {
13604 xmlFreeNodeList(*lst);
13605 *lst = NULL;
13606 }
William M. Brackc3f81342004-10-03 01:22:44 +000013607
William M. Brackb7b54de2004-10-06 16:38:01 +000013608 if (doc->dict != NULL)
13609 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013610 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013611
Daniel Veillard29b17482004-08-16 00:39:03 +000013612 return(ret);
13613#else /* !SAX2 */
13614 return(XML_ERR_INTERNAL_ERROR);
13615#endif
13616}
13617
Daniel Veillard81273902003-09-30 00:43:48 +000013618#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013619/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013620 * xmlParseBalancedChunkMemoryRecover:
13621 * @doc: the document the chunk pertains to
13622 * @sax: the SAX handler bloc (possibly NULL)
13623 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13624 * @depth: Used for loop detection, use 0
13625 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13626 * @lst: the return value for the set of parsed nodes
13627 * @recover: return nodes even if the data is broken (use 0)
13628 *
13629 *
13630 * Parse a well-balanced chunk of an XML document
13631 * called by the parser
13632 * The allowed sequence for the Well Balanced Chunk is the one defined by
13633 * the content production in the XML grammar:
13634 *
13635 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13636 *
13637 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13638 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013639 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013640 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013641 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13642 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013643 */
13644int
13645xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013646 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013647 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013648 xmlParserCtxtPtr ctxt;
13649 xmlDocPtr newDoc;
13650 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013651 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013652 int size;
13653 int ret = 0;
13654
Daniel Veillard0161e632008-08-28 15:36:32 +000013655 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013656 return(XML_ERR_ENTITY_LOOP);
13657 }
13658
13659
Daniel Veillardcda96922001-08-21 10:56:31 +000013660 if (lst != NULL)
13661 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013662 if (string == NULL)
13663 return(-1);
13664
13665 size = xmlStrlen(string);
13666
13667 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13668 if (ctxt == NULL) return(-1);
13669 ctxt->userData = ctxt;
13670 if (sax != NULL) {
13671 oldsax = ctxt->sax;
13672 ctxt->sax = sax;
13673 if (user_data != NULL)
13674 ctxt->userData = user_data;
13675 }
13676 newDoc = xmlNewDoc(BAD_CAST "1.0");
13677 if (newDoc == NULL) {
13678 xmlFreeParserCtxt(ctxt);
13679 return(-1);
13680 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013681 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013682 if ((doc != NULL) && (doc->dict != NULL)) {
13683 xmlDictFree(ctxt->dict);
13684 ctxt->dict = doc->dict;
13685 xmlDictReference(ctxt->dict);
13686 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13687 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13688 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13689 ctxt->dictNames = 1;
13690 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013691 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013692 }
Owen Taylor3473f882001-02-23 17:55:21 +000013693 if (doc != NULL) {
13694 newDoc->intSubset = doc->intSubset;
13695 newDoc->extSubset = doc->extSubset;
13696 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013697 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13698 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013699 if (sax != NULL)
13700 ctxt->sax = oldsax;
13701 xmlFreeParserCtxt(ctxt);
13702 newDoc->intSubset = NULL;
13703 newDoc->extSubset = NULL;
13704 xmlFreeDoc(newDoc);
13705 return(-1);
13706 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013707 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13708 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013709 if (doc == NULL) {
13710 ctxt->myDoc = newDoc;
13711 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013712 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013713 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013714 /* Ensure that doc has XML spec namespace */
13715 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13716 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013717 }
13718 ctxt->instate = XML_PARSER_CONTENT;
13719 ctxt->depth = depth;
13720
13721 /*
13722 * Doing validity checking on chunk doesn't make sense
13723 */
13724 ctxt->validate = 0;
13725 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013726 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013727
Daniel Veillardb39bc392002-10-26 19:29:51 +000013728 if ( doc != NULL ){
13729 content = doc->children;
13730 doc->children = NULL;
13731 xmlParseContent(ctxt);
13732 doc->children = content;
13733 }
13734 else {
13735 xmlParseContent(ctxt);
13736 }
Owen Taylor3473f882001-02-23 17:55:21 +000013737 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013738 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013739 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013740 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013741 }
13742 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013743 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013744 }
13745
13746 if (!ctxt->wellFormed) {
13747 if (ctxt->errNo == 0)
13748 ret = 1;
13749 else
13750 ret = ctxt->errNo;
13751 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013752 ret = 0;
13753 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013754
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013755 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13756 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013757
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013758 /*
13759 * Return the newly created nodeset after unlinking it from
13760 * they pseudo parent.
13761 */
13762 cur = newDoc->children->children;
13763 *lst = cur;
13764 while (cur != NULL) {
13765 xmlSetTreeDoc(cur, doc);
13766 cur->parent = NULL;
13767 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013768 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013769 newDoc->children->children = NULL;
13770 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013771
13772 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013773 ctxt->sax = oldsax;
13774 xmlFreeParserCtxt(ctxt);
13775 newDoc->intSubset = NULL;
13776 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013777 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013778 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013779
Owen Taylor3473f882001-02-23 17:55:21 +000013780 return(ret);
13781}
13782
13783/**
13784 * xmlSAXParseEntity:
13785 * @sax: the SAX handler block
13786 * @filename: the filename
13787 *
13788 * parse an XML external entity out of context and build a tree.
13789 * It use the given SAX function block to handle the parsing callback.
13790 * If sax is NULL, fallback to the default DOM tree building routines.
13791 *
13792 * [78] extParsedEnt ::= TextDecl? content
13793 *
13794 * This correspond to a "Well Balanced" chunk
13795 *
13796 * Returns the resulting document tree
13797 */
13798
13799xmlDocPtr
13800xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13801 xmlDocPtr ret;
13802 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013803
13804 ctxt = xmlCreateFileParserCtxt(filename);
13805 if (ctxt == NULL) {
13806 return(NULL);
13807 }
13808 if (sax != NULL) {
13809 if (ctxt->sax != NULL)
13810 xmlFree(ctxt->sax);
13811 ctxt->sax = sax;
13812 ctxt->userData = NULL;
13813 }
13814
Owen Taylor3473f882001-02-23 17:55:21 +000013815 xmlParseExtParsedEnt(ctxt);
13816
13817 if (ctxt->wellFormed)
13818 ret = ctxt->myDoc;
13819 else {
13820 ret = NULL;
13821 xmlFreeDoc(ctxt->myDoc);
13822 ctxt->myDoc = NULL;
13823 }
13824 if (sax != NULL)
13825 ctxt->sax = NULL;
13826 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013827
Owen Taylor3473f882001-02-23 17:55:21 +000013828 return(ret);
13829}
13830
13831/**
13832 * xmlParseEntity:
13833 * @filename: the filename
13834 *
13835 * parse an XML external entity out of context and build a tree.
13836 *
13837 * [78] extParsedEnt ::= TextDecl? content
13838 *
13839 * This correspond to a "Well Balanced" chunk
13840 *
13841 * Returns the resulting document tree
13842 */
13843
13844xmlDocPtr
13845xmlParseEntity(const char *filename) {
13846 return(xmlSAXParseEntity(NULL, filename));
13847}
Daniel Veillard81273902003-09-30 00:43:48 +000013848#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013849
13850/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013851 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013852 * @URL: the entity URL
13853 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013854 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013855 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013856 *
13857 * Create a parser context for an external entity
13858 * Automatic support for ZLIB/Compress compressed document is provided
13859 * by default if found at compile-time.
13860 *
13861 * Returns the new parser context or NULL
13862 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013863static xmlParserCtxtPtr
13864xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13865 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013866 xmlParserCtxtPtr ctxt;
13867 xmlParserInputPtr inputStream;
13868 char *directory = NULL;
13869 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013870
Owen Taylor3473f882001-02-23 17:55:21 +000013871 ctxt = xmlNewParserCtxt();
13872 if (ctxt == NULL) {
13873 return(NULL);
13874 }
13875
Daniel Veillard48247b42009-07-10 16:12:46 +020013876 if (pctx != NULL) {
13877 ctxt->options = pctx->options;
13878 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000013879 }
13880
Owen Taylor3473f882001-02-23 17:55:21 +000013881 uri = xmlBuildURI(URL, base);
13882
13883 if (uri == NULL) {
13884 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13885 if (inputStream == NULL) {
13886 xmlFreeParserCtxt(ctxt);
13887 return(NULL);
13888 }
13889
13890 inputPush(ctxt, inputStream);
13891
13892 if ((ctxt->directory == NULL) && (directory == NULL))
13893 directory = xmlParserGetDirectory((char *)URL);
13894 if ((ctxt->directory == NULL) && (directory != NULL))
13895 ctxt->directory = directory;
13896 } else {
13897 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13898 if (inputStream == NULL) {
13899 xmlFree(uri);
13900 xmlFreeParserCtxt(ctxt);
13901 return(NULL);
13902 }
13903
13904 inputPush(ctxt, inputStream);
13905
13906 if ((ctxt->directory == NULL) && (directory == NULL))
13907 directory = xmlParserGetDirectory((char *)uri);
13908 if ((ctxt->directory == NULL) && (directory != NULL))
13909 ctxt->directory = directory;
13910 xmlFree(uri);
13911 }
Owen Taylor3473f882001-02-23 17:55:21 +000013912 return(ctxt);
13913}
13914
Rob Richards9c0aa472009-03-26 18:10:19 +000013915/**
13916 * xmlCreateEntityParserCtxt:
13917 * @URL: the entity URL
13918 * @ID: the entity PUBLIC ID
13919 * @base: a possible base for the target URI
13920 *
13921 * Create a parser context for an external entity
13922 * Automatic support for ZLIB/Compress compressed document is provided
13923 * by default if found at compile-time.
13924 *
13925 * Returns the new parser context or NULL
13926 */
13927xmlParserCtxtPtr
13928xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13929 const xmlChar *base) {
13930 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13931
13932}
13933
Owen Taylor3473f882001-02-23 17:55:21 +000013934/************************************************************************
13935 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000013936 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000013937 * *
13938 ************************************************************************/
13939
13940/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013941 * xmlCreateURLParserCtxt:
13942 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013943 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013944 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013945 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000013946 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000013947 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000013948 *
13949 * Returns the new parser context or NULL
13950 */
13951xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000013952xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000013953{
13954 xmlParserCtxtPtr ctxt;
13955 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000013956 char *directory = NULL;
13957
Owen Taylor3473f882001-02-23 17:55:21 +000013958 ctxt = xmlNewParserCtxt();
13959 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000013960 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000013961 return(NULL);
13962 }
13963
Daniel Veillarddf292f72005-01-16 19:00:15 +000013964 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000013965 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000013966 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000013967
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000013968 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013969 if (inputStream == NULL) {
13970 xmlFreeParserCtxt(ctxt);
13971 return(NULL);
13972 }
13973
Owen Taylor3473f882001-02-23 17:55:21 +000013974 inputPush(ctxt, inputStream);
13975 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000013976 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013977 if ((ctxt->directory == NULL) && (directory != NULL))
13978 ctxt->directory = directory;
13979
13980 return(ctxt);
13981}
13982
Daniel Veillard61b93382003-11-03 14:28:31 +000013983/**
13984 * xmlCreateFileParserCtxt:
13985 * @filename: the filename
13986 *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080013987 * Create a parser context for a file content.
Daniel Veillard61b93382003-11-03 14:28:31 +000013988 * Automatic support for ZLIB/Compress compressed document is provided
13989 * by default if found at compile-time.
13990 *
13991 * Returns the new parser context or NULL
13992 */
13993xmlParserCtxtPtr
13994xmlCreateFileParserCtxt(const char *filename)
13995{
13996 return(xmlCreateURLParserCtxt(filename, 0));
13997}
13998
Daniel Veillard81273902003-09-30 00:43:48 +000013999#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014000/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014001 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000014002 * @sax: the SAX handler block
14003 * @filename: the filename
14004 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14005 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000014006 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000014007 *
14008 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14009 * compressed document is provided by default if found at compile-time.
14010 * It use the given SAX function block to handle the parsing callback.
14011 * If sax is NULL, fallback to the default DOM tree building routines.
14012 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000014013 * User data (void *) is stored within the parser context in the
14014 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000014015 *
Owen Taylor3473f882001-02-23 17:55:21 +000014016 * Returns the resulting document tree
14017 */
14018
14019xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000014020xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14021 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000014022 xmlDocPtr ret;
14023 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014024
Daniel Veillard635ef722001-10-29 11:48:19 +000014025 xmlInitParser();
14026
Owen Taylor3473f882001-02-23 17:55:21 +000014027 ctxt = xmlCreateFileParserCtxt(filename);
14028 if (ctxt == NULL) {
14029 return(NULL);
14030 }
14031 if (sax != NULL) {
14032 if (ctxt->sax != NULL)
14033 xmlFree(ctxt->sax);
14034 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014035 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014036 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000014037 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000014038 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000014039 }
Owen Taylor3473f882001-02-23 17:55:21 +000014040
Daniel Veillard37d2d162008-03-14 10:54:00 +000014041 if (ctxt->directory == NULL)
14042 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014043
Daniel Veillarddad3f682002-11-17 16:47:27 +000014044 ctxt->recovery = recovery;
14045
Owen Taylor3473f882001-02-23 17:55:21 +000014046 xmlParseDocument(ctxt);
14047
William M. Brackc07329e2003-09-08 01:57:30 +000014048 if ((ctxt->wellFormed) || recovery) {
14049 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000014050 if (ret != NULL) {
14051 if (ctxt->input->buf->compressed > 0)
14052 ret->compression = 9;
14053 else
14054 ret->compression = ctxt->input->buf->compressed;
14055 }
William M. Brackc07329e2003-09-08 01:57:30 +000014056 }
Owen Taylor3473f882001-02-23 17:55:21 +000014057 else {
14058 ret = NULL;
14059 xmlFreeDoc(ctxt->myDoc);
14060 ctxt->myDoc = NULL;
14061 }
14062 if (sax != NULL)
14063 ctxt->sax = NULL;
14064 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014065
Owen Taylor3473f882001-02-23 17:55:21 +000014066 return(ret);
14067}
14068
14069/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014070 * xmlSAXParseFile:
14071 * @sax: the SAX handler block
14072 * @filename: the filename
14073 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14074 * documents
14075 *
14076 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14077 * compressed document is provided by default if found at compile-time.
14078 * It use the given SAX function block to handle the parsing callback.
14079 * If sax is NULL, fallback to the default DOM tree building routines.
14080 *
14081 * Returns the resulting document tree
14082 */
14083
14084xmlDocPtr
14085xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14086 int recovery) {
14087 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14088}
14089
14090/**
Owen Taylor3473f882001-02-23 17:55:21 +000014091 * xmlRecoverDoc:
14092 * @cur: a pointer to an array of xmlChar
14093 *
14094 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014095 * In the case the document is not Well Formed, a attempt to build a
14096 * tree is tried anyway
14097 *
14098 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014099 */
14100
14101xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020014102xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014103 return(xmlSAXParseDoc(NULL, cur, 1));
14104}
14105
14106/**
14107 * xmlParseFile:
14108 * @filename: the filename
14109 *
14110 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14111 * compressed document is provided by default if found at compile-time.
14112 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000014113 * Returns the resulting document tree if the file was wellformed,
14114 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000014115 */
14116
14117xmlDocPtr
14118xmlParseFile(const char *filename) {
14119 return(xmlSAXParseFile(NULL, filename, 0));
14120}
14121
14122/**
14123 * xmlRecoverFile:
14124 * @filename: the filename
14125 *
14126 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14127 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014128 * In the case the document is not Well Formed, it attempts to build
14129 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000014130 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000014131 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014132 */
14133
14134xmlDocPtr
14135xmlRecoverFile(const char *filename) {
14136 return(xmlSAXParseFile(NULL, filename, 1));
14137}
14138
14139
14140/**
14141 * xmlSetupParserForBuffer:
14142 * @ctxt: an XML parser context
14143 * @buffer: a xmlChar * buffer
14144 * @filename: a file name
14145 *
14146 * Setup the parser context to parse a new buffer; Clears any prior
14147 * contents from the parser context. The buffer parameter must not be
14148 * NULL, but the filename parameter can be
14149 */
14150void
14151xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14152 const char* filename)
14153{
14154 xmlParserInputPtr input;
14155
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014156 if ((ctxt == NULL) || (buffer == NULL))
14157 return;
14158
Owen Taylor3473f882001-02-23 17:55:21 +000014159 input = xmlNewInputStream(ctxt);
14160 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000014161 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014162 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014163 return;
14164 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014165
Owen Taylor3473f882001-02-23 17:55:21 +000014166 xmlClearParserCtxt(ctxt);
14167 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000014168 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014169 input->base = buffer;
14170 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000014171 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000014172 inputPush(ctxt, input);
14173}
14174
14175/**
14176 * xmlSAXUserParseFile:
14177 * @sax: a SAX handler
14178 * @user_data: The user data returned on SAX callbacks
14179 * @filename: a file name
14180 *
14181 * parse an XML file and call the given SAX handler routines.
14182 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014183 *
Owen Taylor3473f882001-02-23 17:55:21 +000014184 * Returns 0 in case of success or a error number otherwise
14185 */
14186int
14187xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14188 const char *filename) {
14189 int ret = 0;
14190 xmlParserCtxtPtr ctxt;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014191
Owen Taylor3473f882001-02-23 17:55:21 +000014192 ctxt = xmlCreateFileParserCtxt(filename);
14193 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000014194 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000014195 xmlFree(ctxt->sax);
14196 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014197 xmlDetectSAX2(ctxt);
14198
Owen Taylor3473f882001-02-23 17:55:21 +000014199 if (user_data != NULL)
14200 ctxt->userData = user_data;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014201
Owen Taylor3473f882001-02-23 17:55:21 +000014202 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014203
Owen Taylor3473f882001-02-23 17:55:21 +000014204 if (ctxt->wellFormed)
14205 ret = 0;
14206 else {
14207 if (ctxt->errNo != 0)
14208 ret = ctxt->errNo;
14209 else
14210 ret = -1;
14211 }
14212 if (sax != NULL)
14213 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014214 if (ctxt->myDoc != NULL) {
14215 xmlFreeDoc(ctxt->myDoc);
14216 ctxt->myDoc = NULL;
14217 }
Owen Taylor3473f882001-02-23 17:55:21 +000014218 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014219
Owen Taylor3473f882001-02-23 17:55:21 +000014220 return ret;
14221}
Daniel Veillard81273902003-09-30 00:43:48 +000014222#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014223
14224/************************************************************************
14225 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014226 * Front ends when parsing from memory *
Owen Taylor3473f882001-02-23 17:55:21 +000014227 * *
14228 ************************************************************************/
14229
14230/**
14231 * xmlCreateMemoryParserCtxt:
14232 * @buffer: a pointer to a char array
14233 * @size: the size of the array
14234 *
14235 * Create a parser context for an XML in-memory document.
14236 *
14237 * Returns the new parser context or NULL
14238 */
14239xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014240xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014241 xmlParserCtxtPtr ctxt;
14242 xmlParserInputPtr input;
14243 xmlParserInputBufferPtr buf;
14244
14245 if (buffer == NULL)
14246 return(NULL);
14247 if (size <= 0)
14248 return(NULL);
14249
14250 ctxt = xmlNewParserCtxt();
14251 if (ctxt == NULL)
14252 return(NULL);
14253
Daniel Veillard53350552003-09-18 13:35:51 +000014254 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000014255 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014256 if (buf == NULL) {
14257 xmlFreeParserCtxt(ctxt);
14258 return(NULL);
14259 }
Owen Taylor3473f882001-02-23 17:55:21 +000014260
14261 input = xmlNewInputStream(ctxt);
14262 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014263 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000014264 xmlFreeParserCtxt(ctxt);
14265 return(NULL);
14266 }
14267
14268 input->filename = NULL;
14269 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014270 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000014271
14272 inputPush(ctxt, input);
14273 return(ctxt);
14274}
14275
Daniel Veillard81273902003-09-30 00:43:48 +000014276#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014277/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014278 * xmlSAXParseMemoryWithData:
14279 * @sax: the SAX handler block
14280 * @buffer: an pointer to a char array
14281 * @size: the size of the array
14282 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14283 * documents
14284 * @data: the userdata
14285 *
14286 * parse an XML in-memory block and use the given SAX function block
14287 * to handle the parsing callback. If sax is NULL, fallback to the default
14288 * DOM tree building routines.
14289 *
14290 * User data (void *) is stored within the parser context in the
14291 * context's _private member, so it is available nearly everywhere in libxml
14292 *
14293 * Returns the resulting document tree
14294 */
14295
14296xmlDocPtr
14297xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14298 int size, int recovery, void *data) {
14299 xmlDocPtr ret;
14300 xmlParserCtxtPtr ctxt;
14301
Daniel Veillardab2a7632009-07-09 08:45:03 +020014302 xmlInitParser();
14303
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014304 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14305 if (ctxt == NULL) return(NULL);
14306 if (sax != NULL) {
14307 if (ctxt->sax != NULL)
14308 xmlFree(ctxt->sax);
14309 ctxt->sax = sax;
14310 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014311 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014312 if (data!=NULL) {
14313 ctxt->_private=data;
14314 }
14315
Daniel Veillardadba5f12003-04-04 16:09:01 +000014316 ctxt->recovery = recovery;
14317
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014318 xmlParseDocument(ctxt);
14319
14320 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14321 else {
14322 ret = NULL;
14323 xmlFreeDoc(ctxt->myDoc);
14324 ctxt->myDoc = NULL;
14325 }
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014326 if (sax != NULL)
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014327 ctxt->sax = NULL;
14328 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020014329
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014330 return(ret);
14331}
14332
14333/**
Owen Taylor3473f882001-02-23 17:55:21 +000014334 * xmlSAXParseMemory:
14335 * @sax: the SAX handler block
14336 * @buffer: an pointer to a char array
14337 * @size: the size of the array
14338 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14339 * documents
14340 *
14341 * parse an XML in-memory block and use the given SAX function block
14342 * to handle the parsing callback. If sax is NULL, fallback to the default
14343 * DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014344 *
Owen Taylor3473f882001-02-23 17:55:21 +000014345 * Returns the resulting document tree
14346 */
14347xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000014348xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14349 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014350 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014351}
14352
14353/**
14354 * xmlParseMemory:
14355 * @buffer: an pointer to a char array
14356 * @size: the size of the array
14357 *
14358 * parse an XML in-memory block and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014359 *
Owen Taylor3473f882001-02-23 17:55:21 +000014360 * Returns the resulting document tree
14361 */
14362
Daniel Veillard50822cb2001-07-26 20:05:51 +000014363xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014364 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14365}
14366
14367/**
14368 * xmlRecoverMemory:
14369 * @buffer: an pointer to a char array
14370 * @size: the size of the array
14371 *
14372 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014373 * In the case the document is not Well Formed, an attempt to
14374 * build a tree is tried anyway
14375 *
14376 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000014377 */
14378
Daniel Veillard50822cb2001-07-26 20:05:51 +000014379xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014380 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14381}
14382
14383/**
14384 * xmlSAXUserParseMemory:
14385 * @sax: a SAX handler
14386 * @user_data: The user data returned on SAX callbacks
14387 * @buffer: an in-memory XML document input
14388 * @size: the length of the XML document in bytes
14389 *
14390 * A better SAX parsing routine.
14391 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014392 *
Owen Taylor3473f882001-02-23 17:55:21 +000014393 * Returns 0 in case of success or a error number otherwise
14394 */
14395int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014396 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014397 int ret = 0;
14398 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014399
14400 xmlInitParser();
14401
Owen Taylor3473f882001-02-23 17:55:21 +000014402 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14403 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014404 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14405 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014406 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014407 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014408
Daniel Veillard30211a02001-04-26 09:33:18 +000014409 if (user_data != NULL)
14410 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014411
Owen Taylor3473f882001-02-23 17:55:21 +000014412 xmlParseDocument(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014413
Owen Taylor3473f882001-02-23 17:55:21 +000014414 if (ctxt->wellFormed)
14415 ret = 0;
14416 else {
14417 if (ctxt->errNo != 0)
14418 ret = ctxt->errNo;
14419 else
14420 ret = -1;
14421 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014422 if (sax != NULL)
14423 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014424 if (ctxt->myDoc != NULL) {
14425 xmlFreeDoc(ctxt->myDoc);
14426 ctxt->myDoc = NULL;
14427 }
Owen Taylor3473f882001-02-23 17:55:21 +000014428 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014429
Owen Taylor3473f882001-02-23 17:55:21 +000014430 return ret;
14431}
Daniel Veillard81273902003-09-30 00:43:48 +000014432#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014433
14434/**
14435 * xmlCreateDocParserCtxt:
14436 * @cur: a pointer to an array of xmlChar
14437 *
14438 * Creates a parser context for an XML in-memory document.
14439 *
14440 * Returns the new parser context or NULL
14441 */
14442xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014443xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014444 int len;
14445
14446 if (cur == NULL)
14447 return(NULL);
14448 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014449 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014450}
14451
Daniel Veillard81273902003-09-30 00:43:48 +000014452#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014453/**
14454 * xmlSAXParseDoc:
14455 * @sax: the SAX handler block
14456 * @cur: a pointer to an array of xmlChar
14457 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14458 * documents
14459 *
14460 * parse an XML in-memory document and build a tree.
14461 * It use the given SAX function block to handle the parsing callback.
14462 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014463 *
Owen Taylor3473f882001-02-23 17:55:21 +000014464 * Returns the resulting document tree
14465 */
14466
14467xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014468xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014469 xmlDocPtr ret;
14470 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014471 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014472
Daniel Veillard38936062004-11-04 17:45:11 +000014473 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014474
14475
14476 ctxt = xmlCreateDocParserCtxt(cur);
14477 if (ctxt == NULL) return(NULL);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014478 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014479 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014480 ctxt->sax = sax;
14481 ctxt->userData = NULL;
14482 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014483 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014484
14485 xmlParseDocument(ctxt);
14486 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14487 else {
14488 ret = NULL;
14489 xmlFreeDoc(ctxt->myDoc);
14490 ctxt->myDoc = NULL;
14491 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014492 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014493 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014494 xmlFreeParserCtxt(ctxt);
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014495
Owen Taylor3473f882001-02-23 17:55:21 +000014496 return(ret);
14497}
14498
14499/**
14500 * xmlParseDoc:
14501 * @cur: a pointer to an array of xmlChar
14502 *
14503 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014504 *
Owen Taylor3473f882001-02-23 17:55:21 +000014505 * Returns the resulting document tree
14506 */
14507
14508xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014509xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014510 return(xmlSAXParseDoc(NULL, cur, 0));
14511}
Daniel Veillard81273902003-09-30 00:43:48 +000014512#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014513
Daniel Veillard81273902003-09-30 00:43:48 +000014514#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014515/************************************************************************
14516 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014517 * Specific function to keep track of entities references *
14518 * and used by the XSLT debugger *
Daniel Veillard8107a222002-01-13 14:10:10 +000014519 * *
14520 ************************************************************************/
14521
14522static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14523
14524/**
14525 * xmlAddEntityReference:
14526 * @ent : A valid entity
14527 * @firstNode : A valid first node for children of entity
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014528 * @lastNode : A valid last node of children entity
Daniel Veillard8107a222002-01-13 14:10:10 +000014529 *
14530 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14531 */
14532static void
14533xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14534 xmlNodePtr lastNode)
14535{
14536 if (xmlEntityRefFunc != NULL) {
14537 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14538 }
14539}
14540
14541
14542/**
14543 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014544 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014545 *
14546 * Set the function to call call back when a xml reference has been made
14547 */
14548void
14549xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14550{
14551 xmlEntityRefFunc = func;
14552}
Daniel Veillard81273902003-09-30 00:43:48 +000014553#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014554
14555/************************************************************************
14556 * *
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014557 * Miscellaneous *
Owen Taylor3473f882001-02-23 17:55:21 +000014558 * *
14559 ************************************************************************/
14560
14561#ifdef LIBXML_XPATH_ENABLED
14562#include <libxml/xpath.h>
14563#endif
14564
Daniel Veillardffa3c742005-07-21 13:24:09 +000014565extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014566static int xmlParserInitialized = 0;
14567
14568/**
14569 * xmlInitParser:
14570 *
14571 * Initialization function for the XML parser.
14572 * This is not reentrant. Call once before processing in case of
14573 * use in multithreaded programs.
14574 */
14575
14576void
14577xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014578 if (xmlParserInitialized != 0)
14579 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014580
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014581#ifdef LIBXML_THREAD_ENABLED
14582 __xmlGlobalInitMutexLock();
14583 if (xmlParserInitialized == 0) {
14584#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014585 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014586 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014587 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14588 (xmlGenericError == NULL))
14589 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014590 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014591 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014592 xmlInitCharEncodingHandlers();
14593 xmlDefaultSAXHandlerInit();
14594 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014595#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014596 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014597#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014598#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014599 htmlInitAutoClose();
14600 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014601#endif
14602#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014603 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014604#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014605 xmlParserInitialized = 1;
14606#ifdef LIBXML_THREAD_ENABLED
14607 }
14608 __xmlGlobalInitMutexUnlock();
14609#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014610}
14611
14612/**
14613 * xmlCleanupParser:
14614 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014615 * This function name is somewhat misleading. It does not clean up
14616 * parser state, it cleans up memory allocated by the library itself.
14617 * It is a cleanup function for the XML library. It tries to reclaim all
14618 * related global memory allocated for the library processing.
14619 * It doesn't deallocate any document related memory. One should
14620 * call xmlCleanupParser() only when the process has finished using
14621 * the library and all XML/HTML documents built with it.
14622 * See also xmlInitParser() which has the opposite function of preparing
14623 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014624 *
14625 * WARNING: if your application is multithreaded or has plugin support
14626 * calling this may crash the application if another thread or
14627 * a plugin is still using libxml2. It's sometimes very hard to
14628 * guess if libxml2 is in use in the application, some libraries
14629 * or plugins may use it without notice. In case of doubt abstain
14630 * from calling this function or do it just before calling exit()
14631 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014632 */
14633
14634void
14635xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014636 if (!xmlParserInitialized)
14637 return;
14638
Owen Taylor3473f882001-02-23 17:55:21 +000014639 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014640#ifdef LIBXML_CATALOG_ENABLED
14641 xmlCatalogCleanup();
14642#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014643 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014644 xmlCleanupInputCallbacks();
14645#ifdef LIBXML_OUTPUT_ENABLED
14646 xmlCleanupOutputCallbacks();
14647#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014648#ifdef LIBXML_SCHEMAS_ENABLED
14649 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014650 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014651#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000014652 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014653 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014654 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014655 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014656 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014657}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014658
14659/************************************************************************
14660 * *
14661 * New set (2.6.0) of simpler and more flexible APIs *
14662 * *
14663 ************************************************************************/
14664
14665/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014666 * DICT_FREE:
14667 * @str: a string
14668 *
14669 * Free a string if it is not owned by the "dict" dictionnary in the
14670 * current scope
14671 */
14672#define DICT_FREE(str) \
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014673 if ((str) && ((!dict) || \
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014674 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14675 xmlFree((char *)(str));
14676
14677/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014678 * xmlCtxtReset:
14679 * @ctxt: an XML parser context
14680 *
14681 * Reset a parser context
14682 */
14683void
14684xmlCtxtReset(xmlParserCtxtPtr ctxt)
14685{
14686 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014687 xmlDictPtr dict;
Daniel Veillardf8e3db02012-09-11 13:26:36 +080014688
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014689 if (ctxt == NULL)
14690 return;
14691
14692 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014693
14694 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14695 xmlFreeInputStream(input);
14696 }
14697 ctxt->inputNr = 0;
14698 ctxt->input = NULL;
14699
14700 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014701 if (ctxt->spaceTab != NULL) {
14702 ctxt->spaceTab[0] = -1;
14703 ctxt->space = &ctxt->spaceTab[0];
14704 } else {
14705 ctxt->space = NULL;
14706 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014707
14708
14709 ctxt->nodeNr = 0;
14710 ctxt->node = NULL;
14711
14712 ctxt->nameNr = 0;
14713 ctxt->name = NULL;
14714
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014715 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014716 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014717 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014718 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014719 DICT_FREE(ctxt->directory);
14720 ctxt->directory = NULL;
14721 DICT_FREE(ctxt->extSubURI);
14722 ctxt->extSubURI = NULL;
14723 DICT_FREE(ctxt->extSubSystem);
14724 ctxt->extSubSystem = NULL;
14725 if (ctxt->myDoc != NULL)
14726 xmlFreeDoc(ctxt->myDoc);
14727 ctxt->myDoc = NULL;
14728
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014729 ctxt->standalone = -1;
14730 ctxt->hasExternalSubset = 0;
14731 ctxt->hasPErefs = 0;
14732 ctxt->html = 0;
14733 ctxt->external = 0;
14734 ctxt->instate = XML_PARSER_START;
14735 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014736
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014737 ctxt->wellFormed = 1;
14738 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014739 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014740 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014741#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014742 ctxt->vctxt.userData = ctxt;
14743 ctxt->vctxt.error = xmlParserValidityError;
14744 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014745#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014746 ctxt->record_info = 0;
14747 ctxt->nbChars = 0;
14748 ctxt->checkIndex = 0;
14749 ctxt->inSubset = 0;
14750 ctxt->errNo = XML_ERR_OK;
14751 ctxt->depth = 0;
14752 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14753 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014754 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014755 ctxt->sizeentities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014756 xmlInitNodeInfoSeq(&ctxt->node_seq);
14757
14758 if (ctxt->attsDefault != NULL) {
14759 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14760 ctxt->attsDefault = NULL;
14761 }
14762 if (ctxt->attsSpecial != NULL) {
14763 xmlHashFree(ctxt->attsSpecial, NULL);
14764 ctxt->attsSpecial = NULL;
14765 }
14766
Daniel Veillard4432df22003-09-28 18:58:27 +000014767#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014768 if (ctxt->catalogs != NULL)
14769 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014770#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014771 if (ctxt->lastError.code != XML_ERR_OK)
14772 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014773}
14774
14775/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014776 * xmlCtxtResetPush:
14777 * @ctxt: an XML parser context
14778 * @chunk: a pointer to an array of chars
14779 * @size: number of chars in the array
14780 * @filename: an optional file name or URI
14781 * @encoding: the document encoding, or NULL
14782 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014783 * Reset a push parser context
14784 *
14785 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014786 */
14787int
14788xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14789 int size, const char *filename, const char *encoding)
14790{
14791 xmlParserInputPtr inputStream;
14792 xmlParserInputBufferPtr buf;
14793 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14794
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014795 if (ctxt == NULL)
14796 return(1);
14797
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014798 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14799 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14800
14801 buf = xmlAllocParserInputBuffer(enc);
14802 if (buf == NULL)
14803 return(1);
14804
14805 if (ctxt == NULL) {
14806 xmlFreeParserInputBuffer(buf);
14807 return(1);
14808 }
14809
14810 xmlCtxtReset(ctxt);
14811
14812 if (ctxt->pushTab == NULL) {
14813 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14814 sizeof(xmlChar *));
14815 if (ctxt->pushTab == NULL) {
14816 xmlErrMemory(ctxt, NULL);
14817 xmlFreeParserInputBuffer(buf);
14818 return(1);
14819 }
14820 }
14821
14822 if (filename == NULL) {
14823 ctxt->directory = NULL;
14824 } else {
14825 ctxt->directory = xmlParserGetDirectory(filename);
14826 }
14827
14828 inputStream = xmlNewInputStream(ctxt);
14829 if (inputStream == NULL) {
14830 xmlFreeParserInputBuffer(buf);
14831 return(1);
14832 }
14833
14834 if (filename == NULL)
14835 inputStream->filename = NULL;
14836 else
14837 inputStream->filename = (char *)
14838 xmlCanonicPath((const xmlChar *) filename);
14839 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014840 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014841
14842 inputPush(ctxt, inputStream);
14843
14844 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14845 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014846 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14847 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014848
14849 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14850
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014851 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014852#ifdef DEBUG_PUSH
14853 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14854#endif
14855 }
14856
14857 if (encoding != NULL) {
14858 xmlCharEncodingHandlerPtr hdlr;
14859
Daniel Veillard37334572008-07-31 08:20:02 +000014860 if (ctxt->encoding != NULL)
14861 xmlFree((xmlChar *) ctxt->encoding);
14862 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14863
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014864 hdlr = xmlFindCharEncodingHandler(encoding);
14865 if (hdlr != NULL) {
14866 xmlSwitchToEncoding(ctxt, hdlr);
14867 } else {
14868 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14869 "Unsupported encoding %s\n", BAD_CAST encoding);
14870 }
14871 } else if (enc != XML_CHAR_ENCODING_NONE) {
14872 xmlSwitchEncoding(ctxt, enc);
14873 }
14874
14875 return(0);
14876}
14877
Daniel Veillard37334572008-07-31 08:20:02 +000014878
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014879/**
Daniel Veillard37334572008-07-31 08:20:02 +000014880 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014881 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014882 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014883 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014884 *
14885 * Applies the options to the parser context
14886 *
14887 * Returns 0 in case of success, the set of unknown or unimplemented options
14888 * in case of error.
14889 */
Daniel Veillard37334572008-07-31 08:20:02 +000014890static int
14891xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014892{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014893 if (ctxt == NULL)
14894 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014895 if (encoding != NULL) {
14896 if (ctxt->encoding != NULL)
14897 xmlFree((xmlChar *) ctxt->encoding);
14898 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14899 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014900 if (options & XML_PARSE_RECOVER) {
14901 ctxt->recovery = 1;
14902 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014903 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014904 } else
14905 ctxt->recovery = 0;
14906 if (options & XML_PARSE_DTDLOAD) {
14907 ctxt->loadsubset = XML_DETECT_IDS;
14908 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014909 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014910 } else
14911 ctxt->loadsubset = 0;
14912 if (options & XML_PARSE_DTDATTR) {
14913 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14914 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014915 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014916 }
14917 if (options & XML_PARSE_NOENT) {
14918 ctxt->replaceEntities = 1;
14919 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14920 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014921 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014922 } else
14923 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014924 if (options & XML_PARSE_PEDANTIC) {
14925 ctxt->pedantic = 1;
14926 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014927 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014928 } else
14929 ctxt->pedantic = 0;
14930 if (options & XML_PARSE_NOBLANKS) {
14931 ctxt->keepBlanks = 0;
14932 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14933 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014934 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014935 } else
14936 ctxt->keepBlanks = 1;
14937 if (options & XML_PARSE_DTDVALID) {
14938 ctxt->validate = 1;
14939 if (options & XML_PARSE_NOWARNING)
14940 ctxt->vctxt.warning = NULL;
14941 if (options & XML_PARSE_NOERROR)
14942 ctxt->vctxt.error = NULL;
14943 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014944 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014945 } else
14946 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000014947 if (options & XML_PARSE_NOWARNING) {
14948 ctxt->sax->warning = NULL;
14949 options -= XML_PARSE_NOWARNING;
14950 }
14951 if (options & XML_PARSE_NOERROR) {
14952 ctxt->sax->error = NULL;
14953 ctxt->sax->fatalError = NULL;
14954 options -= XML_PARSE_NOERROR;
14955 }
Daniel Veillard81273902003-09-30 00:43:48 +000014956#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014957 if (options & XML_PARSE_SAX1) {
14958 ctxt->sax->startElement = xmlSAX2StartElement;
14959 ctxt->sax->endElement = xmlSAX2EndElement;
14960 ctxt->sax->startElementNs = NULL;
14961 ctxt->sax->endElementNs = NULL;
14962 ctxt->sax->initialized = 1;
14963 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014964 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014965 }
Daniel Veillard81273902003-09-30 00:43:48 +000014966#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014967 if (options & XML_PARSE_NODICT) {
14968 ctxt->dictNames = 0;
14969 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014970 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014971 } else {
14972 ctxt->dictNames = 1;
14973 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014974 if (options & XML_PARSE_NOCDATA) {
14975 ctxt->sax->cdataBlock = NULL;
14976 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014977 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014978 }
14979 if (options & XML_PARSE_NSCLEAN) {
14980 ctxt->options |= XML_PARSE_NSCLEAN;
14981 options -= XML_PARSE_NSCLEAN;
14982 }
Daniel Veillard61b93382003-11-03 14:28:31 +000014983 if (options & XML_PARSE_NONET) {
14984 ctxt->options |= XML_PARSE_NONET;
14985 options -= XML_PARSE_NONET;
14986 }
Daniel Veillard8874b942005-08-25 13:19:21 +000014987 if (options & XML_PARSE_COMPACT) {
14988 ctxt->options |= XML_PARSE_COMPACT;
14989 options -= XML_PARSE_COMPACT;
14990 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000014991 if (options & XML_PARSE_OLD10) {
14992 ctxt->options |= XML_PARSE_OLD10;
14993 options -= XML_PARSE_OLD10;
14994 }
Daniel Veillard8915c152008-08-26 13:05:34 +000014995 if (options & XML_PARSE_NOBASEFIX) {
14996 ctxt->options |= XML_PARSE_NOBASEFIX;
14997 options -= XML_PARSE_NOBASEFIX;
14998 }
14999 if (options & XML_PARSE_HUGE) {
15000 ctxt->options |= XML_PARSE_HUGE;
15001 options -= XML_PARSE_HUGE;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080015002 if (ctxt->dict != NULL)
15003 xmlDictSetLimit(ctxt->dict, 0);
Daniel Veillard8915c152008-08-26 13:05:34 +000015004 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000015005 if (options & XML_PARSE_OLDSAX) {
15006 ctxt->options |= XML_PARSE_OLDSAX;
15007 options -= XML_PARSE_OLDSAX;
15008 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080015009 if (options & XML_PARSE_IGNORE_ENC) {
15010 ctxt->options |= XML_PARSE_IGNORE_ENC;
15011 options -= XML_PARSE_IGNORE_ENC;
15012 }
Daniel Veillard968a03a2012-08-13 12:41:33 +080015013 if (options & XML_PARSE_BIG_LINES) {
15014 ctxt->options |= XML_PARSE_BIG_LINES;
15015 options -= XML_PARSE_BIG_LINES;
15016 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000015017 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015018 return (options);
15019}
15020
15021/**
Daniel Veillard37334572008-07-31 08:20:02 +000015022 * xmlCtxtUseOptions:
15023 * @ctxt: an XML parser context
15024 * @options: a combination of xmlParserOption
15025 *
15026 * Applies the options to the parser context
15027 *
15028 * Returns 0 in case of success, the set of unknown or unimplemented options
15029 * in case of error.
15030 */
15031int
15032xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15033{
15034 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15035}
15036
15037/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015038 * xmlDoRead:
15039 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000015040 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015041 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015042 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015043 * @reuse: keep the context for reuse
15044 *
15045 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000015046 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015047 * Returns the resulting document tree or NULL
15048 */
15049static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015050xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15051 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015052{
15053 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000015054
15055 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015056 if (encoding != NULL) {
15057 xmlCharEncodingHandlerPtr hdlr;
15058
15059 hdlr = xmlFindCharEncodingHandler(encoding);
15060 if (hdlr != NULL)
15061 xmlSwitchToEncoding(ctxt, hdlr);
15062 }
Daniel Veillard60942de2003-09-25 21:05:58 +000015063 if ((URL != NULL) && (ctxt->input != NULL) &&
15064 (ctxt->input->filename == NULL))
15065 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015066 xmlParseDocument(ctxt);
15067 if ((ctxt->wellFormed) || ctxt->recovery)
15068 ret = ctxt->myDoc;
15069 else {
15070 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015071 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015072 xmlFreeDoc(ctxt->myDoc);
15073 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015074 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015075 ctxt->myDoc = NULL;
15076 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015077 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015078 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015079
15080 return (ret);
15081}
15082
15083/**
15084 * xmlReadDoc:
15085 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015086 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015087 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015088 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015089 *
15090 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015091 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015092 * Returns the resulting document tree
15093 */
15094xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015095xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015096{
15097 xmlParserCtxtPtr ctxt;
15098
15099 if (cur == NULL)
15100 return (NULL);
15101
15102 ctxt = xmlCreateDocParserCtxt(cur);
15103 if (ctxt == NULL)
15104 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015105 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015106}
15107
15108/**
15109 * xmlReadFile:
15110 * @filename: a file or URL
15111 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015112 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015113 *
15114 * parse an XML file from the filesystem or the network.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015115 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015116 * Returns the resulting document tree
15117 */
15118xmlDocPtr
15119xmlReadFile(const char *filename, const char *encoding, int options)
15120{
15121 xmlParserCtxtPtr ctxt;
15122
Daniel Veillard61b93382003-11-03 14:28:31 +000015123 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015124 if (ctxt == NULL)
15125 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015126 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015127}
15128
15129/**
15130 * xmlReadMemory:
15131 * @buffer: a pointer to a char array
15132 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015133 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015134 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015135 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015136 *
15137 * parse an XML in-memory document and build a tree.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015138 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015139 * Returns the resulting document tree
15140 */
15141xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015142xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015143{
15144 xmlParserCtxtPtr ctxt;
15145
15146 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15147 if (ctxt == NULL)
15148 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015149 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015150}
15151
15152/**
15153 * xmlReadFd:
15154 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015155 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015156 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015157 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015158 *
15159 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015160 * NOTE that the file descriptor will not be closed when the
15161 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015162 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015163 * Returns the resulting document tree
15164 */
15165xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015166xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015167{
15168 xmlParserCtxtPtr ctxt;
15169 xmlParserInputBufferPtr input;
15170 xmlParserInputPtr stream;
15171
15172 if (fd < 0)
15173 return (NULL);
15174
15175 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15176 if (input == NULL)
15177 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015178 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015179 ctxt = xmlNewParserCtxt();
15180 if (ctxt == NULL) {
15181 xmlFreeParserInputBuffer(input);
15182 return (NULL);
15183 }
15184 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15185 if (stream == NULL) {
15186 xmlFreeParserInputBuffer(input);
15187 xmlFreeParserCtxt(ctxt);
15188 return (NULL);
15189 }
15190 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015191 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015192}
15193
15194/**
15195 * xmlReadIO:
15196 * @ioread: an I/O read function
15197 * @ioclose: an I/O close function
15198 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015199 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015200 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015201 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015202 *
15203 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080015204 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015205 * Returns the resulting document tree
15206 */
15207xmlDocPtr
15208xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000015209 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015210{
15211 xmlParserCtxtPtr ctxt;
15212 xmlParserInputBufferPtr input;
15213 xmlParserInputPtr stream;
15214
15215 if (ioread == NULL)
15216 return (NULL);
15217
15218 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15219 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015220 if (input == NULL) {
15221 if (ioclose != NULL)
15222 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015223 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015224 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015225 ctxt = xmlNewParserCtxt();
15226 if (ctxt == NULL) {
15227 xmlFreeParserInputBuffer(input);
15228 return (NULL);
15229 }
15230 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15231 if (stream == NULL) {
15232 xmlFreeParserInputBuffer(input);
15233 xmlFreeParserCtxt(ctxt);
15234 return (NULL);
15235 }
15236 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015237 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015238}
15239
15240/**
15241 * xmlCtxtReadDoc:
15242 * @ctxt: an XML parser context
15243 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015244 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015245 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015246 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015247 *
15248 * parse an XML in-memory document and build a tree.
15249 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015250 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015251 * Returns the resulting document tree
15252 */
15253xmlDocPtr
15254xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000015255 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015256{
15257 xmlParserInputPtr stream;
15258
15259 if (cur == NULL)
15260 return (NULL);
15261 if (ctxt == NULL)
15262 return (NULL);
15263
15264 xmlCtxtReset(ctxt);
15265
15266 stream = xmlNewStringInputStream(ctxt, cur);
15267 if (stream == NULL) {
15268 return (NULL);
15269 }
15270 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015271 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015272}
15273
15274/**
15275 * xmlCtxtReadFile:
15276 * @ctxt: an XML parser context
15277 * @filename: a file or URL
15278 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015279 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015280 *
15281 * parse an XML file from the filesystem or the network.
15282 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015283 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015284 * Returns the resulting document tree
15285 */
15286xmlDocPtr
15287xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15288 const char *encoding, int options)
15289{
15290 xmlParserInputPtr stream;
15291
15292 if (filename == NULL)
15293 return (NULL);
15294 if (ctxt == NULL)
15295 return (NULL);
15296
15297 xmlCtxtReset(ctxt);
15298
Daniel Veillard29614c72004-11-26 10:47:26 +000015299 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015300 if (stream == NULL) {
15301 return (NULL);
15302 }
15303 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015304 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015305}
15306
15307/**
15308 * xmlCtxtReadMemory:
15309 * @ctxt: an XML parser context
15310 * @buffer: a pointer to a char array
15311 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015312 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015313 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015314 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015315 *
15316 * parse an XML in-memory document and build a tree.
15317 * This reuses the existing @ctxt parser context
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015318 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015319 * Returns the resulting document tree
15320 */
15321xmlDocPtr
15322xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000015323 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015324{
15325 xmlParserInputBufferPtr input;
15326 xmlParserInputPtr stream;
15327
15328 if (ctxt == NULL)
15329 return (NULL);
15330 if (buffer == NULL)
15331 return (NULL);
15332
15333 xmlCtxtReset(ctxt);
15334
15335 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15336 if (input == NULL) {
15337 return(NULL);
15338 }
15339
15340 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15341 if (stream == NULL) {
15342 xmlFreeParserInputBuffer(input);
15343 return(NULL);
15344 }
15345
15346 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015347 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015348}
15349
15350/**
15351 * xmlCtxtReadFd:
15352 * @ctxt: an XML parser context
15353 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015354 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015355 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015356 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015357 *
15358 * parse an XML from a file descriptor and build a tree.
15359 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015360 * NOTE that the file descriptor will not be closed when the
15361 * reader is closed or reset.
Daniel Veillardf8e3db02012-09-11 13:26:36 +080015362 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015363 * Returns the resulting document tree
15364 */
15365xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015366xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15367 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015368{
15369 xmlParserInputBufferPtr input;
15370 xmlParserInputPtr stream;
15371
15372 if (fd < 0)
15373 return (NULL);
15374 if (ctxt == NULL)
15375 return (NULL);
15376
15377 xmlCtxtReset(ctxt);
15378
15379
15380 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15381 if (input == NULL)
15382 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015383 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015384 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15385 if (stream == NULL) {
15386 xmlFreeParserInputBuffer(input);
15387 return (NULL);
15388 }
15389 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015390 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015391}
15392
15393/**
15394 * xmlCtxtReadIO:
15395 * @ctxt: an XML parser context
15396 * @ioread: an I/O read function
15397 * @ioclose: an I/O close function
15398 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015399 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015400 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015401 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015402 *
15403 * parse an XML document from I/O functions and source and build a tree.
15404 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015405 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015406 * Returns the resulting document tree
15407 */
15408xmlDocPtr
15409xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15410 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015411 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015412 const char *encoding, int options)
15413{
15414 xmlParserInputBufferPtr input;
15415 xmlParserInputPtr stream;
15416
15417 if (ioread == NULL)
15418 return (NULL);
15419 if (ctxt == NULL)
15420 return (NULL);
15421
15422 xmlCtxtReset(ctxt);
15423
15424 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15425 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015426 if (input == NULL) {
15427 if (ioclose != NULL)
15428 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015429 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015430 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015431 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15432 if (stream == NULL) {
15433 xmlFreeParserInputBuffer(input);
15434 return (NULL);
15435 }
15436 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015437 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015438}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015439
15440#define bottom_parser
15441#include "elfgcchack.h"