blob: 9a57b0199b3c05c65ff3eb3b338317f4f517fe02 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080043#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000045#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000047#include <libxml/threads.h>
48#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <libxml/tree.h>
50#include <libxml/parser.h>
51#include <libxml/parserInternals.h>
52#include <libxml/valid.h>
53#include <libxml/entities.h>
54#include <libxml/xmlerror.h>
55#include <libxml/encoding.h>
56#include <libxml/xmlIO.h>
57#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000058#ifdef LIBXML_CATALOG_ENABLED
59#include <libxml/catalog.h>
60#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000061#ifdef LIBXML_SCHEMAS_ENABLED
62#include <libxml/xmlschemastypes.h>
63#include <libxml/relaxng.h>
64#endif
Owen Taylor3473f882001-02-23 17:55:21 +000065#ifdef HAVE_CTYPE_H
66#include <ctype.h>
67#endif
68#ifdef HAVE_STDLIB_H
69#include <stdlib.h>
70#endif
71#ifdef HAVE_SYS_STAT_H
72#include <sys/stat.h>
73#endif
74#ifdef HAVE_FCNTL_H
75#include <fcntl.h>
76#endif
77#ifdef HAVE_UNISTD_H
78#include <unistd.h>
79#endif
80#ifdef HAVE_ZLIB_H
81#include <zlib.h>
82#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +020083#ifdef HAVE_LZMA_H
84#include <lzma.h>
85#endif
Owen Taylor3473f882001-02-23 17:55:21 +000086
Daniel Veillard768eb3b2012-07-16 14:19:49 +080087#include "buf.h"
88#include "enc.h"
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090static void
91xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
Rob Richards9c0aa472009-03-26 18:10:19 +000093static xmlParserCtxtPtr
94xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
Daniel Veillard0161e632008-08-28 15:36:32 +000097/************************************************************************
98 * *
99 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
100 * *
101 ************************************************************************/
102
103#define XML_PARSER_BIG_ENTITY 1000
104#define XML_PARSER_LOT_ENTITY 5000
105
106/*
107 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
108 * replacement over the size in byte of the input indicates that you have
109 * and eponential behaviour. A value of 10 correspond to at least 3 entity
110 * replacement per byte of input.
111 */
112#define XML_PARSER_NON_LINEAR 10
113
114/*
115 * xmlParserEntityCheck
116 *
117 * Function to check non-linear entity expansion behaviour
118 * This is here to detect and stop exponential linear entity expansion
119 * This is not a limitation of the parser but a safety
120 * boundary feature. It can be disabled with the XML_PARSE_HUGE
121 * parser option.
122 */
123static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800124xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard0161e632008-08-28 15:36:32 +0000125 xmlEntityPtr ent)
126{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800127 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000128
129 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
130 return (0);
131 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
132 return (1);
133 if (size != 0) {
134 /*
135 * Do the check based on the replacement size of the entity
136 */
137 if (size < XML_PARSER_BIG_ENTITY)
138 return(0);
139
140 /*
141 * A limit on the amount of text data reasonably used
142 */
143 if (ctxt->input != NULL) {
144 consumed = ctxt->input->consumed +
145 (ctxt->input->cur - ctxt->input->base);
146 }
147 consumed += ctxt->sizeentities;
148
149 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
150 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
151 return (0);
152 } else if (ent != NULL) {
153 /*
154 * use the number of parsed entities in the replacement
155 */
156 size = ent->checked;
157
158 /*
159 * The amount of data parsed counting entities size only once
160 */
161 if (ctxt->input != NULL) {
162 consumed = ctxt->input->consumed +
163 (ctxt->input->cur - ctxt->input->base);
164 }
165 consumed += ctxt->sizeentities;
166
167 /*
168 * Check the density of entities for the amount of data
169 * knowing an entity reference will take at least 3 bytes
170 */
171 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
172 return (0);
173 } else {
174 /*
175 * strange we got no data for checking just return
176 */
177 return (0);
178 }
179
180 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
181 return (1);
182}
183
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000184/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000185 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000186 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000187 * arbitrary depth limit for the XML documents that we allow to
188 * process. This is not a limitation of the parser but a safety
189 * boundary feature. It can be disabled with the XML_PARSE_HUGE
190 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000191 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000192unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000193
Daniel Veillard0fb18932003-09-07 09:14:37 +0000194
Daniel Veillard0161e632008-08-28 15:36:32 +0000195
196#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000197#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000198#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000199#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
200
Daniel Veillard1f972e92012-08-15 10:16:37 +0800201/**
202 * XML_PARSER_CHUNK_SIZE
203 *
204 * When calling GROW that's the minimal amount of data
205 * the parser expected to have received. It is not a hard
206 * limit but an optimization when reading strings like Names
207 * It is not strictly needed as long as inputs available characters
208 * are followed by 0, which should be provided by the I/O level
209 */
210#define XML_PARSER_CHUNK_SIZE 100
211
Owen Taylor3473f882001-02-23 17:55:21 +0000212/*
Owen Taylor3473f882001-02-23 17:55:21 +0000213 * List of XML prefixed PI allowed by W3C specs
214 */
215
Daniel Veillardb44025c2001-10-11 22:55:55 +0000216static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000217 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800218 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000219 NULL
220};
221
Daniel Veillarda07050d2003-10-19 14:46:32 +0000222
Owen Taylor3473f882001-02-23 17:55:21 +0000223/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200224static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
225 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000226
Daniel Veillard7d515752003-09-26 19:12:37 +0000227static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000228xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
229 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000230 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000231 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000232
Daniel Veillard37334572008-07-31 08:20:02 +0000233static int
234xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
235 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000236#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000237static void
238xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
239 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000240#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000241
Daniel Veillard7d515752003-09-26 19:12:37 +0000242static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000243xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
244 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000245
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000246static int
247xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
248
Daniel Veillarde57ec792003-09-10 10:50:59 +0000249/************************************************************************
250 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000251 * Some factorized error routines *
252 * *
253 ************************************************************************/
254
255/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000256 * xmlErrAttributeDup:
257 * @ctxt: an XML parser context
258 * @prefix: the attribute prefix
259 * @localname: the attribute localname
260 *
261 * Handle a redefinition of attribute error
262 */
263static void
264xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
265 const xmlChar * localname)
266{
Daniel Veillard157fee02003-10-31 10:36:03 +0000267 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
268 (ctxt->instate == XML_PARSER_EOF))
269 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000270 if (ctxt != NULL)
271 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200272
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000273 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000274 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200275 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000276 (const char *) localname, NULL, NULL, 0, 0,
277 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000278 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000279 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200280 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000281 (const char *) prefix, (const char *) localname,
282 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
283 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000284 if (ctxt != NULL) {
285 ctxt->wellFormed = 0;
286 if (ctxt->recovery == 0)
287 ctxt->disableSAX = 1;
288 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000289}
290
291/**
292 * xmlFatalErr:
293 * @ctxt: an XML parser context
294 * @error: the error number
295 * @extra: extra information string
296 *
297 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
298 */
299static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000300xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000301{
302 const char *errmsg;
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800303 char errstr[129] = "";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000304
Daniel Veillard157fee02003-10-31 10:36:03 +0000305 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
306 (ctxt->instate == XML_PARSER_EOF))
307 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000308 switch (error) {
309 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800310 errmsg = "CharRef: invalid hexadecimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800313 errmsg = "CharRef: invalid decimal value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_INVALID_CHARREF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800316 errmsg = "CharRef: invalid value";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "internal error";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_PEREF_AT_EOF:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800322 errmsg = "PEReference at end of document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800325 errmsg = "PEReference in prolog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800328 errmsg = "PEReference in epilog";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_PEREF_NO_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800331 errmsg = "PEReference: no name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800334 errmsg = "PEReference: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_ENTITY_LOOP:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800337 errmsg = "Detected an entity reference loop";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800340 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800343 errmsg = "PEReferences forbidden in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800346 errmsg = "EntityValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800349 errmsg = "AttValue: \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800352 errmsg = "Unescaped '<' not allowed in attributes values";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800355 errmsg = "SystemLiteral \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800358 errmsg = "Unfinished System or Public ID \" or ' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000359 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000360 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800361 errmsg = "Sequence ']]>' not allowed in content";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000362 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 case XML_ERR_URI_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800364 errmsg = "SYSTEM or PUBLIC, the URI is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000365 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 case XML_ERR_PUBID_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800367 errmsg = "PUBLIC, the Public Identifier is missing";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000368 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000369 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800370 errmsg = "Comment must not contain '--' (double-hyphen)";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000371 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000372 case XML_ERR_PI_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800373 errmsg = "xmlParsePI : no target name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000374 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000375 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800376 errmsg = "Invalid PI name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000377 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000378 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800379 errmsg = "NOTATION: Name expected here";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000380 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000381 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800382 errmsg = "'>' required to close NOTATION declaration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000383 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000384 case XML_ERR_VALUE_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800385 errmsg = "Entity value required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000386 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000387 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 errmsg = "Fragment not allowed";
389 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000390 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800391 errmsg = "'(' required to start ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000392 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000393 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800394 errmsg = "NmToken expected in ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000395 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000396 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800397 errmsg = "')' required to finish ATTLIST enumeration";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000398 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000399 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800400 errmsg = "MixedContentDecl : '|' or ')*' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000401 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000402 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800403 errmsg = "MixedContentDecl : '#PCDATA' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000404 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000405 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800406 errmsg = "ContentDecl : Name or '(' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000407 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000408 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800409 errmsg = "ContentDecl : ',' '|' or ')' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000410 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000411 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000412 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800413 "PEReference: forbidden within markup decl in internal subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000415 case XML_ERR_GT_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800416 errmsg = "expected '>'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000418 case XML_ERR_CONDSEC_INVALID:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800419 errmsg = "XML conditional section '[' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000421 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800422 errmsg = "Content error in the external subset";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 break;
424 case XML_ERR_CONDSEC_INVALID_KEYWORD:
425 errmsg =
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800426 "conditional section INCLUDE or IGNORE keyword expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000427 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000428 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800429 errmsg = "XML conditional section not closed";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000430 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000431 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800432 errmsg = "Text declaration '<?xml' required";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000433 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000434 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800435 errmsg = "parsing XML declaration: '?>' expected";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000436 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000437 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800438 errmsg = "external parsed entities cannot be standalone";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000439 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000440 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800441 errmsg = "EntityRef: expecting ';'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000442 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000443 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800444 errmsg = "DOCTYPE improperly terminated";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000445 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000446 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800447 errmsg = "EndTag: '</' not found";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000448 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000449 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800450 errmsg = "expected '='";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000451 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000452 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800453 errmsg = "String not closed expecting \" or '";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000454 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000455 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800456 errmsg = "String not started expecting ' or \"";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000457 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000458 case XML_ERR_ENCODING_NAME:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800459 errmsg = "Invalid XML encoding name";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000460 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000461 case XML_ERR_STANDALONE_VALUE:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800462 errmsg = "standalone accepts only 'yes' or 'no'";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000464 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800465 errmsg = "Document is empty";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000466 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000467 case XML_ERR_DOCUMENT_END:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800468 errmsg = "Extra content at the end of the document";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000469 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000470 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800471 errmsg = "chunk is not well balanced";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000472 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000473 case XML_ERR_EXTRA_CONTENT:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800474 errmsg = "extra content at the end of well balanced chunk";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000475 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000476 case XML_ERR_VERSION_MISSING:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800477 errmsg = "Malformed declaration expecting version";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000478 break;
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800479 case XML_ERR_NAME_TOO_LONG:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800480 errmsg = "Name too long use XML_PARSE_HUGE option";
Daniel Veillard52d8ade2012-07-30 10:08:45 +0800481 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000482#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000483 case:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800484 errmsg = "";
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000485 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000486#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000487 default:
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800488 errmsg = "Unregistered error message";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000489 }
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800490 if (info == NULL)
491 snprintf(errstr, 128, "%s\n", errmsg);
492 else
493 snprintf(errstr, 128, "%s: %%s\n", errmsg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000494 if (ctxt != NULL)
495 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000496 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillarde7bf8922012-07-30 20:09:25 +0800497 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, &errstr[0],
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000498 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000499 if (ctxt != NULL) {
500 ctxt->wellFormed = 0;
501 if (ctxt->recovery == 0)
502 ctxt->disableSAX = 1;
503 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000504}
505
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000506/**
507 * xmlFatalErrMsg:
508 * @ctxt: an XML parser context
509 * @error: the error number
510 * @msg: the error message
511 *
512 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
513 */
514static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000515xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
516 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000517{
Daniel Veillard157fee02003-10-31 10:36:03 +0000518 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
519 (ctxt->instate == XML_PARSER_EOF))
520 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000521 if (ctxt != NULL)
522 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000523 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200524 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000525 if (ctxt != NULL) {
526 ctxt->wellFormed = 0;
527 if (ctxt->recovery == 0)
528 ctxt->disableSAX = 1;
529 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000530}
531
532/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000533 * xmlWarningMsg:
534 * @ctxt: an XML parser context
535 * @error: the error number
536 * @msg: the error message
537 * @str1: extra data
538 * @str2: extra data
539 *
540 * Handle a warning.
541 */
542static void
543xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
544 const char *msg, const xmlChar *str1, const xmlChar *str2)
545{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000546 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000547
Daniel Veillard157fee02003-10-31 10:36:03 +0000548 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
549 (ctxt->instate == XML_PARSER_EOF))
550 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000551 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
552 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000553 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200554 if (ctxt != NULL) {
555 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000556 (ctxt->sax) ? ctxt->sax->warning : NULL,
557 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000558 ctxt, NULL, XML_FROM_PARSER, error,
559 XML_ERR_WARNING, NULL, 0,
560 (const char *) str1, (const char *) str2, NULL, 0, 0,
561 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200562 } else {
563 __xmlRaiseError(schannel, NULL, NULL,
564 ctxt, NULL, XML_FROM_PARSER, error,
565 XML_ERR_WARNING, NULL, 0,
566 (const char *) str1, (const char *) str2, NULL, 0, 0,
567 msg, (const char *) str1, (const char *) str2);
568 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000569}
570
571/**
572 * xmlValidityError:
573 * @ctxt: an XML parser context
574 * @error: the error number
575 * @msg: the error message
576 * @str1: extra data
577 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000578 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000579 */
580static void
581xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000582 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000583{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000584 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000585
586 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
587 (ctxt->instate == XML_PARSER_EOF))
588 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000589 if (ctxt != NULL) {
590 ctxt->errNo = error;
591 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
592 schannel = ctxt->sax->serror;
593 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200594 if (ctxt != NULL) {
595 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000596 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000597 ctxt, NULL, XML_FROM_DTD, error,
598 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000599 (const char *) str2, NULL, 0, 0,
600 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000601 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200602 } else {
603 __xmlRaiseError(schannel, NULL, NULL,
604 ctxt, NULL, XML_FROM_DTD, error,
605 XML_ERR_ERROR, NULL, 0, (const char *) str1,
606 (const char *) str2, NULL, 0, 0,
607 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000608 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000609}
610
611/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000612 * xmlFatalErrMsgInt:
613 * @ctxt: an XML parser context
614 * @error: the error number
615 * @msg: the error message
616 * @val: an integer value
617 *
618 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
619 */
620static void
621xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000622 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000623{
Daniel Veillard157fee02003-10-31 10:36:03 +0000624 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
625 (ctxt->instate == XML_PARSER_EOF))
626 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000627 if (ctxt != NULL)
628 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000629 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000630 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
631 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000632 if (ctxt != NULL) {
633 ctxt->wellFormed = 0;
634 if (ctxt->recovery == 0)
635 ctxt->disableSAX = 1;
636 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000637}
638
639/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000640 * xmlFatalErrMsgStrIntStr:
641 * @ctxt: an XML parser context
642 * @error: the error number
643 * @msg: the error message
644 * @str1: an string info
645 * @val: an integer value
646 * @str2: an string info
647 *
648 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
649 */
650static void
651xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
652 const char *msg, const xmlChar *str1, int val,
653 const xmlChar *str2)
654{
Daniel Veillard157fee02003-10-31 10:36:03 +0000655 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
656 (ctxt->instate == XML_PARSER_EOF))
657 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000658 if (ctxt != NULL)
659 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000660 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000661 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
662 NULL, 0, (const char *) str1, (const char *) str2,
663 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000664 if (ctxt != NULL) {
665 ctxt->wellFormed = 0;
666 if (ctxt->recovery == 0)
667 ctxt->disableSAX = 1;
668 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000669}
670
671/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000672 * xmlFatalErrMsgStr:
673 * @ctxt: an XML parser context
674 * @error: the error number
675 * @msg: the error message
676 * @val: a string value
677 *
678 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
679 */
680static void
681xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000682 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000683{
Daniel Veillard157fee02003-10-31 10:36:03 +0000684 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
685 (ctxt->instate == XML_PARSER_EOF))
686 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000687 if (ctxt != NULL)
688 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000689 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000690 XML_FROM_PARSER, error, XML_ERR_FATAL,
691 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
692 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000693 if (ctxt != NULL) {
694 ctxt->wellFormed = 0;
695 if (ctxt->recovery == 0)
696 ctxt->disableSAX = 1;
697 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000698}
699
700/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000701 * xmlErrMsgStr:
702 * @ctxt: an XML parser context
703 * @error: the error number
704 * @msg: the error message
705 * @val: a string value
706 *
707 * Handle a non fatal parser error
708 */
709static void
710xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
711 const char *msg, const xmlChar * val)
712{
Daniel Veillard157fee02003-10-31 10:36:03 +0000713 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
714 (ctxt->instate == XML_PARSER_EOF))
715 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000716 if (ctxt != NULL)
717 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000718 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000719 XML_FROM_PARSER, error, XML_ERR_ERROR,
720 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
721 val);
722}
723
724/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000725 * xmlNsErr:
726 * @ctxt: an XML parser context
727 * @error: the error number
728 * @msg: the message
729 * @info1: extra information string
730 * @info2: extra information string
731 *
732 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
733 */
734static void
735xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
736 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000737 const xmlChar * info1, const xmlChar * info2,
738 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000739{
Daniel Veillard157fee02003-10-31 10:36:03 +0000740 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
741 (ctxt->instate == XML_PARSER_EOF))
742 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000743 if (ctxt != NULL)
744 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000745 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000746 XML_ERR_ERROR, NULL, 0, (const char *) info1,
747 (const char *) info2, (const char *) info3, 0, 0, msg,
748 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000749 if (ctxt != NULL)
750 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000751}
752
Daniel Veillard37334572008-07-31 08:20:02 +0000753/**
754 * xmlNsWarn
755 * @ctxt: an XML parser context
756 * @error: the error number
757 * @msg: the message
758 * @info1: extra information string
759 * @info2: extra information string
760 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800761 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000762 */
763static void
764xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
765 const char *msg,
766 const xmlChar * info1, const xmlChar * info2,
767 const xmlChar * info3)
768{
769 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
770 (ctxt->instate == XML_PARSER_EOF))
771 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000772 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
773 XML_ERR_WARNING, NULL, 0, (const char *) info1,
774 (const char *) info2, (const char *) info3, 0, 0, msg,
775 info1, info2, info3);
776}
777
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000778/************************************************************************
779 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000780 * Library wide options *
781 * *
782 ************************************************************************/
783
784/**
785 * xmlHasFeature:
786 * @feature: the feature to be examined
787 *
788 * Examines if the library has been compiled with a given feature.
789 *
790 * Returns a non-zero value if the feature exist, otherwise zero.
791 * Returns zero (0) if the feature does not exist or an unknown
792 * unknown feature is requested, non-zero otherwise.
793 */
794int
795xmlHasFeature(xmlFeature feature)
796{
797 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000798 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000799#ifdef LIBXML_THREAD_ENABLED
800 return(1);
801#else
802 return(0);
803#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000804 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000805#ifdef LIBXML_TREE_ENABLED
806 return(1);
807#else
808 return(0);
809#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000810 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000811#ifdef LIBXML_OUTPUT_ENABLED
812 return(1);
813#else
814 return(0);
815#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000816 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000817#ifdef LIBXML_PUSH_ENABLED
818 return(1);
819#else
820 return(0);
821#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000822 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000823#ifdef LIBXML_READER_ENABLED
824 return(1);
825#else
826 return(0);
827#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000828 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000829#ifdef LIBXML_PATTERN_ENABLED
830 return(1);
831#else
832 return(0);
833#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000834 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000835#ifdef LIBXML_WRITER_ENABLED
836 return(1);
837#else
838 return(0);
839#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000840 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000841#ifdef LIBXML_SAX1_ENABLED
842 return(1);
843#else
844 return(0);
845#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000846 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000847#ifdef LIBXML_FTP_ENABLED
848 return(1);
849#else
850 return(0);
851#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000852 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000853#ifdef LIBXML_HTTP_ENABLED
854 return(1);
855#else
856 return(0);
857#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000858 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000859#ifdef LIBXML_VALID_ENABLED
860 return(1);
861#else
862 return(0);
863#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000864 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000865#ifdef LIBXML_HTML_ENABLED
866 return(1);
867#else
868 return(0);
869#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000870 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000871#ifdef LIBXML_LEGACY_ENABLED
872 return(1);
873#else
874 return(0);
875#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000876 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000877#ifdef LIBXML_C14N_ENABLED
878 return(1);
879#else
880 return(0);
881#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000882 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000883#ifdef LIBXML_CATALOG_ENABLED
884 return(1);
885#else
886 return(0);
887#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000888 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000889#ifdef LIBXML_XPATH_ENABLED
890 return(1);
891#else
892 return(0);
893#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000894 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000895#ifdef LIBXML_XPTR_ENABLED
896 return(1);
897#else
898 return(0);
899#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000900 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000901#ifdef LIBXML_XINCLUDE_ENABLED
902 return(1);
903#else
904 return(0);
905#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000906 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000907#ifdef LIBXML_ICONV_ENABLED
908 return(1);
909#else
910 return(0);
911#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000912 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000913#ifdef LIBXML_ISO8859X_ENABLED
914 return(1);
915#else
916 return(0);
917#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000918 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000919#ifdef LIBXML_UNICODE_ENABLED
920 return(1);
921#else
922 return(0);
923#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000924 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000925#ifdef LIBXML_REGEXP_ENABLED
926 return(1);
927#else
928 return(0);
929#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000930 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000931#ifdef LIBXML_AUTOMATA_ENABLED
932 return(1);
933#else
934 return(0);
935#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000936 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000937#ifdef LIBXML_EXPR_ENABLED
938 return(1);
939#else
940 return(0);
941#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000942 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000943#ifdef LIBXML_SCHEMAS_ENABLED
944 return(1);
945#else
946 return(0);
947#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000948 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000949#ifdef LIBXML_SCHEMATRON_ENABLED
950 return(1);
951#else
952 return(0);
953#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000954 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000955#ifdef LIBXML_MODULES_ENABLED
956 return(1);
957#else
958 return(0);
959#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000960 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000961#ifdef LIBXML_DEBUG_ENABLED
962 return(1);
963#else
964 return(0);
965#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000966 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000967#ifdef DEBUG_MEMORY_LOCATION
968 return(1);
969#else
970 return(0);
971#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000972 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000973#ifdef LIBXML_DEBUG_RUNTIME
974 return(1);
975#else
976 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000977#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000978 case XML_WITH_ZLIB:
979#ifdef LIBXML_ZLIB_ENABLED
980 return(1);
981#else
982 return(0);
983#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +0200984 case XML_WITH_LZMA:
985#ifdef LIBXML_LZMA_ENABLED
986 return(1);
987#else
988 return(0);
989#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100990 case XML_WITH_ICU:
991#ifdef LIBXML_ICU_ENABLED
992 return(1);
993#else
994 return(0);
995#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000996 default:
997 break;
998 }
999 return(0);
1000}
1001
1002/************************************************************************
1003 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001004 * SAX2 defaulted attributes handling *
1005 * *
1006 ************************************************************************/
1007
1008/**
1009 * xmlDetectSAX2:
1010 * @ctxt: an XML parser context
1011 *
1012 * Do the SAX2 detection and specific intialization
1013 */
1014static void
1015xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
1016 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +00001017#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +00001018 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1019 ((ctxt->sax->startElementNs != NULL) ||
1020 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001021#else
1022 ctxt->sax2 = 1;
1023#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001024
1025 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1026 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1027 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +00001028 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1029 (ctxt->str_xml_ns == NULL)) {
1030 xmlErrMemory(ctxt, NULL);
1031 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001032}
1033
Daniel Veillarde57ec792003-09-10 10:50:59 +00001034typedef struct _xmlDefAttrs xmlDefAttrs;
1035typedef xmlDefAttrs *xmlDefAttrsPtr;
1036struct _xmlDefAttrs {
1037 int nbAttrs; /* number of defaulted attributes on that element */
1038 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +00001039 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001040};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001041
1042/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001043 * xmlAttrNormalizeSpace:
1044 * @src: the source string
1045 * @dst: the target string
1046 *
1047 * Normalize the space in non CDATA attribute values:
1048 * If the attribute type is not CDATA, then the XML processor MUST further
1049 * process the normalized attribute value by discarding any leading and
1050 * trailing space (#x20) characters, and by replacing sequences of space
1051 * (#x20) characters by a single space (#x20) character.
1052 * Note that the size of dst need to be at least src, and if one doesn't need
1053 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1054 * passing src as dst is just fine.
1055 *
1056 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1057 * is needed.
1058 */
1059static xmlChar *
1060xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1061{
1062 if ((src == NULL) || (dst == NULL))
1063 return(NULL);
1064
1065 while (*src == 0x20) src++;
1066 while (*src != 0) {
1067 if (*src == 0x20) {
1068 while (*src == 0x20) src++;
1069 if (*src != 0)
1070 *dst++ = 0x20;
1071 } else {
1072 *dst++ = *src++;
1073 }
1074 }
1075 *dst = 0;
1076 if (dst == src)
1077 return(NULL);
1078 return(dst);
1079}
1080
1081/**
1082 * xmlAttrNormalizeSpace2:
1083 * @src: the source string
1084 *
1085 * Normalize the space in non CDATA attribute values, a slightly more complex
1086 * front end to avoid allocation problems when running on attribute values
1087 * coming from the input.
1088 *
1089 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1090 * is needed.
1091 */
1092static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001093xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001094{
1095 int i;
1096 int remove_head = 0;
1097 int need_realloc = 0;
1098 const xmlChar *cur;
1099
1100 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1101 return(NULL);
1102 i = *len;
1103 if (i <= 0)
1104 return(NULL);
1105
1106 cur = src;
1107 while (*cur == 0x20) {
1108 cur++;
1109 remove_head++;
1110 }
1111 while (*cur != 0) {
1112 if (*cur == 0x20) {
1113 cur++;
1114 if ((*cur == 0x20) || (*cur == 0)) {
1115 need_realloc = 1;
1116 break;
1117 }
1118 } else
1119 cur++;
1120 }
1121 if (need_realloc) {
1122 xmlChar *ret;
1123
1124 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1125 if (ret == NULL) {
1126 xmlErrMemory(ctxt, NULL);
1127 return(NULL);
1128 }
1129 xmlAttrNormalizeSpace(ret, ret);
1130 *len = (int) strlen((const char *)ret);
1131 return(ret);
1132 } else if (remove_head) {
1133 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001134 memmove(src, src + remove_head, 1 + *len);
1135 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001136 }
1137 return(NULL);
1138}
1139
1140/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001141 * xmlAddDefAttrs:
1142 * @ctxt: an XML parser context
1143 * @fullname: the element fullname
1144 * @fullattr: the attribute fullname
1145 * @value: the attribute value
1146 *
1147 * Add a defaulted attribute for an element
1148 */
1149static void
1150xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1151 const xmlChar *fullname,
1152 const xmlChar *fullattr,
1153 const xmlChar *value) {
1154 xmlDefAttrsPtr defaults;
1155 int len;
1156 const xmlChar *name;
1157 const xmlChar *prefix;
1158
Daniel Veillard6a31b832008-03-26 14:06:44 +00001159 /*
1160 * Allows to detect attribute redefinitions
1161 */
1162 if (ctxt->attsSpecial != NULL) {
1163 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1164 return;
1165 }
1166
Daniel Veillarde57ec792003-09-10 10:50:59 +00001167 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001168 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001169 if (ctxt->attsDefault == NULL)
1170 goto mem_error;
1171 }
1172
1173 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001174 * split the element name into prefix:localname , the string found
1175 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001176 */
1177 name = xmlSplitQName3(fullname, &len);
1178 if (name == NULL) {
1179 name = xmlDictLookup(ctxt->dict, fullname, -1);
1180 prefix = NULL;
1181 } else {
1182 name = xmlDictLookup(ctxt->dict, name, -1);
1183 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1184 }
1185
1186 /*
1187 * make sure there is some storage
1188 */
1189 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1190 if (defaults == NULL) {
1191 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001192 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001193 if (defaults == NULL)
1194 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001195 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001196 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001197 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1198 defaults, NULL) < 0) {
1199 xmlFree(defaults);
1200 goto mem_error;
1201 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001202 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001203 xmlDefAttrsPtr temp;
1204
1205 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001206 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001207 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001208 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001209 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001210 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001211 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1212 defaults, NULL) < 0) {
1213 xmlFree(defaults);
1214 goto mem_error;
1215 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001216 }
1217
1218 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001219 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001220 * are within the DTD and hen not associated to namespace names.
1221 */
1222 name = xmlSplitQName3(fullattr, &len);
1223 if (name == NULL) {
1224 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1225 prefix = NULL;
1226 } else {
1227 name = xmlDictLookup(ctxt->dict, name, -1);
1228 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1229 }
1230
Daniel Veillardae0765b2008-07-31 19:54:59 +00001231 defaults->values[5 * defaults->nbAttrs] = name;
1232 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001233 /* intern the string and precompute the end */
1234 len = xmlStrlen(value);
1235 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001236 defaults->values[5 * defaults->nbAttrs + 2] = value;
1237 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1238 if (ctxt->external)
1239 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1240 else
1241 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001242 defaults->nbAttrs++;
1243
1244 return;
1245
1246mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001247 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001248 return;
1249}
1250
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001251/**
1252 * xmlAddSpecialAttr:
1253 * @ctxt: an XML parser context
1254 * @fullname: the element fullname
1255 * @fullattr: the attribute fullname
1256 * @type: the attribute type
1257 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001258 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001259 */
1260static void
1261xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1262 const xmlChar *fullname,
1263 const xmlChar *fullattr,
1264 int type)
1265{
1266 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001267 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001268 if (ctxt->attsSpecial == NULL)
1269 goto mem_error;
1270 }
1271
Daniel Veillardac4118d2008-01-11 05:27:32 +00001272 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1273 return;
1274
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001275 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1276 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001277 return;
1278
1279mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001280 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001281 return;
1282}
1283
Daniel Veillard4432df22003-09-28 18:58:27 +00001284/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001285 * xmlCleanSpecialAttrCallback:
1286 *
1287 * Removes CDATA attributes from the special attribute table
1288 */
1289static void
1290xmlCleanSpecialAttrCallback(void *payload, void *data,
1291 const xmlChar *fullname, const xmlChar *fullattr,
1292 const xmlChar *unused ATTRIBUTE_UNUSED) {
1293 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1294
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001295 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001296 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1297 }
1298}
1299
1300/**
1301 * xmlCleanSpecialAttr:
1302 * @ctxt: an XML parser context
1303 *
1304 * Trim the list of attributes defined to remove all those of type
1305 * CDATA as they are not special. This call should be done when finishing
1306 * to parse the DTD and before starting to parse the document root.
1307 */
1308static void
1309xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1310{
1311 if (ctxt->attsSpecial == NULL)
1312 return;
1313
1314 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1315
1316 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1317 xmlHashFree(ctxt->attsSpecial, NULL);
1318 ctxt->attsSpecial = NULL;
1319 }
1320 return;
1321}
1322
1323/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001324 * xmlCheckLanguageID:
1325 * @lang: pointer to the string value
1326 *
1327 * Checks that the value conforms to the LanguageID production:
1328 *
1329 * NOTE: this is somewhat deprecated, those productions were removed from
1330 * the XML Second edition.
1331 *
1332 * [33] LanguageID ::= Langcode ('-' Subcode)*
1333 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1334 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1335 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1336 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1337 * [38] Subcode ::= ([a-z] | [A-Z])+
1338 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001339 * The current REC reference the sucessors of RFC 1766, currently 5646
1340 *
1341 * http://www.rfc-editor.org/rfc/rfc5646.txt
1342 * langtag = language
1343 * ["-" script]
1344 * ["-" region]
1345 * *("-" variant)
1346 * *("-" extension)
1347 * ["-" privateuse]
1348 * language = 2*3ALPHA ; shortest ISO 639 code
1349 * ["-" extlang] ; sometimes followed by
1350 * ; extended language subtags
1351 * / 4ALPHA ; or reserved for future use
1352 * / 5*8ALPHA ; or registered language subtag
1353 *
1354 * extlang = 3ALPHA ; selected ISO 639 codes
1355 * *2("-" 3ALPHA) ; permanently reserved
1356 *
1357 * script = 4ALPHA ; ISO 15924 code
1358 *
1359 * region = 2ALPHA ; ISO 3166-1 code
1360 * / 3DIGIT ; UN M.49 code
1361 *
1362 * variant = 5*8alphanum ; registered variants
1363 * / (DIGIT 3alphanum)
1364 *
1365 * extension = singleton 1*("-" (2*8alphanum))
1366 *
1367 * ; Single alphanumerics
1368 * ; "x" reserved for private use
1369 * singleton = DIGIT ; 0 - 9
1370 * / %x41-57 ; A - W
1371 * / %x59-5A ; Y - Z
1372 * / %x61-77 ; a - w
1373 * / %x79-7A ; y - z
1374 *
1375 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1376 * The parser below doesn't try to cope with extension or privateuse
1377 * that could be added but that's not interoperable anyway
1378 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001379 * Returns 1 if correct 0 otherwise
1380 **/
1381int
1382xmlCheckLanguageID(const xmlChar * lang)
1383{
Daniel Veillard60587d62010-11-04 15:16:27 +01001384 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001385
1386 if (cur == NULL)
1387 return (0);
1388 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001389 ((cur[0] == 'I') && (cur[1] == '-')) ||
1390 ((cur[0] == 'x') && (cur[1] == '-')) ||
1391 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001392 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001393 * Still allow IANA code and user code which were coming
1394 * from the previous version of the XML-1.0 specification
1395 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001396 */
1397 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001398 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001399 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1400 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001401 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001402 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001403 nxt = cur;
1404 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1405 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1406 nxt++;
1407 if (nxt - cur >= 4) {
1408 /*
1409 * Reserved
1410 */
1411 if ((nxt - cur > 8) || (nxt[0] != 0))
1412 return(0);
1413 return(1);
1414 }
1415 if (nxt - cur < 2)
1416 return(0);
1417 /* we got an ISO 639 code */
1418 if (nxt[0] == 0)
1419 return(1);
1420 if (nxt[0] != '-')
1421 return(0);
1422
1423 nxt++;
1424 cur = nxt;
1425 /* now we can have extlang or script or region or variant */
1426 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1427 goto region_m49;
1428
1429 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1430 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1431 nxt++;
1432 if (nxt - cur == 4)
1433 goto script;
1434 if (nxt - cur == 2)
1435 goto region;
1436 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1437 goto variant;
1438 if (nxt - cur != 3)
1439 return(0);
1440 /* we parsed an extlang */
1441 if (nxt[0] == 0)
1442 return(1);
1443 if (nxt[0] != '-')
1444 return(0);
1445
1446 nxt++;
1447 cur = nxt;
1448 /* now we can have script or region or variant */
1449 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1450 goto region_m49;
1451
1452 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1453 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1454 nxt++;
1455 if (nxt - cur == 2)
1456 goto region;
1457 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1458 goto variant;
1459 if (nxt - cur != 4)
1460 return(0);
1461 /* we parsed a script */
1462script:
1463 if (nxt[0] == 0)
1464 return(1);
1465 if (nxt[0] != '-')
1466 return(0);
1467
1468 nxt++;
1469 cur = nxt;
1470 /* now we can have region or variant */
1471 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1472 goto region_m49;
1473
1474 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1475 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1476 nxt++;
1477
1478 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1479 goto variant;
1480 if (nxt - cur != 2)
1481 return(0);
1482 /* we parsed a region */
1483region:
1484 if (nxt[0] == 0)
1485 return(1);
1486 if (nxt[0] != '-')
1487 return(0);
1488
1489 nxt++;
1490 cur = nxt;
1491 /* now we can just have a variant */
1492 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1493 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1494 nxt++;
1495
1496 if ((nxt - cur < 5) || (nxt - cur > 8))
1497 return(0);
1498
1499 /* we parsed a variant */
1500variant:
1501 if (nxt[0] == 0)
1502 return(1);
1503 if (nxt[0] != '-')
1504 return(0);
1505 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001506 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001507
1508region_m49:
1509 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1510 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1511 nxt += 3;
1512 goto region;
1513 }
1514 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001515}
1516
Owen Taylor3473f882001-02-23 17:55:21 +00001517/************************************************************************
1518 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001519 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001520 * *
1521 ************************************************************************/
1522
Daniel Veillard8ed10722009-08-20 19:17:36 +02001523static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1524 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001525
Daniel Veillard0fb18932003-09-07 09:14:37 +00001526#ifdef SAX2
1527/**
1528 * nsPush:
1529 * @ctxt: an XML parser context
1530 * @prefix: the namespace prefix or NULL
1531 * @URL: the namespace name
1532 *
1533 * Pushes a new parser namespace on top of the ns stack
1534 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001535 * Returns -1 in case of error, -2 if the namespace should be discarded
1536 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001537 */
1538static int
1539nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1540{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001541 if (ctxt->options & XML_PARSE_NSCLEAN) {
1542 int i;
1543 for (i = 0;i < ctxt->nsNr;i += 2) {
1544 if (ctxt->nsTab[i] == prefix) {
1545 /* in scope */
1546 if (ctxt->nsTab[i + 1] == URL)
1547 return(-2);
1548 /* out of scope keep it */
1549 break;
1550 }
1551 }
1552 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001553 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1554 ctxt->nsMax = 10;
1555 ctxt->nsNr = 0;
1556 ctxt->nsTab = (const xmlChar **)
1557 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1558 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001559 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001560 ctxt->nsMax = 0;
1561 return (-1);
1562 }
1563 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001564 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001565 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001566 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1567 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1568 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001569 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001570 ctxt->nsMax /= 2;
1571 return (-1);
1572 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001573 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001574 }
1575 ctxt->nsTab[ctxt->nsNr++] = prefix;
1576 ctxt->nsTab[ctxt->nsNr++] = URL;
1577 return (ctxt->nsNr);
1578}
1579/**
1580 * nsPop:
1581 * @ctxt: an XML parser context
1582 * @nr: the number to pop
1583 *
1584 * Pops the top @nr parser prefix/namespace from the ns stack
1585 *
1586 * Returns the number of namespaces removed
1587 */
1588static int
1589nsPop(xmlParserCtxtPtr ctxt, int nr)
1590{
1591 int i;
1592
1593 if (ctxt->nsTab == NULL) return(0);
1594 if (ctxt->nsNr < nr) {
1595 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1596 nr = ctxt->nsNr;
1597 }
1598 if (ctxt->nsNr <= 0)
1599 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001600
Daniel Veillard0fb18932003-09-07 09:14:37 +00001601 for (i = 0;i < nr;i++) {
1602 ctxt->nsNr--;
1603 ctxt->nsTab[ctxt->nsNr] = NULL;
1604 }
1605 return(nr);
1606}
1607#endif
1608
1609static int
1610xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1611 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001612 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001613 int maxatts;
1614
1615 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001616 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001617 atts = (const xmlChar **)
1618 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001619 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001620 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001621 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1622 if (attallocs == NULL) goto mem_error;
1623 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001624 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001625 } else if (nr + 5 > ctxt->maxatts) {
1626 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001627 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1628 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001629 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001630 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001631 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1632 (maxatts / 5) * sizeof(int));
1633 if (attallocs == NULL) goto mem_error;
1634 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001635 ctxt->maxatts = maxatts;
1636 }
1637 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001638mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001639 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001640 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001641}
1642
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001643/**
1644 * inputPush:
1645 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001646 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001647 *
1648 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001649 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001650 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001651 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001652int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001653inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1654{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001655 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001656 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001657 if (ctxt->inputNr >= ctxt->inputMax) {
1658 ctxt->inputMax *= 2;
1659 ctxt->inputTab =
1660 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1661 ctxt->inputMax *
1662 sizeof(ctxt->inputTab[0]));
1663 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001664 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001665 xmlFreeInputStream(value);
1666 ctxt->inputMax /= 2;
1667 value = NULL;
1668 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001669 }
1670 }
1671 ctxt->inputTab[ctxt->inputNr] = value;
1672 ctxt->input = value;
1673 return (ctxt->inputNr++);
1674}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001675/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001676 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001677 * @ctxt: an XML parser context
1678 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001679 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001680 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001681 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001682 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001683xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001684inputPop(xmlParserCtxtPtr ctxt)
1685{
1686 xmlParserInputPtr ret;
1687
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001688 if (ctxt == NULL)
1689 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001690 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001691 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001692 ctxt->inputNr--;
1693 if (ctxt->inputNr > 0)
1694 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1695 else
1696 ctxt->input = NULL;
1697 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001698 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001699 return (ret);
1700}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001701/**
1702 * nodePush:
1703 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001704 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001705 *
1706 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001707 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001708 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001709 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001710int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001711nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1712{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001713 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001714 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001715 xmlNodePtr *tmp;
1716
1717 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1718 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001719 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001720 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001721 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001722 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001723 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001724 ctxt->nodeTab = tmp;
1725 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001726 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001727 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1728 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001729 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001730 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001731 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001732 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001733 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001734 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001735 ctxt->nodeTab[ctxt->nodeNr] = value;
1736 ctxt->node = value;
1737 return (ctxt->nodeNr++);
1738}
Daniel Veillard8915c152008-08-26 13:05:34 +00001739
Daniel Veillard1c732d22002-11-30 11:22:59 +00001740/**
1741 * nodePop:
1742 * @ctxt: an XML parser context
1743 *
1744 * Pops the top element node from the node stack
1745 *
1746 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001747 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001748xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001749nodePop(xmlParserCtxtPtr ctxt)
1750{
1751 xmlNodePtr ret;
1752
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001753 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001754 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001755 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001756 ctxt->nodeNr--;
1757 if (ctxt->nodeNr > 0)
1758 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1759 else
1760 ctxt->node = NULL;
1761 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001762 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001763 return (ret);
1764}
Daniel Veillarda2351322004-06-27 12:08:10 +00001765
1766#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001767/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001768 * nameNsPush:
1769 * @ctxt: an XML parser context
1770 * @value: the element name
1771 * @prefix: the element prefix
1772 * @URI: the element namespace name
1773 *
1774 * Pushes a new element name/prefix/URL on top of the name stack
1775 *
1776 * Returns -1 in case of error, the index in the stack otherwise
1777 */
1778static int
1779nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1780 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1781{
1782 if (ctxt->nameNr >= ctxt->nameMax) {
1783 const xmlChar * *tmp;
1784 void **tmp2;
1785 ctxt->nameMax *= 2;
1786 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1787 ctxt->nameMax *
1788 sizeof(ctxt->nameTab[0]));
1789 if (tmp == NULL) {
1790 ctxt->nameMax /= 2;
1791 goto mem_error;
1792 }
1793 ctxt->nameTab = tmp;
1794 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1795 ctxt->nameMax * 3 *
1796 sizeof(ctxt->pushTab[0]));
1797 if (tmp2 == NULL) {
1798 ctxt->nameMax /= 2;
1799 goto mem_error;
1800 }
1801 ctxt->pushTab = tmp2;
1802 }
1803 ctxt->nameTab[ctxt->nameNr] = value;
1804 ctxt->name = value;
1805 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1806 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001807 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001808 return (ctxt->nameNr++);
1809mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001810 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001811 return (-1);
1812}
1813/**
1814 * nameNsPop:
1815 * @ctxt: an XML parser context
1816 *
1817 * Pops the top element/prefix/URI name from the name stack
1818 *
1819 * Returns the name just removed
1820 */
1821static const xmlChar *
1822nameNsPop(xmlParserCtxtPtr ctxt)
1823{
1824 const xmlChar *ret;
1825
1826 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001827 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001828 ctxt->nameNr--;
1829 if (ctxt->nameNr > 0)
1830 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1831 else
1832 ctxt->name = NULL;
1833 ret = ctxt->nameTab[ctxt->nameNr];
1834 ctxt->nameTab[ctxt->nameNr] = NULL;
1835 return (ret);
1836}
Daniel Veillarda2351322004-06-27 12:08:10 +00001837#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001838
1839/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001840 * namePush:
1841 * @ctxt: an XML parser context
1842 * @value: the element name
1843 *
1844 * Pushes a new element name on top of the name stack
1845 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001846 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001847 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001848int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001849namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001850{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001851 if (ctxt == NULL) return (-1);
1852
Daniel Veillard1c732d22002-11-30 11:22:59 +00001853 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001854 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001855 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001856 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001857 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001858 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001859 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001860 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001861 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001862 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001863 }
1864 ctxt->nameTab[ctxt->nameNr] = value;
1865 ctxt->name = value;
1866 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001867mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001868 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001869 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001870}
1871/**
1872 * namePop:
1873 * @ctxt: an XML parser context
1874 *
1875 * Pops the top element name from the name stack
1876 *
1877 * Returns the name just removed
1878 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001879const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001880namePop(xmlParserCtxtPtr ctxt)
1881{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001882 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001883
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001884 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1885 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001886 ctxt->nameNr--;
1887 if (ctxt->nameNr > 0)
1888 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1889 else
1890 ctxt->name = NULL;
1891 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001892 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001893 return (ret);
1894}
Owen Taylor3473f882001-02-23 17:55:21 +00001895
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001896static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001897 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001898 int *tmp;
1899
Owen Taylor3473f882001-02-23 17:55:21 +00001900 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001901 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1902 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1903 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001904 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001905 ctxt->spaceMax /=2;
1906 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001907 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001908 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001909 }
1910 ctxt->spaceTab[ctxt->spaceNr] = val;
1911 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1912 return(ctxt->spaceNr++);
1913}
1914
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001915static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001916 int ret;
1917 if (ctxt->spaceNr <= 0) return(0);
1918 ctxt->spaceNr--;
1919 if (ctxt->spaceNr > 0)
1920 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1921 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001922 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001923 ret = ctxt->spaceTab[ctxt->spaceNr];
1924 ctxt->spaceTab[ctxt->spaceNr] = -1;
1925 return(ret);
1926}
1927
1928/*
1929 * Macros for accessing the content. Those should be used only by the parser,
1930 * and not exported.
1931 *
1932 * Dirty macros, i.e. one often need to make assumption on the context to
1933 * use them
1934 *
1935 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1936 * To be used with extreme caution since operations consuming
1937 * characters may move the input buffer to a different location !
1938 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1939 * This should be used internally by the parser
1940 * only to compare to ASCII values otherwise it would break when
1941 * running with UTF-8 encoding.
1942 * RAW same as CUR but in the input buffer, bypass any token
1943 * extraction that may have been done
1944 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1945 * to compare on ASCII based substring.
1946 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001947 * strings without newlines within the parser.
1948 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1949 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001950 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1951 *
1952 * NEXT Skip to the next character, this does the proper decoding
1953 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001954 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001955 * CUR_CHAR(l) returns the current unicode character (int), set l
1956 * to the number of xmlChars used for the encoding [0-5].
1957 * CUR_SCHAR same but operate on a string instead of the context
1958 * COPY_BUF copy the current unicode char to the target buffer, increment
1959 * the index
1960 * GROW, SHRINK handling of input buffers
1961 */
1962
Daniel Veillardfdc91562002-07-01 21:52:03 +00001963#define RAW (*ctxt->input->cur)
1964#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001965#define NXT(val) ctxt->input->cur[(val)]
1966#define CUR_PTR ctxt->input->cur
1967
Daniel Veillarda07050d2003-10-19 14:46:32 +00001968#define CMP4( s, c1, c2, c3, c4 ) \
1969 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1970 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1971#define CMP5( s, c1, c2, c3, c4, c5 ) \
1972 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1973#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1974 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1975#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1976 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1977#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1978 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1979#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1980 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1981 ((unsigned char *) s)[ 8 ] == c9 )
1982#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1983 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1984 ((unsigned char *) s)[ 9 ] == c10 )
1985
Owen Taylor3473f882001-02-23 17:55:21 +00001986#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001987 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001988 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001989 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001990 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1991 xmlPopInput(ctxt); \
1992 } while (0)
1993
Daniel Veillard0b787f32004-03-26 17:29:53 +00001994#define SKIPL(val) do { \
1995 int skipl; \
1996 for(skipl=0; skipl<val; skipl++) { \
1997 if (*(ctxt->input->cur) == '\n') { \
1998 ctxt->input->line++; ctxt->input->col = 1; \
1999 } else ctxt->input->col++; \
2000 ctxt->nbChars++; \
2001 ctxt->input->cur++; \
2002 } \
2003 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
2004 if ((*ctxt->input->cur == 0) && \
2005 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
2006 xmlPopInput(ctxt); \
2007 } while (0)
2008
Daniel Veillarda880b122003-04-21 21:36:41 +00002009#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00002010 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2011 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002012 xmlSHRINK (ctxt);
2013
2014static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
2015 xmlParserInputShrink(ctxt->input);
2016 if ((*ctxt->input->cur == 0) &&
2017 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2018 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002019 }
Owen Taylor3473f882001-02-23 17:55:21 +00002020
Daniel Veillarda880b122003-04-21 21:36:41 +00002021#define GROW if ((ctxt->progressive == 0) && \
2022 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002023 xmlGROW (ctxt);
2024
2025static void xmlGROW (xmlParserCtxtPtr ctxt) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002026 if ((((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
2027 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
2028 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2029 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
Daniel Veillard0df83ca2012-07-30 15:41:10 +08002030 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08002031 }
Daniel Veillard46de64e2002-05-29 08:21:33 +00002032 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard59df7832010-02-02 10:24:01 +01002033 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
Daniel Veillard46de64e2002-05-29 08:21:33 +00002034 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2035 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00002036}
Owen Taylor3473f882001-02-23 17:55:21 +00002037
2038#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2039
2040#define NEXT xmlNextChar(ctxt)
2041
Daniel Veillard21a0f912001-02-25 19:54:14 +00002042#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002043 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002044 ctxt->input->cur++; \
2045 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002046 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002047 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2048 }
2049
Owen Taylor3473f882001-02-23 17:55:21 +00002050#define NEXTL(l) do { \
2051 if (*(ctxt->input->cur) == '\n') { \
2052 ctxt->input->line++; ctxt->input->col = 1; \
2053 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002054 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002055 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00002056 } while (0)
2057
2058#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2059#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2060
2061#define COPY_BUF(l,b,i,v) \
2062 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002063 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002064
2065/**
2066 * xmlSkipBlankChars:
2067 * @ctxt: the XML parser context
2068 *
2069 * skip all blanks character found at that point in the input streams.
2070 * It pops up finished entities in the process if allowable at that point.
2071 *
2072 * Returns the number of space chars skipped
2073 */
2074
2075int
2076xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002077 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002078
2079 /*
2080 * It's Okay to use CUR/NEXT here since all the blanks are on
2081 * the ASCII range.
2082 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002083 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2084 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002085 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002086 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002087 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002088 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002089 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002090 if (*cur == '\n') {
2091 ctxt->input->line++; ctxt->input->col = 1;
2092 }
2093 cur++;
2094 res++;
2095 if (*cur == 0) {
2096 ctxt->input->cur = cur;
2097 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2098 cur = ctxt->input->cur;
2099 }
2100 }
2101 ctxt->input->cur = cur;
2102 } else {
2103 int cur;
2104 do {
2105 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00002106 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002107 NEXT;
2108 cur = CUR;
2109 res++;
2110 }
2111 while ((cur == 0) && (ctxt->inputNr > 1) &&
2112 (ctxt->instate != XML_PARSER_COMMENT)) {
2113 xmlPopInput(ctxt);
2114 cur = CUR;
2115 }
2116 /*
2117 * Need to handle support of entities branching here
2118 */
2119 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2120 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2121 }
Owen Taylor3473f882001-02-23 17:55:21 +00002122 return(res);
2123}
2124
2125/************************************************************************
2126 * *
2127 * Commodity functions to handle entities *
2128 * *
2129 ************************************************************************/
2130
2131/**
2132 * xmlPopInput:
2133 * @ctxt: an XML parser context
2134 *
2135 * xmlPopInput: the current input pointed by ctxt->input came to an end
2136 * pop it and return the next char.
2137 *
2138 * Returns the current xmlChar in the parser context
2139 */
2140xmlChar
2141xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002142 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002143 if (xmlParserDebugEntities)
2144 xmlGenericError(xmlGenericErrorContext,
2145 "Popping input %d\n", ctxt->inputNr);
2146 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00002147 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00002148 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2149 return(xmlPopInput(ctxt));
2150 return(CUR);
2151}
2152
2153/**
2154 * xmlPushInput:
2155 * @ctxt: an XML parser context
2156 * @input: an XML parser input fragment (entity, XML fragment ...).
2157 *
2158 * xmlPushInput: switch to a new input stream which is stacked on top
2159 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002160 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002161 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002162int
Owen Taylor3473f882001-02-23 17:55:21 +00002163xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002164 int ret;
2165 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002166
2167 if (xmlParserDebugEntities) {
2168 if ((ctxt->input != NULL) && (ctxt->input->filename))
2169 xmlGenericError(xmlGenericErrorContext,
2170 "%s(%d): ", ctxt->input->filename,
2171 ctxt->input->line);
2172 xmlGenericError(xmlGenericErrorContext,
2173 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2174 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002175 ret = inputPush(ctxt, input);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002176 if (ctxt->instate == XML_PARSER_EOF)
2177 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002178 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002179 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002180}
2181
2182/**
2183 * xmlParseCharRef:
2184 * @ctxt: an XML parser context
2185 *
2186 * parse Reference declarations
2187 *
2188 * [66] CharRef ::= '&#' [0-9]+ ';' |
2189 * '&#x' [0-9a-fA-F]+ ';'
2190 *
2191 * [ WFC: Legal Character ]
2192 * Characters referred to using character references must match the
2193 * production for Char.
2194 *
2195 * Returns the value parsed (as an int), 0 in case of error
2196 */
2197int
2198xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002199 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002200 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002201 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002202
Owen Taylor3473f882001-02-23 17:55:21 +00002203 /*
2204 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2205 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002206 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002207 (NXT(2) == 'x')) {
2208 SKIP(3);
2209 GROW;
2210 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002211 if (count++ > 20) {
2212 count = 0;
2213 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002214 if (ctxt->instate == XML_PARSER_EOF)
2215 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002216 }
2217 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002218 val = val * 16 + (CUR - '0');
2219 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2220 val = val * 16 + (CUR - 'a') + 10;
2221 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2222 val = val * 16 + (CUR - 'A') + 10;
2223 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002224 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002225 val = 0;
2226 break;
2227 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002228 if (val > 0x10FFFF)
2229 outofrange = val;
2230
Owen Taylor3473f882001-02-23 17:55:21 +00002231 NEXT;
2232 count++;
2233 }
2234 if (RAW == ';') {
2235 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002236 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002237 ctxt->nbChars ++;
2238 ctxt->input->cur++;
2239 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002240 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002241 SKIP(2);
2242 GROW;
2243 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002244 if (count++ > 20) {
2245 count = 0;
2246 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002247 if (ctxt->instate == XML_PARSER_EOF)
2248 return(0);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002249 }
2250 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002251 val = val * 10 + (CUR - '0');
2252 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002253 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002254 val = 0;
2255 break;
2256 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002257 if (val > 0x10FFFF)
2258 outofrange = val;
2259
Owen Taylor3473f882001-02-23 17:55:21 +00002260 NEXT;
2261 count++;
2262 }
2263 if (RAW == ';') {
2264 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002265 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002266 ctxt->nbChars ++;
2267 ctxt->input->cur++;
2268 }
2269 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002270 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002271 }
2272
2273 /*
2274 * [ WFC: Legal Character ]
2275 * Characters referred to using character references must match the
2276 * production for Char.
2277 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002278 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002279 return(val);
2280 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002281 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2282 "xmlParseCharRef: invalid xmlChar value %d\n",
2283 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002284 }
2285 return(0);
2286}
2287
2288/**
2289 * xmlParseStringCharRef:
2290 * @ctxt: an XML parser context
2291 * @str: a pointer to an index in the string
2292 *
2293 * parse Reference declarations, variant parsing from a string rather
2294 * than an an input flow.
2295 *
2296 * [66] CharRef ::= '&#' [0-9]+ ';' |
2297 * '&#x' [0-9a-fA-F]+ ';'
2298 *
2299 * [ WFC: Legal Character ]
2300 * Characters referred to using character references must match the
2301 * production for Char.
2302 *
2303 * Returns the value parsed (as an int), 0 in case of error, str will be
2304 * updated to the current value of the index
2305 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002306static int
Owen Taylor3473f882001-02-23 17:55:21 +00002307xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2308 const xmlChar *ptr;
2309 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002310 unsigned int val = 0;
2311 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002312
2313 if ((str == NULL) || (*str == NULL)) return(0);
2314 ptr = *str;
2315 cur = *ptr;
2316 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2317 ptr += 3;
2318 cur = *ptr;
2319 while (cur != ';') { /* Non input consuming loop */
2320 if ((cur >= '0') && (cur <= '9'))
2321 val = val * 16 + (cur - '0');
2322 else if ((cur >= 'a') && (cur <= 'f'))
2323 val = val * 16 + (cur - 'a') + 10;
2324 else if ((cur >= 'A') && (cur <= 'F'))
2325 val = val * 16 + (cur - 'A') + 10;
2326 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002327 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002328 val = 0;
2329 break;
2330 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002331 if (val > 0x10FFFF)
2332 outofrange = val;
2333
Owen Taylor3473f882001-02-23 17:55:21 +00002334 ptr++;
2335 cur = *ptr;
2336 }
2337 if (cur == ';')
2338 ptr++;
2339 } else if ((cur == '&') && (ptr[1] == '#')){
2340 ptr += 2;
2341 cur = *ptr;
2342 while (cur != ';') { /* Non input consuming loops */
2343 if ((cur >= '0') && (cur <= '9'))
2344 val = val * 10 + (cur - '0');
2345 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002346 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002347 val = 0;
2348 break;
2349 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002350 if (val > 0x10FFFF)
2351 outofrange = val;
2352
Owen Taylor3473f882001-02-23 17:55:21 +00002353 ptr++;
2354 cur = *ptr;
2355 }
2356 if (cur == ';')
2357 ptr++;
2358 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002359 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002360 return(0);
2361 }
2362 *str = ptr;
2363
2364 /*
2365 * [ WFC: Legal Character ]
2366 * Characters referred to using character references must match the
2367 * production for Char.
2368 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002369 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002370 return(val);
2371 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002372 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2373 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2374 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002375 }
2376 return(0);
2377}
2378
2379/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002380 * xmlNewBlanksWrapperInputStream:
2381 * @ctxt: an XML parser context
2382 * @entity: an Entity pointer
2383 *
2384 * Create a new input stream for wrapping
2385 * blanks around a PEReference
2386 *
2387 * Returns the new input stream or NULL
2388 */
2389
2390static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2391
Daniel Veillardf4862f02002-09-10 11:13:43 +00002392static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002393xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2394 xmlParserInputPtr input;
2395 xmlChar *buffer;
2396 size_t length;
2397 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002398 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2399 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002400 return(NULL);
2401 }
2402 if (xmlParserDebugEntities)
2403 xmlGenericError(xmlGenericErrorContext,
2404 "new blanks wrapper for entity: %s\n", entity->name);
2405 input = xmlNewInputStream(ctxt);
2406 if (input == NULL) {
2407 return(NULL);
2408 }
2409 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002410 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002411 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002412 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002413 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002414 return(NULL);
2415 }
2416 buffer [0] = ' ';
2417 buffer [1] = '%';
2418 buffer [length-3] = ';';
2419 buffer [length-2] = ' ';
2420 buffer [length-1] = 0;
2421 memcpy(buffer + 2, entity->name, length - 5);
2422 input->free = deallocblankswrapper;
2423 input->base = buffer;
2424 input->cur = buffer;
2425 input->length = length;
2426 input->end = &buffer[length];
2427 return(input);
2428}
2429
2430/**
Owen Taylor3473f882001-02-23 17:55:21 +00002431 * xmlParserHandlePEReference:
2432 * @ctxt: the parser context
2433 *
2434 * [69] PEReference ::= '%' Name ';'
2435 *
2436 * [ WFC: No Recursion ]
2437 * A parsed entity must not contain a recursive
2438 * reference to itself, either directly or indirectly.
2439 *
2440 * [ WFC: Entity Declared ]
2441 * In a document without any DTD, a document with only an internal DTD
2442 * subset which contains no parameter entity references, or a document
2443 * with "standalone='yes'", ... ... The declaration of a parameter
2444 * entity must precede any reference to it...
2445 *
2446 * [ VC: Entity Declared ]
2447 * In a document with an external subset or external parameter entities
2448 * with "standalone='no'", ... ... The declaration of a parameter entity
2449 * must precede any reference to it...
2450 *
2451 * [ WFC: In DTD ]
2452 * Parameter-entity references may only appear in the DTD.
2453 * NOTE: misleading but this is handled.
2454 *
2455 * A PEReference may have been detected in the current input stream
2456 * the handling is done accordingly to
2457 * http://www.w3.org/TR/REC-xml#entproc
2458 * i.e.
2459 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002460 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002461 */
2462void
2463xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002464 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002465 xmlEntityPtr entity = NULL;
2466 xmlParserInputPtr input;
2467
Owen Taylor3473f882001-02-23 17:55:21 +00002468 if (RAW != '%') return;
2469 switch(ctxt->instate) {
2470 case XML_PARSER_CDATA_SECTION:
2471 return;
2472 case XML_PARSER_COMMENT:
2473 return;
2474 case XML_PARSER_START_TAG:
2475 return;
2476 case XML_PARSER_END_TAG:
2477 return;
2478 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002479 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002480 return;
2481 case XML_PARSER_PROLOG:
2482 case XML_PARSER_START:
2483 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002484 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002485 return;
2486 case XML_PARSER_ENTITY_DECL:
2487 case XML_PARSER_CONTENT:
2488 case XML_PARSER_ATTRIBUTE_VALUE:
2489 case XML_PARSER_PI:
2490 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002491 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002492 /* we just ignore it there */
2493 return;
2494 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002495 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002496 return;
2497 case XML_PARSER_ENTITY_VALUE:
2498 /*
2499 * NOTE: in the case of entity values, we don't do the
2500 * substitution here since we need the literal
2501 * entity value to be able to save the internal
2502 * subset of the document.
2503 * This will be handled by xmlStringDecodeEntities
2504 */
2505 return;
2506 case XML_PARSER_DTD:
2507 /*
2508 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2509 * In the internal DTD subset, parameter-entity references
2510 * can occur only where markup declarations can occur, not
2511 * within markup declarations.
2512 * In that case this is handled in xmlParseMarkupDecl
2513 */
2514 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2515 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002516 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002517 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002518 break;
2519 case XML_PARSER_IGNORE:
2520 return;
2521 }
2522
2523 NEXT;
2524 name = xmlParseName(ctxt);
2525 if (xmlParserDebugEntities)
2526 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002527 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002528 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002529 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002530 } else {
2531 if (RAW == ';') {
2532 NEXT;
2533 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2534 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2535 if (entity == NULL) {
2536
2537 /*
2538 * [ WFC: Entity Declared ]
2539 * In a document without any DTD, a document with only an
2540 * internal DTD subset which contains no parameter entity
2541 * references, or a document with "standalone='yes'", ...
2542 * ... The declaration of a parameter entity must precede
2543 * any reference to it...
2544 */
2545 if ((ctxt->standalone == 1) ||
2546 ((ctxt->hasExternalSubset == 0) &&
2547 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002548 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002549 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002550 } else {
2551 /*
2552 * [ VC: Entity Declared ]
2553 * In a document with an external subset or external
2554 * parameter entities with "standalone='no'", ...
2555 * ... The declaration of a parameter entity must precede
2556 * any reference to it...
2557 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002558 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2559 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2560 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002561 name, NULL);
Daniel Veillard24eb9782003-10-04 21:08:09 +00002562 } else
2563 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2564 "PEReference: %%%s; not found\n",
2565 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002566 ctxt->valid = 0;
2567 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002568 } else if (ctxt->input->free != deallocblankswrapper) {
2569 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002570 if (xmlPushInput(ctxt, input) < 0)
2571 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002572 } else {
2573 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2574 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002575 xmlChar start[4];
2576 xmlCharEncoding enc;
2577
Owen Taylor3473f882001-02-23 17:55:21 +00002578 /*
2579 * handle the extra spaces added before and after
2580 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002581 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002582 */
2583 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002584 if (xmlPushInput(ctxt, input) < 0)
2585 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002586
2587 /*
2588 * Get the 4 first bytes and decode the charset
2589 * if enc != XML_CHAR_ENCODING_NONE
2590 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002591 * Note that, since we may have some non-UTF8
2592 * encoding (like UTF16, bug 135229), the 'length'
2593 * is not known, but we can calculate based upon
2594 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002595 */
2596 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08002597 if (ctxt->instate == XML_PARSER_EOF)
2598 return;
William M. Bracka0c48ad2004-04-16 15:58:29 +00002599 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002600 start[0] = RAW;
2601 start[1] = NXT(1);
2602 start[2] = NXT(2);
2603 start[3] = NXT(3);
2604 enc = xmlDetectCharEncoding(start, 4);
2605 if (enc != XML_CHAR_ENCODING_NONE) {
2606 xmlSwitchEncoding(ctxt, enc);
2607 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002608 }
2609
Owen Taylor3473f882001-02-23 17:55:21 +00002610 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002611 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2612 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002613 xmlParseTextDecl(ctxt);
2614 }
Owen Taylor3473f882001-02-23 17:55:21 +00002615 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002616 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2617 "PEReference: %s is not a parameter entity\n",
2618 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002619 }
2620 }
2621 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002622 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002623 }
Owen Taylor3473f882001-02-23 17:55:21 +00002624 }
2625}
2626
2627/*
2628 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002629 * buffer##_size is expected to be a size_t
2630 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002631 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002632#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002633 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002634 size_t new_size = buffer##_size * 2 + n; \
2635 if (new_size < buffer##_size) goto mem_error; \
2636 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002637 if (tmp == NULL) goto mem_error; \
2638 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002639 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002640}
2641
2642/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002643 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002644 * @ctxt: the parser context
2645 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002646 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002647 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2648 * @end: an end marker xmlChar, 0 if none
2649 * @end2: an end marker xmlChar, 0 if none
2650 * @end3: an end marker xmlChar, 0 if none
2651 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002652 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002653 *
2654 * [67] Reference ::= EntityRef | CharRef
2655 *
2656 * [69] PEReference ::= '%' Name ';'
2657 *
2658 * Returns A newly allocated string with the substitution done. The caller
2659 * must deallocate it !
2660 */
2661xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002662xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2663 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002664 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002665 size_t buffer_size = 0;
2666 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002667
2668 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002669 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002670 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002671 xmlEntityPtr ent;
2672 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002673
Daniel Veillarda82b1822004-11-08 16:24:57 +00002674 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002675 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002676 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002677
Daniel Veillard0161e632008-08-28 15:36:32 +00002678 if (((ctxt->depth > 40) &&
2679 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2680 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002681 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002682 return(NULL);
2683 }
2684
2685 /*
2686 * allocate a translation buffer.
2687 */
2688 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002689 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002690 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002691
2692 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002693 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002694 * we are operating on already parsed values.
2695 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002696 if (str < last)
2697 c = CUR_SCHAR(str, l);
2698 else
2699 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002700 while ((c != 0) && (c != end) && /* non input consuming loop */
2701 (c != end2) && (c != end3)) {
2702
2703 if (c == 0) break;
2704 if ((c == '&') && (str[1] == '#')) {
2705 int val = xmlParseStringCharRef(ctxt, &str);
2706 if (val != 0) {
2707 COPY_BUF(0,buffer,nbchars,val);
2708 }
Daniel Veillard459eeb92012-07-17 16:19:17 +08002709 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002710 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002711 }
Owen Taylor3473f882001-02-23 17:55:21 +00002712 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2713 if (xmlParserDebugEntities)
2714 xmlGenericError(xmlGenericErrorContext,
2715 "String decoding Entity Reference: %.30s\n",
2716 str);
2717 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002718 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2719 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002720 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002721 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002722 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002723 if ((ent != NULL) &&
2724 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2725 if (ent->content != NULL) {
2726 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002727 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002728 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002729 }
Owen Taylor3473f882001-02-23 17:55:21 +00002730 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002731 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2732 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002733 }
2734 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002735 ctxt->depth++;
2736 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2737 0, 0, 0);
2738 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002739
Owen Taylor3473f882001-02-23 17:55:21 +00002740 if (rep != NULL) {
2741 current = rep;
2742 while (*current != 0) { /* non input consuming loop */
2743 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002744 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002745 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2746 goto int_error;
2747 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002748 }
2749 }
2750 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002751 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002752 }
2753 } else if (ent != NULL) {
2754 int i = xmlStrlen(ent->name);
2755 const xmlChar *cur = ent->name;
2756
2757 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002758 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002759 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002760 }
2761 for (;i > 0;i--)
2762 buffer[nbchars++] = *cur++;
2763 buffer[nbchars++] = ';';
2764 }
2765 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2766 if (xmlParserDebugEntities)
2767 xmlGenericError(xmlGenericErrorContext,
2768 "String decoding PE Reference: %.30s\n", str);
2769 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002770 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2771 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002772 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002773 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002774 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002775 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002776 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002777 }
Owen Taylor3473f882001-02-23 17:55:21 +00002778 ctxt->depth++;
2779 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2780 0, 0, 0);
2781 ctxt->depth--;
2782 if (rep != NULL) {
2783 current = rep;
2784 while (*current != 0) { /* non input consuming loop */
2785 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002786 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002787 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2788 goto int_error;
2789 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002790 }
2791 }
2792 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002793 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002794 }
2795 }
2796 } else {
2797 COPY_BUF(l,buffer,nbchars,c);
2798 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002799 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2800 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002801 }
2802 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002803 if (str < last)
2804 c = CUR_SCHAR(str, l);
2805 else
2806 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002807 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002808 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002809 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002810
2811mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002812 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002813int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002814 if (rep != NULL)
2815 xmlFree(rep);
2816 if (buffer != NULL)
2817 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002818 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002819}
2820
Daniel Veillarde57ec792003-09-10 10:50:59 +00002821/**
2822 * xmlStringDecodeEntities:
2823 * @ctxt: the parser context
2824 * @str: the input string
2825 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2826 * @end: an end marker xmlChar, 0 if none
2827 * @end2: an end marker xmlChar, 0 if none
2828 * @end3: an end marker xmlChar, 0 if none
2829 *
2830 * Takes a entity string content and process to do the adequate substitutions.
2831 *
2832 * [67] Reference ::= EntityRef | CharRef
2833 *
2834 * [69] PEReference ::= '%' Name ';'
2835 *
2836 * Returns A newly allocated string with the substitution done. The caller
2837 * must deallocate it !
2838 */
2839xmlChar *
2840xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2841 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002842 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002843 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2844 end, end2, end3));
2845}
Owen Taylor3473f882001-02-23 17:55:21 +00002846
2847/************************************************************************
2848 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002849 * Commodity functions, cleanup needed ? *
2850 * *
2851 ************************************************************************/
2852
2853/**
2854 * areBlanks:
2855 * @ctxt: an XML parser context
2856 * @str: a xmlChar *
2857 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002858 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002859 *
2860 * Is this a sequence of blank chars that one can ignore ?
2861 *
2862 * Returns 1 if ignorable 0 otherwise.
2863 */
2864
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002865static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2866 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002867 int i, ret;
2868 xmlNodePtr lastChild;
2869
Daniel Veillard05c13a22001-09-09 08:38:09 +00002870 /*
2871 * Don't spend time trying to differentiate them, the same callback is
2872 * used !
2873 */
2874 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002875 return(0);
2876
Owen Taylor3473f882001-02-23 17:55:21 +00002877 /*
2878 * Check for xml:space value.
2879 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002880 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2881 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002882 return(0);
2883
2884 /*
2885 * Check that the string is made of blanks
2886 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002887 if (blank_chars == 0) {
2888 for (i = 0;i < len;i++)
2889 if (!(IS_BLANK_CH(str[i]))) return(0);
2890 }
Owen Taylor3473f882001-02-23 17:55:21 +00002891
2892 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002893 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002894 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002895 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002896 if (ctxt->myDoc != NULL) {
2897 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2898 if (ret == 0) return(1);
2899 if (ret == 1) return(0);
2900 }
2901
2902 /*
2903 * Otherwise, heuristic :-\
2904 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002905 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002906 if ((ctxt->node->children == NULL) &&
2907 (RAW == '<') && (NXT(1) == '/')) return(0);
2908
2909 lastChild = xmlGetLastChild(ctxt->node);
2910 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002911 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2912 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002913 } else if (xmlNodeIsText(lastChild))
2914 return(0);
2915 else if ((ctxt->node->children != NULL) &&
2916 (xmlNodeIsText(ctxt->node->children)))
2917 return(0);
2918 return(1);
2919}
2920
Owen Taylor3473f882001-02-23 17:55:21 +00002921/************************************************************************
2922 * *
2923 * Extra stuff for namespace support *
2924 * Relates to http://www.w3.org/TR/WD-xml-names *
2925 * *
2926 ************************************************************************/
2927
2928/**
2929 * xmlSplitQName:
2930 * @ctxt: an XML parser context
2931 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002932 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002933 *
2934 * parse an UTF8 encoded XML qualified name string
2935 *
2936 * [NS 5] QName ::= (Prefix ':')? LocalPart
2937 *
2938 * [NS 6] Prefix ::= NCName
2939 *
2940 * [NS 7] LocalPart ::= NCName
2941 *
2942 * Returns the local part, and prefix is updated
2943 * to get the Prefix if any.
2944 */
2945
2946xmlChar *
2947xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2948 xmlChar buf[XML_MAX_NAMELEN + 5];
2949 xmlChar *buffer = NULL;
2950 int len = 0;
2951 int max = XML_MAX_NAMELEN;
2952 xmlChar *ret = NULL;
2953 const xmlChar *cur = name;
2954 int c;
2955
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002956 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002957 *prefix = NULL;
2958
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002959 if (cur == NULL) return(NULL);
2960
Owen Taylor3473f882001-02-23 17:55:21 +00002961#ifndef XML_XML_NAMESPACE
2962 /* xml: prefix is not really a namespace */
2963 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2964 (cur[2] == 'l') && (cur[3] == ':'))
2965 return(xmlStrdup(name));
2966#endif
2967
Daniel Veillard597bc482003-07-24 16:08:28 +00002968 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002969 if (cur[0] == ':')
2970 return(xmlStrdup(name));
2971
2972 c = *cur++;
2973 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2974 buf[len++] = c;
2975 c = *cur++;
2976 }
2977 if (len >= max) {
2978 /*
2979 * Okay someone managed to make a huge name, so he's ready to pay
2980 * for the processing speed.
2981 */
2982 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002983
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002984 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002985 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002986 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002987 return(NULL);
2988 }
2989 memcpy(buffer, buf, len);
2990 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2991 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002992 xmlChar *tmp;
2993
Owen Taylor3473f882001-02-23 17:55:21 +00002994 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002995 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002996 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002997 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002998 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002999 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003000 return(NULL);
3001 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003002 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003003 }
3004 buffer[len++] = c;
3005 c = *cur++;
3006 }
3007 buffer[len] = 0;
3008 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00003009
Daniel Veillard597bc482003-07-24 16:08:28 +00003010 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00003011 if (buffer != NULL)
3012 xmlFree(buffer);
3013 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00003014 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00003015 }
Daniel Veillard597bc482003-07-24 16:08:28 +00003016
Owen Taylor3473f882001-02-23 17:55:21 +00003017 if (buffer == NULL)
3018 ret = xmlStrndup(buf, len);
3019 else {
3020 ret = buffer;
3021 buffer = NULL;
3022 max = XML_MAX_NAMELEN;
3023 }
3024
3025
3026 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00003027 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00003028 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00003029 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00003030 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00003031 }
Owen Taylor3473f882001-02-23 17:55:21 +00003032 len = 0;
3033
Daniel Veillardbb284f42002-10-16 18:02:47 +00003034 /*
3035 * Check that the first character is proper to start
3036 * a new name
3037 */
3038 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3039 ((c >= 0x41) && (c <= 0x5A)) ||
3040 (c == '_') || (c == ':'))) {
3041 int l;
3042 int first = CUR_SCHAR(cur, l);
3043
3044 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003045 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003046 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003047 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003048 }
3049 }
3050 cur++;
3051
Owen Taylor3473f882001-02-23 17:55:21 +00003052 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3053 buf[len++] = c;
3054 c = *cur++;
3055 }
3056 if (len >= max) {
3057 /*
3058 * Okay someone managed to make a huge name, so he's ready to pay
3059 * for the processing speed.
3060 */
3061 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003062
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003063 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003064 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003065 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003066 return(NULL);
3067 }
3068 memcpy(buffer, buf, len);
3069 while (c != 0) { /* tested bigname2.xml */
3070 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003071 xmlChar *tmp;
3072
Owen Taylor3473f882001-02-23 17:55:21 +00003073 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003074 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003075 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003076 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003077 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003078 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003079 return(NULL);
3080 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003081 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003082 }
3083 buffer[len++] = c;
3084 c = *cur++;
3085 }
3086 buffer[len] = 0;
3087 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003088
Owen Taylor3473f882001-02-23 17:55:21 +00003089 if (buffer == NULL)
3090 ret = xmlStrndup(buf, len);
3091 else {
3092 ret = buffer;
3093 }
3094 }
3095
3096 return(ret);
3097}
3098
3099/************************************************************************
3100 * *
3101 * The parser itself *
3102 * Relates to http://www.w3.org/TR/REC-xml *
3103 * *
3104 ************************************************************************/
3105
Daniel Veillard34e3f642008-07-29 09:02:27 +00003106/************************************************************************
3107 * *
3108 * Routines to parse Name, NCName and NmToken *
3109 * *
3110 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003111#ifdef DEBUG
3112static unsigned long nbParseName = 0;
3113static unsigned long nbParseNmToken = 0;
3114static unsigned long nbParseNCName = 0;
3115static unsigned long nbParseNCNameComplex = 0;
3116static unsigned long nbParseNameComplex = 0;
3117static unsigned long nbParseStringName = 0;
3118#endif
3119
Daniel Veillard34e3f642008-07-29 09:02:27 +00003120/*
3121 * The two following functions are related to the change of accepted
3122 * characters for Name and NmToken in the Revision 5 of XML-1.0
3123 * They correspond to the modified production [4] and the new production [4a]
3124 * changes in that revision. Also note that the macros used for the
3125 * productions Letter, Digit, CombiningChar and Extender are not needed
3126 * anymore.
3127 * We still keep compatibility to pre-revision5 parsing semantic if the
3128 * new XML_PARSE_OLD10 option is given to the parser.
3129 */
3130static int
3131xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3132 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3133 /*
3134 * Use the new checks of production [4] [4a] amd [5] of the
3135 * Update 5 of XML-1.0
3136 */
3137 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3138 (((c >= 'a') && (c <= 'z')) ||
3139 ((c >= 'A') && (c <= 'Z')) ||
3140 (c == '_') || (c == ':') ||
3141 ((c >= 0xC0) && (c <= 0xD6)) ||
3142 ((c >= 0xD8) && (c <= 0xF6)) ||
3143 ((c >= 0xF8) && (c <= 0x2FF)) ||
3144 ((c >= 0x370) && (c <= 0x37D)) ||
3145 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3146 ((c >= 0x200C) && (c <= 0x200D)) ||
3147 ((c >= 0x2070) && (c <= 0x218F)) ||
3148 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3149 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3150 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3151 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3152 ((c >= 0x10000) && (c <= 0xEFFFF))))
3153 return(1);
3154 } else {
3155 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3156 return(1);
3157 }
3158 return(0);
3159}
3160
3161static int
3162xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3163 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3164 /*
3165 * Use the new checks of production [4] [4a] amd [5] of the
3166 * Update 5 of XML-1.0
3167 */
3168 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3169 (((c >= 'a') && (c <= 'z')) ||
3170 ((c >= 'A') && (c <= 'Z')) ||
3171 ((c >= '0') && (c <= '9')) || /* !start */
3172 (c == '_') || (c == ':') ||
3173 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3174 ((c >= 0xC0) && (c <= 0xD6)) ||
3175 ((c >= 0xD8) && (c <= 0xF6)) ||
3176 ((c >= 0xF8) && (c <= 0x2FF)) ||
3177 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3178 ((c >= 0x370) && (c <= 0x37D)) ||
3179 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3180 ((c >= 0x200C) && (c <= 0x200D)) ||
3181 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3182 ((c >= 0x2070) && (c <= 0x218F)) ||
3183 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3184 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3185 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3186 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3187 ((c >= 0x10000) && (c <= 0xEFFFF))))
3188 return(1);
3189 } else {
3190 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3191 (c == '.') || (c == '-') ||
3192 (c == '_') || (c == ':') ||
3193 (IS_COMBINING(c)) ||
3194 (IS_EXTENDER(c)))
3195 return(1);
3196 }
3197 return(0);
3198}
3199
Daniel Veillarde57ec792003-09-10 10:50:59 +00003200static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003201 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003202
Daniel Veillard34e3f642008-07-29 09:02:27 +00003203static const xmlChar *
3204xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3205 int len = 0, l;
3206 int c;
3207 int count = 0;
3208
Daniel Veillardc6561462009-03-25 10:22:31 +00003209#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003210 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003211#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003212
3213 /*
3214 * Handler for more complex cases
3215 */
3216 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003217 if (ctxt->instate == XML_PARSER_EOF)
3218 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003219 c = CUR_CHAR(l);
3220 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3221 /*
3222 * Use the new checks of production [4] [4a] amd [5] of the
3223 * Update 5 of XML-1.0
3224 */
3225 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3226 (!(((c >= 'a') && (c <= 'z')) ||
3227 ((c >= 'A') && (c <= 'Z')) ||
3228 (c == '_') || (c == ':') ||
3229 ((c >= 0xC0) && (c <= 0xD6)) ||
3230 ((c >= 0xD8) && (c <= 0xF6)) ||
3231 ((c >= 0xF8) && (c <= 0x2FF)) ||
3232 ((c >= 0x370) && (c <= 0x37D)) ||
3233 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3234 ((c >= 0x200C) && (c <= 0x200D)) ||
3235 ((c >= 0x2070) && (c <= 0x218F)) ||
3236 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3237 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3238 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3239 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3240 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3241 return(NULL);
3242 }
3243 len += l;
3244 NEXTL(l);
3245 c = CUR_CHAR(l);
3246 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3247 (((c >= 'a') && (c <= 'z')) ||
3248 ((c >= 'A') && (c <= 'Z')) ||
3249 ((c >= '0') && (c <= '9')) || /* !start */
3250 (c == '_') || (c == ':') ||
3251 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3252 ((c >= 0xC0) && (c <= 0xD6)) ||
3253 ((c >= 0xD8) && (c <= 0xF6)) ||
3254 ((c >= 0xF8) && (c <= 0x2FF)) ||
3255 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3256 ((c >= 0x370) && (c <= 0x37D)) ||
3257 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3258 ((c >= 0x200C) && (c <= 0x200D)) ||
3259 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3260 ((c >= 0x2070) && (c <= 0x218F)) ||
3261 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3262 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3263 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3264 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3265 ((c >= 0x10000) && (c <= 0xEFFFF))
3266 )) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003267 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003268 count = 0;
3269 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003270 if (ctxt->instate == XML_PARSER_EOF)
3271 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003272 }
3273 len += l;
3274 NEXTL(l);
3275 c = CUR_CHAR(l);
3276 }
3277 } else {
3278 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3279 (!IS_LETTER(c) && (c != '_') &&
3280 (c != ':'))) {
3281 return(NULL);
3282 }
3283 len += l;
3284 NEXTL(l);
3285 c = CUR_CHAR(l);
3286
3287 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3288 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3289 (c == '.') || (c == '-') ||
3290 (c == '_') || (c == ':') ||
3291 (IS_COMBINING(c)) ||
3292 (IS_EXTENDER(c)))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003293 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003294 count = 0;
3295 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003296 if (ctxt->instate == XML_PARSER_EOF)
3297 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003298 }
3299 len += l;
3300 NEXTL(l);
3301 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003302 if (c == 0) {
3303 count = 0;
3304 GROW;
3305 if (ctxt->instate == XML_PARSER_EOF)
3306 return(NULL);
3307 c = CUR_CHAR(l);
3308 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003309 }
3310 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003311 if ((len > XML_MAX_NAME_LENGTH) &&
3312 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3313 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3314 return(NULL);
3315 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003316 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3317 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3318 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3319}
3320
Owen Taylor3473f882001-02-23 17:55:21 +00003321/**
3322 * xmlParseName:
3323 * @ctxt: an XML parser context
3324 *
3325 * parse an XML name.
3326 *
3327 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3328 * CombiningChar | Extender
3329 *
3330 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3331 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003332 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003333 *
3334 * Returns the Name parsed or NULL
3335 */
3336
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003337const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003338xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003339 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003340 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003341 int count = 0;
3342
3343 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003344
Daniel Veillardc6561462009-03-25 10:22:31 +00003345#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003346 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003347#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003348
Daniel Veillard48b2f892001-02-25 16:11:03 +00003349 /*
3350 * Accelerator for simple ASCII names
3351 */
3352 in = ctxt->input->cur;
3353 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3354 ((*in >= 0x41) && (*in <= 0x5A)) ||
3355 (*in == '_') || (*in == ':')) {
3356 in++;
3357 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3358 ((*in >= 0x41) && (*in <= 0x5A)) ||
3359 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003360 (*in == '_') || (*in == '-') ||
3361 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003362 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003363 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003364 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003365 if ((count > XML_MAX_NAME_LENGTH) &&
3366 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3367 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3368 return(NULL);
3369 }
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003370 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003371 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003372 ctxt->nbChars += count;
3373 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003374 if (ret == NULL)
3375 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003376 return(ret);
3377 }
3378 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003379 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003380 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003381}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003382
Daniel Veillard34e3f642008-07-29 09:02:27 +00003383static const xmlChar *
3384xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3385 int len = 0, l;
3386 int c;
3387 int count = 0;
3388
Daniel Veillardc6561462009-03-25 10:22:31 +00003389#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003390 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003391#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003392
3393 /*
3394 * Handler for more complex cases
3395 */
3396 GROW;
3397 c = CUR_CHAR(l);
3398 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3399 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3400 return(NULL);
3401 }
3402
3403 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3404 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003405 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003406 if ((len > XML_MAX_NAME_LENGTH) &&
3407 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3408 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3409 return(NULL);
3410 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003411 count = 0;
3412 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003413 if (ctxt->instate == XML_PARSER_EOF)
3414 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003415 }
3416 len += l;
3417 NEXTL(l);
3418 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003419 if (c == 0) {
3420 count = 0;
3421 GROW;
3422 if (ctxt->instate == XML_PARSER_EOF)
3423 return(NULL);
3424 c = CUR_CHAR(l);
3425 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003426 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003427 if ((len > XML_MAX_NAME_LENGTH) &&
3428 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3429 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3430 return(NULL);
3431 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003432 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3433}
3434
3435/**
3436 * xmlParseNCName:
3437 * @ctxt: an XML parser context
3438 * @len: lenght of the string parsed
3439 *
3440 * parse an XML name.
3441 *
3442 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3443 * CombiningChar | Extender
3444 *
3445 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3446 *
3447 * Returns the Name parsed or NULL
3448 */
3449
3450static const xmlChar *
3451xmlParseNCName(xmlParserCtxtPtr ctxt) {
3452 const xmlChar *in;
3453 const xmlChar *ret;
3454 int count = 0;
3455
Daniel Veillardc6561462009-03-25 10:22:31 +00003456#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003457 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003458#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003459
3460 /*
3461 * Accelerator for simple ASCII names
3462 */
3463 in = ctxt->input->cur;
3464 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3465 ((*in >= 0x41) && (*in <= 0x5A)) ||
3466 (*in == '_')) {
3467 in++;
3468 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3469 ((*in >= 0x41) && (*in <= 0x5A)) ||
3470 ((*in >= 0x30) && (*in <= 0x39)) ||
3471 (*in == '_') || (*in == '-') ||
3472 (*in == '.'))
3473 in++;
3474 if ((*in > 0) && (*in < 0x80)) {
3475 count = in - ctxt->input->cur;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003476 if ((count > XML_MAX_NAME_LENGTH) &&
3477 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3478 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3479 return(NULL);
3480 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003481 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3482 ctxt->input->cur = in;
3483 ctxt->nbChars += count;
3484 ctxt->input->col += count;
3485 if (ret == NULL) {
3486 xmlErrMemory(ctxt, NULL);
3487 }
3488 return(ret);
3489 }
3490 }
3491 return(xmlParseNCNameComplex(ctxt));
3492}
3493
Daniel Veillard46de64e2002-05-29 08:21:33 +00003494/**
3495 * xmlParseNameAndCompare:
3496 * @ctxt: an XML parser context
3497 *
3498 * parse an XML name and compares for match
3499 * (specialized for endtag parsing)
3500 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003501 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3502 * and the name for mismatch
3503 */
3504
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003505static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003506xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003507 register const xmlChar *cmp = other;
3508 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003509 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003510
3511 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003512 if (ctxt->instate == XML_PARSER_EOF)
3513 return(NULL);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003514
Daniel Veillard46de64e2002-05-29 08:21:33 +00003515 in = ctxt->input->cur;
3516 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003517 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003518 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003519 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003520 }
William M. Brack76e95df2003-10-18 16:20:14 +00003521 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003522 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003523 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003524 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003525 }
3526 /* failure (or end of input buffer), check with full function */
3527 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003528 /* strings coming from the dictionnary direct compare possible */
3529 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003530 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003531 }
3532 return ret;
3533}
3534
Owen Taylor3473f882001-02-23 17:55:21 +00003535/**
3536 * xmlParseStringName:
3537 * @ctxt: an XML parser context
3538 * @str: a pointer to the string pointer (IN/OUT)
3539 *
3540 * parse an XML name.
3541 *
3542 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3543 * CombiningChar | Extender
3544 *
3545 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3546 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003547 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003548 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003549 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003550 * is updated to the current location in the string.
3551 */
3552
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003553static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003554xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3555 xmlChar buf[XML_MAX_NAMELEN + 5];
3556 const xmlChar *cur = *str;
3557 int len = 0, l;
3558 int c;
3559
Daniel Veillardc6561462009-03-25 10:22:31 +00003560#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003561 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003562#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003563
Owen Taylor3473f882001-02-23 17:55:21 +00003564 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003565 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003566 return(NULL);
3567 }
3568
Daniel Veillard34e3f642008-07-29 09:02:27 +00003569 COPY_BUF(l,buf,len,c);
3570 cur += l;
3571 c = CUR_SCHAR(cur, l);
3572 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003573 COPY_BUF(l,buf,len,c);
3574 cur += l;
3575 c = CUR_SCHAR(cur, l);
3576 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3577 /*
3578 * Okay someone managed to make a huge name, so he's ready to pay
3579 * for the processing speed.
3580 */
3581 xmlChar *buffer;
3582 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003583
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003584 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003585 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003586 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003587 return(NULL);
3588 }
3589 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003590 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003591 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003592 xmlChar *tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003593
3594 if ((len > XML_MAX_NAME_LENGTH) &&
3595 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3596 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3597 xmlFree(buffer);
3598 return(NULL);
3599 }
Owen Taylor3473f882001-02-23 17:55:21 +00003600 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003601 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003602 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003603 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003604 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003605 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003606 return(NULL);
3607 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003608 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003609 }
3610 COPY_BUF(l,buffer,len,c);
3611 cur += l;
3612 c = CUR_SCHAR(cur, l);
3613 }
3614 buffer[len] = 0;
3615 *str = cur;
3616 return(buffer);
3617 }
3618 }
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003619 if ((len > XML_MAX_NAME_LENGTH) &&
3620 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3621 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3622 return(NULL);
3623 }
Owen Taylor3473f882001-02-23 17:55:21 +00003624 *str = cur;
3625 return(xmlStrndup(buf, len));
3626}
3627
3628/**
3629 * xmlParseNmtoken:
3630 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003631 *
Owen Taylor3473f882001-02-23 17:55:21 +00003632 * parse an XML Nmtoken.
3633 *
3634 * [7] Nmtoken ::= (NameChar)+
3635 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003636 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003637 *
3638 * Returns the Nmtoken parsed or NULL
3639 */
3640
3641xmlChar *
3642xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3643 xmlChar buf[XML_MAX_NAMELEN + 5];
3644 int len = 0, l;
3645 int c;
3646 int count = 0;
3647
Daniel Veillardc6561462009-03-25 10:22:31 +00003648#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003649 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003650#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003651
Owen Taylor3473f882001-02-23 17:55:21 +00003652 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003653 if (ctxt->instate == XML_PARSER_EOF)
3654 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003655 c = CUR_CHAR(l);
3656
Daniel Veillard34e3f642008-07-29 09:02:27 +00003657 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003658 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003659 count = 0;
3660 GROW;
3661 }
3662 COPY_BUF(l,buf,len,c);
3663 NEXTL(l);
3664 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08003665 if (c == 0) {
3666 count = 0;
3667 GROW;
3668 if (ctxt->instate == XML_PARSER_EOF)
3669 return(NULL);
3670 c = CUR_CHAR(l);
3671 }
Owen Taylor3473f882001-02-23 17:55:21 +00003672 if (len >= XML_MAX_NAMELEN) {
3673 /*
3674 * Okay someone managed to make a huge token, so he's ready to pay
3675 * for the processing speed.
3676 */
3677 xmlChar *buffer;
3678 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003679
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003680 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003681 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003682 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003683 return(NULL);
3684 }
3685 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003686 while (xmlIsNameChar(ctxt, c)) {
Daniel Veillard1f972e92012-08-15 10:16:37 +08003687 if (count++ > XML_PARSER_CHUNK_SIZE) {
Owen Taylor3473f882001-02-23 17:55:21 +00003688 count = 0;
3689 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003690 if (ctxt->instate == XML_PARSER_EOF) {
3691 xmlFree(buffer);
3692 return(NULL);
3693 }
Owen Taylor3473f882001-02-23 17:55:21 +00003694 }
3695 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003696 xmlChar *tmp;
3697
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003698 if ((max > XML_MAX_NAME_LENGTH) &&
3699 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3700 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3701 xmlFree(buffer);
3702 return(NULL);
3703 }
Owen Taylor3473f882001-02-23 17:55:21 +00003704 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003705 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003706 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003707 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003708 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003709 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003710 return(NULL);
3711 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003712 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003713 }
3714 COPY_BUF(l,buffer,len,c);
3715 NEXTL(l);
3716 c = CUR_CHAR(l);
3717 }
3718 buffer[len] = 0;
3719 return(buffer);
3720 }
3721 }
3722 if (len == 0)
3723 return(NULL);
Daniel Veillard52d8ade2012-07-30 10:08:45 +08003724 if ((len > XML_MAX_NAME_LENGTH) &&
3725 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3726 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3727 return(NULL);
3728 }
Owen Taylor3473f882001-02-23 17:55:21 +00003729 return(xmlStrndup(buf, len));
3730}
3731
3732/**
3733 * xmlParseEntityValue:
3734 * @ctxt: an XML parser context
3735 * @orig: if non-NULL store a copy of the original entity value
3736 *
3737 * parse a value for ENTITY declarations
3738 *
3739 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3740 * "'" ([^%&'] | PEReference | Reference)* "'"
3741 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003742 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003743 */
3744
3745xmlChar *
3746xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3747 xmlChar *buf = NULL;
3748 int len = 0;
3749 int size = XML_PARSER_BUFFER_SIZE;
3750 int c, l;
3751 xmlChar stop;
3752 xmlChar *ret = NULL;
3753 const xmlChar *cur = NULL;
3754 xmlParserInputPtr input;
3755
3756 if (RAW == '"') stop = '"';
3757 else if (RAW == '\'') stop = '\'';
3758 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003759 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003760 return(NULL);
3761 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003762 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003763 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003764 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003765 return(NULL);
3766 }
3767
3768 /*
3769 * The content of the entity definition is copied in a buffer.
3770 */
3771
3772 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3773 input = ctxt->input;
3774 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003775 if (ctxt->instate == XML_PARSER_EOF) {
3776 xmlFree(buf);
3777 return(NULL);
3778 }
Owen Taylor3473f882001-02-23 17:55:21 +00003779 NEXT;
3780 c = CUR_CHAR(l);
3781 /*
3782 * NOTE: 4.4.5 Included in Literal
3783 * When a parameter entity reference appears in a literal entity
3784 * value, ... a single or double quote character in the replacement
3785 * text is always treated as a normal data character and will not
3786 * terminate the literal.
3787 * In practice it means we stop the loop only when back at parsing
3788 * the initial entity and the quote is found
3789 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003790 while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3791 (ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003792 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003793 xmlChar *tmp;
3794
Owen Taylor3473f882001-02-23 17:55:21 +00003795 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003796 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3797 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003798 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003799 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003800 return(NULL);
3801 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003802 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003803 }
3804 COPY_BUF(l,buf,len,c);
3805 NEXTL(l);
3806 /*
3807 * Pop-up of finished entities.
3808 */
3809 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3810 xmlPopInput(ctxt);
3811
3812 GROW;
3813 c = CUR_CHAR(l);
3814 if (c == 0) {
3815 GROW;
3816 c = CUR_CHAR(l);
3817 }
3818 }
3819 buf[len] = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003820 if (ctxt->instate == XML_PARSER_EOF) {
3821 xmlFree(buf);
3822 return(NULL);
3823 }
Owen Taylor3473f882001-02-23 17:55:21 +00003824
3825 /*
3826 * Raise problem w.r.t. '&' and '%' being used in non-entities
3827 * reference constructs. Note Charref will be handled in
3828 * xmlStringDecodeEntities()
3829 */
3830 cur = buf;
3831 while (*cur != 0) { /* non input consuming */
3832 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3833 xmlChar *name;
3834 xmlChar tmp = *cur;
3835
3836 cur++;
3837 name = xmlParseStringName(ctxt, &cur);
3838 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003839 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003840 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003841 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003842 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003843 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3844 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003845 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003846 }
3847 if (name != NULL)
3848 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003849 if (*cur == 0)
3850 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003851 }
3852 cur++;
3853 }
3854
3855 /*
3856 * Then PEReference entities are substituted.
3857 */
3858 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003859 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003860 xmlFree(buf);
3861 } else {
3862 NEXT;
3863 /*
3864 * NOTE: 4.4.7 Bypassed
3865 * When a general entity reference appears in the EntityValue in
3866 * an entity declaration, it is bypassed and left as is.
3867 * so XML_SUBSTITUTE_REF is not set here.
3868 */
3869 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3870 0, 0, 0);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003871 if (orig != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +00003872 *orig = buf;
3873 else
3874 xmlFree(buf);
3875 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003876
Owen Taylor3473f882001-02-23 17:55:21 +00003877 return(ret);
3878}
3879
3880/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003881 * xmlParseAttValueComplex:
3882 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003883 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003884 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003885 *
3886 * parse a value for an attribute, this is the fallback function
3887 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003888 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003889 *
3890 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3891 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003892static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003893xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003894 xmlChar limit = 0;
3895 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003896 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003897 size_t len = 0;
3898 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003899 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003900 xmlChar *current = NULL;
3901 xmlEntityPtr ent;
3902
Owen Taylor3473f882001-02-23 17:55:21 +00003903 if (NXT(0) == '"') {
3904 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3905 limit = '"';
3906 NEXT;
3907 } else if (NXT(0) == '\'') {
3908 limit = '\'';
3909 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3910 NEXT;
3911 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003912 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003913 return(NULL);
3914 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003915
Owen Taylor3473f882001-02-23 17:55:21 +00003916 /*
3917 * allocate a translation buffer.
3918 */
3919 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003920 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003921 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003922
3923 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003924 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003925 */
3926 c = CUR_CHAR(l);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08003927 while (((NXT(0) != limit) && /* checked */
3928 (IS_CHAR(c)) && (c != '<')) &&
3929 (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillarde17db992012-07-19 11:25:16 +08003930 /*
3931 * Impose a reasonable limit on attribute size, unless XML_PARSE_HUGE
3932 * special option is given
3933 */
3934 if ((len > XML_MAX_TEXT_LENGTH) &&
3935 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3936 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3937 "AttValue lenght too long\n");
3938 goto mem_error;
3939 }
Owen Taylor3473f882001-02-23 17:55:21 +00003940 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003941 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003942 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003943 if (NXT(1) == '#') {
3944 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003945
Owen Taylor3473f882001-02-23 17:55:21 +00003946 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003947 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003948 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003949 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003950 }
3951 buf[len++] = '&';
3952 } else {
3953 /*
3954 * The reparsing will be done in xmlStringGetNodeList()
3955 * called by the attribute() function in SAX.c
3956 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08003957 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003958 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003959 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003960 buf[len++] = '&';
3961 buf[len++] = '#';
3962 buf[len++] = '3';
3963 buf[len++] = '8';
3964 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003965 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003966 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003967 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003968 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003969 }
Owen Taylor3473f882001-02-23 17:55:21 +00003970 len += xmlCopyChar(0, &buf[len], val);
3971 }
3972 } else {
3973 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003974 ctxt->nbentities++;
3975 if (ent != NULL)
3976 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003977 if ((ent != NULL) &&
3978 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003979 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003980 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003981 }
3982 if ((ctxt->replaceEntities == 0) &&
3983 (ent->content[0] == '&')) {
3984 buf[len++] = '&';
3985 buf[len++] = '#';
3986 buf[len++] = '3';
3987 buf[len++] = '8';
3988 buf[len++] = ';';
3989 } else {
3990 buf[len++] = ent->content[0];
3991 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003992 } else if ((ent != NULL) &&
3993 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003994 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3995 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003996 XML_SUBSTITUTE_REF,
3997 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003998 if (rep != NULL) {
3999 current = rep;
4000 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02004001 if ((*current == 0xD) || (*current == 0xA) ||
4002 (*current == 0x9)) {
4003 buf[len++] = 0x20;
4004 current++;
4005 } else
4006 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004007 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004008 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004009 }
4010 }
4011 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004012 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004013 }
4014 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08004015 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004016 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00004017 }
Owen Taylor3473f882001-02-23 17:55:21 +00004018 if (ent->content != NULL)
4019 buf[len++] = ent->content[0];
4020 }
4021 } else if (ent != NULL) {
4022 int i = xmlStrlen(ent->name);
4023 const xmlChar *cur = ent->name;
4024
4025 /*
4026 * This may look absurd but is needed to detect
4027 * entities problems
4028 */
4029 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4030 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004031 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00004032 XML_SUBSTITUTE_REF, 0, 0, 0);
4033 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00004034 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004035 rep = NULL;
4036 }
Owen Taylor3473f882001-02-23 17:55:21 +00004037 }
4038
4039 /*
4040 * Just output the reference
4041 */
4042 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08004043 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004044 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004045 }
4046 for (;i > 0;i--)
4047 buf[len++] = *cur++;
4048 buf[len++] = ';';
4049 }
4050 }
4051 } else {
4052 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004053 if ((len != 0) || (!normalize)) {
4054 if ((!normalize) || (!in_space)) {
4055 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004056 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004057 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004058 }
4059 }
4060 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004061 }
4062 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004063 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004064 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08004065 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00004066 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00004067 }
4068 }
4069 NEXTL(l);
4070 }
4071 GROW;
4072 c = CUR_CHAR(l);
4073 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004074 if (ctxt->instate == XML_PARSER_EOF)
4075 goto error;
4076
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004077 if ((in_space) && (normalize)) {
4078 while (buf[len - 1] == 0x20) len--;
4079 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00004080 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004081 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004082 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004083 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00004084 if ((c != 0) && (!IS_CHAR(c))) {
4085 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4086 "invalid character in attribute value\n");
4087 } else {
4088 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4089 "AttValue: ' expected\n");
4090 }
Owen Taylor3473f882001-02-23 17:55:21 +00004091 } else
4092 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08004093
4094 /*
4095 * There we potentially risk an overflow, don't allow attribute value of
4096 * lenght more than INT_MAX it is a very reasonnable assumption !
4097 */
4098 if (len >= INT_MAX) {
4099 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4100 "AttValue lenght too long\n");
4101 goto mem_error;
4102 }
4103
4104 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00004105 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004106
4107mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004108 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004109error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00004110 if (buf != NULL)
4111 xmlFree(buf);
4112 if (rep != NULL)
4113 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004114 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004115}
4116
4117/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00004118 * xmlParseAttValue:
4119 * @ctxt: an XML parser context
4120 *
4121 * parse a value for an attribute
4122 * Note: the parser won't do substitution of entities here, this
4123 * will be handled later in xmlStringGetNodeList
4124 *
4125 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4126 * "'" ([^<&'] | Reference)* "'"
4127 *
4128 * 3.3.3 Attribute-Value Normalization:
4129 * Before the value of an attribute is passed to the application or
4130 * checked for validity, the XML processor must normalize it as follows:
4131 * - a character reference is processed by appending the referenced
4132 * character to the attribute value
4133 * - an entity reference is processed by recursively processing the
4134 * replacement text of the entity
4135 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4136 * appending #x20 to the normalized value, except that only a single
4137 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4138 * parsed entity or the literal entity value of an internal parsed entity
4139 * - other characters are processed by appending them to the normalized value
4140 * If the declared value is not CDATA, then the XML processor must further
4141 * process the normalized attribute value by discarding any leading and
4142 * trailing space (#x20) characters, and by replacing sequences of space
4143 * (#x20) characters by a single space (#x20) character.
4144 * All attributes for which no declaration has been read should be treated
4145 * by a non-validating parser as if declared CDATA.
4146 *
4147 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4148 */
4149
4150
4151xmlChar *
4152xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004153 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004154 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004155}
4156
4157/**
Owen Taylor3473f882001-02-23 17:55:21 +00004158 * xmlParseSystemLiteral:
4159 * @ctxt: an XML parser context
4160 *
4161 * parse an XML Literal
4162 *
4163 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4164 *
4165 * Returns the SystemLiteral parsed or NULL
4166 */
4167
4168xmlChar *
4169xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4170 xmlChar *buf = NULL;
4171 int len = 0;
4172 int size = XML_PARSER_BUFFER_SIZE;
4173 int cur, l;
4174 xmlChar stop;
4175 int state = ctxt->instate;
4176 int count = 0;
4177
4178 SHRINK;
4179 if (RAW == '"') {
4180 NEXT;
4181 stop = '"';
4182 } else if (RAW == '\'') {
4183 NEXT;
4184 stop = '\'';
4185 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004186 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004187 return(NULL);
4188 }
4189
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004190 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004191 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004192 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004193 return(NULL);
4194 }
4195 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4196 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004197 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004198 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004199 xmlChar *tmp;
4200
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004201 if ((size > XML_MAX_NAME_LENGTH) &&
4202 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4203 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4204 xmlFree(buf);
4205 ctxt->instate = (xmlParserInputState) state;
4206 return(NULL);
4207 }
Owen Taylor3473f882001-02-23 17:55:21 +00004208 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004209 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4210 if (tmp == NULL) {
4211 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004212 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004213 ctxt->instate = (xmlParserInputState) state;
4214 return(NULL);
4215 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004216 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004217 }
4218 count++;
4219 if (count > 50) {
4220 GROW;
4221 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004222 if (ctxt->instate == XML_PARSER_EOF) {
4223 xmlFree(buf);
4224 return(NULL);
4225 }
Owen Taylor3473f882001-02-23 17:55:21 +00004226 }
4227 COPY_BUF(l,buf,len,cur);
4228 NEXTL(l);
4229 cur = CUR_CHAR(l);
4230 if (cur == 0) {
4231 GROW;
4232 SHRINK;
4233 cur = CUR_CHAR(l);
4234 }
4235 }
4236 buf[len] = 0;
4237 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004238 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004239 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004240 } else {
4241 NEXT;
4242 }
4243 return(buf);
4244}
4245
4246/**
4247 * xmlParsePubidLiteral:
4248 * @ctxt: an XML parser context
4249 *
4250 * parse an XML public literal
4251 *
4252 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4253 *
4254 * Returns the PubidLiteral parsed or NULL.
4255 */
4256
4257xmlChar *
4258xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4259 xmlChar *buf = NULL;
4260 int len = 0;
4261 int size = XML_PARSER_BUFFER_SIZE;
4262 xmlChar cur;
4263 xmlChar stop;
4264 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004265 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004266
4267 SHRINK;
4268 if (RAW == '"') {
4269 NEXT;
4270 stop = '"';
4271 } else if (RAW == '\'') {
4272 NEXT;
4273 stop = '\'';
4274 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004275 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004276 return(NULL);
4277 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004278 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004279 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004280 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004281 return(NULL);
4282 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004283 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004284 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004285 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004286 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004287 xmlChar *tmp;
4288
Daniel Veillard52d8ade2012-07-30 10:08:45 +08004289 if ((size > XML_MAX_NAME_LENGTH) &&
4290 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4291 xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4292 xmlFree(buf);
4293 return(NULL);
4294 }
Owen Taylor3473f882001-02-23 17:55:21 +00004295 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004296 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4297 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004298 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004299 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004300 return(NULL);
4301 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004302 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004303 }
4304 buf[len++] = cur;
4305 count++;
4306 if (count > 50) {
4307 GROW;
4308 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004309 if (ctxt->instate == XML_PARSER_EOF) {
4310 xmlFree(buf);
4311 return(NULL);
4312 }
Owen Taylor3473f882001-02-23 17:55:21 +00004313 }
4314 NEXT;
4315 cur = CUR;
4316 if (cur == 0) {
4317 GROW;
4318 SHRINK;
4319 cur = CUR;
4320 }
4321 }
4322 buf[len] = 0;
4323 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004324 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004325 } else {
4326 NEXT;
4327 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004328 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004329 return(buf);
4330}
4331
Daniel Veillard8ed10722009-08-20 19:17:36 +02004332static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004333
4334/*
4335 * used for the test in the inner loop of the char data testing
4336 */
4337static const unsigned char test_char_data[256] = {
4338 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4339 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4340 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4341 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4342 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4343 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4344 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4345 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4346 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4347 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4348 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4349 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4350 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4351 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4352 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4353 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4354 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4355 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4356 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4357 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4358 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4359 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4360 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4361 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4362 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4363 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4364 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4365 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4366 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4367 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4368 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4369 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4370};
4371
Owen Taylor3473f882001-02-23 17:55:21 +00004372/**
4373 * xmlParseCharData:
4374 * @ctxt: an XML parser context
4375 * @cdata: int indicating whether we are within a CDATA section
4376 *
4377 * parse a CharData section.
4378 * if we are within a CDATA section ']]>' marks an end of section.
4379 *
4380 * The right angle bracket (>) may be represented using the string "&gt;",
4381 * and must, for compatibility, be escaped using "&gt;" or a character
4382 * reference when it appears in the string "]]>" in content, when that
4383 * string is not marking the end of a CDATA section.
4384 *
4385 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4386 */
4387
4388void
4389xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004390 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004391 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004392 int line = ctxt->input->line;
4393 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004394 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004395
4396 SHRINK;
4397 GROW;
4398 /*
4399 * Accelerated common case where input don't need to be
4400 * modified before passing it to the handler.
4401 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004402 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004403 in = ctxt->input->cur;
4404 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004405get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004406 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004407 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004408 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004409 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004410 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004411 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004412 goto get_more_space;
4413 }
4414 if (*in == '<') {
4415 nbchar = in - ctxt->input->cur;
4416 if (nbchar > 0) {
4417 const xmlChar *tmp = ctxt->input->cur;
4418 ctxt->input->cur = in;
4419
Daniel Veillard34099b42004-11-04 17:34:35 +00004420 if ((ctxt->sax != NULL) &&
4421 (ctxt->sax->ignorableWhitespace !=
4422 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004423 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004424 if (ctxt->sax->ignorableWhitespace != NULL)
4425 ctxt->sax->ignorableWhitespace(ctxt->userData,
4426 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004427 } else {
4428 if (ctxt->sax->characters != NULL)
4429 ctxt->sax->characters(ctxt->userData,
4430 tmp, nbchar);
4431 if (*ctxt->space == -1)
4432 *ctxt->space = -2;
4433 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004434 } else if ((ctxt->sax != NULL) &&
4435 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004436 ctxt->sax->characters(ctxt->userData,
4437 tmp, nbchar);
4438 }
4439 }
4440 return;
4441 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004442
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004443get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004444 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004445 while (test_char_data[*in]) {
4446 in++;
4447 ccol++;
4448 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004449 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004450 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004451 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004452 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004453 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004454 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004455 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004456 }
4457 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004458 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004459 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004460 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004461 return;
4462 }
4463 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004464 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004465 goto get_more;
4466 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004467 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004468 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004469 if ((ctxt->sax != NULL) &&
4470 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004471 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004472 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004473 const xmlChar *tmp = ctxt->input->cur;
4474 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004475
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004476 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004477 if (ctxt->sax->ignorableWhitespace != NULL)
4478 ctxt->sax->ignorableWhitespace(ctxt->userData,
4479 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004480 } else {
4481 if (ctxt->sax->characters != NULL)
4482 ctxt->sax->characters(ctxt->userData,
4483 tmp, nbchar);
4484 if (*ctxt->space == -1)
4485 *ctxt->space = -2;
4486 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004487 line = ctxt->input->line;
4488 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004489 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004490 if (ctxt->sax->characters != NULL)
4491 ctxt->sax->characters(ctxt->userData,
4492 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004493 line = ctxt->input->line;
4494 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004495 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004496 /* something really bad happened in the SAX callback */
4497 if (ctxt->instate != XML_PARSER_CONTENT)
4498 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004499 }
4500 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004501 if (*in == 0xD) {
4502 in++;
4503 if (*in == 0xA) {
4504 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004505 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004506 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004507 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004508 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004509 in--;
4510 }
4511 if (*in == '<') {
4512 return;
4513 }
4514 if (*in == '&') {
4515 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004516 }
4517 SHRINK;
4518 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004519 if (ctxt->instate == XML_PARSER_EOF)
4520 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004521 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004522 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004523 nbchar = 0;
4524 }
Daniel Veillard50582112001-03-26 22:52:16 +00004525 ctxt->input->line = line;
4526 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004527 xmlParseCharDataComplex(ctxt, cdata);
4528}
4529
Daniel Veillard01c13b52002-12-10 15:19:08 +00004530/**
4531 * xmlParseCharDataComplex:
4532 * @ctxt: an XML parser context
4533 * @cdata: int indicating whether we are within a CDATA section
4534 *
4535 * parse a CharData section.this is the fallback function
4536 * of xmlParseCharData() when the parsing requires handling
4537 * of non-ASCII characters.
4538 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004539static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004540xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004541 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4542 int nbchar = 0;
4543 int cur, l;
4544 int count = 0;
4545
4546 SHRINK;
4547 GROW;
4548 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004549 while ((cur != '<') && /* checked */
4550 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004551 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004552 if ((cur == ']') && (NXT(1) == ']') &&
4553 (NXT(2) == '>')) {
4554 if (cdata) break;
4555 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004556 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004557 }
4558 }
4559 COPY_BUF(l,buf,nbchar,cur);
4560 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004561 buf[nbchar] = 0;
4562
Owen Taylor3473f882001-02-23 17:55:21 +00004563 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004564 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004565 */
4566 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004567 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004568 if (ctxt->sax->ignorableWhitespace != NULL)
4569 ctxt->sax->ignorableWhitespace(ctxt->userData,
4570 buf, nbchar);
4571 } else {
4572 if (ctxt->sax->characters != NULL)
4573 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004574 if ((ctxt->sax->characters !=
4575 ctxt->sax->ignorableWhitespace) &&
4576 (*ctxt->space == -1))
4577 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004578 }
4579 }
4580 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004581 /* something really bad happened in the SAX callback */
4582 if (ctxt->instate != XML_PARSER_CONTENT)
4583 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004584 }
4585 count++;
4586 if (count > 50) {
4587 GROW;
4588 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004589 if (ctxt->instate == XML_PARSER_EOF)
4590 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004591 }
4592 NEXTL(l);
4593 cur = CUR_CHAR(l);
4594 }
4595 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004596 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004597 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004598 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004599 */
4600 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004601 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004602 if (ctxt->sax->ignorableWhitespace != NULL)
4603 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4604 } else {
4605 if (ctxt->sax->characters != NULL)
4606 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004607 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4608 (*ctxt->space == -1))
4609 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004610 }
4611 }
4612 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004613 if ((cur != 0) && (!IS_CHAR(cur))) {
4614 /* Generate the error and skip the offending character */
4615 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4616 "PCDATA invalid Char value %d\n",
4617 cur);
4618 NEXTL(l);
4619 }
Owen Taylor3473f882001-02-23 17:55:21 +00004620}
4621
4622/**
4623 * xmlParseExternalID:
4624 * @ctxt: an XML parser context
4625 * @publicID: a xmlChar** receiving PubidLiteral
4626 * @strict: indicate whether we should restrict parsing to only
4627 * production [75], see NOTE below
4628 *
4629 * Parse an External ID or a Public ID
4630 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004631 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004632 * 'PUBLIC' S PubidLiteral S SystemLiteral
4633 *
4634 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4635 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4636 *
4637 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4638 *
4639 * Returns the function returns SystemLiteral and in the second
4640 * case publicID receives PubidLiteral, is strict is off
4641 * it is possible to return NULL and have publicID set.
4642 */
4643
4644xmlChar *
4645xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4646 xmlChar *URI = NULL;
4647
4648 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004649
4650 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004651 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004652 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004653 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004654 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4655 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004656 }
4657 SKIP_BLANKS;
4658 URI = xmlParseSystemLiteral(ctxt);
4659 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004660 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004661 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004662 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004663 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004664 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004665 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004666 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004667 }
4668 SKIP_BLANKS;
4669 *publicID = xmlParsePubidLiteral(ctxt);
4670 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004671 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004672 }
4673 if (strict) {
4674 /*
4675 * We don't handle [83] so "S SystemLiteral" is required.
4676 */
William M. Brack76e95df2003-10-18 16:20:14 +00004677 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004678 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004679 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004680 }
4681 } else {
4682 /*
4683 * We handle [83] so we return immediately, if
4684 * "S SystemLiteral" is not detected. From a purely parsing
4685 * point of view that's a nice mess.
4686 */
4687 const xmlChar *ptr;
4688 GROW;
4689
4690 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004691 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004692
William M. Brack76e95df2003-10-18 16:20:14 +00004693 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004694 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4695 }
4696 SKIP_BLANKS;
4697 URI = xmlParseSystemLiteral(ctxt);
4698 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004699 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004700 }
4701 }
4702 return(URI);
4703}
4704
4705/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004706 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004707 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004708 * @buf: the already parsed part of the buffer
4709 * @len: number of bytes filles in the buffer
4710 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004711 *
4712 * Skip an XML (SGML) comment <!-- .... -->
4713 * The spec says that "For compatibility, the string "--" (double-hyphen)
4714 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004715 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004716 *
4717 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4718 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004719static void
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004720xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4721 size_t len, size_t size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004722 int q, ql;
4723 int r, rl;
4724 int cur, l;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004725 size_t count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004726 int inputid;
4727
4728 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004729
Owen Taylor3473f882001-02-23 17:55:21 +00004730 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004731 len = 0;
4732 size = XML_PARSER_BUFFER_SIZE;
4733 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4734 if (buf == NULL) {
4735 xmlErrMemory(ctxt, NULL);
4736 return;
4737 }
Owen Taylor3473f882001-02-23 17:55:21 +00004738 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004739 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004740 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004741 if (q == 0)
4742 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004743 if (!IS_CHAR(q)) {
4744 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4745 "xmlParseComment: invalid xmlChar value %d\n",
4746 q);
4747 xmlFree (buf);
4748 return;
4749 }
Owen Taylor3473f882001-02-23 17:55:21 +00004750 NEXTL(ql);
4751 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004752 if (r == 0)
4753 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004754 if (!IS_CHAR(r)) {
4755 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4756 "xmlParseComment: invalid xmlChar value %d\n",
4757 q);
4758 xmlFree (buf);
4759 return;
4760 }
Owen Taylor3473f882001-02-23 17:55:21 +00004761 NEXTL(rl);
4762 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004763 if (cur == 0)
4764 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004765 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004766 ((cur != '>') ||
4767 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004768 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004769 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004770 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004771 if ((len > XML_MAX_TEXT_LENGTH) &&
4772 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4773 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4774 "Comment too big found", NULL);
4775 xmlFree (buf);
4776 return;
4777 }
Owen Taylor3473f882001-02-23 17:55:21 +00004778 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004779 xmlChar *new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004780 size_t new_size;
4781
4782 new_size = size * 2;
4783 new_buf = (xmlChar *) xmlRealloc(buf, new_size);
William M. Bracka3215c72004-07-31 16:24:01 +00004784 if (new_buf == NULL) {
4785 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004786 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004787 return;
4788 }
William M. Bracka3215c72004-07-31 16:24:01 +00004789 buf = new_buf;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004790 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00004791 }
4792 COPY_BUF(ql,buf,len,q);
4793 q = r;
4794 ql = rl;
4795 r = cur;
4796 rl = l;
4797
4798 count++;
4799 if (count > 50) {
4800 GROW;
4801 count = 0;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004802 if (ctxt->instate == XML_PARSER_EOF) {
4803 xmlFree(buf);
4804 return;
4805 }
Owen Taylor3473f882001-02-23 17:55:21 +00004806 }
4807 NEXTL(l);
4808 cur = CUR_CHAR(l);
4809 if (cur == 0) {
4810 SHRINK;
4811 GROW;
4812 cur = CUR_CHAR(l);
4813 }
4814 }
4815 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004816 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004817 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004818 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004819 } else if (!IS_CHAR(cur)) {
4820 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4821 "xmlParseComment: invalid xmlChar value %d\n",
4822 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004823 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004824 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004825 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4826 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004827 }
4828 NEXT;
4829 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4830 (!ctxt->disableSAX))
4831 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004832 }
Daniel Veillardda629342007-08-01 07:49:06 +00004833 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004834 return;
4835not_terminated:
4836 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4837 "Comment not terminated\n", NULL);
4838 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004839 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004840}
Daniel Veillardda629342007-08-01 07:49:06 +00004841
Daniel Veillard4c778d82005-01-23 17:37:44 +00004842/**
4843 * xmlParseComment:
4844 * @ctxt: an XML parser context
4845 *
4846 * Skip an XML (SGML) comment <!-- .... -->
4847 * The spec says that "For compatibility, the string "--" (double-hyphen)
4848 * must not occur within comments. "
4849 *
4850 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4851 */
4852void
4853xmlParseComment(xmlParserCtxtPtr ctxt) {
4854 xmlChar *buf = NULL;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004855 size_t size = XML_PARSER_BUFFER_SIZE;
4856 size_t len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004857 xmlParserInputState state;
4858 const xmlChar *in;
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004859 size_t nbchar = 0;
4860 int ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004861 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004862
4863 /*
4864 * Check that there is a comment right here.
4865 */
4866 if ((RAW != '<') || (NXT(1) != '!') ||
4867 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004868 state = ctxt->instate;
4869 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004870 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004871 SKIP(4);
4872 SHRINK;
4873 GROW;
4874
4875 /*
4876 * Accelerated common case where input don't need to be
4877 * modified before passing it to the handler.
4878 */
4879 in = ctxt->input->cur;
4880 do {
4881 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004882 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004883 ctxt->input->line++; ctxt->input->col = 1;
4884 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004885 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004886 }
4887get_more:
4888 ccol = ctxt->input->col;
4889 while (((*in > '-') && (*in <= 0x7F)) ||
4890 ((*in >= 0x20) && (*in < '-')) ||
4891 (*in == 0x09)) {
4892 in++;
4893 ccol++;
4894 }
4895 ctxt->input->col = ccol;
4896 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004897 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004898 ctxt->input->line++; ctxt->input->col = 1;
4899 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004900 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004901 goto get_more;
4902 }
4903 nbchar = in - ctxt->input->cur;
4904 /*
4905 * save current set of data
4906 */
4907 if (nbchar > 0) {
4908 if ((ctxt->sax != NULL) &&
4909 (ctxt->sax->comment != NULL)) {
4910 if (buf == NULL) {
4911 if ((*in == '-') && (in[1] == '-'))
4912 size = nbchar + 1;
4913 else
4914 size = XML_PARSER_BUFFER_SIZE + nbchar;
4915 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4916 if (buf == NULL) {
4917 xmlErrMemory(ctxt, NULL);
4918 ctxt->instate = state;
4919 return;
4920 }
4921 len = 0;
4922 } else if (len + nbchar + 1 >= size) {
4923 xmlChar *new_buf;
4924 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4925 new_buf = (xmlChar *) xmlRealloc(buf,
4926 size * sizeof(xmlChar));
4927 if (new_buf == NULL) {
4928 xmlFree (buf);
4929 xmlErrMemory(ctxt, NULL);
4930 ctxt->instate = state;
4931 return;
4932 }
4933 buf = new_buf;
4934 }
4935 memcpy(&buf[len], ctxt->input->cur, nbchar);
4936 len += nbchar;
4937 buf[len] = 0;
4938 }
4939 }
Daniel Veillard58f73ac2012-07-19 11:58:47 +08004940 if ((len > XML_MAX_TEXT_LENGTH) &&
4941 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
4942 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4943 "Comment too big found", NULL);
4944 xmlFree (buf);
4945 return;
4946 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004947 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004948 if (*in == 0xA) {
4949 in++;
4950 ctxt->input->line++; ctxt->input->col = 1;
4951 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004952 if (*in == 0xD) {
4953 in++;
4954 if (*in == 0xA) {
4955 ctxt->input->cur = in;
4956 in++;
4957 ctxt->input->line++; ctxt->input->col = 1;
4958 continue; /* while */
4959 }
4960 in--;
4961 }
4962 SHRINK;
4963 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08004964 if (ctxt->instate == XML_PARSER_EOF) {
4965 xmlFree(buf);
4966 return;
4967 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004968 in = ctxt->input->cur;
4969 if (*in == '-') {
4970 if (in[1] == '-') {
4971 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004972 if (ctxt->input->id != inputid) {
4973 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4974 "comment doesn't start and stop in the same entity\n");
4975 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004976 SKIP(3);
4977 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4978 (!ctxt->disableSAX)) {
4979 if (buf != NULL)
4980 ctxt->sax->comment(ctxt->userData, buf);
4981 else
4982 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4983 }
4984 if (buf != NULL)
4985 xmlFree(buf);
4986 ctxt->instate = state;
4987 return;
4988 }
Bryan Henderson8658d272012-05-08 16:39:05 +08004989 if (buf != NULL) {
4990 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4991 "Double hyphen within comment: "
4992 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00004993 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08004994 } else
4995 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4996 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004997 in++;
4998 ctxt->input->col++;
4999 }
5000 in++;
5001 ctxt->input->col++;
5002 goto get_more;
5003 }
5004 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
5005 xmlParseCommentComplex(ctxt, buf, len, size);
5006 ctxt->instate = state;
5007 return;
5008}
5009
Owen Taylor3473f882001-02-23 17:55:21 +00005010
5011/**
5012 * xmlParsePITarget:
5013 * @ctxt: an XML parser context
5014 *
5015 * parse the name of a PI
5016 *
5017 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5018 *
5019 * Returns the PITarget name or NULL
5020 */
5021
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005022const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00005023xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005024 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005025
5026 name = xmlParseName(ctxt);
5027 if ((name != NULL) &&
5028 ((name[0] == 'x') || (name[0] == 'X')) &&
5029 ((name[1] == 'm') || (name[1] == 'M')) &&
5030 ((name[2] == 'l') || (name[2] == 'L'))) {
5031 int i;
5032 if ((name[0] == 'x') && (name[1] == 'm') &&
5033 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005034 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00005035 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005036 return(name);
5037 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005038 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005039 return(name);
5040 }
5041 for (i = 0;;i++) {
5042 if (xmlW3CPIs[i] == NULL) break;
5043 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5044 return(name);
5045 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00005046 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5047 "xmlParsePITarget: invalid name prefix 'xml'\n",
5048 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005049 }
Daniel Veillard37334572008-07-31 08:20:02 +00005050 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5051 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5052 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
5053 }
Owen Taylor3473f882001-02-23 17:55:21 +00005054 return(name);
5055}
5056
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005057#ifdef LIBXML_CATALOG_ENABLED
5058/**
5059 * xmlParseCatalogPI:
5060 * @ctxt: an XML parser context
5061 * @catalog: the PI value string
5062 *
5063 * parse an XML Catalog Processing Instruction.
5064 *
5065 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5066 *
5067 * Occurs only if allowed by the user and if happening in the Misc
5068 * part of the document before any doctype informations
5069 * This will add the given catalog to the parsing context in order
5070 * to be used if there is a resolution need further down in the document
5071 */
5072
5073static void
5074xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5075 xmlChar *URL = NULL;
5076 const xmlChar *tmp, *base;
5077 xmlChar marker;
5078
5079 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00005080 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005081 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5082 goto error;
5083 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00005084 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005085 if (*tmp != '=') {
5086 return;
5087 }
5088 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005089 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005090 marker = *tmp;
5091 if ((marker != '\'') && (marker != '"'))
5092 goto error;
5093 tmp++;
5094 base = tmp;
5095 while ((*tmp != 0) && (*tmp != marker)) tmp++;
5096 if (*tmp == 0)
5097 goto error;
5098 URL = xmlStrndup(base, tmp - base);
5099 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00005100 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005101 if (*tmp != 0)
5102 goto error;
5103
5104 if (URL != NULL) {
5105 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5106 xmlFree(URL);
5107 }
5108 return;
5109
5110error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00005111 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5112 "Catalog PI syntax error: %s\n",
5113 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005114 if (URL != NULL)
5115 xmlFree(URL);
5116}
5117#endif
5118
Owen Taylor3473f882001-02-23 17:55:21 +00005119/**
5120 * xmlParsePI:
5121 * @ctxt: an XML parser context
5122 *
5123 * parse an XML Processing Instruction.
5124 *
5125 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5126 *
5127 * The processing is transfered to SAX once parsed.
5128 */
5129
5130void
5131xmlParsePI(xmlParserCtxtPtr ctxt) {
5132 xmlChar *buf = NULL;
Daniel Veillard51304812012-07-19 20:34:26 +08005133 size_t len = 0;
5134 size_t size = XML_PARSER_BUFFER_SIZE;
Owen Taylor3473f882001-02-23 17:55:21 +00005135 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005136 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00005137 xmlParserInputState state;
5138 int count = 0;
5139
5140 if ((RAW == '<') && (NXT(1) == '?')) {
5141 xmlParserInputPtr input = ctxt->input;
5142 state = ctxt->instate;
5143 ctxt->instate = XML_PARSER_PI;
5144 /*
5145 * this is a Processing Instruction.
5146 */
5147 SKIP(2);
5148 SHRINK;
5149
5150 /*
5151 * Parse the target name and check for special support like
5152 * namespace.
5153 */
5154 target = xmlParsePITarget(ctxt);
5155 if (target != NULL) {
5156 if ((RAW == '?') && (NXT(1) == '>')) {
5157 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005158 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5159 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005160 }
5161 SKIP(2);
5162
5163 /*
5164 * SAX: PI detected.
5165 */
5166 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5167 (ctxt->sax->processingInstruction != NULL))
5168 ctxt->sax->processingInstruction(ctxt->userData,
5169 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08005170 if (ctxt->instate != XML_PARSER_EOF)
5171 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005172 return;
5173 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00005174 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00005175 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005176 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005177 ctxt->instate = state;
5178 return;
5179 }
5180 cur = CUR;
5181 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005182 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5183 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005184 }
5185 SKIP_BLANKS;
5186 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00005187 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00005188 ((cur != '?') || (NXT(1) != '>'))) {
5189 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00005190 xmlChar *tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005191 size_t new_size = size * 2;
5192 tmp = (xmlChar *) xmlRealloc(buf, new_size);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005193 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005194 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005195 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005196 ctxt->instate = state;
5197 return;
5198 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005199 buf = tmp;
Daniel Veillard51304812012-07-19 20:34:26 +08005200 size = new_size;
Owen Taylor3473f882001-02-23 17:55:21 +00005201 }
5202 count++;
5203 if (count > 50) {
5204 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005205 if (ctxt->instate == XML_PARSER_EOF) {
5206 xmlFree(buf);
5207 return;
5208 }
Owen Taylor3473f882001-02-23 17:55:21 +00005209 count = 0;
Daniel Veillard51304812012-07-19 20:34:26 +08005210 if ((len > XML_MAX_TEXT_LENGTH) &&
5211 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5212 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5213 "PI %s too big found", target);
5214 xmlFree(buf);
5215 ctxt->instate = state;
5216 return;
5217 }
Owen Taylor3473f882001-02-23 17:55:21 +00005218 }
5219 COPY_BUF(l,buf,len,cur);
5220 NEXTL(l);
5221 cur = CUR_CHAR(l);
5222 if (cur == 0) {
5223 SHRINK;
5224 GROW;
5225 cur = CUR_CHAR(l);
5226 }
5227 }
Daniel Veillard51304812012-07-19 20:34:26 +08005228 if ((len > XML_MAX_TEXT_LENGTH) &&
5229 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
5230 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5231 "PI %s too big found", target);
5232 xmlFree(buf);
5233 ctxt->instate = state;
5234 return;
5235 }
Owen Taylor3473f882001-02-23 17:55:21 +00005236 buf[len] = 0;
5237 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005238 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5239 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005240 } else {
5241 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005242 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5243 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005244 }
5245 SKIP(2);
5246
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005247#ifdef LIBXML_CATALOG_ENABLED
5248 if (((state == XML_PARSER_MISC) ||
5249 (state == XML_PARSER_START)) &&
5250 (xmlStrEqual(target, XML_CATALOG_PI))) {
5251 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5252 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5253 (allow == XML_CATA_ALLOW_ALL))
5254 xmlParseCatalogPI(ctxt, buf);
5255 }
5256#endif
5257
5258
Owen Taylor3473f882001-02-23 17:55:21 +00005259 /*
5260 * SAX: PI detected.
5261 */
5262 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5263 (ctxt->sax->processingInstruction != NULL))
5264 ctxt->sax->processingInstruction(ctxt->userData,
5265 target, buf);
5266 }
5267 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005268 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005269 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005270 }
Chris Evans77404b82011-12-14 16:18:25 +08005271 if (ctxt->instate != XML_PARSER_EOF)
5272 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005273 }
5274}
5275
5276/**
5277 * xmlParseNotationDecl:
5278 * @ctxt: an XML parser context
5279 *
5280 * parse a notation declaration
5281 *
5282 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5283 *
5284 * Hence there is actually 3 choices:
5285 * 'PUBLIC' S PubidLiteral
5286 * 'PUBLIC' S PubidLiteral S SystemLiteral
5287 * and 'SYSTEM' S SystemLiteral
5288 *
5289 * See the NOTE on xmlParseExternalID().
5290 */
5291
5292void
5293xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005294 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005295 xmlChar *Pubid;
5296 xmlChar *Systemid;
5297
Daniel Veillarda07050d2003-10-19 14:46:32 +00005298 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005299 xmlParserInputPtr input = ctxt->input;
5300 SHRINK;
5301 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00005302 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005303 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5304 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005305 return;
5306 }
5307 SKIP_BLANKS;
5308
Daniel Veillard76d66f42001-05-16 21:05:17 +00005309 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005310 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005311 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005312 return;
5313 }
William M. Brack76e95df2003-10-18 16:20:14 +00005314 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005315 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005316 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005317 return;
5318 }
Daniel Veillard37334572008-07-31 08:20:02 +00005319 if (xmlStrchr(name, ':') != NULL) {
5320 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5321 "colon are forbidden from notation names '%s'\n",
5322 name, NULL, NULL);
5323 }
Owen Taylor3473f882001-02-23 17:55:21 +00005324 SKIP_BLANKS;
5325
5326 /*
5327 * Parse the IDs.
5328 */
5329 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5330 SKIP_BLANKS;
5331
5332 if (RAW == '>') {
5333 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005334 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5335 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005336 }
5337 NEXT;
5338 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5339 (ctxt->sax->notationDecl != NULL))
5340 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5341 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005342 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005343 }
Owen Taylor3473f882001-02-23 17:55:21 +00005344 if (Systemid != NULL) xmlFree(Systemid);
5345 if (Pubid != NULL) xmlFree(Pubid);
5346 }
5347}
5348
5349/**
5350 * xmlParseEntityDecl:
5351 * @ctxt: an XML parser context
5352 *
5353 * parse <!ENTITY declarations
5354 *
5355 * [70] EntityDecl ::= GEDecl | PEDecl
5356 *
5357 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5358 *
5359 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5360 *
5361 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5362 *
5363 * [74] PEDef ::= EntityValue | ExternalID
5364 *
5365 * [76] NDataDecl ::= S 'NDATA' S Name
5366 *
5367 * [ VC: Notation Declared ]
5368 * The Name must match the declared name of a notation.
5369 */
5370
5371void
5372xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005373 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005374 xmlChar *value = NULL;
5375 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005376 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005377 int isParameter = 0;
5378 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005379 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00005380
Daniel Veillard4c778d82005-01-23 17:37:44 +00005381 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005382 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005383 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005384 SHRINK;
5385 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005386 skipped = SKIP_BLANKS;
5387 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005388 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5389 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005390 }
Owen Taylor3473f882001-02-23 17:55:21 +00005391
5392 if (RAW == '%') {
5393 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005394 skipped = SKIP_BLANKS;
5395 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005396 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5397 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005398 }
Owen Taylor3473f882001-02-23 17:55:21 +00005399 isParameter = 1;
5400 }
5401
Daniel Veillard76d66f42001-05-16 21:05:17 +00005402 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005403 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005404 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5405 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005406 return;
5407 }
Daniel Veillard37334572008-07-31 08:20:02 +00005408 if (xmlStrchr(name, ':') != NULL) {
5409 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5410 "colon are forbidden from entities names '%s'\n",
5411 name, NULL, NULL);
5412 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005413 skipped = SKIP_BLANKS;
5414 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005415 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5416 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005417 }
Owen Taylor3473f882001-02-23 17:55:21 +00005418
Daniel Veillardf5582f12002-06-11 10:08:16 +00005419 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005420 /*
5421 * handle the various case of definitions...
5422 */
5423 if (isParameter) {
5424 if ((RAW == '"') || (RAW == '\'')) {
5425 value = xmlParseEntityValue(ctxt, &orig);
5426 if (value) {
5427 if ((ctxt->sax != NULL) &&
5428 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5429 ctxt->sax->entityDecl(ctxt->userData, name,
5430 XML_INTERNAL_PARAMETER_ENTITY,
5431 NULL, NULL, value);
5432 }
5433 } else {
5434 URI = xmlParseExternalID(ctxt, &literal, 1);
5435 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005436 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005437 }
5438 if (URI) {
5439 xmlURIPtr uri;
5440
5441 uri = xmlParseURI((const char *) URI);
5442 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005443 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5444 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005445 /*
5446 * This really ought to be a well formedness error
5447 * but the XML Core WG decided otherwise c.f. issue
5448 * E26 of the XML erratas.
5449 */
Owen Taylor3473f882001-02-23 17:55:21 +00005450 } else {
5451 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005452 /*
5453 * Okay this is foolish to block those but not
5454 * invalid URIs.
5455 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005456 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005457 } else {
5458 if ((ctxt->sax != NULL) &&
5459 (!ctxt->disableSAX) &&
5460 (ctxt->sax->entityDecl != NULL))
5461 ctxt->sax->entityDecl(ctxt->userData, name,
5462 XML_EXTERNAL_PARAMETER_ENTITY,
5463 literal, URI, NULL);
5464 }
5465 xmlFreeURI(uri);
5466 }
5467 }
5468 }
5469 } else {
5470 if ((RAW == '"') || (RAW == '\'')) {
5471 value = xmlParseEntityValue(ctxt, &orig);
5472 if ((ctxt->sax != NULL) &&
5473 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5474 ctxt->sax->entityDecl(ctxt->userData, name,
5475 XML_INTERNAL_GENERAL_ENTITY,
5476 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005477 /*
5478 * For expat compatibility in SAX mode.
5479 */
5480 if ((ctxt->myDoc == NULL) ||
5481 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5482 if (ctxt->myDoc == NULL) {
5483 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005484 if (ctxt->myDoc == NULL) {
5485 xmlErrMemory(ctxt, "New Doc failed");
5486 return;
5487 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005488 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005489 }
5490 if (ctxt->myDoc->intSubset == NULL)
5491 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5492 BAD_CAST "fake", NULL, NULL);
5493
Daniel Veillard1af9a412003-08-20 22:54:39 +00005494 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5495 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005496 }
Owen Taylor3473f882001-02-23 17:55:21 +00005497 } else {
5498 URI = xmlParseExternalID(ctxt, &literal, 1);
5499 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005500 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005501 }
5502 if (URI) {
5503 xmlURIPtr uri;
5504
5505 uri = xmlParseURI((const char *)URI);
5506 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005507 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5508 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005509 /*
5510 * This really ought to be a well formedness error
5511 * but the XML Core WG decided otherwise c.f. issue
5512 * E26 of the XML erratas.
5513 */
Owen Taylor3473f882001-02-23 17:55:21 +00005514 } else {
5515 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005516 /*
5517 * Okay this is foolish to block those but not
5518 * invalid URIs.
5519 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005520 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005521 }
5522 xmlFreeURI(uri);
5523 }
5524 }
William M. Brack76e95df2003-10-18 16:20:14 +00005525 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005526 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5527 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005528 }
5529 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005530 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005531 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005532 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005533 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5534 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005535 }
5536 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005537 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005538 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5539 (ctxt->sax->unparsedEntityDecl != NULL))
5540 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5541 literal, URI, ndata);
5542 } else {
5543 if ((ctxt->sax != NULL) &&
5544 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5545 ctxt->sax->entityDecl(ctxt->userData, name,
5546 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5547 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005548 /*
5549 * For expat compatibility in SAX mode.
5550 * assuming the entity repalcement was asked for
5551 */
5552 if ((ctxt->replaceEntities != 0) &&
5553 ((ctxt->myDoc == NULL) ||
5554 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5555 if (ctxt->myDoc == NULL) {
5556 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005557 if (ctxt->myDoc == NULL) {
5558 xmlErrMemory(ctxt, "New Doc failed");
5559 return;
5560 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005561 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005562 }
5563
5564 if (ctxt->myDoc->intSubset == NULL)
5565 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5566 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005567 xmlSAX2EntityDecl(ctxt, name,
5568 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5569 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005570 }
Owen Taylor3473f882001-02-23 17:55:21 +00005571 }
5572 }
5573 }
5574 SKIP_BLANKS;
5575 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005576 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005577 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005578 } else {
5579 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005580 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5581 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005582 }
5583 NEXT;
5584 }
5585 if (orig != NULL) {
5586 /*
5587 * Ugly mechanism to save the raw entity value.
5588 */
5589 xmlEntityPtr cur = NULL;
5590
5591 if (isParameter) {
5592 if ((ctxt->sax != NULL) &&
5593 (ctxt->sax->getParameterEntity != NULL))
5594 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5595 } else {
5596 if ((ctxt->sax != NULL) &&
5597 (ctxt->sax->getEntity != NULL))
5598 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005599 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005600 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005601 }
Owen Taylor3473f882001-02-23 17:55:21 +00005602 }
5603 if (cur != NULL) {
5604 if (cur->orig != NULL)
5605 xmlFree(orig);
5606 else
5607 cur->orig = orig;
5608 } else
5609 xmlFree(orig);
5610 }
Owen Taylor3473f882001-02-23 17:55:21 +00005611 if (value != NULL) xmlFree(value);
5612 if (URI != NULL) xmlFree(URI);
5613 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005614 }
5615}
5616
5617/**
5618 * xmlParseDefaultDecl:
5619 * @ctxt: an XML parser context
5620 * @value: Receive a possible fixed default value for the attribute
5621 *
5622 * Parse an attribute default declaration
5623 *
5624 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5625 *
5626 * [ VC: Required Attribute ]
5627 * if the default declaration is the keyword #REQUIRED, then the
5628 * attribute must be specified for all elements of the type in the
5629 * attribute-list declaration.
5630 *
5631 * [ VC: Attribute Default Legal ]
5632 * The declared default value must meet the lexical constraints of
5633 * the declared attribute type c.f. xmlValidateAttributeDecl()
5634 *
5635 * [ VC: Fixed Attribute Default ]
5636 * if an attribute has a default value declared with the #FIXED
5637 * keyword, instances of that attribute must match the default value.
5638 *
5639 * [ WFC: No < in Attribute Values ]
5640 * handled in xmlParseAttValue()
5641 *
5642 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5643 * or XML_ATTRIBUTE_FIXED.
5644 */
5645
5646int
5647xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5648 int val;
5649 xmlChar *ret;
5650
5651 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005652 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005653 SKIP(9);
5654 return(XML_ATTRIBUTE_REQUIRED);
5655 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005656 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005657 SKIP(8);
5658 return(XML_ATTRIBUTE_IMPLIED);
5659 }
5660 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005661 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005662 SKIP(6);
5663 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005664 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005665 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5666 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005667 }
5668 SKIP_BLANKS;
5669 }
5670 ret = xmlParseAttValue(ctxt);
5671 ctxt->instate = XML_PARSER_DTD;
5672 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005673 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005674 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005675 } else
5676 *value = ret;
5677 return(val);
5678}
5679
5680/**
5681 * xmlParseNotationType:
5682 * @ctxt: an XML parser context
5683 *
5684 * parse an Notation attribute type.
5685 *
5686 * Note: the leading 'NOTATION' S part has already being parsed...
5687 *
5688 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5689 *
5690 * [ VC: Notation Attributes ]
5691 * Values of this type must match one of the notation names included
5692 * in the declaration; all notation names in the declaration must be declared.
5693 *
5694 * Returns: the notation attribute tree built while parsing
5695 */
5696
5697xmlEnumerationPtr
5698xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005699 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005700 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005701
5702 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005703 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005704 return(NULL);
5705 }
5706 SHRINK;
5707 do {
5708 NEXT;
5709 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005710 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005711 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005712 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5713 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005714 xmlFreeEnumeration(ret);
5715 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005716 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005717 tmp = ret;
5718 while (tmp != NULL) {
5719 if (xmlStrEqual(name, tmp->name)) {
5720 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5721 "standalone: attribute notation value token %s duplicated\n",
5722 name, NULL);
5723 if (!xmlDictOwns(ctxt->dict, name))
5724 xmlFree((xmlChar *) name);
5725 break;
5726 }
5727 tmp = tmp->next;
5728 }
5729 if (tmp == NULL) {
5730 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005731 if (cur == NULL) {
5732 xmlFreeEnumeration(ret);
5733 return(NULL);
5734 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005735 if (last == NULL) ret = last = cur;
5736 else {
5737 last->next = cur;
5738 last = cur;
5739 }
Owen Taylor3473f882001-02-23 17:55:21 +00005740 }
5741 SKIP_BLANKS;
5742 } while (RAW == '|');
5743 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005744 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005745 xmlFreeEnumeration(ret);
5746 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005747 }
5748 NEXT;
5749 return(ret);
5750}
5751
5752/**
5753 * xmlParseEnumerationType:
5754 * @ctxt: an XML parser context
5755 *
5756 * parse an Enumeration attribute type.
5757 *
5758 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5759 *
5760 * [ VC: Enumeration ]
5761 * Values of this type must match one of the Nmtoken tokens in
5762 * the declaration
5763 *
5764 * Returns: the enumeration attribute tree built while parsing
5765 */
5766
5767xmlEnumerationPtr
5768xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5769 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005770 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005771
5772 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005773 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005774 return(NULL);
5775 }
5776 SHRINK;
5777 do {
5778 NEXT;
5779 SKIP_BLANKS;
5780 name = xmlParseNmtoken(ctxt);
5781 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005782 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005783 return(ret);
5784 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005785 tmp = ret;
5786 while (tmp != NULL) {
5787 if (xmlStrEqual(name, tmp->name)) {
5788 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5789 "standalone: attribute enumeration value token %s duplicated\n",
5790 name, NULL);
5791 if (!xmlDictOwns(ctxt->dict, name))
5792 xmlFree(name);
5793 break;
5794 }
5795 tmp = tmp->next;
5796 }
5797 if (tmp == NULL) {
5798 cur = xmlCreateEnumeration(name);
5799 if (!xmlDictOwns(ctxt->dict, name))
5800 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005801 if (cur == NULL) {
5802 xmlFreeEnumeration(ret);
5803 return(NULL);
5804 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005805 if (last == NULL) ret = last = cur;
5806 else {
5807 last->next = cur;
5808 last = cur;
5809 }
Owen Taylor3473f882001-02-23 17:55:21 +00005810 }
5811 SKIP_BLANKS;
5812 } while (RAW == '|');
5813 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005814 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005815 return(ret);
5816 }
5817 NEXT;
5818 return(ret);
5819}
5820
5821/**
5822 * xmlParseEnumeratedType:
5823 * @ctxt: an XML parser context
5824 * @tree: the enumeration tree built while parsing
5825 *
5826 * parse an Enumerated attribute type.
5827 *
5828 * [57] EnumeratedType ::= NotationType | Enumeration
5829 *
5830 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5831 *
5832 *
5833 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5834 */
5835
5836int
5837xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005838 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005839 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005840 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005841 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5842 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005843 return(0);
5844 }
5845 SKIP_BLANKS;
5846 *tree = xmlParseNotationType(ctxt);
5847 if (*tree == NULL) return(0);
5848 return(XML_ATTRIBUTE_NOTATION);
5849 }
5850 *tree = xmlParseEnumerationType(ctxt);
5851 if (*tree == NULL) return(0);
5852 return(XML_ATTRIBUTE_ENUMERATION);
5853}
5854
5855/**
5856 * xmlParseAttributeType:
5857 * @ctxt: an XML parser context
5858 * @tree: the enumeration tree built while parsing
5859 *
5860 * parse the Attribute list def for an element
5861 *
5862 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5863 *
5864 * [55] StringType ::= 'CDATA'
5865 *
5866 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5867 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5868 *
5869 * Validity constraints for attribute values syntax are checked in
5870 * xmlValidateAttributeValue()
5871 *
5872 * [ VC: ID ]
5873 * Values of type ID must match the Name production. A name must not
5874 * appear more than once in an XML document as a value of this type;
5875 * i.e., ID values must uniquely identify the elements which bear them.
5876 *
5877 * [ VC: One ID per Element Type ]
5878 * No element type may have more than one ID attribute specified.
5879 *
5880 * [ VC: ID Attribute Default ]
5881 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5882 *
5883 * [ VC: IDREF ]
5884 * Values of type IDREF must match the Name production, and values
5885 * of type IDREFS must match Names; each IDREF Name must match the value
5886 * of an ID attribute on some element in the XML document; i.e. IDREF
5887 * values must match the value of some ID attribute.
5888 *
5889 * [ VC: Entity Name ]
5890 * Values of type ENTITY must match the Name production, values
5891 * of type ENTITIES must match Names; each Entity Name must match the
5892 * name of an unparsed entity declared in the DTD.
5893 *
5894 * [ VC: Name Token ]
5895 * Values of type NMTOKEN must match the Nmtoken production; values
5896 * of type NMTOKENS must match Nmtokens.
5897 *
5898 * Returns the attribute type
5899 */
5900int
5901xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5902 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005903 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005904 SKIP(5);
5905 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005906 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005907 SKIP(6);
5908 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005909 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005910 SKIP(5);
5911 return(XML_ATTRIBUTE_IDREF);
5912 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5913 SKIP(2);
5914 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005915 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005916 SKIP(6);
5917 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005918 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005919 SKIP(8);
5920 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005921 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005922 SKIP(8);
5923 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005924 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005925 SKIP(7);
5926 return(XML_ATTRIBUTE_NMTOKEN);
5927 }
5928 return(xmlParseEnumeratedType(ctxt, tree));
5929}
5930
5931/**
5932 * xmlParseAttributeListDecl:
5933 * @ctxt: an XML parser context
5934 *
5935 * : parse the Attribute list def for an element
5936 *
5937 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5938 *
5939 * [53] AttDef ::= S Name S AttType S DefaultDecl
5940 *
5941 */
5942void
5943xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005944 const xmlChar *elemName;
5945 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005946 xmlEnumerationPtr tree;
5947
Daniel Veillarda07050d2003-10-19 14:46:32 +00005948 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005949 xmlParserInputPtr input = ctxt->input;
5950
5951 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005952 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005953 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005954 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005955 }
5956 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005957 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005958 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005959 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5960 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005961 return;
5962 }
5963 SKIP_BLANKS;
5964 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08005965 while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005966 const xmlChar *check = CUR_PTR;
5967 int type;
5968 int def;
5969 xmlChar *defaultValue = NULL;
5970
5971 GROW;
5972 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005973 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005974 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005975 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5976 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005977 break;
5978 }
5979 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005980 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005981 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005982 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005983 break;
5984 }
5985 SKIP_BLANKS;
5986
5987 type = xmlParseAttributeType(ctxt, &tree);
5988 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005989 break;
5990 }
5991
5992 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005993 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005994 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5995 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005996 if (tree != NULL)
5997 xmlFreeEnumeration(tree);
5998 break;
5999 }
6000 SKIP_BLANKS;
6001
6002 def = xmlParseDefaultDecl(ctxt, &defaultValue);
6003 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006004 if (defaultValue != NULL)
6005 xmlFree(defaultValue);
6006 if (tree != NULL)
6007 xmlFreeEnumeration(tree);
6008 break;
6009 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00006010 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6011 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00006012
6013 GROW;
6014 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00006015 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006016 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006017 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006018 if (defaultValue != NULL)
6019 xmlFree(defaultValue);
6020 if (tree != NULL)
6021 xmlFreeEnumeration(tree);
6022 break;
6023 }
6024 SKIP_BLANKS;
6025 }
6026 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006027 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
6028 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006029 if (defaultValue != NULL)
6030 xmlFree(defaultValue);
6031 if (tree != NULL)
6032 xmlFreeEnumeration(tree);
6033 break;
6034 }
6035 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6036 (ctxt->sax->attributeDecl != NULL))
6037 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6038 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00006039 else if (tree != NULL)
6040 xmlFreeEnumeration(tree);
6041
6042 if ((ctxt->sax2) && (defaultValue != NULL) &&
6043 (def != XML_ATTRIBUTE_IMPLIED) &&
6044 (def != XML_ATTRIBUTE_REQUIRED)) {
6045 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6046 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00006047 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00006048 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6049 }
Owen Taylor3473f882001-02-23 17:55:21 +00006050 if (defaultValue != NULL)
6051 xmlFree(defaultValue);
6052 GROW;
6053 }
6054 if (RAW == '>') {
6055 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006056 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6057 "Attribute list declaration doesn't start and stop in the same entity\n",
6058 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006059 }
6060 NEXT;
6061 }
Owen Taylor3473f882001-02-23 17:55:21 +00006062 }
6063}
6064
6065/**
6066 * xmlParseElementMixedContentDecl:
6067 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006068 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00006069 *
6070 * parse the declaration for a Mixed Element content
6071 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6072 *
6073 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6074 * '(' S? '#PCDATA' S? ')'
6075 *
6076 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6077 *
6078 * [ VC: No Duplicate Types ]
6079 * The same name must not appear more than once in a single
6080 * mixed-content declaration.
6081 *
6082 * returns: the list of the xmlElementContentPtr describing the element choices
6083 */
6084xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006085xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00006086 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006087 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006088
6089 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006090 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006091 SKIP(7);
6092 SKIP_BLANKS;
6093 SHRINK;
6094 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006095 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006096 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6097"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006098 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006099 }
Owen Taylor3473f882001-02-23 17:55:21 +00006100 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006101 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006102 if (ret == NULL)
6103 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006104 if (RAW == '*') {
6105 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6106 NEXT;
6107 }
6108 return(ret);
6109 }
6110 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006111 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00006112 if (ret == NULL) return(NULL);
6113 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006114 while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006115 NEXT;
6116 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006117 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006118 if (ret == NULL) return(NULL);
6119 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006120 if (cur != NULL)
6121 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00006122 cur = ret;
6123 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006124 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006125 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006126 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006127 if (n->c1 != NULL)
6128 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006129 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006130 if (n != NULL)
6131 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006132 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00006133 }
6134 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006135 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006136 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006137 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006138 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006139 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00006140 return(NULL);
6141 }
6142 SKIP_BLANKS;
6143 GROW;
6144 }
6145 if ((RAW == ')') && (NXT(1) == '*')) {
6146 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006147 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00006148 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006149 if (cur->c2 != NULL)
6150 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006151 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02006152 if (ret != NULL)
6153 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006154 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006155 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6156"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006157 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006158 }
Owen Taylor3473f882001-02-23 17:55:21 +00006159 SKIP(2);
6160 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006161 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006162 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006163 return(NULL);
6164 }
6165
6166 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006167 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006168 }
6169 return(ret);
6170}
6171
6172/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006173 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00006174 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00006175 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006176 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00006177 *
6178 * parse the declaration for a Mixed Element content
6179 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6180 *
6181 *
6182 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6183 *
6184 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6185 *
6186 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6187 *
6188 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6189 *
6190 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6191 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006192 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00006193 * opening or closing parentheses in a choice, seq, or Mixed
6194 * construct is contained in the replacement text for a parameter
6195 * entity, both must be contained in the same replacement text. For
6196 * interoperability, if a parameter-entity reference appears in a
6197 * choice, seq, or Mixed construct, its replacement text should not
6198 * be empty, and neither the first nor last non-blank character of
6199 * the replacement text should be a connector (| or ,).
6200 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00006201 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00006202 * hierarchy.
6203 */
Daniel Veillard489f9672009-08-10 16:49:30 +02006204static xmlElementContentPtr
6205xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6206 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00006207 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006208 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00006209 xmlChar type = 0;
6210
Daniel Veillard489f9672009-08-10 16:49:30 +02006211 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6212 (depth > 2048)) {
6213 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6214"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6215 depth);
6216 return(NULL);
6217 }
Owen Taylor3473f882001-02-23 17:55:21 +00006218 SKIP_BLANKS;
6219 GROW;
6220 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006221 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006222
Owen Taylor3473f882001-02-23 17:55:21 +00006223 /* Recurse on first child */
6224 NEXT;
6225 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006226 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6227 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006228 SKIP_BLANKS;
6229 GROW;
6230 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006231 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006232 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006233 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006234 return(NULL);
6235 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006236 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006237 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006238 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006239 return(NULL);
6240 }
Owen Taylor3473f882001-02-23 17:55:21 +00006241 GROW;
6242 if (RAW == '?') {
6243 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6244 NEXT;
6245 } else if (RAW == '*') {
6246 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6247 NEXT;
6248 } else if (RAW == '+') {
6249 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6250 NEXT;
6251 } else {
6252 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6253 }
Owen Taylor3473f882001-02-23 17:55:21 +00006254 GROW;
6255 }
6256 SKIP_BLANKS;
6257 SHRINK;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006258 while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006259 /*
6260 * Each loop we parse one separator and one element.
6261 */
6262 if (RAW == ',') {
6263 if (type == 0) type = CUR;
6264
6265 /*
6266 * Detect "Name | Name , Name" error
6267 */
6268 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006269 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006270 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006271 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006272 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006273 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006274 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006275 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006276 return(NULL);
6277 }
6278 NEXT;
6279
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006280 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006281 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006282 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006283 xmlFreeDocElementContent(ctxt->myDoc, last);
6284 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006285 return(NULL);
6286 }
6287 if (last == NULL) {
6288 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006289 if (ret != NULL)
6290 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006291 ret = cur = op;
6292 } else {
6293 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006294 if (op != NULL)
6295 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006296 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006297 if (last != NULL)
6298 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006299 cur =op;
6300 last = NULL;
6301 }
6302 } else if (RAW == '|') {
6303 if (type == 0) type = CUR;
6304
6305 /*
6306 * Detect "Name , Name | Name" error
6307 */
6308 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006309 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006310 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006311 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006312 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006313 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006314 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006315 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006316 return(NULL);
6317 }
6318 NEXT;
6319
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006320 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006321 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006322 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006323 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006324 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006325 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006326 return(NULL);
6327 }
6328 if (last == NULL) {
6329 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006330 if (ret != NULL)
6331 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006332 ret = cur = op;
6333 } else {
6334 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006335 if (op != NULL)
6336 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006337 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006338 if (last != NULL)
6339 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006340 cur =op;
6341 last = NULL;
6342 }
6343 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006344 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006345 if ((last != NULL) && (last != ret))
6346 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006347 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006348 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006349 return(NULL);
6350 }
6351 GROW;
6352 SKIP_BLANKS;
6353 GROW;
6354 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006355 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006356 /* Recurse on second child */
6357 NEXT;
6358 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006359 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6360 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006361 SKIP_BLANKS;
6362 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006363 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006364 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006365 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006366 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006367 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006368 return(NULL);
6369 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006370 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006371 if (last == NULL) {
6372 if (ret != NULL)
6373 xmlFreeDocElementContent(ctxt->myDoc, ret);
6374 return(NULL);
6375 }
Owen Taylor3473f882001-02-23 17:55:21 +00006376 if (RAW == '?') {
6377 last->ocur = XML_ELEMENT_CONTENT_OPT;
6378 NEXT;
6379 } else if (RAW == '*') {
6380 last->ocur = XML_ELEMENT_CONTENT_MULT;
6381 NEXT;
6382 } else if (RAW == '+') {
6383 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6384 NEXT;
6385 } else {
6386 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6387 }
6388 }
6389 SKIP_BLANKS;
6390 GROW;
6391 }
6392 if ((cur != NULL) && (last != NULL)) {
6393 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006394 if (last != NULL)
6395 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006396 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006397 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006398 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6399"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006400 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006401 }
Owen Taylor3473f882001-02-23 17:55:21 +00006402 NEXT;
6403 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006404 if (ret != NULL) {
6405 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6406 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6407 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6408 else
6409 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6410 }
Owen Taylor3473f882001-02-23 17:55:21 +00006411 NEXT;
6412 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006413 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006414 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006415 cur = ret;
6416 /*
6417 * Some normalization:
6418 * (a | b* | c?)* == (a | b | c)*
6419 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006420 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006421 if ((cur->c1 != NULL) &&
6422 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6423 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6424 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6425 if ((cur->c2 != NULL) &&
6426 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6427 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6428 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6429 cur = cur->c2;
6430 }
6431 }
Owen Taylor3473f882001-02-23 17:55:21 +00006432 NEXT;
6433 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006434 if (ret != NULL) {
6435 int found = 0;
6436
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006437 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6438 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6439 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006440 else
6441 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006442 /*
6443 * Some normalization:
6444 * (a | b*)+ == (a | b)*
6445 * (a | b?)+ == (a | b)*
6446 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006447 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006448 if ((cur->c1 != NULL) &&
6449 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6450 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6451 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6452 found = 1;
6453 }
6454 if ((cur->c2 != NULL) &&
6455 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6456 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6457 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6458 found = 1;
6459 }
6460 cur = cur->c2;
6461 }
6462 if (found)
6463 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6464 }
Owen Taylor3473f882001-02-23 17:55:21 +00006465 NEXT;
6466 }
6467 return(ret);
6468}
6469
6470/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006471 * xmlParseElementChildrenContentDecl:
6472 * @ctxt: an XML parser context
6473 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006474 *
6475 * parse the declaration for a Mixed Element content
6476 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6477 *
6478 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6479 *
6480 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6481 *
6482 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6483 *
6484 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6485 *
6486 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6487 * TODO Parameter-entity replacement text must be properly nested
6488 * with parenthesized groups. That is to say, if either of the
6489 * opening or closing parentheses in a choice, seq, or Mixed
6490 * construct is contained in the replacement text for a parameter
6491 * entity, both must be contained in the same replacement text. For
6492 * interoperability, if a parameter-entity reference appears in a
6493 * choice, seq, or Mixed construct, its replacement text should not
6494 * be empty, and neither the first nor last non-blank character of
6495 * the replacement text should be a connector (| or ,).
6496 *
6497 * Returns the tree of xmlElementContentPtr describing the element
6498 * hierarchy.
6499 */
6500xmlElementContentPtr
6501xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6502 /* stub left for API/ABI compat */
6503 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6504}
6505
6506/**
Owen Taylor3473f882001-02-23 17:55:21 +00006507 * xmlParseElementContentDecl:
6508 * @ctxt: an XML parser context
6509 * @name: the name of the element being defined.
6510 * @result: the Element Content pointer will be stored here if any
6511 *
6512 * parse the declaration for an Element content either Mixed or Children,
6513 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6514 *
6515 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6516 *
6517 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6518 */
6519
6520int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006521xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006522 xmlElementContentPtr *result) {
6523
6524 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006525 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006526 int res;
6527
6528 *result = NULL;
6529
6530 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006531 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006532 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006533 return(-1);
6534 }
6535 NEXT;
6536 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006537 if (ctxt->instate == XML_PARSER_EOF)
6538 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00006539 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006540 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006541 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006542 res = XML_ELEMENT_TYPE_MIXED;
6543 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006544 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006545 res = XML_ELEMENT_TYPE_ELEMENT;
6546 }
Owen Taylor3473f882001-02-23 17:55:21 +00006547 SKIP_BLANKS;
6548 *result = tree;
6549 return(res);
6550}
6551
6552/**
6553 * xmlParseElementDecl:
6554 * @ctxt: an XML parser context
6555 *
6556 * parse an Element declaration.
6557 *
6558 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6559 *
6560 * [ VC: Unique Element Type Declaration ]
6561 * No element type may be declared more than once
6562 *
6563 * Returns the type of the element, or -1 in case of error
6564 */
6565int
6566xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006567 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006568 int ret = -1;
6569 xmlElementContentPtr content = NULL;
6570
Daniel Veillard4c778d82005-01-23 17:37:44 +00006571 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006572 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006573 xmlParserInputPtr input = ctxt->input;
6574
6575 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006576 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006577 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6578 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006579 }
6580 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006581 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006582 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006583 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6584 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006585 return(-1);
6586 }
6587 while ((RAW == 0) && (ctxt->inputNr > 1))
6588 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006589 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006590 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6591 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006592 }
6593 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006594 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006595 SKIP(5);
6596 /*
6597 * Element must always be empty.
6598 */
6599 ret = XML_ELEMENT_TYPE_EMPTY;
6600 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6601 (NXT(2) == 'Y')) {
6602 SKIP(3);
6603 /*
6604 * Element is a generic container.
6605 */
6606 ret = XML_ELEMENT_TYPE_ANY;
6607 } else if (RAW == '(') {
6608 ret = xmlParseElementContentDecl(ctxt, name, &content);
6609 } else {
6610 /*
6611 * [ WFC: PEs in Internal Subset ] error handling.
6612 */
6613 if ((RAW == '%') && (ctxt->external == 0) &&
6614 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006615 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006616 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006617 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006618 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006619 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6620 }
Owen Taylor3473f882001-02-23 17:55:21 +00006621 return(-1);
6622 }
6623
6624 SKIP_BLANKS;
6625 /*
6626 * Pop-up of finished entities.
6627 */
6628 while ((RAW == 0) && (ctxt->inputNr > 1))
6629 xmlPopInput(ctxt);
6630 SKIP_BLANKS;
6631
6632 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006633 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006634 if (content != NULL) {
6635 xmlFreeDocElementContent(ctxt->myDoc, content);
6636 }
Owen Taylor3473f882001-02-23 17:55:21 +00006637 } else {
6638 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006639 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6640 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006641 }
6642
6643 NEXT;
6644 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006645 (ctxt->sax->elementDecl != NULL)) {
6646 if (content != NULL)
6647 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006648 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6649 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006650 if ((content != NULL) && (content->parent == NULL)) {
6651 /*
6652 * this is a trick: if xmlAddElementDecl is called,
6653 * instead of copying the full tree it is plugged directly
6654 * if called from the parser. Avoid duplicating the
6655 * interfaces or change the API/ABI
6656 */
6657 xmlFreeDocElementContent(ctxt->myDoc, content);
6658 }
6659 } else if (content != NULL) {
6660 xmlFreeDocElementContent(ctxt->myDoc, content);
6661 }
Owen Taylor3473f882001-02-23 17:55:21 +00006662 }
Owen Taylor3473f882001-02-23 17:55:21 +00006663 }
6664 return(ret);
6665}
6666
6667/**
Owen Taylor3473f882001-02-23 17:55:21 +00006668 * xmlParseConditionalSections
6669 * @ctxt: an XML parser context
6670 *
6671 * [61] conditionalSect ::= includeSect | ignoreSect
6672 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6673 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6674 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6675 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6676 */
6677
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006678static void
Owen Taylor3473f882001-02-23 17:55:21 +00006679xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006680 int id = ctxt->input->id;
6681
Owen Taylor3473f882001-02-23 17:55:21 +00006682 SKIP(3);
6683 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006684 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006685 SKIP(7);
6686 SKIP_BLANKS;
6687 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006688 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006689 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006690 if (ctxt->input->id != id) {
6691 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6692 "All markup of the conditional section is not in the same entity\n",
6693 NULL, NULL);
6694 }
Owen Taylor3473f882001-02-23 17:55:21 +00006695 NEXT;
6696 }
6697 if (xmlParserDebugEntities) {
6698 if ((ctxt->input != NULL) && (ctxt->input->filename))
6699 xmlGenericError(xmlGenericErrorContext,
6700 "%s(%d): ", ctxt->input->filename,
6701 ctxt->input->line);
6702 xmlGenericError(xmlGenericErrorContext,
6703 "Entering INCLUDE Conditional Section\n");
6704 }
6705
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006706 while (((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6707 (NXT(2) != '>'))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006708 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006709 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006710
6711 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6712 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006713 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006714 NEXT;
6715 } else if (RAW == '%') {
6716 xmlParsePEReference(ctxt);
6717 } else
6718 xmlParseMarkupDecl(ctxt);
6719
6720 /*
6721 * Pop-up of finished entities.
6722 */
6723 while ((RAW == 0) && (ctxt->inputNr > 1))
6724 xmlPopInput(ctxt);
6725
Daniel Veillardfdc91562002-07-01 21:52:03 +00006726 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006727 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006728 break;
6729 }
6730 }
6731 if (xmlParserDebugEntities) {
6732 if ((ctxt->input != NULL) && (ctxt->input->filename))
6733 xmlGenericError(xmlGenericErrorContext,
6734 "%s(%d): ", ctxt->input->filename,
6735 ctxt->input->line);
6736 xmlGenericError(xmlGenericErrorContext,
6737 "Leaving INCLUDE Conditional Section\n");
6738 }
6739
Daniel Veillarda07050d2003-10-19 14:46:32 +00006740 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006741 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006742 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006743 int depth = 0;
6744
6745 SKIP(6);
6746 SKIP_BLANKS;
6747 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006748 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006749 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006750 if (ctxt->input->id != id) {
6751 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6752 "All markup of the conditional section is not in the same entity\n",
6753 NULL, NULL);
6754 }
Owen Taylor3473f882001-02-23 17:55:21 +00006755 NEXT;
6756 }
6757 if (xmlParserDebugEntities) {
6758 if ((ctxt->input != NULL) && (ctxt->input->filename))
6759 xmlGenericError(xmlGenericErrorContext,
6760 "%s(%d): ", ctxt->input->filename,
6761 ctxt->input->line);
6762 xmlGenericError(xmlGenericErrorContext,
6763 "Entering IGNORE Conditional Section\n");
6764 }
6765
6766 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006767 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006768 * But disable SAX event generating DTD building in the meantime
6769 */
6770 state = ctxt->disableSAX;
6771 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006772 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006773 ctxt->instate = XML_PARSER_IGNORE;
6774
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08006775 while (((depth >= 0) && (RAW != 0)) &&
6776 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006777 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6778 depth++;
6779 SKIP(3);
6780 continue;
6781 }
6782 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6783 if (--depth >= 0) SKIP(3);
6784 continue;
6785 }
6786 NEXT;
6787 continue;
6788 }
6789
6790 ctxt->disableSAX = state;
6791 ctxt->instate = instate;
6792
6793 if (xmlParserDebugEntities) {
6794 if ((ctxt->input != NULL) && (ctxt->input->filename))
6795 xmlGenericError(xmlGenericErrorContext,
6796 "%s(%d): ", ctxt->input->filename,
6797 ctxt->input->line);
6798 xmlGenericError(xmlGenericErrorContext,
6799 "Leaving IGNORE Conditional Section\n");
6800 }
6801
6802 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006803 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006804 }
6805
6806 if (RAW == 0)
6807 SHRINK;
6808
6809 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006810 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006811 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006812 if (ctxt->input->id != id) {
6813 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6814 "All markup of the conditional section is not in the same entity\n",
6815 NULL, NULL);
6816 }
Owen Taylor3473f882001-02-23 17:55:21 +00006817 SKIP(3);
6818 }
6819}
6820
6821/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006822 * xmlParseMarkupDecl:
6823 * @ctxt: an XML parser context
6824 *
6825 * parse Markup declarations
6826 *
6827 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6828 * NotationDecl | PI | Comment
6829 *
6830 * [ VC: Proper Declaration/PE Nesting ]
6831 * Parameter-entity replacement text must be properly nested with
6832 * markup declarations. That is to say, if either the first character
6833 * or the last character of a markup declaration (markupdecl above) is
6834 * contained in the replacement text for a parameter-entity reference,
6835 * both must be contained in the same replacement text.
6836 *
6837 * [ WFC: PEs in Internal Subset ]
6838 * In the internal DTD subset, parameter-entity references can occur
6839 * only where markup declarations can occur, not within markup declarations.
6840 * (This does not apply to references that occur in external parameter
6841 * entities or to the external subset.)
6842 */
6843void
6844xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6845 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006846 if (CUR == '<') {
6847 if (NXT(1) == '!') {
6848 switch (NXT(2)) {
6849 case 'E':
6850 if (NXT(3) == 'L')
6851 xmlParseElementDecl(ctxt);
6852 else if (NXT(3) == 'N')
6853 xmlParseEntityDecl(ctxt);
6854 break;
6855 case 'A':
6856 xmlParseAttributeListDecl(ctxt);
6857 break;
6858 case 'N':
6859 xmlParseNotationDecl(ctxt);
6860 break;
6861 case '-':
6862 xmlParseComment(ctxt);
6863 break;
6864 default:
6865 /* there is an error but it will be detected later */
6866 break;
6867 }
6868 } else if (NXT(1) == '?') {
6869 xmlParsePI(ctxt);
6870 }
6871 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006872 /*
6873 * This is only for internal subset. On external entities,
6874 * the replacement is done before parsing stage
6875 */
6876 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6877 xmlParsePEReference(ctxt);
6878
6879 /*
6880 * Conditional sections are allowed from entities included
6881 * by PE References in the internal subset.
6882 */
6883 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6884 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6885 xmlParseConditionalSections(ctxt);
6886 }
6887 }
6888
6889 ctxt->instate = XML_PARSER_DTD;
6890}
6891
6892/**
6893 * xmlParseTextDecl:
6894 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006895 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006896 * parse an XML declaration header for external entities
6897 *
6898 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006899 */
6900
6901void
6902xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6903 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006904 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006905
6906 /*
6907 * We know that '<?xml' is here.
6908 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006909 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006910 SKIP(5);
6911 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006912 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006913 return;
6914 }
6915
William M. Brack76e95df2003-10-18 16:20:14 +00006916 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006917 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6918 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006919 }
6920 SKIP_BLANKS;
6921
6922 /*
6923 * We may have the VersionInfo here.
6924 */
6925 version = xmlParseVersionInfo(ctxt);
6926 if (version == NULL)
6927 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006928 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006929 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006930 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6931 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006932 }
6933 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006934 ctxt->input->version = version;
6935
6936 /*
6937 * We must have the encoding declaration
6938 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006939 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006940 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6941 /*
6942 * The XML REC instructs us to stop parsing right here
6943 */
6944 return;
6945 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006946 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6947 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6948 "Missing encoding in text declaration\n");
6949 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006950
6951 SKIP_BLANKS;
6952 if ((RAW == '?') && (NXT(1) == '>')) {
6953 SKIP(2);
6954 } else if (RAW == '>') {
6955 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006956 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006957 NEXT;
6958 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006959 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006960 MOVETO_ENDTAG(CUR_PTR);
6961 NEXT;
6962 }
6963}
6964
6965/**
Owen Taylor3473f882001-02-23 17:55:21 +00006966 * xmlParseExternalSubset:
6967 * @ctxt: an XML parser context
6968 * @ExternalID: the external identifier
6969 * @SystemID: the system identifier (or URL)
6970 *
6971 * parse Markup declarations from an external subset
6972 *
6973 * [30] extSubset ::= textDecl? extSubsetDecl
6974 *
6975 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6976 */
6977void
6978xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6979 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006980 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006981 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006982
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01006983 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006984 (ctxt->input->end - ctxt->input->cur >= 4)) {
6985 xmlChar start[4];
6986 xmlCharEncoding enc;
6987
6988 start[0] = RAW;
6989 start[1] = NXT(1);
6990 start[2] = NXT(2);
6991 start[3] = NXT(3);
6992 enc = xmlDetectCharEncoding(start, 4);
6993 if (enc != XML_CHAR_ENCODING_NONE)
6994 xmlSwitchEncoding(ctxt, enc);
6995 }
6996
Daniel Veillarda07050d2003-10-19 14:46:32 +00006997 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006998 xmlParseTextDecl(ctxt);
6999 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7000 /*
7001 * The XML REC instructs us to stop parsing right here
7002 */
7003 ctxt->instate = XML_PARSER_EOF;
7004 return;
7005 }
7006 }
7007 if (ctxt->myDoc == NULL) {
7008 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00007009 if (ctxt->myDoc == NULL) {
7010 xmlErrMemory(ctxt, "New Doc failed");
7011 return;
7012 }
7013 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00007014 }
7015 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7016 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7017
7018 ctxt->instate = XML_PARSER_DTD;
7019 ctxt->external = 1;
7020 while (((RAW == '<') && (NXT(1) == '?')) ||
7021 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00007022 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007023 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007024 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007025
7026 GROW;
7027 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7028 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00007029 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007030 NEXT;
7031 } else if (RAW == '%') {
7032 xmlParsePEReference(ctxt);
7033 } else
7034 xmlParseMarkupDecl(ctxt);
7035
7036 /*
7037 * Pop-up of finished entities.
7038 */
7039 while ((RAW == 0) && (ctxt->inputNr > 1))
7040 xmlPopInput(ctxt);
7041
Daniel Veillardfdc91562002-07-01 21:52:03 +00007042 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007043 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007044 break;
7045 }
7046 }
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007047
Owen Taylor3473f882001-02-23 17:55:21 +00007048 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007049 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007050 }
7051
7052}
7053
7054/**
7055 * xmlParseReference:
7056 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00007057 *
Owen Taylor3473f882001-02-23 17:55:21 +00007058 * parse and handle entity references in content, depending on the SAX
7059 * interface, this may end-up in a call to character() if this is a
7060 * CharRef, a predefined entity, if there is no reference() callback.
7061 * or if the parser was asked to switch to that mode.
7062 *
7063 * [67] Reference ::= EntityRef | CharRef
7064 */
7065void
7066xmlParseReference(xmlParserCtxtPtr ctxt) {
7067 xmlEntityPtr ent;
7068 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00007069 int was_checked;
7070 xmlNodePtr list = NULL;
7071 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00007072
Daniel Veillard0161e632008-08-28 15:36:32 +00007073
7074 if (RAW != '&')
7075 return;
7076
7077 /*
7078 * Simple case of a CharRef
7079 */
Owen Taylor3473f882001-02-23 17:55:21 +00007080 if (NXT(1) == '#') {
7081 int i = 0;
7082 xmlChar out[10];
7083 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007084 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00007085
Daniel Veillarddc171602008-03-26 17:41:38 +00007086 if (value == 0)
7087 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007088 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
7089 /*
7090 * So we are using non-UTF-8 buffers
7091 * Check that the char fit on 8bits, if not
7092 * generate a CharRef.
7093 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007094 if (value <= 0xFF) {
7095 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00007096 out[1] = 0;
7097 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7098 (!ctxt->disableSAX))
7099 ctxt->sax->characters(ctxt->userData, out, 1);
7100 } else {
7101 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007102 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007103 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00007104 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00007105 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7106 (!ctxt->disableSAX))
7107 ctxt->sax->reference(ctxt->userData, out);
7108 }
7109 } else {
7110 /*
7111 * Just encode the value in UTF-8
7112 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007113 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00007114 out[i] = 0;
7115 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7116 (!ctxt->disableSAX))
7117 ctxt->sax->characters(ctxt->userData, out, i);
7118 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007119 return;
7120 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007121
Daniel Veillard0161e632008-08-28 15:36:32 +00007122 /*
7123 * We are seeing an entity reference
7124 */
7125 ent = xmlParseEntityRef(ctxt);
7126 if (ent == NULL) return;
7127 if (!ctxt->wellFormed)
7128 return;
7129 was_checked = ent->checked;
7130
7131 /* special case of predefined entities */
7132 if ((ent->name == NULL) ||
7133 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7134 val = ent->content;
7135 if (val == NULL) return;
7136 /*
7137 * inline the entity.
7138 */
7139 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7140 (!ctxt->disableSAX))
7141 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7142 return;
7143 }
7144
7145 /*
7146 * The first reference to the entity trigger a parsing phase
7147 * where the ent->children is filled with the result from
7148 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08007149 * Note: external parsed entities will not be loaded, it is not
7150 * required for a non-validating parser, unless the parsing option
7151 * of validating, or substituting entities were given. Doing so is
7152 * far more secure as the parser will only process data coming from
7153 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00007154 */
Daniel Veillard4629ee02012-07-23 14:15:40 +08007155 if ((ent->checked == 0) &&
7156 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7157 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Daniel Veillard0161e632008-08-28 15:36:32 +00007158 unsigned long oldnbent = ctxt->nbentities;
7159
7160 /*
7161 * This is a bit hackish but this seems the best
7162 * way to make sure both SAX and DOM entity support
7163 * behaves okay.
7164 */
7165 void *user_data;
7166 if (ctxt->userData == ctxt)
7167 user_data = NULL;
7168 else
7169 user_data = ctxt->userData;
7170
7171 /*
7172 * Check that this entity is well formed
7173 * 4.3.2: An internal general parsed entity is well-formed
7174 * if its replacement text matches the production labeled
7175 * content.
7176 */
7177 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7178 ctxt->depth++;
7179 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7180 user_data, &list);
7181 ctxt->depth--;
7182
7183 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7184 ctxt->depth++;
7185 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7186 user_data, ctxt->depth, ent->URI,
7187 ent->ExternalID, &list);
7188 ctxt->depth--;
7189 } else {
7190 ret = XML_ERR_ENTITY_PE_INTERNAL;
7191 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7192 "invalid entity type found\n", NULL);
7193 }
7194
7195 /*
7196 * Store the number of entities needing parsing for this entity
7197 * content and do checkings
7198 */
7199 ent->checked = ctxt->nbentities - oldnbent;
7200 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007201 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00007202 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00007203 return;
7204 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007205 if (xmlParserEntityCheck(ctxt, 0, ent)) {
7206 xmlFreeNodeList(list);
7207 return;
7208 }
Owen Taylor3473f882001-02-23 17:55:21 +00007209
Daniel Veillard0161e632008-08-28 15:36:32 +00007210 if ((ret == XML_ERR_OK) && (list != NULL)) {
7211 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
7212 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
7213 (ent->children == NULL)) {
7214 ent->children = list;
7215 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00007216 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007217 * Prune it directly in the generated document
7218 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00007219 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007220 if (((list->type == XML_TEXT_NODE) &&
7221 (list->next == NULL)) ||
7222 (ctxt->parseMode == XML_PARSE_READER)) {
7223 list->parent = (xmlNodePtr) ent;
7224 list = NULL;
7225 ent->owner = 1;
7226 } else {
7227 ent->owner = 0;
7228 while (list != NULL) {
7229 list->parent = (xmlNodePtr) ctxt->node;
7230 list->doc = ctxt->myDoc;
7231 if (list->next == NULL)
7232 ent->last = list;
7233 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007234 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007235 list = ent->children;
7236#ifdef LIBXML_LEGACY_ENABLED
7237 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7238 xmlAddEntityReference(ent, list, NULL);
7239#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007240 }
7241 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007242 ent->owner = 1;
7243 while (list != NULL) {
7244 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007245 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007246 if (list->next == NULL)
7247 ent->last = list;
7248 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007249 }
7250 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007251 } else {
7252 xmlFreeNodeList(list);
7253 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007254 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007255 } else if ((ret != XML_ERR_OK) &&
7256 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7257 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7258 "Entity '%s' failed to parse\n", ent->name);
7259 } else if (list != NULL) {
7260 xmlFreeNodeList(list);
7261 list = NULL;
7262 }
7263 if (ent->checked == 0)
7264 ent->checked = 1;
7265 } else if (ent->checked != 1) {
7266 ctxt->nbentities += ent->checked;
7267 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007268
Daniel Veillard0161e632008-08-28 15:36:32 +00007269 /*
7270 * Now that the entity content has been gathered
7271 * provide it to the application, this can take different forms based
7272 * on the parsing modes.
7273 */
7274 if (ent->children == NULL) {
7275 /*
7276 * Probably running in SAX mode and the callbacks don't
7277 * build the entity content. So unless we already went
7278 * though parsing for first checking go though the entity
7279 * content to generate callbacks associated to the entity
7280 */
7281 if (was_checked != 0) {
7282 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007283 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007284 * This is a bit hackish but this seems the best
7285 * way to make sure both SAX and DOM entity support
7286 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007287 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007288 if (ctxt->userData == ctxt)
7289 user_data = NULL;
7290 else
7291 user_data = ctxt->userData;
7292
7293 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7294 ctxt->depth++;
7295 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7296 ent->content, user_data, NULL);
7297 ctxt->depth--;
7298 } else if (ent->etype ==
7299 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7300 ctxt->depth++;
7301 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7302 ctxt->sax, user_data, ctxt->depth,
7303 ent->URI, ent->ExternalID, NULL);
7304 ctxt->depth--;
7305 } else {
7306 ret = XML_ERR_ENTITY_PE_INTERNAL;
7307 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7308 "invalid entity type found\n", NULL);
7309 }
7310 if (ret == XML_ERR_ENTITY_LOOP) {
7311 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7312 return;
7313 }
7314 }
7315 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7316 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7317 /*
7318 * Entity reference callback comes second, it's somewhat
7319 * superfluous but a compatibility to historical behaviour
7320 */
7321 ctxt->sax->reference(ctxt->userData, ent->name);
7322 }
7323 return;
7324 }
7325
7326 /*
7327 * If we didn't get any children for the entity being built
7328 */
7329 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7330 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7331 /*
7332 * Create a node.
7333 */
7334 ctxt->sax->reference(ctxt->userData, ent->name);
7335 return;
7336 }
7337
7338 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7339 /*
7340 * There is a problem on the handling of _private for entities
7341 * (bug 155816): Should we copy the content of the field from
7342 * the entity (possibly overwriting some value set by the user
7343 * when a copy is created), should we leave it alone, or should
7344 * we try to take care of different situations? The problem
7345 * is exacerbated by the usage of this field by the xmlReader.
7346 * To fix this bug, we look at _private on the created node
7347 * and, if it's NULL, we copy in whatever was in the entity.
7348 * If it's not NULL we leave it alone. This is somewhat of a
7349 * hack - maybe we should have further tests to determine
7350 * what to do.
7351 */
7352 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7353 /*
7354 * Seems we are generating the DOM content, do
7355 * a simple tree copy for all references except the first
7356 * In the first occurrence list contains the replacement.
Daniel Veillard0161e632008-08-28 15:36:32 +00007357 */
7358 if (((list == NULL) && (ent->owner == 0)) ||
7359 (ctxt->parseMode == XML_PARSE_READER)) {
7360 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7361
7362 /*
7363 * when operating on a reader, the entities definitions
7364 * are always owning the entities subtree.
7365 if (ctxt->parseMode == XML_PARSE_READER)
7366 ent->owner = 1;
7367 */
7368
7369 cur = ent->children;
7370 while (cur != NULL) {
7371 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7372 if (nw != NULL) {
7373 if (nw->_private == NULL)
7374 nw->_private = cur->_private;
7375 if (firstChild == NULL){
7376 firstChild = nw;
7377 }
7378 nw = xmlAddChild(ctxt->node, nw);
7379 }
7380 if (cur == ent->last) {
7381 /*
7382 * needed to detect some strange empty
7383 * node cases in the reader tests
7384 */
7385 if ((ctxt->parseMode == XML_PARSE_READER) &&
7386 (nw != NULL) &&
7387 (nw->type == XML_ELEMENT_NODE) &&
7388 (nw->children == NULL))
7389 nw->extra = 1;
7390
7391 break;
7392 }
7393 cur = cur->next;
7394 }
7395#ifdef LIBXML_LEGACY_ENABLED
7396 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7397 xmlAddEntityReference(ent, firstChild, nw);
7398#endif /* LIBXML_LEGACY_ENABLED */
7399 } else if (list == NULL) {
7400 xmlNodePtr nw = NULL, cur, next, last,
7401 firstChild = NULL;
7402 /*
7403 * Copy the entity child list and make it the new
7404 * entity child list. The goal is to make sure any
7405 * ID or REF referenced will be the one from the
7406 * document content and not the entity copy.
7407 */
7408 cur = ent->children;
7409 ent->children = NULL;
7410 last = ent->last;
7411 ent->last = NULL;
7412 while (cur != NULL) {
7413 next = cur->next;
7414 cur->next = NULL;
7415 cur->parent = NULL;
7416 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7417 if (nw != NULL) {
7418 if (nw->_private == NULL)
7419 nw->_private = cur->_private;
7420 if (firstChild == NULL){
7421 firstChild = cur;
7422 }
7423 xmlAddChild((xmlNodePtr) ent, nw);
7424 xmlAddChild(ctxt->node, cur);
7425 }
7426 if (cur == last)
7427 break;
7428 cur = next;
7429 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007430 if (ent->owner == 0)
7431 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007432#ifdef LIBXML_LEGACY_ENABLED
7433 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7434 xmlAddEntityReference(ent, firstChild, nw);
7435#endif /* LIBXML_LEGACY_ENABLED */
7436 } else {
7437 const xmlChar *nbktext;
7438
7439 /*
7440 * the name change is to avoid coalescing of the
7441 * node with a possible previous text one which
7442 * would make ent->children a dangling pointer
7443 */
7444 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7445 -1);
7446 if (ent->children->type == XML_TEXT_NODE)
7447 ent->children->name = nbktext;
7448 if ((ent->last != ent->children) &&
7449 (ent->last->type == XML_TEXT_NODE))
7450 ent->last->name = nbktext;
7451 xmlAddChildList(ctxt->node, ent->children);
7452 }
7453
7454 /*
7455 * This is to avoid a nasty side effect, see
7456 * characters() in SAX.c
7457 */
7458 ctxt->nodemem = 0;
7459 ctxt->nodelen = 0;
7460 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007461 }
7462 }
7463}
7464
7465/**
7466 * xmlParseEntityRef:
7467 * @ctxt: an XML parser context
7468 *
7469 * parse ENTITY references declarations
7470 *
7471 * [68] EntityRef ::= '&' Name ';'
7472 *
7473 * [ WFC: Entity Declared ]
7474 * In a document without any DTD, a document with only an internal DTD
7475 * subset which contains no parameter entity references, or a document
7476 * with "standalone='yes'", the Name given in the entity reference
7477 * must match that in an entity declaration, except that well-formed
7478 * documents need not declare any of the following entities: amp, lt,
7479 * gt, apos, quot. The declaration of a parameter entity must precede
7480 * any reference to it. Similarly, the declaration of a general entity
7481 * must precede any reference to it which appears in a default value in an
7482 * attribute-list declaration. Note that if entities are declared in the
7483 * external subset or in external parameter entities, a non-validating
7484 * processor is not obligated to read and process their declarations;
7485 * for such documents, the rule that an entity must be declared is a
7486 * well-formedness constraint only if standalone='yes'.
7487 *
7488 * [ WFC: Parsed Entity ]
7489 * An entity reference must not contain the name of an unparsed entity
7490 *
7491 * Returns the xmlEntityPtr if found, or NULL otherwise.
7492 */
7493xmlEntityPtr
7494xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007495 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007496 xmlEntityPtr ent = NULL;
7497
7498 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08007499 if (ctxt->instate == XML_PARSER_EOF)
7500 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007501
Daniel Veillard0161e632008-08-28 15:36:32 +00007502 if (RAW != '&')
7503 return(NULL);
7504 NEXT;
7505 name = xmlParseName(ctxt);
7506 if (name == NULL) {
7507 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7508 "xmlParseEntityRef: no name\n");
7509 return(NULL);
7510 }
7511 if (RAW != ';') {
7512 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7513 return(NULL);
7514 }
7515 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007516
Daniel Veillard0161e632008-08-28 15:36:32 +00007517 /*
7518 * Predefined entites override any extra definition
7519 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007520 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7521 ent = xmlGetPredefinedEntity(name);
7522 if (ent != NULL)
7523 return(ent);
7524 }
Owen Taylor3473f882001-02-23 17:55:21 +00007525
Daniel Veillard0161e632008-08-28 15:36:32 +00007526 /*
7527 * Increate the number of entity references parsed
7528 */
7529 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007530
Daniel Veillard0161e632008-08-28 15:36:32 +00007531 /*
7532 * Ask first SAX for entity resolution, otherwise try the
7533 * entities which may have stored in the parser context.
7534 */
7535 if (ctxt->sax != NULL) {
7536 if (ctxt->sax->getEntity != NULL)
7537 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007538 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7539 (ctxt->options & XML_PARSE_OLDSAX))
7540 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007541 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7542 (ctxt->userData==ctxt)) {
7543 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007544 }
7545 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007546 /*
7547 * [ WFC: Entity Declared ]
7548 * In a document without any DTD, a document with only an
7549 * internal DTD subset which contains no parameter entity
7550 * references, or a document with "standalone='yes'", the
7551 * Name given in the entity reference must match that in an
7552 * entity declaration, except that well-formed documents
7553 * need not declare any of the following entities: amp, lt,
7554 * gt, apos, quot.
7555 * The declaration of a parameter entity must precede any
7556 * reference to it.
7557 * Similarly, the declaration of a general entity must
7558 * precede any reference to it which appears in a default
7559 * value in an attribute-list declaration. Note that if
7560 * entities are declared in the external subset or in
7561 * external parameter entities, a non-validating processor
7562 * is not obligated to read and process their declarations;
7563 * for such documents, the rule that an entity must be
7564 * declared is a well-formedness constraint only if
7565 * standalone='yes'.
7566 */
7567 if (ent == NULL) {
7568 if ((ctxt->standalone == 1) ||
7569 ((ctxt->hasExternalSubset == 0) &&
7570 (ctxt->hasPErefs == 0))) {
7571 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7572 "Entity '%s' not defined\n", name);
7573 } else {
7574 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7575 "Entity '%s' not defined\n", name);
7576 if ((ctxt->inSubset == 0) &&
7577 (ctxt->sax != NULL) &&
7578 (ctxt->sax->reference != NULL)) {
7579 ctxt->sax->reference(ctxt->userData, name);
7580 }
7581 }
7582 ctxt->valid = 0;
7583 }
7584
7585 /*
7586 * [ WFC: Parsed Entity ]
7587 * An entity reference must not contain the name of an
7588 * unparsed entity
7589 */
7590 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7591 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7592 "Entity reference to unparsed entity %s\n", name);
7593 }
7594
7595 /*
7596 * [ WFC: No External Entity References ]
7597 * Attribute values cannot contain direct or indirect
7598 * entity references to external entities.
7599 */
7600 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7601 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7602 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7603 "Attribute references external entity '%s'\n", name);
7604 }
7605 /*
7606 * [ WFC: No < in Attribute Values ]
7607 * The replacement text of any entity referred to directly or
7608 * indirectly in an attribute value (other than "&lt;") must
7609 * not contain a <.
7610 */
7611 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7612 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007613 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007614 (xmlStrchr(ent->content, '<'))) {
7615 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7616 "'<' in entity '%s' is not allowed in attributes values\n", name);
7617 }
7618
7619 /*
7620 * Internal check, no parameter entities here ...
7621 */
7622 else {
7623 switch (ent->etype) {
7624 case XML_INTERNAL_PARAMETER_ENTITY:
7625 case XML_EXTERNAL_PARAMETER_ENTITY:
7626 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7627 "Attempt to reference the parameter entity '%s'\n",
7628 name);
7629 break;
7630 default:
7631 break;
7632 }
7633 }
7634
7635 /*
7636 * [ WFC: No Recursion ]
7637 * A parsed entity must not contain a recursive reference
7638 * to itself, either directly or indirectly.
7639 * Done somewhere else
7640 */
Owen Taylor3473f882001-02-23 17:55:21 +00007641 return(ent);
7642}
7643
7644/**
7645 * xmlParseStringEntityRef:
7646 * @ctxt: an XML parser context
7647 * @str: a pointer to an index in the string
7648 *
7649 * parse ENTITY references declarations, but this version parses it from
7650 * a string value.
7651 *
7652 * [68] EntityRef ::= '&' Name ';'
7653 *
7654 * [ WFC: Entity Declared ]
7655 * In a document without any DTD, a document with only an internal DTD
7656 * subset which contains no parameter entity references, or a document
7657 * with "standalone='yes'", the Name given in the entity reference
7658 * must match that in an entity declaration, except that well-formed
7659 * documents need not declare any of the following entities: amp, lt,
7660 * gt, apos, quot. The declaration of a parameter entity must precede
7661 * any reference to it. Similarly, the declaration of a general entity
7662 * must precede any reference to it which appears in a default value in an
7663 * attribute-list declaration. Note that if entities are declared in the
7664 * external subset or in external parameter entities, a non-validating
7665 * processor is not obligated to read and process their declarations;
7666 * for such documents, the rule that an entity must be declared is a
7667 * well-formedness constraint only if standalone='yes'.
7668 *
7669 * [ WFC: Parsed Entity ]
7670 * An entity reference must not contain the name of an unparsed entity
7671 *
7672 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7673 * is updated to the current location in the string.
7674 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007675static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007676xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7677 xmlChar *name;
7678 const xmlChar *ptr;
7679 xmlChar cur;
7680 xmlEntityPtr ent = NULL;
7681
7682 if ((str == NULL) || (*str == NULL))
7683 return(NULL);
7684 ptr = *str;
7685 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007686 if (cur != '&')
7687 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007688
Daniel Veillard0161e632008-08-28 15:36:32 +00007689 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007690 name = xmlParseStringName(ctxt, &ptr);
7691 if (name == NULL) {
7692 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7693 "xmlParseStringEntityRef: no name\n");
7694 *str = ptr;
7695 return(NULL);
7696 }
7697 if (*ptr != ';') {
7698 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007699 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007700 *str = ptr;
7701 return(NULL);
7702 }
7703 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007704
Owen Taylor3473f882001-02-23 17:55:21 +00007705
Daniel Veillard0161e632008-08-28 15:36:32 +00007706 /*
7707 * Predefined entites override any extra definition
7708 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007709 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7710 ent = xmlGetPredefinedEntity(name);
7711 if (ent != NULL) {
7712 xmlFree(name);
7713 *str = ptr;
7714 return(ent);
7715 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007716 }
Owen Taylor3473f882001-02-23 17:55:21 +00007717
Daniel Veillard0161e632008-08-28 15:36:32 +00007718 /*
7719 * Increate the number of entity references parsed
7720 */
7721 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007722
Daniel Veillard0161e632008-08-28 15:36:32 +00007723 /*
7724 * Ask first SAX for entity resolution, otherwise try the
7725 * entities which may have stored in the parser context.
7726 */
7727 if (ctxt->sax != NULL) {
7728 if (ctxt->sax->getEntity != NULL)
7729 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007730 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7731 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007732 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7733 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007734 }
7735 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007736
7737 /*
7738 * [ WFC: Entity Declared ]
7739 * In a document without any DTD, a document with only an
7740 * internal DTD subset which contains no parameter entity
7741 * references, or a document with "standalone='yes'", the
7742 * Name given in the entity reference must match that in an
7743 * entity declaration, except that well-formed documents
7744 * need not declare any of the following entities: amp, lt,
7745 * gt, apos, quot.
7746 * The declaration of a parameter entity must precede any
7747 * reference to it.
7748 * Similarly, the declaration of a general entity must
7749 * precede any reference to it which appears in a default
7750 * value in an attribute-list declaration. Note that if
7751 * entities are declared in the external subset or in
7752 * external parameter entities, a non-validating processor
7753 * is not obligated to read and process their declarations;
7754 * for such documents, the rule that an entity must be
7755 * declared is a well-formedness constraint only if
7756 * standalone='yes'.
7757 */
7758 if (ent == NULL) {
7759 if ((ctxt->standalone == 1) ||
7760 ((ctxt->hasExternalSubset == 0) &&
7761 (ctxt->hasPErefs == 0))) {
7762 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7763 "Entity '%s' not defined\n", name);
7764 } else {
7765 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7766 "Entity '%s' not defined\n",
7767 name);
7768 }
7769 /* TODO ? check regressions ctxt->valid = 0; */
7770 }
7771
7772 /*
7773 * [ WFC: Parsed Entity ]
7774 * An entity reference must not contain the name of an
7775 * unparsed entity
7776 */
7777 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7778 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7779 "Entity reference to unparsed entity %s\n", name);
7780 }
7781
7782 /*
7783 * [ WFC: No External Entity References ]
7784 * Attribute values cannot contain direct or indirect
7785 * entity references to external entities.
7786 */
7787 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7788 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7789 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7790 "Attribute references external entity '%s'\n", name);
7791 }
7792 /*
7793 * [ WFC: No < in Attribute Values ]
7794 * The replacement text of any entity referred to directly or
7795 * indirectly in an attribute value (other than "&lt;") must
7796 * not contain a <.
7797 */
7798 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7799 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007800 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007801 (xmlStrchr(ent->content, '<'))) {
7802 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7803 "'<' in entity '%s' is not allowed in attributes values\n",
7804 name);
7805 }
7806
7807 /*
7808 * Internal check, no parameter entities here ...
7809 */
7810 else {
7811 switch (ent->etype) {
7812 case XML_INTERNAL_PARAMETER_ENTITY:
7813 case XML_EXTERNAL_PARAMETER_ENTITY:
7814 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7815 "Attempt to reference the parameter entity '%s'\n",
7816 name);
7817 break;
7818 default:
7819 break;
7820 }
7821 }
7822
7823 /*
7824 * [ WFC: No Recursion ]
7825 * A parsed entity must not contain a recursive reference
7826 * to itself, either directly or indirectly.
7827 * Done somewhere else
7828 */
7829
7830 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007831 *str = ptr;
7832 return(ent);
7833}
7834
7835/**
7836 * xmlParsePEReference:
7837 * @ctxt: an XML parser context
7838 *
7839 * parse PEReference declarations
7840 * The entity content is handled directly by pushing it's content as
7841 * a new input stream.
7842 *
7843 * [69] PEReference ::= '%' Name ';'
7844 *
7845 * [ WFC: No Recursion ]
7846 * A parsed entity must not contain a recursive
7847 * reference to itself, either directly or indirectly.
7848 *
7849 * [ WFC: Entity Declared ]
7850 * In a document without any DTD, a document with only an internal DTD
7851 * subset which contains no parameter entity references, or a document
7852 * with "standalone='yes'", ... ... The declaration of a parameter
7853 * entity must precede any reference to it...
7854 *
7855 * [ VC: Entity Declared ]
7856 * In a document with an external subset or external parameter entities
7857 * with "standalone='no'", ... ... The declaration of a parameter entity
7858 * must precede any reference to it...
7859 *
7860 * [ WFC: In DTD ]
7861 * Parameter-entity references may only appear in the DTD.
7862 * NOTE: misleading but this is handled.
7863 */
7864void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007865xmlParsePEReference(xmlParserCtxtPtr ctxt)
7866{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007867 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007868 xmlEntityPtr entity = NULL;
7869 xmlParserInputPtr input;
7870
Daniel Veillard0161e632008-08-28 15:36:32 +00007871 if (RAW != '%')
7872 return;
7873 NEXT;
7874 name = xmlParseName(ctxt);
7875 if (name == NULL) {
7876 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7877 "xmlParsePEReference: no name\n");
7878 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007879 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007880 if (RAW != ';') {
7881 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7882 return;
7883 }
7884
7885 NEXT;
7886
7887 /*
7888 * Increate the number of entity references parsed
7889 */
7890 ctxt->nbentities++;
7891
7892 /*
7893 * Request the entity from SAX
7894 */
7895 if ((ctxt->sax != NULL) &&
7896 (ctxt->sax->getParameterEntity != NULL))
7897 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7898 name);
7899 if (entity == NULL) {
7900 /*
7901 * [ WFC: Entity Declared ]
7902 * In a document without any DTD, a document with only an
7903 * internal DTD subset which contains no parameter entity
7904 * references, or a document with "standalone='yes'", ...
7905 * ... The declaration of a parameter entity must precede
7906 * any reference to it...
7907 */
7908 if ((ctxt->standalone == 1) ||
7909 ((ctxt->hasExternalSubset == 0) &&
7910 (ctxt->hasPErefs == 0))) {
7911 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7912 "PEReference: %%%s; not found\n",
7913 name);
7914 } else {
7915 /*
7916 * [ VC: Entity Declared ]
7917 * In a document with an external subset or external
7918 * parameter entities with "standalone='no'", ...
7919 * ... The declaration of a parameter entity must
7920 * precede any reference to it...
7921 */
7922 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7923 "PEReference: %%%s; not found\n",
7924 name, NULL);
7925 ctxt->valid = 0;
7926 }
7927 } else {
7928 /*
7929 * Internal checking in case the entity quest barfed
7930 */
7931 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7932 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7933 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7934 "Internal: %%%s; is not a parameter entity\n",
7935 name, NULL);
7936 } else if (ctxt->input->free != deallocblankswrapper) {
7937 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7938 if (xmlPushInput(ctxt, input) < 0)
7939 return;
7940 } else {
7941 /*
7942 * TODO !!!
7943 * handle the extra spaces added before and after
7944 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7945 */
7946 input = xmlNewEntityInputStream(ctxt, entity);
7947 if (xmlPushInput(ctxt, input) < 0)
7948 return;
7949 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7950 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7951 (IS_BLANK_CH(NXT(5)))) {
7952 xmlParseTextDecl(ctxt);
7953 if (ctxt->errNo ==
7954 XML_ERR_UNSUPPORTED_ENCODING) {
7955 /*
7956 * The XML REC instructs us to stop parsing
7957 * right here
7958 */
7959 ctxt->instate = XML_PARSER_EOF;
7960 return;
7961 }
7962 }
7963 }
7964 }
7965 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007966}
7967
7968/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007969 * xmlLoadEntityContent:
7970 * @ctxt: an XML parser context
7971 * @entity: an unloaded system entity
7972 *
7973 * Load the original content of the given system entity from the
7974 * ExternalID/SystemID given. This is to be used for Included in Literal
7975 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7976 *
7977 * Returns 0 in case of success and -1 in case of failure
7978 */
7979static int
7980xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7981 xmlParserInputPtr input;
7982 xmlBufferPtr buf;
7983 int l, c;
7984 int count = 0;
7985
7986 if ((ctxt == NULL) || (entity == NULL) ||
7987 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7988 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7989 (entity->content != NULL)) {
7990 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7991 "xmlLoadEntityContent parameter error");
7992 return(-1);
7993 }
7994
7995 if (xmlParserDebugEntities)
7996 xmlGenericError(xmlGenericErrorContext,
7997 "Reading %s entity content input\n", entity->name);
7998
7999 buf = xmlBufferCreate();
8000 if (buf == NULL) {
8001 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8002 "xmlLoadEntityContent parameter error");
8003 return(-1);
8004 }
8005
8006 input = xmlNewEntityInputStream(ctxt, entity);
8007 if (input == NULL) {
8008 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8009 "xmlLoadEntityContent input error");
8010 xmlBufferFree(buf);
8011 return(-1);
8012 }
8013
8014 /*
8015 * Push the entity as the current input, read char by char
8016 * saving to the buffer until the end of the entity or an error
8017 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00008018 if (xmlPushInput(ctxt, input) < 0) {
8019 xmlBufferFree(buf);
8020 return(-1);
8021 }
8022
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008023 GROW;
8024 c = CUR_CHAR(l);
8025 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
8026 (IS_CHAR(c))) {
8027 xmlBufferAdd(buf, ctxt->input->cur, l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008028 if (count++ > XML_PARSER_CHUNK_SIZE) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008029 count = 0;
8030 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008031 if (ctxt->instate == XML_PARSER_EOF) {
8032 xmlBufferFree(buf);
8033 return(-1);
8034 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008035 }
8036 NEXTL(l);
8037 c = CUR_CHAR(l);
Daniel Veillard1f972e92012-08-15 10:16:37 +08008038 if (c == 0) {
8039 count = 0;
8040 GROW;
8041 if (ctxt->instate == XML_PARSER_EOF) {
8042 xmlBufferFree(buf);
8043 return(-1);
8044 }
8045 c = CUR_CHAR(l);
8046 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008047 }
8048
8049 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
8050 xmlPopInput(ctxt);
8051 } else if (!IS_CHAR(c)) {
8052 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8053 "xmlLoadEntityContent: invalid char value %d\n",
8054 c);
8055 xmlBufferFree(buf);
8056 return(-1);
8057 }
8058 entity->content = buf->content;
8059 buf->content = NULL;
8060 xmlBufferFree(buf);
8061
8062 return(0);
8063}
8064
8065/**
Owen Taylor3473f882001-02-23 17:55:21 +00008066 * xmlParseStringPEReference:
8067 * @ctxt: an XML parser context
8068 * @str: a pointer to an index in the string
8069 *
8070 * parse PEReference declarations
8071 *
8072 * [69] PEReference ::= '%' Name ';'
8073 *
8074 * [ WFC: No Recursion ]
8075 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00008076 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00008077 *
8078 * [ WFC: Entity Declared ]
8079 * In a document without any DTD, a document with only an internal DTD
8080 * subset which contains no parameter entity references, or a document
8081 * with "standalone='yes'", ... ... The declaration of a parameter
8082 * entity must precede any reference to it...
8083 *
8084 * [ VC: Entity Declared ]
8085 * In a document with an external subset or external parameter entities
8086 * with "standalone='no'", ... ... The declaration of a parameter entity
8087 * must precede any reference to it...
8088 *
8089 * [ WFC: In DTD ]
8090 * Parameter-entity references may only appear in the DTD.
8091 * NOTE: misleading but this is handled.
8092 *
8093 * Returns the string of the entity content.
8094 * str is updated to the current value of the index
8095 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02008096static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00008097xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8098 const xmlChar *ptr;
8099 xmlChar cur;
8100 xmlChar *name;
8101 xmlEntityPtr entity = NULL;
8102
8103 if ((str == NULL) || (*str == NULL)) return(NULL);
8104 ptr = *str;
8105 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00008106 if (cur != '%')
8107 return(NULL);
8108 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00008109 name = xmlParseStringName(ctxt, &ptr);
8110 if (name == NULL) {
8111 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8112 "xmlParseStringPEReference: no name\n");
8113 *str = ptr;
8114 return(NULL);
8115 }
8116 cur = *ptr;
8117 if (cur != ';') {
8118 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8119 xmlFree(name);
8120 *str = ptr;
8121 return(NULL);
8122 }
8123 ptr++;
8124
8125 /*
8126 * Increate the number of entity references parsed
8127 */
8128 ctxt->nbentities++;
8129
8130 /*
8131 * Request the entity from SAX
8132 */
8133 if ((ctxt->sax != NULL) &&
8134 (ctxt->sax->getParameterEntity != NULL))
8135 entity = ctxt->sax->getParameterEntity(ctxt->userData,
8136 name);
8137 if (entity == NULL) {
8138 /*
8139 * [ WFC: Entity Declared ]
8140 * In a document without any DTD, a document with only an
8141 * internal DTD subset which contains no parameter entity
8142 * references, or a document with "standalone='yes'", ...
8143 * ... The declaration of a parameter entity must precede
8144 * any reference to it...
8145 */
8146 if ((ctxt->standalone == 1) ||
8147 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8148 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8149 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008150 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00008151 /*
8152 * [ VC: Entity Declared ]
8153 * In a document with an external subset or external
8154 * parameter entities with "standalone='no'", ...
8155 * ... The declaration of a parameter entity must
8156 * precede any reference to it...
8157 */
8158 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8159 "PEReference: %%%s; not found\n",
8160 name, NULL);
8161 ctxt->valid = 0;
8162 }
8163 } else {
8164 /*
8165 * Internal checking in case the entity quest barfed
8166 */
8167 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8168 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8169 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8170 "%%%s; is not a parameter entity\n",
8171 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008172 }
8173 }
Daniel Veillard0161e632008-08-28 15:36:32 +00008174 ctxt->hasPErefs = 1;
8175 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00008176 *str = ptr;
8177 return(entity);
8178}
8179
8180/**
8181 * xmlParseDocTypeDecl:
8182 * @ctxt: an XML parser context
8183 *
8184 * parse a DOCTYPE declaration
8185 *
8186 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8187 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8188 *
8189 * [ VC: Root Element Type ]
8190 * The Name in the document type declaration must match the element
8191 * type of the root element.
8192 */
8193
8194void
8195xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008196 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00008197 xmlChar *ExternalID = NULL;
8198 xmlChar *URI = NULL;
8199
8200 /*
8201 * We know that '<!DOCTYPE' has been detected.
8202 */
8203 SKIP(9);
8204
8205 SKIP_BLANKS;
8206
8207 /*
8208 * Parse the DOCTYPE name.
8209 */
8210 name = xmlParseName(ctxt);
8211 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008212 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8213 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008214 }
8215 ctxt->intSubName = name;
8216
8217 SKIP_BLANKS;
8218
8219 /*
8220 * Check for SystemID and ExternalID
8221 */
8222 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8223
8224 if ((URI != NULL) || (ExternalID != NULL)) {
8225 ctxt->hasExternalSubset = 1;
8226 }
8227 ctxt->extSubURI = URI;
8228 ctxt->extSubSystem = ExternalID;
8229
8230 SKIP_BLANKS;
8231
8232 /*
8233 * Create and update the internal subset.
8234 */
8235 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8236 (!ctxt->disableSAX))
8237 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8238
8239 /*
8240 * Is there any internal subset declarations ?
8241 * they are handled separately in xmlParseInternalSubset()
8242 */
8243 if (RAW == '[')
8244 return;
8245
8246 /*
8247 * We should be at the end of the DOCTYPE declaration.
8248 */
8249 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008250 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008251 }
8252 NEXT;
8253}
8254
8255/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008256 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008257 * @ctxt: an XML parser context
8258 *
8259 * parse the internal subset declaration
8260 *
8261 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8262 */
8263
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008264static void
Owen Taylor3473f882001-02-23 17:55:21 +00008265xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8266 /*
8267 * Is there any DTD definition ?
8268 */
8269 if (RAW == '[') {
8270 ctxt->instate = XML_PARSER_DTD;
8271 NEXT;
8272 /*
8273 * Parse the succession of Markup declarations and
8274 * PEReferences.
8275 * Subsequence (markupdecl | PEReference | S)*
8276 */
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008277 while ((RAW != ']') && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008278 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008279 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008280
8281 SKIP_BLANKS;
8282 xmlParseMarkupDecl(ctxt);
8283 xmlParsePEReference(ctxt);
8284
8285 /*
8286 * Pop-up of finished entities.
8287 */
8288 while ((RAW == 0) && (ctxt->inputNr > 1))
8289 xmlPopInput(ctxt);
8290
8291 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008292 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008293 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008294 break;
8295 }
8296 }
8297 if (RAW == ']') {
8298 NEXT;
8299 SKIP_BLANKS;
8300 }
8301 }
8302
8303 /*
8304 * We should be at the end of the DOCTYPE declaration.
8305 */
8306 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008307 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008308 }
8309 NEXT;
8310}
8311
Daniel Veillard81273902003-09-30 00:43:48 +00008312#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008313/**
8314 * xmlParseAttribute:
8315 * @ctxt: an XML parser context
8316 * @value: a xmlChar ** used to store the value of the attribute
8317 *
8318 * parse an attribute
8319 *
8320 * [41] Attribute ::= Name Eq AttValue
8321 *
8322 * [ WFC: No External Entity References ]
8323 * Attribute values cannot contain direct or indirect entity references
8324 * to external entities.
8325 *
8326 * [ WFC: No < in Attribute Values ]
8327 * The replacement text of any entity referred to directly or indirectly in
8328 * an attribute value (other than "&lt;") must not contain a <.
8329 *
8330 * [ VC: Attribute Value Type ]
8331 * The attribute must have been declared; the value must be of the type
8332 * declared for it.
8333 *
8334 * [25] Eq ::= S? '=' S?
8335 *
8336 * With namespace:
8337 *
8338 * [NS 11] Attribute ::= QName Eq AttValue
8339 *
8340 * Also the case QName == xmlns:??? is handled independently as a namespace
8341 * definition.
8342 *
8343 * Returns the attribute name, and the value in *value.
8344 */
8345
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008346const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008347xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008348 const xmlChar *name;
8349 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008350
8351 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008352 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008353 name = xmlParseName(ctxt);
8354 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008355 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008356 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008357 return(NULL);
8358 }
8359
8360 /*
8361 * read the value
8362 */
8363 SKIP_BLANKS;
8364 if (RAW == '=') {
8365 NEXT;
8366 SKIP_BLANKS;
8367 val = xmlParseAttValue(ctxt);
8368 ctxt->instate = XML_PARSER_CONTENT;
8369 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008370 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00008371 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008372 return(NULL);
8373 }
8374
8375 /*
8376 * Check that xml:lang conforms to the specification
8377 * No more registered as an error, just generate a warning now
8378 * since this was deprecated in XML second edition
8379 */
8380 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8381 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008382 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8383 "Malformed value for xml:lang : %s\n",
8384 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008385 }
8386 }
8387
8388 /*
8389 * Check that xml:space conforms to the specification
8390 */
8391 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8392 if (xmlStrEqual(val, BAD_CAST "default"))
8393 *(ctxt->space) = 0;
8394 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8395 *(ctxt->space) = 1;
8396 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008397 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008398"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008399 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008400 }
8401 }
8402
8403 *value = val;
8404 return(name);
8405}
8406
8407/**
8408 * xmlParseStartTag:
8409 * @ctxt: an XML parser context
8410 *
8411 * parse a start of tag either for rule element or
8412 * EmptyElement. In both case we don't parse the tag closing chars.
8413 *
8414 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8415 *
8416 * [ WFC: Unique Att Spec ]
8417 * No attribute name may appear more than once in the same start-tag or
8418 * empty-element tag.
8419 *
8420 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8421 *
8422 * [ WFC: Unique Att Spec ]
8423 * No attribute name may appear more than once in the same start-tag or
8424 * empty-element tag.
8425 *
8426 * With namespace:
8427 *
8428 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8429 *
8430 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8431 *
8432 * Returns the element name parsed
8433 */
8434
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008435const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008436xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008437 const xmlChar *name;
8438 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008439 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008440 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008441 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008442 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008443 int i;
8444
8445 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008446 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008447
8448 name = xmlParseName(ctxt);
8449 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008450 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008451 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008452 return(NULL);
8453 }
8454
8455 /*
8456 * Now parse the attributes, it ends up with the ending
8457 *
8458 * (S Attribute)* S?
8459 */
8460 SKIP_BLANKS;
8461 GROW;
8462
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008463 while (((RAW != '>') &&
Daniel Veillard21a0f912001-02-25 19:54:14 +00008464 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008465 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00008466 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008467 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008468
8469 attname = xmlParseAttribute(ctxt, &attvalue);
8470 if ((attname != NULL) && (attvalue != NULL)) {
8471 /*
8472 * [ WFC: Unique Att Spec ]
8473 * No attribute name may appear more than once in the same
8474 * start-tag or empty-element tag.
8475 */
8476 for (i = 0; i < nbatts;i += 2) {
8477 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008478 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008479 xmlFree(attvalue);
8480 goto failed;
8481 }
8482 }
Owen Taylor3473f882001-02-23 17:55:21 +00008483 /*
8484 * Add the pair to atts
8485 */
8486 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008487 maxatts = 22; /* allow for 10 attrs by default */
8488 atts = (const xmlChar **)
8489 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008490 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008491 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008492 if (attvalue != NULL)
8493 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008494 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008495 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008496 ctxt->atts = atts;
8497 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008498 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008499 const xmlChar **n;
8500
Owen Taylor3473f882001-02-23 17:55:21 +00008501 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008502 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008503 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008504 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008505 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008506 if (attvalue != NULL)
8507 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008508 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008509 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008510 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008511 ctxt->atts = atts;
8512 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008513 }
8514 atts[nbatts++] = attname;
8515 atts[nbatts++] = attvalue;
8516 atts[nbatts] = NULL;
8517 atts[nbatts + 1] = NULL;
8518 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008519 if (attvalue != NULL)
8520 xmlFree(attvalue);
8521 }
8522
Daniel Veillard52d8ade2012-07-30 10:08:45 +08008523failed:
Owen Taylor3473f882001-02-23 17:55:21 +00008524
Daniel Veillard3772de32002-12-17 10:31:45 +00008525 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008526 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8527 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008528 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008529 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8530 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008531 }
8532 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008533 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8534 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008535 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8536 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008537 break;
8538 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008539 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008540 GROW;
8541 }
8542
8543 /*
8544 * SAX: Start of Element !
8545 */
8546 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008547 (!ctxt->disableSAX)) {
8548 if (nbatts > 0)
8549 ctxt->sax->startElement(ctxt->userData, name, atts);
8550 else
8551 ctxt->sax->startElement(ctxt->userData, name, NULL);
8552 }
Owen Taylor3473f882001-02-23 17:55:21 +00008553
8554 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008555 /* Free only the content strings */
8556 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008557 if (atts[i] != NULL)
8558 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008559 }
8560 return(name);
8561}
8562
8563/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008564 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008565 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008566 * @line: line of the start tag
8567 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008568 *
8569 * parse an end of tag
8570 *
8571 * [42] ETag ::= '</' Name S? '>'
8572 *
8573 * With namespace
8574 *
8575 * [NS 9] ETag ::= '</' QName S? '>'
8576 */
8577
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008578static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008579xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008580 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008581
8582 GROW;
8583 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008584 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008585 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008586 return;
8587 }
8588 SKIP(2);
8589
Daniel Veillard46de64e2002-05-29 08:21:33 +00008590 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008591
8592 /*
8593 * We should definitely be at the ending "S? '>'" part
8594 */
8595 GROW;
8596 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008597 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008598 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008599 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008600 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008601
8602 /*
8603 * [ WFC: Element Type Match ]
8604 * The Name in an element's end-tag must match the element type in the
8605 * start-tag.
8606 *
8607 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008608 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008609 if (name == NULL) name = BAD_CAST "unparseable";
8610 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008611 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008612 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008613 }
8614
8615 /*
8616 * SAX: End of Tag
8617 */
8618 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8619 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008620 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008621
Daniel Veillarde57ec792003-09-10 10:50:59 +00008622 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008623 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008624 return;
8625}
8626
8627/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008628 * xmlParseEndTag:
8629 * @ctxt: an XML parser context
8630 *
8631 * parse an end of tag
8632 *
8633 * [42] ETag ::= '</' Name S? '>'
8634 *
8635 * With namespace
8636 *
8637 * [NS 9] ETag ::= '</' QName S? '>'
8638 */
8639
8640void
8641xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008642 xmlParseEndTag1(ctxt, 0);
8643}
Daniel Veillard81273902003-09-30 00:43:48 +00008644#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008645
8646/************************************************************************
8647 * *
8648 * SAX 2 specific operations *
8649 * *
8650 ************************************************************************/
8651
Daniel Veillard0fb18932003-09-07 09:14:37 +00008652/*
8653 * xmlGetNamespace:
8654 * @ctxt: an XML parser context
8655 * @prefix: the prefix to lookup
8656 *
8657 * Lookup the namespace name for the @prefix (which ca be NULL)
8658 * The prefix must come from the @ctxt->dict dictionnary
8659 *
8660 * Returns the namespace name or NULL if not bound
8661 */
8662static const xmlChar *
8663xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8664 int i;
8665
Daniel Veillarde57ec792003-09-10 10:50:59 +00008666 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008667 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008668 if (ctxt->nsTab[i] == prefix) {
8669 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8670 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008671 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008672 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008673 return(NULL);
8674}
8675
8676/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008677 * xmlParseQName:
8678 * @ctxt: an XML parser context
8679 * @prefix: pointer to store the prefix part
8680 *
8681 * parse an XML Namespace QName
8682 *
8683 * [6] QName ::= (Prefix ':')? LocalPart
8684 * [7] Prefix ::= NCName
8685 * [8] LocalPart ::= NCName
8686 *
8687 * Returns the Name parsed or NULL
8688 */
8689
8690static const xmlChar *
8691xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8692 const xmlChar *l, *p;
8693
8694 GROW;
8695
8696 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008697 if (l == NULL) {
8698 if (CUR == ':') {
8699 l = xmlParseName(ctxt);
8700 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008701 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8702 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008703 *prefix = NULL;
8704 return(l);
8705 }
8706 }
8707 return(NULL);
8708 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008709 if (CUR == ':') {
8710 NEXT;
8711 p = l;
8712 l = xmlParseNCName(ctxt);
8713 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008714 xmlChar *tmp;
8715
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008716 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8717 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008718 l = xmlParseNmtoken(ctxt);
8719 if (l == NULL)
8720 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8721 else {
8722 tmp = xmlBuildQName(l, p, NULL, 0);
8723 xmlFree((char *)l);
8724 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008725 p = xmlDictLookup(ctxt->dict, tmp, -1);
8726 if (tmp != NULL) xmlFree(tmp);
8727 *prefix = NULL;
8728 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008729 }
8730 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008731 xmlChar *tmp;
8732
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008733 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8734 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008735 NEXT;
8736 tmp = (xmlChar *) xmlParseName(ctxt);
8737 if (tmp != NULL) {
8738 tmp = xmlBuildQName(tmp, l, NULL, 0);
8739 l = xmlDictLookup(ctxt->dict, tmp, -1);
8740 if (tmp != NULL) xmlFree(tmp);
8741 *prefix = p;
8742 return(l);
8743 }
8744 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8745 l = xmlDictLookup(ctxt->dict, tmp, -1);
8746 if (tmp != NULL) xmlFree(tmp);
8747 *prefix = p;
8748 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008749 }
8750 *prefix = p;
8751 } else
8752 *prefix = NULL;
8753 return(l);
8754}
8755
8756/**
8757 * xmlParseQNameAndCompare:
8758 * @ctxt: an XML parser context
8759 * @name: the localname
8760 * @prefix: the prefix, if any.
8761 *
8762 * parse an XML name and compares for match
8763 * (specialized for endtag parsing)
8764 *
8765 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8766 * and the name for mismatch
8767 */
8768
8769static const xmlChar *
8770xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8771 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008772 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008773 const xmlChar *in;
8774 const xmlChar *ret;
8775 const xmlChar *prefix2;
8776
8777 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8778
8779 GROW;
8780 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008781
Daniel Veillard0fb18932003-09-07 09:14:37 +00008782 cmp = prefix;
8783 while (*in != 0 && *in == *cmp) {
8784 ++in;
8785 ++cmp;
8786 }
8787 if ((*cmp == 0) && (*in == ':')) {
8788 in++;
8789 cmp = name;
8790 while (*in != 0 && *in == *cmp) {
8791 ++in;
8792 ++cmp;
8793 }
William M. Brack76e95df2003-10-18 16:20:14 +00008794 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008795 /* success */
8796 ctxt->input->cur = in;
8797 return((const xmlChar*) 1);
8798 }
8799 }
8800 /*
8801 * all strings coms from the dictionary, equality can be done directly
8802 */
8803 ret = xmlParseQName (ctxt, &prefix2);
8804 if ((ret == name) && (prefix == prefix2))
8805 return((const xmlChar*) 1);
8806 return ret;
8807}
8808
8809/**
8810 * xmlParseAttValueInternal:
8811 * @ctxt: an XML parser context
8812 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008813 * @alloc: whether the attribute was reallocated as a new string
8814 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008815 *
8816 * parse a value for an attribute.
8817 * NOTE: if no normalization is needed, the routine will return pointers
8818 * directly from the data buffer.
8819 *
8820 * 3.3.3 Attribute-Value Normalization:
8821 * Before the value of an attribute is passed to the application or
8822 * checked for validity, the XML processor must normalize it as follows:
8823 * - a character reference is processed by appending the referenced
8824 * character to the attribute value
8825 * - an entity reference is processed by recursively processing the
8826 * replacement text of the entity
8827 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8828 * appending #x20 to the normalized value, except that only a single
8829 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8830 * parsed entity or the literal entity value of an internal parsed entity
8831 * - other characters are processed by appending them to the normalized value
8832 * If the declared value is not CDATA, then the XML processor must further
8833 * process the normalized attribute value by discarding any leading and
8834 * trailing space (#x20) characters, and by replacing sequences of space
8835 * (#x20) characters by a single space (#x20) character.
8836 * All attributes for which no declaration has been read should be treated
8837 * by a non-validating parser as if declared CDATA.
8838 *
8839 * Returns the AttValue parsed or NULL. The value has to be freed by the
8840 * caller if it was copied, this can be detected by val[*len] == 0.
8841 */
8842
8843static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008844xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8845 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008846{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008847 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008848 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008849 xmlChar *ret = NULL;
8850
8851 GROW;
8852 in = (xmlChar *) CUR_PTR;
8853 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008854 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008855 return (NULL);
8856 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008857 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008858
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008859 /*
8860 * try to handle in this routine the most common case where no
8861 * allocation of a new string is required and where content is
8862 * pure ASCII.
8863 */
8864 limit = *in++;
8865 end = ctxt->input->end;
8866 start = in;
8867 if (in >= end) {
8868 const xmlChar *oldbase = ctxt->input->base;
8869 GROW;
8870 if (oldbase != ctxt->input->base) {
8871 long delta = ctxt->input->base - oldbase;
8872 start = start + delta;
8873 in = in + delta;
8874 }
8875 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008876 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008877 if (normalize) {
8878 /*
8879 * Skip any leading spaces
8880 */
8881 while ((in < end) && (*in != limit) &&
8882 ((*in == 0x20) || (*in == 0x9) ||
8883 (*in == 0xA) || (*in == 0xD))) {
8884 in++;
8885 start = in;
8886 if (in >= end) {
8887 const xmlChar *oldbase = ctxt->input->base;
8888 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008889 if (ctxt->instate == XML_PARSER_EOF)
8890 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008891 if (oldbase != ctxt->input->base) {
8892 long delta = ctxt->input->base - oldbase;
8893 start = start + delta;
8894 in = in + delta;
8895 }
8896 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008897 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8898 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8899 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8900 "AttValue lenght too long\n");
8901 return(NULL);
8902 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008903 }
8904 }
8905 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8906 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8907 if ((*in++ == 0x20) && (*in == 0x20)) break;
8908 if (in >= end) {
8909 const xmlChar *oldbase = ctxt->input->base;
8910 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008911 if (ctxt->instate == XML_PARSER_EOF)
8912 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008913 if (oldbase != ctxt->input->base) {
8914 long delta = ctxt->input->base - oldbase;
8915 start = start + delta;
8916 in = in + delta;
8917 }
8918 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008919 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8920 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8921 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8922 "AttValue lenght too long\n");
8923 return(NULL);
8924 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008925 }
8926 }
8927 last = in;
8928 /*
8929 * skip the trailing blanks
8930 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008931 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008932 while ((in < end) && (*in != limit) &&
8933 ((*in == 0x20) || (*in == 0x9) ||
8934 (*in == 0xA) || (*in == 0xD))) {
8935 in++;
8936 if (in >= end) {
8937 const xmlChar *oldbase = ctxt->input->base;
8938 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008939 if (ctxt->instate == XML_PARSER_EOF)
8940 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008941 if (oldbase != ctxt->input->base) {
8942 long delta = ctxt->input->base - oldbase;
8943 start = start + delta;
8944 in = in + delta;
8945 last = last + delta;
8946 }
8947 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008948 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8949 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8950 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8951 "AttValue lenght too long\n");
8952 return(NULL);
8953 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008954 }
8955 }
Daniel Veillarde17db992012-07-19 11:25:16 +08008956 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8957 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8958 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8959 "AttValue lenght too long\n");
8960 return(NULL);
8961 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008962 if (*in != limit) goto need_complex;
8963 } else {
8964 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8965 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8966 in++;
8967 if (in >= end) {
8968 const xmlChar *oldbase = ctxt->input->base;
8969 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08008970 if (ctxt->instate == XML_PARSER_EOF)
8971 return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008972 if (oldbase != ctxt->input->base) {
8973 long delta = ctxt->input->base - oldbase;
8974 start = start + delta;
8975 in = in + delta;
8976 }
8977 end = ctxt->input->end;
Daniel Veillarde17db992012-07-19 11:25:16 +08008978 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8979 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8980 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8981 "AttValue lenght too long\n");
8982 return(NULL);
8983 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008984 }
8985 }
8986 last = in;
Daniel Veillarde17db992012-07-19 11:25:16 +08008987 if (((in - start) > XML_MAX_TEXT_LENGTH) &&
8988 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
8989 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
8990 "AttValue lenght too long\n");
8991 return(NULL);
8992 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008993 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008994 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008995 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008996 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008997 *len = last - start;
8998 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008999 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009000 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009001 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009002 }
9003 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009004 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009005 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009006need_complex:
9007 if (alloc) *alloc = 1;
9008 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009009}
9010
9011/**
9012 * xmlParseAttribute2:
9013 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009014 * @pref: the element prefix
9015 * @elem: the element name
9016 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00009017 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009018 * @len: an int * to save the length of the attribute
9019 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00009020 *
9021 * parse an attribute in the new SAX2 framework.
9022 *
9023 * Returns the attribute name, and the value in *value, .
9024 */
9025
9026static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009027xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009028 const xmlChar * pref, const xmlChar * elem,
9029 const xmlChar ** prefix, xmlChar ** value,
9030 int *len, int *alloc)
9031{
Daniel Veillard0fb18932003-09-07 09:14:37 +00009032 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00009033 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009034 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009035
9036 *value = NULL;
9037 GROW;
9038 name = xmlParseQName(ctxt, prefix);
9039 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009040 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9041 "error parsing attribute name\n");
9042 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009043 }
9044
9045 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009046 * get the type if needed
9047 */
9048 if (ctxt->attsSpecial != NULL) {
9049 int type;
9050
9051 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009052 pref, elem, *prefix, name);
9053 if (type != 0)
9054 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009055 }
9056
9057 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009058 * read the value
9059 */
9060 SKIP_BLANKS;
9061 if (RAW == '=') {
9062 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009063 SKIP_BLANKS;
9064 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9065 if (normalize) {
9066 /*
9067 * Sometimes a second normalisation pass for spaces is needed
9068 * but that only happens if charrefs or entities refernces
9069 * have been used in the attribute value, i.e. the attribute
9070 * value have been extracted in an allocated string already.
9071 */
9072 if (*alloc) {
9073 const xmlChar *val2;
9074
9075 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009076 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009077 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00009078 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009079 }
9080 }
9081 }
9082 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009083 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009084 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9085 "Specification mandate value for attribute %s\n",
9086 name);
9087 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009088 }
9089
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009090 if (*prefix == ctxt->str_xml) {
9091 /*
9092 * Check that xml:lang conforms to the specification
9093 * No more registered as an error, just generate a warning now
9094 * since this was deprecated in XML second edition
9095 */
9096 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9097 internal_val = xmlStrndup(val, *len);
9098 if (!xmlCheckLanguageID(internal_val)) {
9099 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9100 "Malformed value for xml:lang : %s\n",
9101 internal_val, NULL);
9102 }
9103 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009104
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009105 /*
9106 * Check that xml:space conforms to the specification
9107 */
9108 if (xmlStrEqual(name, BAD_CAST "space")) {
9109 internal_val = xmlStrndup(val, *len);
9110 if (xmlStrEqual(internal_val, BAD_CAST "default"))
9111 *(ctxt->space) = 0;
9112 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9113 *(ctxt->space) = 1;
9114 else {
9115 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9116 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9117 internal_val, NULL);
9118 }
9119 }
9120 if (internal_val) {
9121 xmlFree(internal_val);
9122 }
9123 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009124
9125 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00009126 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009127}
Daniel Veillard0fb18932003-09-07 09:14:37 +00009128/**
9129 * xmlParseStartTag2:
9130 * @ctxt: an XML parser context
9131 *
9132 * parse a start of tag either for rule element or
9133 * EmptyElement. In both case we don't parse the tag closing chars.
9134 * This routine is called when running SAX2 parsing
9135 *
9136 * [40] STag ::= '<' Name (S Attribute)* S? '>'
9137 *
9138 * [ WFC: Unique Att Spec ]
9139 * No attribute name may appear more than once in the same start-tag or
9140 * empty-element tag.
9141 *
9142 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9143 *
9144 * [ WFC: Unique Att Spec ]
9145 * No attribute name may appear more than once in the same start-tag or
9146 * empty-element tag.
9147 *
9148 * With namespace:
9149 *
9150 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9151 *
9152 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9153 *
9154 * Returns the element name parsed
9155 */
9156
9157static const xmlChar *
9158xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009159 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009160 const xmlChar *localname;
9161 const xmlChar *prefix;
9162 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009163 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009164 const xmlChar *nsname;
9165 xmlChar *attvalue;
9166 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009167 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009168 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009169 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009170 const xmlChar *base;
9171 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00009172 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009173
9174 if (RAW != '<') return(NULL);
9175 NEXT1;
9176
9177 /*
9178 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
9179 * point since the attribute values may be stored as pointers to
9180 * the buffer and calling SHRINK would destroy them !
9181 * The Shrinking is only possible once the full set of attribute
9182 * callbacks have been done.
9183 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009184reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009185 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009186 base = ctxt->input->base;
9187 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009188 oldline = ctxt->input->line;
9189 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009190 nbatts = 0;
9191 nratts = 0;
9192 nbdef = 0;
9193 nbNs = 0;
9194 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00009195 /* Forget any namespaces added during an earlier parse of this element. */
9196 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009197
9198 localname = xmlParseQName(ctxt, &prefix);
9199 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009200 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9201 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009202 return(NULL);
9203 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009204 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009205
9206 /*
9207 * Now parse the attributes, it ends up with the ending
9208 *
9209 * (S Attribute)* S?
9210 */
9211 SKIP_BLANKS;
9212 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009213 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009214
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009215 while (((RAW != '>') &&
Daniel Veillard0fb18932003-09-07 09:14:37 +00009216 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009217 (IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009218 const xmlChar *q = CUR_PTR;
9219 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009220 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009221
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00009222 attname = xmlParseAttribute2(ctxt, prefix, localname,
9223 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00009224 if (ctxt->input->base != base) {
9225 if ((attvalue != NULL) && (alloc != 0))
9226 xmlFree(attvalue);
9227 attvalue = NULL;
9228 goto base_changed;
9229 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009230 if ((attname != NULL) && (attvalue != NULL)) {
9231 if (len < 0) len = xmlStrlen(attvalue);
9232 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009233 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9234 xmlURIPtr uri;
9235
9236 if (*URL != 0) {
9237 uri = xmlParseURI((const char *) URL);
9238 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009239 xmlNsErr(ctxt, XML_WAR_NS_URI,
9240 "xmlns: '%s' is not a valid URI\n",
9241 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009242 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00009243 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00009244 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9245 "xmlns: URI %s is not absolute\n",
9246 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009247 }
9248 xmlFreeURI(uri);
9249 }
Daniel Veillard37334572008-07-31 08:20:02 +00009250 if (URL == ctxt->str_xml_ns) {
9251 if (attname != ctxt->str_xml) {
9252 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9253 "xml namespace URI cannot be the default namespace\n",
9254 NULL, NULL, NULL);
9255 }
9256 goto skip_default_ns;
9257 }
9258 if ((len == 29) &&
9259 (xmlStrEqual(URL,
9260 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9261 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9262 "reuse of the xmlns namespace name is forbidden\n",
9263 NULL, NULL, NULL);
9264 goto skip_default_ns;
9265 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009266 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009267 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009268 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009269 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009270 for (j = 1;j <= nbNs;j++)
9271 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9272 break;
9273 if (j <= nbNs)
9274 xmlErrAttributeDup(ctxt, NULL, attname);
9275 else
9276 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009277skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009278 if (alloc != 0) xmlFree(attvalue);
9279 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009280 continue;
9281 }
9282 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009283 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9284 xmlURIPtr uri;
9285
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009286 if (attname == ctxt->str_xml) {
9287 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009288 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9289 "xml namespace prefix mapped to wrong URI\n",
9290 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009291 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009292 /*
9293 * Do not keep a namespace definition node
9294 */
Daniel Veillard37334572008-07-31 08:20:02 +00009295 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009296 }
Daniel Veillard37334572008-07-31 08:20:02 +00009297 if (URL == ctxt->str_xml_ns) {
9298 if (attname != ctxt->str_xml) {
9299 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9300 "xml namespace URI mapped to wrong prefix\n",
9301 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009302 }
Daniel Veillard37334572008-07-31 08:20:02 +00009303 goto skip_ns;
9304 }
9305 if (attname == ctxt->str_xmlns) {
9306 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9307 "redefinition of the xmlns prefix is forbidden\n",
9308 NULL, NULL, NULL);
9309 goto skip_ns;
9310 }
9311 if ((len == 29) &&
9312 (xmlStrEqual(URL,
9313 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9314 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9315 "reuse of the xmlns namespace name is forbidden\n",
9316 NULL, NULL, NULL);
9317 goto skip_ns;
9318 }
9319 if ((URL == NULL) || (URL[0] == 0)) {
9320 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9321 "xmlns:%s: Empty XML namespace is not allowed\n",
9322 attname, NULL, NULL);
9323 goto skip_ns;
9324 } else {
9325 uri = xmlParseURI((const char *) URL);
9326 if (uri == NULL) {
9327 xmlNsErr(ctxt, XML_WAR_NS_URI,
9328 "xmlns:%s: '%s' is not a valid URI\n",
9329 attname, URL, NULL);
9330 } else {
9331 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9332 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9333 "xmlns:%s: URI %s is not absolute\n",
9334 attname, URL, NULL);
9335 }
9336 xmlFreeURI(uri);
9337 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009338 }
9339
Daniel Veillard0fb18932003-09-07 09:14:37 +00009340 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009341 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009342 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009343 for (j = 1;j <= nbNs;j++)
9344 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9345 break;
9346 if (j <= nbNs)
9347 xmlErrAttributeDup(ctxt, aprefix, attname);
9348 else
9349 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009350skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009351 if (alloc != 0) xmlFree(attvalue);
9352 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00009353 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009354 continue;
9355 }
9356
9357 /*
9358 * Add the pair to atts
9359 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009360 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9361 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009362 if (attvalue[len] == 0)
9363 xmlFree(attvalue);
9364 goto failed;
9365 }
9366 maxatts = ctxt->maxatts;
9367 atts = ctxt->atts;
9368 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009369 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009370 atts[nbatts++] = attname;
9371 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009372 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00009373 atts[nbatts++] = attvalue;
9374 attvalue += len;
9375 atts[nbatts++] = attvalue;
9376 /*
9377 * tag if some deallocation is needed
9378 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009379 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009380 } else {
9381 if ((attvalue != NULL) && (attvalue[len] == 0))
9382 xmlFree(attvalue);
9383 }
9384
Daniel Veillard37334572008-07-31 08:20:02 +00009385failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009386
9387 GROW
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009388 if (ctxt->instate == XML_PARSER_EOF)
9389 break;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009390 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009391 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9392 break;
William M. Brack76e95df2003-10-18 16:20:14 +00009393 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009394 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9395 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009396 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009397 }
9398 SKIP_BLANKS;
9399 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9400 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009401 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009402 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009403 break;
9404 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009405 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009406 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009407 }
9408
Daniel Veillard0fb18932003-09-07 09:14:37 +00009409 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009410 * The attributes defaulting
9411 */
9412 if (ctxt->attsDefault != NULL) {
9413 xmlDefAttrsPtr defaults;
9414
9415 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9416 if (defaults != NULL) {
9417 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009418 attname = defaults->values[5 * i];
9419 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009420
9421 /*
9422 * special work for namespaces defaulted defs
9423 */
9424 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9425 /*
9426 * check that it's not a defined namespace
9427 */
9428 for (j = 1;j <= nbNs;j++)
9429 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9430 break;
9431 if (j <= nbNs) continue;
9432
9433 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009434 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009435 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009436 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009437 nbNs++;
9438 }
9439 } else if (aprefix == ctxt->str_xmlns) {
9440 /*
9441 * check that it's not a defined namespace
9442 */
9443 for (j = 1;j <= nbNs;j++)
9444 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9445 break;
9446 if (j <= nbNs) continue;
9447
9448 nsname = xmlGetNamespace(ctxt, attname);
9449 if (nsname != defaults->values[2]) {
9450 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009451 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009452 nbNs++;
9453 }
9454 } else {
9455 /*
9456 * check that it's not a defined attribute
9457 */
9458 for (j = 0;j < nbatts;j+=5) {
9459 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9460 break;
9461 }
9462 if (j < nbatts) continue;
9463
9464 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9465 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009466 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009467 }
9468 maxatts = ctxt->maxatts;
9469 atts = ctxt->atts;
9470 }
9471 atts[nbatts++] = attname;
9472 atts[nbatts++] = aprefix;
9473 if (aprefix == NULL)
9474 atts[nbatts++] = NULL;
9475 else
9476 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009477 atts[nbatts++] = defaults->values[5 * i + 2];
9478 atts[nbatts++] = defaults->values[5 * i + 3];
9479 if ((ctxt->standalone == 1) &&
9480 (defaults->values[5 * i + 4] != NULL)) {
9481 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9482 "standalone: attribute %s on %s defaulted from external subset\n",
9483 attname, localname);
9484 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009485 nbdef++;
9486 }
9487 }
9488 }
9489 }
9490
Daniel Veillarde70c8772003-11-25 07:21:18 +00009491 /*
9492 * The attributes checkings
9493 */
9494 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009495 /*
9496 * The default namespace does not apply to attribute names.
9497 */
9498 if (atts[i + 1] != NULL) {
9499 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9500 if (nsname == NULL) {
9501 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9502 "Namespace prefix %s for %s on %s is not defined\n",
9503 atts[i + 1], atts[i], localname);
9504 }
9505 atts[i + 2] = nsname;
9506 } else
9507 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009508 /*
9509 * [ WFC: Unique Att Spec ]
9510 * No attribute name may appear more than once in the same
9511 * start-tag or empty-element tag.
9512 * As extended by the Namespace in XML REC.
9513 */
9514 for (j = 0; j < i;j += 5) {
9515 if (atts[i] == atts[j]) {
9516 if (atts[i+1] == atts[j+1]) {
9517 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9518 break;
9519 }
9520 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9521 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9522 "Namespaced Attribute %s in '%s' redefined\n",
9523 atts[i], nsname, NULL);
9524 break;
9525 }
9526 }
9527 }
9528 }
9529
Daniel Veillarde57ec792003-09-10 10:50:59 +00009530 nsname = xmlGetNamespace(ctxt, prefix);
9531 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009532 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9533 "Namespace prefix %s on %s is not defined\n",
9534 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009535 }
9536 *pref = prefix;
9537 *URI = nsname;
9538
9539 /*
9540 * SAX: Start of Element !
9541 */
9542 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9543 (!ctxt->disableSAX)) {
9544 if (nbNs > 0)
9545 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9546 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9547 nbatts / 5, nbdef, atts);
9548 else
9549 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9550 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9551 }
9552
9553 /*
9554 * Free up attribute allocated strings if needed
9555 */
9556 if (attval != 0) {
9557 for (i = 3,j = 0; j < nratts;i += 5,j++)
9558 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9559 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009560 }
9561
9562 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009563
9564base_changed:
9565 /*
9566 * the attribute strings are valid iif the base didn't changed
9567 */
9568 if (attval != 0) {
9569 for (i = 3,j = 0; j < nratts;i += 5,j++)
9570 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9571 xmlFree((xmlChar *) atts[i]);
9572 }
9573 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009574 ctxt->input->line = oldline;
9575 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009576 if (ctxt->wellFormed == 1) {
9577 goto reparse;
9578 }
9579 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009580}
9581
9582/**
9583 * xmlParseEndTag2:
9584 * @ctxt: an XML parser context
9585 * @line: line of the start tag
9586 * @nsNr: number of namespaces on the start tag
9587 *
9588 * parse an end of tag
9589 *
9590 * [42] ETag ::= '</' Name S? '>'
9591 *
9592 * With namespace
9593 *
9594 * [NS 9] ETag ::= '</' QName S? '>'
9595 */
9596
9597static void
9598xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009599 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009600 const xmlChar *name;
9601
9602 GROW;
9603 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009604 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009605 return;
9606 }
9607 SKIP(2);
9608
William M. Brack13dfa872004-09-18 04:52:08 +00009609 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009610 if (ctxt->input->cur[tlen] == '>') {
9611 ctxt->input->cur += tlen + 1;
9612 goto done;
9613 }
9614 ctxt->input->cur += tlen;
9615 name = (xmlChar*)1;
9616 } else {
9617 if (prefix == NULL)
9618 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9619 else
9620 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9621 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009622
9623 /*
9624 * We should definitely be at the ending "S? '>'" part
9625 */
9626 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009627 if (ctxt->instate == XML_PARSER_EOF)
9628 return;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009629 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009630 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009631 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009632 } else
9633 NEXT1;
9634
9635 /*
9636 * [ WFC: Element Type Match ]
9637 * The Name in an element's end-tag must match the element type in the
9638 * start-tag.
9639 *
9640 */
9641 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009642 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009643 if ((line == 0) && (ctxt->node != NULL))
9644 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009645 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009646 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009647 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009648 }
9649
9650 /*
9651 * SAX: End of Tag
9652 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009653done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009654 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9655 (!ctxt->disableSAX))
9656 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9657
Daniel Veillard0fb18932003-09-07 09:14:37 +00009658 spacePop(ctxt);
9659 if (nsNr != 0)
9660 nsPop(ctxt, nsNr);
9661 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009662}
9663
9664/**
Owen Taylor3473f882001-02-23 17:55:21 +00009665 * xmlParseCDSect:
9666 * @ctxt: an XML parser context
9667 *
9668 * Parse escaped pure raw content.
9669 *
9670 * [18] CDSect ::= CDStart CData CDEnd
9671 *
9672 * [19] CDStart ::= '<![CDATA['
9673 *
9674 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9675 *
9676 * [21] CDEnd ::= ']]>'
9677 */
9678void
9679xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9680 xmlChar *buf = NULL;
9681 int len = 0;
9682 int size = XML_PARSER_BUFFER_SIZE;
9683 int r, rl;
9684 int s, sl;
9685 int cur, l;
9686 int count = 0;
9687
Daniel Veillard8f597c32003-10-06 08:19:27 +00009688 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009689 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009690 SKIP(9);
9691 } else
9692 return;
9693
9694 ctxt->instate = XML_PARSER_CDATA_SECTION;
9695 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009696 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009697 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009698 ctxt->instate = XML_PARSER_CONTENT;
9699 return;
9700 }
9701 NEXTL(rl);
9702 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009703 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009704 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009705 ctxt->instate = XML_PARSER_CONTENT;
9706 return;
9707 }
9708 NEXTL(sl);
9709 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009710 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009711 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009712 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009713 return;
9714 }
William M. Brack871611b2003-10-18 04:53:14 +00009715 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009716 ((r != ']') || (s != ']') || (cur != '>'))) {
9717 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009718 xmlChar *tmp;
9719
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009720 if ((size > XML_MAX_TEXT_LENGTH) &&
9721 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9722 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
9723 "CData section too big found", NULL);
9724 xmlFree (buf);
9725 return;
9726 }
9727 tmp = (xmlChar *) xmlRealloc(buf, size * 2 * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00009728 if (tmp == NULL) {
9729 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009730 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009731 return;
9732 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009733 buf = tmp;
Daniel Veillard52d8ade2012-07-30 10:08:45 +08009734 size *= 2;
Owen Taylor3473f882001-02-23 17:55:21 +00009735 }
9736 COPY_BUF(rl,buf,len,r);
9737 r = s;
9738 rl = sl;
9739 s = cur;
9740 sl = l;
9741 count++;
9742 if (count > 50) {
9743 GROW;
Daniel Veillard48b4cdd2012-07-30 16:16:04 +08009744 if (ctxt->instate == XML_PARSER_EOF) {
9745 xmlFree(buf);
9746 return;
9747 }
Owen Taylor3473f882001-02-23 17:55:21 +00009748 count = 0;
9749 }
9750 NEXTL(l);
9751 cur = CUR_CHAR(l);
9752 }
9753 buf[len] = 0;
9754 ctxt->instate = XML_PARSER_CONTENT;
9755 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009756 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009757 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009758 xmlFree(buf);
9759 return;
9760 }
9761 NEXTL(l);
9762
9763 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009764 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009765 */
9766 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9767 if (ctxt->sax->cdataBlock != NULL)
9768 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009769 else if (ctxt->sax->characters != NULL)
9770 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009771 }
9772 xmlFree(buf);
9773}
9774
9775/**
9776 * xmlParseContent:
9777 * @ctxt: an XML parser context
9778 *
9779 * Parse a content:
9780 *
9781 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9782 */
9783
9784void
9785xmlParseContent(xmlParserCtxtPtr ctxt) {
9786 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009787 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009788 ((RAW != '<') || (NXT(1) != '/')) &&
9789 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009790 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009791 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009792 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009793
9794 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009795 * First case : a Processing Instruction.
9796 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009797 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009798 xmlParsePI(ctxt);
9799 }
9800
9801 /*
9802 * Second case : a CDSection
9803 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009804 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009805 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009806 xmlParseCDSect(ctxt);
9807 }
9808
9809 /*
9810 * Third case : a comment
9811 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009812 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009813 (NXT(2) == '-') && (NXT(3) == '-')) {
9814 xmlParseComment(ctxt);
9815 ctxt->instate = XML_PARSER_CONTENT;
9816 }
9817
9818 /*
9819 * Fourth case : a sub-element.
9820 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009821 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009822 xmlParseElement(ctxt);
9823 }
9824
9825 /*
9826 * Fifth case : a reference. If if has not been resolved,
9827 * parsing returns it's Name, create the node
9828 */
9829
Daniel Veillard21a0f912001-02-25 19:54:14 +00009830 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009831 xmlParseReference(ctxt);
9832 }
9833
9834 /*
9835 * Last case, text. Note that References are handled directly.
9836 */
9837 else {
9838 xmlParseCharData(ctxt, 0);
9839 }
9840
9841 GROW;
9842 /*
9843 * Pop-up of finished entities.
9844 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009845 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009846 xmlPopInput(ctxt);
9847 SHRINK;
9848
Daniel Veillardfdc91562002-07-01 21:52:03 +00009849 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009850 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9851 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009852 ctxt->instate = XML_PARSER_EOF;
9853 break;
9854 }
9855 }
9856}
9857
9858/**
9859 * xmlParseElement:
9860 * @ctxt: an XML parser context
9861 *
9862 * parse an XML element, this is highly recursive
9863 *
9864 * [39] element ::= EmptyElemTag | STag content ETag
9865 *
9866 * [ WFC: Element Type Match ]
9867 * The Name in an element's end-tag must match the element type in the
9868 * start-tag.
9869 *
Owen Taylor3473f882001-02-23 17:55:21 +00009870 */
9871
9872void
9873xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009874 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009875 const xmlChar *prefix = NULL;
9876 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009877 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +08009878 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00009879 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009880 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009881
Daniel Veillard8915c152008-08-26 13:05:34 +00009882 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9883 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9884 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9885 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9886 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009887 ctxt->instate = XML_PARSER_EOF;
9888 return;
9889 }
9890
Owen Taylor3473f882001-02-23 17:55:21 +00009891 /* Capture start position */
9892 if (ctxt->record_info) {
9893 node_info.begin_pos = ctxt->input->consumed +
9894 (CUR_PTR - ctxt->input->base);
9895 node_info.begin_line = ctxt->input->line;
9896 }
9897
9898 if (ctxt->spaceNr == 0)
9899 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009900 else if (*ctxt->space == -2)
9901 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009902 else
9903 spacePush(ctxt, *ctxt->space);
9904
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009905 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009906#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009907 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009908#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009909 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009910#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009911 else
9912 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009913#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +08009914 if (ctxt->instate == XML_PARSER_EOF)
9915 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009916 if (name == NULL) {
9917 spacePop(ctxt);
9918 return;
9919 }
9920 namePush(ctxt, name);
9921 ret = ctxt->node;
9922
Daniel Veillard4432df22003-09-28 18:58:27 +00009923#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009924 /*
9925 * [ VC: Root Element Type ]
9926 * The Name in the document type declaration must match the element
9927 * type of the root element.
9928 */
9929 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9930 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9931 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009932#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009933
9934 /*
9935 * Check for an Empty Element.
9936 */
9937 if ((RAW == '/') && (NXT(1) == '>')) {
9938 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009939 if (ctxt->sax2) {
9940 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9941 (!ctxt->disableSAX))
9942 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009943#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009944 } else {
9945 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9946 (!ctxt->disableSAX))
9947 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009948#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009949 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009950 namePop(ctxt);
9951 spacePop(ctxt);
9952 if (nsNr != ctxt->nsNr)
9953 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009954 if ( ret != NULL && ctxt->record_info ) {
9955 node_info.end_pos = ctxt->input->consumed +
9956 (CUR_PTR - ctxt->input->base);
9957 node_info.end_line = ctxt->input->line;
9958 node_info.node = ret;
9959 xmlParserAddNodeInfo(ctxt, &node_info);
9960 }
9961 return;
9962 }
9963 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009964 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009965 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009966 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9967 "Couldn't find end of Start Tag %s line %d\n",
9968 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009969
9970 /*
9971 * end of parsing of this node.
9972 */
9973 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009974 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009975 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009976 if (nsNr != ctxt->nsNr)
9977 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009978
9979 /*
9980 * Capture end position and add node
9981 */
9982 if ( ret != NULL && ctxt->record_info ) {
9983 node_info.end_pos = ctxt->input->consumed +
9984 (CUR_PTR - ctxt->input->base);
9985 node_info.end_line = ctxt->input->line;
9986 node_info.node = ret;
9987 xmlParserAddNodeInfo(ctxt, &node_info);
9988 }
9989 return;
9990 }
9991
9992 /*
9993 * Parse the content of the element:
9994 */
9995 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009996 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009997 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009998 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009999 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010000
10001 /*
10002 * end of parsing of this node.
10003 */
10004 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010005 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010006 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010007 if (nsNr != ctxt->nsNr)
10008 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +000010009 return;
10010 }
10011
10012 /*
10013 * parse the end of tag: '</' should be here.
10014 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010015 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010016 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010017 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010018 }
10019#ifdef LIBXML_SAX1_ENABLED
10020 else
Daniel Veillard0fb18932003-09-07 09:14:37 +000010021 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +000010022#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000010023
10024 /*
10025 * Capture end position and add node
10026 */
10027 if ( ret != NULL && ctxt->record_info ) {
10028 node_info.end_pos = ctxt->input->consumed +
10029 (CUR_PTR - ctxt->input->base);
10030 node_info.end_line = ctxt->input->line;
10031 node_info.node = ret;
10032 xmlParserAddNodeInfo(ctxt, &node_info);
10033 }
10034}
10035
10036/**
10037 * xmlParseVersionNum:
10038 * @ctxt: an XML parser context
10039 *
10040 * parse the XML version value.
10041 *
Daniel Veillard34e3f642008-07-29 09:02:27 +000010042 * [26] VersionNum ::= '1.' [0-9]+
10043 *
10044 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +000010045 *
10046 * Returns the string giving the XML version number, or NULL
10047 */
10048xmlChar *
10049xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10050 xmlChar *buf = NULL;
10051 int len = 0;
10052 int size = 10;
10053 xmlChar cur;
10054
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010055 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010056 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010057 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010058 return(NULL);
10059 }
10060 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +000010061 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010062 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010063 return(NULL);
10064 }
10065 buf[len++] = cur;
10066 NEXT;
10067 cur=CUR;
10068 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000010069 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +000010070 return(NULL);
10071 }
10072 buf[len++] = cur;
10073 NEXT;
10074 cur=CUR;
10075 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010076 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010077 xmlChar *tmp;
10078
Owen Taylor3473f882001-02-23 17:55:21 +000010079 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010080 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10081 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +000010082 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010083 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010084 return(NULL);
10085 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010086 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010087 }
10088 buf[len++] = cur;
10089 NEXT;
10090 cur=CUR;
10091 }
10092 buf[len] = 0;
10093 return(buf);
10094}
10095
10096/**
10097 * xmlParseVersionInfo:
10098 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +000010099 *
Owen Taylor3473f882001-02-23 17:55:21 +000010100 * parse the XML version.
10101 *
10102 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +000010103 *
Owen Taylor3473f882001-02-23 17:55:21 +000010104 * [25] Eq ::= S? '=' S?
10105 *
10106 * Returns the version string, e.g. "1.0"
10107 */
10108
10109xmlChar *
10110xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10111 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010112
Daniel Veillarda07050d2003-10-19 14:46:32 +000010113 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010114 SKIP(7);
10115 SKIP_BLANKS;
10116 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010117 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010118 return(NULL);
10119 }
10120 NEXT;
10121 SKIP_BLANKS;
10122 if (RAW == '"') {
10123 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010124 version = xmlParseVersionNum(ctxt);
10125 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010126 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010127 } else
10128 NEXT;
10129 } else if (RAW == '\''){
10130 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010131 version = xmlParseVersionNum(ctxt);
10132 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010133 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010134 } else
10135 NEXT;
10136 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010137 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010138 }
10139 }
10140 return(version);
10141}
10142
10143/**
10144 * xmlParseEncName:
10145 * @ctxt: an XML parser context
10146 *
10147 * parse the XML encoding name
10148 *
10149 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10150 *
10151 * Returns the encoding name value or NULL
10152 */
10153xmlChar *
10154xmlParseEncName(xmlParserCtxtPtr ctxt) {
10155 xmlChar *buf = NULL;
10156 int len = 0;
10157 int size = 10;
10158 xmlChar cur;
10159
10160 cur = CUR;
10161 if (((cur >= 'a') && (cur <= 'z')) ||
10162 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +000010163 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +000010164 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010165 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010166 return(NULL);
10167 }
Daniel Veillard34e3f642008-07-29 09:02:27 +000010168
Owen Taylor3473f882001-02-23 17:55:21 +000010169 buf[len++] = cur;
10170 NEXT;
10171 cur = CUR;
10172 while (((cur >= 'a') && (cur <= 'z')) ||
10173 ((cur >= 'A') && (cur <= 'Z')) ||
10174 ((cur >= '0') && (cur <= '9')) ||
10175 (cur == '.') || (cur == '_') ||
10176 (cur == '-')) {
10177 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +000010178 xmlChar *tmp;
10179
Owen Taylor3473f882001-02-23 17:55:21 +000010180 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +000010181 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
10182 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010183 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +000010184 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000010185 return(NULL);
10186 }
Daniel Veillard2248ff12004-09-22 23:05:14 +000010187 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +000010188 }
10189 buf[len++] = cur;
10190 NEXT;
10191 cur = CUR;
10192 if (cur == 0) {
10193 SHRINK;
10194 GROW;
10195 cur = CUR;
10196 }
10197 }
10198 buf[len] = 0;
10199 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010200 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010201 }
10202 return(buf);
10203}
10204
10205/**
10206 * xmlParseEncodingDecl:
10207 * @ctxt: an XML parser context
10208 *
10209 * parse the XML encoding declaration
10210 *
10211 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10212 *
10213 * this setups the conversion filters.
10214 *
10215 * Returns the encoding value or NULL
10216 */
10217
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010218const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +000010219xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10220 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000010221
10222 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010223 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010224 SKIP(8);
10225 SKIP_BLANKS;
10226 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010227 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010228 return(NULL);
10229 }
10230 NEXT;
10231 SKIP_BLANKS;
10232 if (RAW == '"') {
10233 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010234 encoding = xmlParseEncName(ctxt);
10235 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010236 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010237 } else
10238 NEXT;
10239 } else if (RAW == '\''){
10240 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +000010241 encoding = xmlParseEncName(ctxt);
10242 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010243 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010244 } else
10245 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +000010246 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010247 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010248 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080010249
10250 /*
10251 * Non standard parsing, allowing the user to ignore encoding
10252 */
10253 if (ctxt->options & XML_PARSE_IGNORE_ENC)
10254 return(encoding);
10255
Daniel Veillard6b621b82003-08-11 15:03:34 +000010256 /*
10257 * UTF-16 encoding stwich has already taken place at this stage,
10258 * more over the little-endian/big-endian selection is already done
10259 */
10260 if ((encoding != NULL) &&
10261 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
10262 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +000010263 /*
10264 * If no encoding was passed to the parser, that we are
10265 * using UTF-16 and no decoder is present i.e. the
10266 * document is apparently UTF-8 compatible, then raise an
10267 * encoding mismatch fatal error
10268 */
10269 if ((ctxt->encoding == NULL) &&
10270 (ctxt->input->buf != NULL) &&
10271 (ctxt->input->buf->encoder == NULL)) {
10272 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
10273 "Document labelled UTF-16 but has UTF-8 content\n");
10274 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010275 if (ctxt->encoding != NULL)
10276 xmlFree((xmlChar *) ctxt->encoding);
10277 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +000010278 }
10279 /*
10280 * UTF-8 encoding is handled natively
10281 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010282 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +000010283 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
10284 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010285 if (ctxt->encoding != NULL)
10286 xmlFree((xmlChar *) ctxt->encoding);
10287 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010288 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010289 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010290 xmlCharEncodingHandlerPtr handler;
10291
10292 if (ctxt->input->encoding != NULL)
10293 xmlFree((xmlChar *) ctxt->input->encoding);
10294 ctxt->input->encoding = encoding;
10295
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010296 handler = xmlFindCharEncodingHandler((const char *) encoding);
10297 if (handler != NULL) {
10298 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +000010299 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010300 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010301 "Unsupported encoding %s\n", encoding);
10302 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010303 }
10304 }
10305 }
10306 return(encoding);
10307}
10308
10309/**
10310 * xmlParseSDDecl:
10311 * @ctxt: an XML parser context
10312 *
10313 * parse the XML standalone declaration
10314 *
10315 * [32] SDDecl ::= S 'standalone' Eq
10316 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10317 *
10318 * [ VC: Standalone Document Declaration ]
10319 * TODO The standalone document declaration must have the value "no"
10320 * if any external markup declarations contain declarations of:
10321 * - attributes with default values, if elements to which these
10322 * attributes apply appear in the document without specifications
10323 * of values for these attributes, or
10324 * - entities (other than amp, lt, gt, apos, quot), if references
10325 * to those entities appear in the document, or
10326 * - attributes with values subject to normalization, where the
10327 * attribute appears in the document with a value which will change
10328 * as a result of normalization, or
10329 * - element types with element content, if white space occurs directly
10330 * within any instance of those types.
10331 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010332 * Returns:
10333 * 1 if standalone="yes"
10334 * 0 if standalone="no"
10335 * -2 if standalone attribute is missing or invalid
10336 * (A standalone value of -2 means that the XML declaration was found,
10337 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010338 */
10339
10340int
10341xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010342 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010343
10344 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010345 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010346 SKIP(10);
10347 SKIP_BLANKS;
10348 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010349 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010350 return(standalone);
10351 }
10352 NEXT;
10353 SKIP_BLANKS;
10354 if (RAW == '\''){
10355 NEXT;
10356 if ((RAW == 'n') && (NXT(1) == 'o')) {
10357 standalone = 0;
10358 SKIP(2);
10359 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10360 (NXT(2) == 's')) {
10361 standalone = 1;
10362 SKIP(3);
10363 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010364 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010365 }
10366 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010367 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010368 } else
10369 NEXT;
10370 } else if (RAW == '"'){
10371 NEXT;
10372 if ((RAW == 'n') && (NXT(1) == 'o')) {
10373 standalone = 0;
10374 SKIP(2);
10375 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10376 (NXT(2) == 's')) {
10377 standalone = 1;
10378 SKIP(3);
10379 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010380 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010381 }
10382 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010383 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010384 } else
10385 NEXT;
10386 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010387 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010388 }
10389 }
10390 return(standalone);
10391}
10392
10393/**
10394 * xmlParseXMLDecl:
10395 * @ctxt: an XML parser context
10396 *
10397 * parse an XML declaration header
10398 *
10399 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10400 */
10401
10402void
10403xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10404 xmlChar *version;
10405
10406 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010407 * This value for standalone indicates that the document has an
10408 * XML declaration but it does not have a standalone attribute.
10409 * It will be overwritten later if a standalone attribute is found.
10410 */
10411 ctxt->input->standalone = -2;
10412
10413 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010414 * We know that '<?xml' is here.
10415 */
10416 SKIP(5);
10417
William M. Brack76e95df2003-10-18 16:20:14 +000010418 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010419 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10420 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010421 }
10422 SKIP_BLANKS;
10423
10424 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010425 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010426 */
10427 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010428 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010429 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010430 } else {
10431 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10432 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010433 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010434 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010435 if (ctxt->options & XML_PARSE_OLD10) {
10436 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10437 "Unsupported version '%s'\n",
10438 version);
10439 } else {
10440 if ((version[0] == '1') && ((version[1] == '.'))) {
10441 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10442 "Unsupported version '%s'\n",
10443 version, NULL);
10444 } else {
10445 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10446 "Unsupported version '%s'\n",
10447 version);
10448 }
10449 }
Daniel Veillard19840942001-11-29 16:11:38 +000010450 }
10451 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010452 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010453 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010454 }
Owen Taylor3473f882001-02-23 17:55:21 +000010455
10456 /*
10457 * We may have the encoding declaration
10458 */
William M. Brack76e95df2003-10-18 16:20:14 +000010459 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010460 if ((RAW == '?') && (NXT(1) == '>')) {
10461 SKIP(2);
10462 return;
10463 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010464 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010465 }
10466 xmlParseEncodingDecl(ctxt);
10467 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10468 /*
10469 * The XML REC instructs us to stop parsing right here
10470 */
10471 return;
10472 }
10473
10474 /*
10475 * We may have the standalone status.
10476 */
William M. Brack76e95df2003-10-18 16:20:14 +000010477 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010478 if ((RAW == '?') && (NXT(1) == '>')) {
10479 SKIP(2);
10480 return;
10481 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010482 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010483 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010484
10485 /*
10486 * We can grow the input buffer freely at that point
10487 */
10488 GROW;
10489
Owen Taylor3473f882001-02-23 17:55:21 +000010490 SKIP_BLANKS;
10491 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10492
10493 SKIP_BLANKS;
10494 if ((RAW == '?') && (NXT(1) == '>')) {
10495 SKIP(2);
10496 } else if (RAW == '>') {
10497 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010498 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010499 NEXT;
10500 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010501 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010502 MOVETO_ENDTAG(CUR_PTR);
10503 NEXT;
10504 }
10505}
10506
10507/**
10508 * xmlParseMisc:
10509 * @ctxt: an XML parser context
10510 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010511 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010512 *
10513 * [27] Misc ::= Comment | PI | S
10514 */
10515
10516void
10517xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080010518 while ((ctxt->instate != XML_PARSER_EOF) &&
10519 (((RAW == '<') && (NXT(1) == '?')) ||
10520 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
10521 IS_BLANK_CH(CUR))) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010522 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010523 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010524 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010525 NEXT;
10526 } else
10527 xmlParseComment(ctxt);
10528 }
10529}
10530
10531/**
10532 * xmlParseDocument:
10533 * @ctxt: an XML parser context
10534 *
10535 * parse an XML document (and build a tree if using the standard SAX
10536 * interface).
10537 *
10538 * [1] document ::= prolog element Misc*
10539 *
10540 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10541 *
10542 * Returns 0, -1 in case of error. the parser context is augmented
10543 * as a result of the parsing.
10544 */
10545
10546int
10547xmlParseDocument(xmlParserCtxtPtr ctxt) {
10548 xmlChar start[4];
10549 xmlCharEncoding enc;
10550
10551 xmlInitParser();
10552
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010553 if ((ctxt == NULL) || (ctxt->input == NULL))
10554 return(-1);
10555
Owen Taylor3473f882001-02-23 17:55:21 +000010556 GROW;
10557
10558 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010559 * SAX: detecting the level.
10560 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010561 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010562
10563 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010564 * SAX: beginning of the document processing.
10565 */
10566 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10567 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10568
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010569 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010570 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +000010571 /*
10572 * Get the 4 first bytes and decode the charset
10573 * if enc != XML_CHAR_ENCODING_NONE
10574 * plug some encoding conversion routines.
10575 */
10576 start[0] = RAW;
10577 start[1] = NXT(1);
10578 start[2] = NXT(2);
10579 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010580 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010581 if (enc != XML_CHAR_ENCODING_NONE) {
10582 xmlSwitchEncoding(ctxt, enc);
10583 }
Owen Taylor3473f882001-02-23 17:55:21 +000010584 }
10585
10586
10587 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010588 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010589 }
10590
10591 /*
10592 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010593 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010594 * than just the first line, unless the amount of data is really
10595 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010596 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010597 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10598 GROW;
10599 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010600 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010601
10602 /*
10603 * Note that we will switch encoding on the fly.
10604 */
10605 xmlParseXMLDecl(ctxt);
10606 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10607 /*
10608 * The XML REC instructs us to stop parsing right here
10609 */
10610 return(-1);
10611 }
10612 ctxt->standalone = ctxt->input->standalone;
10613 SKIP_BLANKS;
10614 } else {
10615 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10616 }
10617 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10618 ctxt->sax->startDocument(ctxt->userData);
10619
10620 /*
10621 * The Misc part of the Prolog
10622 */
10623 GROW;
10624 xmlParseMisc(ctxt);
10625
10626 /*
10627 * Then possibly doc type declaration(s) and more Misc
10628 * (doctypedecl Misc*)?
10629 */
10630 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010631 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010632
10633 ctxt->inSubset = 1;
10634 xmlParseDocTypeDecl(ctxt);
10635 if (RAW == '[') {
10636 ctxt->instate = XML_PARSER_DTD;
10637 xmlParseInternalSubset(ctxt);
10638 }
10639
10640 /*
10641 * Create and update the external subset.
10642 */
10643 ctxt->inSubset = 2;
10644 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10645 (!ctxt->disableSAX))
10646 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10647 ctxt->extSubSystem, ctxt->extSubURI);
10648 ctxt->inSubset = 0;
10649
Daniel Veillardac4118d2008-01-11 05:27:32 +000010650 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010651
10652 ctxt->instate = XML_PARSER_PROLOG;
10653 xmlParseMisc(ctxt);
10654 }
10655
10656 /*
10657 * Time to start parsing the tree itself
10658 */
10659 GROW;
10660 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010661 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10662 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010663 } else {
10664 ctxt->instate = XML_PARSER_CONTENT;
10665 xmlParseElement(ctxt);
10666 ctxt->instate = XML_PARSER_EPILOG;
10667
10668
10669 /*
10670 * The Misc part at the end
10671 */
10672 xmlParseMisc(ctxt);
10673
Daniel Veillard561b7f82002-03-20 21:55:57 +000010674 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010675 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010676 }
10677 ctxt->instate = XML_PARSER_EOF;
10678 }
10679
10680 /*
10681 * SAX: end of the document processing.
10682 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010683 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010684 ctxt->sax->endDocument(ctxt->userData);
10685
Daniel Veillard5997aca2002-03-18 18:36:20 +000010686 /*
10687 * Remove locally kept entity definitions if the tree was not built
10688 */
10689 if ((ctxt->myDoc != NULL) &&
10690 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10691 xmlFreeDoc(ctxt->myDoc);
10692 ctxt->myDoc = NULL;
10693 }
10694
Daniel Veillardae0765b2008-07-31 19:54:59 +000010695 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10696 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10697 if (ctxt->valid)
10698 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10699 if (ctxt->nsWellFormed)
10700 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10701 if (ctxt->options & XML_PARSE_OLD10)
10702 ctxt->myDoc->properties |= XML_DOC_OLD10;
10703 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010704 if (! ctxt->wellFormed) {
10705 ctxt->valid = 0;
10706 return(-1);
10707 }
Owen Taylor3473f882001-02-23 17:55:21 +000010708 return(0);
10709}
10710
10711/**
10712 * xmlParseExtParsedEnt:
10713 * @ctxt: an XML parser context
10714 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010715 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010716 * An external general parsed entity is well-formed if it matches the
10717 * production labeled extParsedEnt.
10718 *
10719 * [78] extParsedEnt ::= TextDecl? content
10720 *
10721 * Returns 0, -1 in case of error. the parser context is augmented
10722 * as a result of the parsing.
10723 */
10724
10725int
10726xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10727 xmlChar start[4];
10728 xmlCharEncoding enc;
10729
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010730 if ((ctxt == NULL) || (ctxt->input == NULL))
10731 return(-1);
10732
Owen Taylor3473f882001-02-23 17:55:21 +000010733 xmlDefaultSAXHandlerInit();
10734
Daniel Veillard309f81d2003-09-23 09:02:53 +000010735 xmlDetectSAX2(ctxt);
10736
Owen Taylor3473f882001-02-23 17:55:21 +000010737 GROW;
10738
10739 /*
10740 * SAX: beginning of the document processing.
10741 */
10742 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10743 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10744
10745 /*
10746 * Get the 4 first bytes and decode the charset
10747 * if enc != XML_CHAR_ENCODING_NONE
10748 * plug some encoding conversion routines.
10749 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010750 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10751 start[0] = RAW;
10752 start[1] = NXT(1);
10753 start[2] = NXT(2);
10754 start[3] = NXT(3);
10755 enc = xmlDetectCharEncoding(start, 4);
10756 if (enc != XML_CHAR_ENCODING_NONE) {
10757 xmlSwitchEncoding(ctxt, enc);
10758 }
Owen Taylor3473f882001-02-23 17:55:21 +000010759 }
10760
10761
10762 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010763 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010764 }
10765
10766 /*
10767 * Check for the XMLDecl in the Prolog.
10768 */
10769 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010770 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010771
10772 /*
10773 * Note that we will switch encoding on the fly.
10774 */
10775 xmlParseXMLDecl(ctxt);
10776 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10777 /*
10778 * The XML REC instructs us to stop parsing right here
10779 */
10780 return(-1);
10781 }
10782 SKIP_BLANKS;
10783 } else {
10784 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10785 }
10786 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10787 ctxt->sax->startDocument(ctxt->userData);
10788
10789 /*
10790 * Doing validity checking on chunk doesn't make sense
10791 */
10792 ctxt->instate = XML_PARSER_CONTENT;
10793 ctxt->validate = 0;
10794 ctxt->loadsubset = 0;
10795 ctxt->depth = 0;
10796
10797 xmlParseContent(ctxt);
10798
10799 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010800 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010801 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010802 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010803 }
10804
10805 /*
10806 * SAX: end of the document processing.
10807 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010808 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010809 ctxt->sax->endDocument(ctxt->userData);
10810
10811 if (! ctxt->wellFormed) return(-1);
10812 return(0);
10813}
10814
Daniel Veillard73b013f2003-09-30 12:36:01 +000010815#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010816/************************************************************************
10817 * *
10818 * Progressive parsing interfaces *
10819 * *
10820 ************************************************************************/
10821
10822/**
10823 * xmlParseLookupSequence:
10824 * @ctxt: an XML parser context
10825 * @first: the first char to lookup
10826 * @next: the next char to lookup or zero
10827 * @third: the next char to lookup or zero
10828 *
10829 * Try to find if a sequence (first, next, third) or just (first next) or
10830 * (first) is available in the input stream.
10831 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10832 * to avoid rescanning sequences of bytes, it DOES change the state of the
10833 * parser, do not use liberally.
10834 *
10835 * Returns the index to the current parsing point if the full sequence
10836 * is available, -1 otherwise.
10837 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010838static int
Owen Taylor3473f882001-02-23 17:55:21 +000010839xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10840 xmlChar next, xmlChar third) {
10841 int base, len;
10842 xmlParserInputPtr in;
10843 const xmlChar *buf;
10844
10845 in = ctxt->input;
10846 if (in == NULL) return(-1);
10847 base = in->cur - in->base;
10848 if (base < 0) return(-1);
10849 if (ctxt->checkIndex > base)
10850 base = ctxt->checkIndex;
10851 if (in->buf == NULL) {
10852 buf = in->base;
10853 len = in->length;
10854 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080010855 buf = xmlBufContent(in->buf->buffer);
10856 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000010857 }
10858 /* take into account the sequence length */
10859 if (third) len -= 2;
10860 else if (next) len --;
10861 for (;base < len;base++) {
10862 if (buf[base] == first) {
10863 if (third != 0) {
10864 if ((buf[base + 1] != next) ||
10865 (buf[base + 2] != third)) continue;
10866 } else if (next != 0) {
10867 if (buf[base + 1] != next) continue;
10868 }
10869 ctxt->checkIndex = 0;
10870#ifdef DEBUG_PUSH
10871 if (next == 0)
10872 xmlGenericError(xmlGenericErrorContext,
10873 "PP: lookup '%c' found at %d\n",
10874 first, base);
10875 else if (third == 0)
10876 xmlGenericError(xmlGenericErrorContext,
10877 "PP: lookup '%c%c' found at %d\n",
10878 first, next, base);
10879 else
10880 xmlGenericError(xmlGenericErrorContext,
10881 "PP: lookup '%c%c%c' found at %d\n",
10882 first, next, third, base);
10883#endif
10884 return(base - (in->cur - in->base));
10885 }
10886 }
10887 ctxt->checkIndex = base;
10888#ifdef DEBUG_PUSH
10889 if (next == 0)
10890 xmlGenericError(xmlGenericErrorContext,
10891 "PP: lookup '%c' failed\n", first);
10892 else if (third == 0)
10893 xmlGenericError(xmlGenericErrorContext,
10894 "PP: lookup '%c%c' failed\n", first, next);
10895 else
10896 xmlGenericError(xmlGenericErrorContext,
10897 "PP: lookup '%c%c%c' failed\n", first, next, third);
10898#endif
10899 return(-1);
10900}
10901
10902/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010903 * xmlParseGetLasts:
10904 * @ctxt: an XML parser context
10905 * @lastlt: pointer to store the last '<' from the input
10906 * @lastgt: pointer to store the last '>' from the input
10907 *
10908 * Lookup the last < and > in the current chunk
10909 */
10910static void
10911xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10912 const xmlChar **lastgt) {
10913 const xmlChar *tmp;
10914
10915 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10916 xmlGenericError(xmlGenericErrorContext,
10917 "Internal error: xmlParseGetLasts\n");
10918 return;
10919 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010920 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010921 tmp = ctxt->input->end;
10922 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010923 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010924 if (tmp < ctxt->input->base) {
10925 *lastlt = NULL;
10926 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010927 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010928 *lastlt = tmp;
10929 tmp++;
10930 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10931 if (*tmp == '\'') {
10932 tmp++;
10933 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10934 if (tmp < ctxt->input->end) tmp++;
10935 } else if (*tmp == '"') {
10936 tmp++;
10937 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10938 if (tmp < ctxt->input->end) tmp++;
10939 } else
10940 tmp++;
10941 }
10942 if (tmp < ctxt->input->end)
10943 *lastgt = tmp;
10944 else {
10945 tmp = *lastlt;
10946 tmp--;
10947 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10948 if (tmp >= ctxt->input->base)
10949 *lastgt = tmp;
10950 else
10951 *lastgt = NULL;
10952 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010953 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010954 } else {
10955 *lastlt = NULL;
10956 *lastgt = NULL;
10957 }
10958}
10959/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010960 * xmlCheckCdataPush:
10961 * @cur: pointer to the bock of characters
10962 * @len: length of the block in bytes
10963 *
10964 * Check that the block of characters is okay as SCdata content [20]
10965 *
10966 * Returns the number of bytes to pass if okay, a negative index where an
10967 * UTF-8 error occured otherwise
10968 */
10969static int
10970xmlCheckCdataPush(const xmlChar *utf, int len) {
10971 int ix;
10972 unsigned char c;
10973 int codepoint;
10974
10975 if ((utf == NULL) || (len <= 0))
10976 return(0);
10977
10978 for (ix = 0; ix < len;) { /* string is 0-terminated */
10979 c = utf[ix];
10980 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10981 if (c >= 0x20)
10982 ix++;
10983 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10984 ix++;
10985 else
10986 return(-ix);
10987 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10988 if (ix + 2 > len) return(ix);
10989 if ((utf[ix+1] & 0xc0 ) != 0x80)
10990 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010991 codepoint = (utf[ix] & 0x1f) << 6;
10992 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010993 if (!xmlIsCharQ(codepoint))
10994 return(-ix);
10995 ix += 2;
10996 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10997 if (ix + 3 > len) return(ix);
10998 if (((utf[ix+1] & 0xc0) != 0x80) ||
10999 ((utf[ix+2] & 0xc0) != 0x80))
11000 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011001 codepoint = (utf[ix] & 0xf) << 12;
11002 codepoint |= (utf[ix+1] & 0x3f) << 6;
11003 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011004 if (!xmlIsCharQ(codepoint))
11005 return(-ix);
11006 ix += 3;
11007 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11008 if (ix + 4 > len) return(ix);
11009 if (((utf[ix+1] & 0xc0) != 0x80) ||
11010 ((utf[ix+2] & 0xc0) != 0x80) ||
11011 ((utf[ix+3] & 0xc0) != 0x80))
11012 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000011013 codepoint = (utf[ix] & 0x7) << 18;
11014 codepoint |= (utf[ix+1] & 0x3f) << 12;
11015 codepoint |= (utf[ix+2] & 0x3f) << 6;
11016 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011017 if (!xmlIsCharQ(codepoint))
11018 return(-ix);
11019 ix += 4;
11020 } else /* unknown encoding */
11021 return(-ix);
11022 }
11023 return(ix);
11024}
11025
11026/**
Owen Taylor3473f882001-02-23 17:55:21 +000011027 * xmlParseTryOrFinish:
11028 * @ctxt: an XML parser context
11029 * @terminate: last chunk indicator
11030 *
11031 * Try to progress on parsing
11032 *
11033 * Returns zero if no parsing was possible
11034 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000011035static int
Owen Taylor3473f882001-02-23 17:55:21 +000011036xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11037 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011038 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000011039 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000011040 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000011041
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011042 if (ctxt->input == NULL)
11043 return(0);
11044
Owen Taylor3473f882001-02-23 17:55:21 +000011045#ifdef DEBUG_PUSH
11046 switch (ctxt->instate) {
11047 case XML_PARSER_EOF:
11048 xmlGenericError(xmlGenericErrorContext,
11049 "PP: try EOF\n"); break;
11050 case XML_PARSER_START:
11051 xmlGenericError(xmlGenericErrorContext,
11052 "PP: try START\n"); break;
11053 case XML_PARSER_MISC:
11054 xmlGenericError(xmlGenericErrorContext,
11055 "PP: try MISC\n");break;
11056 case XML_PARSER_COMMENT:
11057 xmlGenericError(xmlGenericErrorContext,
11058 "PP: try COMMENT\n");break;
11059 case XML_PARSER_PROLOG:
11060 xmlGenericError(xmlGenericErrorContext,
11061 "PP: try PROLOG\n");break;
11062 case XML_PARSER_START_TAG:
11063 xmlGenericError(xmlGenericErrorContext,
11064 "PP: try START_TAG\n");break;
11065 case XML_PARSER_CONTENT:
11066 xmlGenericError(xmlGenericErrorContext,
11067 "PP: try CONTENT\n");break;
11068 case XML_PARSER_CDATA_SECTION:
11069 xmlGenericError(xmlGenericErrorContext,
11070 "PP: try CDATA_SECTION\n");break;
11071 case XML_PARSER_END_TAG:
11072 xmlGenericError(xmlGenericErrorContext,
11073 "PP: try END_TAG\n");break;
11074 case XML_PARSER_ENTITY_DECL:
11075 xmlGenericError(xmlGenericErrorContext,
11076 "PP: try ENTITY_DECL\n");break;
11077 case XML_PARSER_ENTITY_VALUE:
11078 xmlGenericError(xmlGenericErrorContext,
11079 "PP: try ENTITY_VALUE\n");break;
11080 case XML_PARSER_ATTRIBUTE_VALUE:
11081 xmlGenericError(xmlGenericErrorContext,
11082 "PP: try ATTRIBUTE_VALUE\n");break;
11083 case XML_PARSER_DTD:
11084 xmlGenericError(xmlGenericErrorContext,
11085 "PP: try DTD\n");break;
11086 case XML_PARSER_EPILOG:
11087 xmlGenericError(xmlGenericErrorContext,
11088 "PP: try EPILOG\n");break;
11089 case XML_PARSER_PI:
11090 xmlGenericError(xmlGenericErrorContext,
11091 "PP: try PI\n");break;
11092 case XML_PARSER_IGNORE:
11093 xmlGenericError(xmlGenericErrorContext,
11094 "PP: try IGNORE\n");break;
11095 }
11096#endif
11097
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011098 if ((ctxt->input != NULL) &&
11099 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011100 xmlSHRINK(ctxt);
11101 ctxt->checkIndex = 0;
11102 }
11103 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000011104
Daniel Veillarda880b122003-04-21 21:36:41 +000011105 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000011106 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011107 return(0);
11108
11109
Owen Taylor3473f882001-02-23 17:55:21 +000011110 /*
11111 * Pop-up of finished entities.
11112 */
11113 while ((RAW == 0) && (ctxt->inputNr > 1))
11114 xmlPopInput(ctxt);
11115
Daniel Veillard198c1bf2003-10-20 17:07:41 +000011116 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000011117 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011118 avail = ctxt->input->length -
11119 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011120 else {
11121 /*
11122 * If we are operating on converted input, try to flush
11123 * remainng chars to avoid them stalling in the non-converted
11124 * buffer.
11125 */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011126 if (xmlBufIsEmpty(ctxt->input->buf->buffer) == 0) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011127 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
11128 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011129 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000011130
11131 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011132 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
11133 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000011134 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011135 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000011136 (ctxt->input->cur - ctxt->input->base);
11137 }
Owen Taylor3473f882001-02-23 17:55:21 +000011138 if (avail < 1)
11139 goto done;
11140 switch (ctxt->instate) {
11141 case XML_PARSER_EOF:
11142 /*
11143 * Document parsing is done !
11144 */
11145 goto done;
11146 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011147 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
11148 xmlChar start[4];
11149 xmlCharEncoding enc;
11150
11151 /*
11152 * Very first chars read from the document flow.
11153 */
11154 if (avail < 4)
11155 goto done;
11156
11157 /*
11158 * Get the 4 first bytes and decode the charset
11159 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000011160 * plug some encoding conversion routines,
11161 * else xmlSwitchEncoding will set to (default)
11162 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011163 */
11164 start[0] = RAW;
11165 start[1] = NXT(1);
11166 start[2] = NXT(2);
11167 start[3] = NXT(3);
11168 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000011169 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011170 break;
11171 }
Owen Taylor3473f882001-02-23 17:55:21 +000011172
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000011173 if (avail < 2)
11174 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000011175 cur = ctxt->input->cur[0];
11176 next = ctxt->input->cur[1];
11177 if (cur == 0) {
11178 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11179 ctxt->sax->setDocumentLocator(ctxt->userData,
11180 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011181 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011182 ctxt->instate = XML_PARSER_EOF;
11183#ifdef DEBUG_PUSH
11184 xmlGenericError(xmlGenericErrorContext,
11185 "PP: entering EOF\n");
11186#endif
11187 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11188 ctxt->sax->endDocument(ctxt->userData);
11189 goto done;
11190 }
11191 if ((cur == '<') && (next == '?')) {
11192 /* PI or XML decl */
11193 if (avail < 5) return(ret);
11194 if ((!terminate) &&
11195 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11196 return(ret);
11197 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11198 ctxt->sax->setDocumentLocator(ctxt->userData,
11199 &xmlDefaultSAXLocator);
11200 if ((ctxt->input->cur[2] == 'x') &&
11201 (ctxt->input->cur[3] == 'm') &&
11202 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000011203 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000011204 ret += 5;
11205#ifdef DEBUG_PUSH
11206 xmlGenericError(xmlGenericErrorContext,
11207 "PP: Parsing XML Decl\n");
11208#endif
11209 xmlParseXMLDecl(ctxt);
11210 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11211 /*
11212 * The XML REC instructs us to stop parsing right
11213 * here
11214 */
11215 ctxt->instate = XML_PARSER_EOF;
11216 return(0);
11217 }
11218 ctxt->standalone = ctxt->input->standalone;
11219 if ((ctxt->encoding == NULL) &&
11220 (ctxt->input->encoding != NULL))
11221 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
11222 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11223 (!ctxt->disableSAX))
11224 ctxt->sax->startDocument(ctxt->userData);
11225 ctxt->instate = XML_PARSER_MISC;
11226#ifdef DEBUG_PUSH
11227 xmlGenericError(xmlGenericErrorContext,
11228 "PP: entering MISC\n");
11229#endif
11230 } else {
11231 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11232 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11233 (!ctxt->disableSAX))
11234 ctxt->sax->startDocument(ctxt->userData);
11235 ctxt->instate = XML_PARSER_MISC;
11236#ifdef DEBUG_PUSH
11237 xmlGenericError(xmlGenericErrorContext,
11238 "PP: entering MISC\n");
11239#endif
11240 }
11241 } else {
11242 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11243 ctxt->sax->setDocumentLocator(ctxt->userData,
11244 &xmlDefaultSAXLocator);
11245 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000011246 if (ctxt->version == NULL) {
11247 xmlErrMemory(ctxt, NULL);
11248 break;
11249 }
Owen Taylor3473f882001-02-23 17:55:21 +000011250 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11251 (!ctxt->disableSAX))
11252 ctxt->sax->startDocument(ctxt->userData);
11253 ctxt->instate = XML_PARSER_MISC;
11254#ifdef DEBUG_PUSH
11255 xmlGenericError(xmlGenericErrorContext,
11256 "PP: entering MISC\n");
11257#endif
11258 }
11259 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011260 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000011261 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020011262 const xmlChar *prefix = NULL;
11263 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011264 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000011265
11266 if ((avail < 2) && (ctxt->inputNr == 1))
11267 goto done;
11268 cur = ctxt->input->cur[0];
11269 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011270 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000011271 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000011272 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11273 ctxt->sax->endDocument(ctxt->userData);
11274 goto done;
11275 }
11276 if (!terminate) {
11277 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000011278 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000011279 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011280 goto done;
11281 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11282 goto done;
11283 }
11284 }
11285 if (ctxt->spaceNr == 0)
11286 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011287 else if (*ctxt->space == -2)
11288 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011289 else
11290 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011291#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011292 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011293#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011294 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011295#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011296 else
11297 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011298#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011299 if (ctxt->instate == XML_PARSER_EOF)
11300 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011301 if (name == NULL) {
11302 spacePop(ctxt);
11303 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000011304 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11305 ctxt->sax->endDocument(ctxt->userData);
11306 goto done;
11307 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011308#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011309 /*
11310 * [ VC: Root Element Type ]
11311 * The Name in the document type declaration must match
11312 * the element type of the root element.
11313 */
11314 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11315 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11316 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011317#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011318
11319 /*
11320 * Check for an Empty Element.
11321 */
11322 if ((RAW == '/') && (NXT(1) == '>')) {
11323 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011324
11325 if (ctxt->sax2) {
11326 if ((ctxt->sax != NULL) &&
11327 (ctxt->sax->endElementNs != NULL) &&
11328 (!ctxt->disableSAX))
11329 ctxt->sax->endElementNs(ctxt->userData, name,
11330 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011331 if (ctxt->nsNr - nsNr > 0)
11332 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011333#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011334 } else {
11335 if ((ctxt->sax != NULL) &&
11336 (ctxt->sax->endElement != NULL) &&
11337 (!ctxt->disableSAX))
11338 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011339#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011340 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011341 spacePop(ctxt);
11342 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011343 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011344 } else {
11345 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011346 }
Daniel Veillard65686452012-07-19 18:25:01 +080011347 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011348 break;
11349 }
11350 if (RAW == '>') {
11351 NEXT;
11352 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011353 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011354 "Couldn't find end of Start Tag %s\n",
11355 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011356 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011357 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011358 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011359 if (ctxt->sax2)
11360 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011361#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011362 else
11363 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011364#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011365
Daniel Veillarda880b122003-04-21 21:36:41 +000011366 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011367 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011368 break;
11369 }
11370 case XML_PARSER_CONTENT: {
11371 const xmlChar *test;
11372 unsigned int cons;
11373 if ((avail < 2) && (ctxt->inputNr == 1))
11374 goto done;
11375 cur = ctxt->input->cur[0];
11376 next = ctxt->input->cur[1];
11377
11378 test = CUR_PTR;
11379 cons = ctxt->input->consumed;
11380 if ((cur == '<') && (next == '/')) {
11381 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011382 break;
11383 } else if ((cur == '<') && (next == '?')) {
11384 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011385 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11386 ctxt->progressive = XML_PARSER_PI;
Daniel Veillarda880b122003-04-21 21:36:41 +000011387 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011388 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011389 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011390 ctxt->instate = XML_PARSER_CONTENT;
11391 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011392 } else if ((cur == '<') && (next != '!')) {
11393 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011394 break;
11395 } else if ((cur == '<') && (next == '!') &&
11396 (ctxt->input->cur[2] == '-') &&
11397 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011398 int term;
11399
11400 if (avail < 4)
11401 goto done;
11402 ctxt->input->cur += 4;
11403 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11404 ctxt->input->cur -= 4;
Daniel Veillard65686452012-07-19 18:25:01 +080011405 if ((!terminate) && (term < 0)) {
11406 ctxt->progressive = XML_PARSER_COMMENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011407 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011408 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011409 xmlParseComment(ctxt);
11410 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard65686452012-07-19 18:25:01 +080011411 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011412 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11413 (ctxt->input->cur[2] == '[') &&
11414 (ctxt->input->cur[3] == 'C') &&
11415 (ctxt->input->cur[4] == 'D') &&
11416 (ctxt->input->cur[5] == 'A') &&
11417 (ctxt->input->cur[6] == 'T') &&
11418 (ctxt->input->cur[7] == 'A') &&
11419 (ctxt->input->cur[8] == '[')) {
11420 SKIP(9);
11421 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011422 break;
11423 } else if ((cur == '<') && (next == '!') &&
11424 (avail < 9)) {
11425 goto done;
11426 } else if (cur == '&') {
11427 if ((!terminate) &&
11428 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11429 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011430 xmlParseReference(ctxt);
11431 } else {
11432 /* TODO Avoid the extra copy, handle directly !!! */
11433 /*
11434 * Goal of the following test is:
11435 * - minimize calls to the SAX 'character' callback
11436 * when they are mergeable
11437 * - handle an problem for isBlank when we only parse
11438 * a sequence of blank chars and the next one is
11439 * not available to check against '<' presence.
11440 * - tries to homogenize the differences in SAX
11441 * callbacks between the push and pull versions
11442 * of the parser.
11443 */
11444 if ((ctxt->inputNr == 1) &&
11445 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11446 if (!terminate) {
11447 if (ctxt->progressive) {
11448 if ((lastlt == NULL) ||
11449 (ctxt->input->cur > lastlt))
11450 goto done;
11451 } else if (xmlParseLookupSequence(ctxt,
11452 '<', 0, 0) < 0) {
11453 goto done;
11454 }
11455 }
11456 }
11457 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011458 xmlParseCharData(ctxt, 0);
11459 }
11460 /*
11461 * Pop-up of finished entities.
11462 */
11463 while ((RAW == 0) && (ctxt->inputNr > 1))
11464 xmlPopInput(ctxt);
11465 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011466 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11467 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000011468 ctxt->instate = XML_PARSER_EOF;
11469 break;
11470 }
11471 break;
11472 }
11473 case XML_PARSER_END_TAG:
11474 if (avail < 2)
11475 goto done;
11476 if (!terminate) {
11477 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011478 /* > can be found unescaped in attribute values */
11479 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011480 goto done;
11481 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11482 goto done;
11483 }
11484 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011485 if (ctxt->sax2) {
11486 xmlParseEndTag2(ctxt,
11487 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11488 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011489 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011490 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011491 }
11492#ifdef LIBXML_SAX1_ENABLED
11493 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011494 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011495#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011496 if (ctxt->instate == XML_PARSER_EOF) {
11497 /* Nothing */
11498 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011499 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011500 } else {
11501 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011502 }
11503 break;
11504 case XML_PARSER_CDATA_SECTION: {
11505 /*
11506 * The Push mode need to have the SAX callback for
11507 * cdataBlock merge back contiguous callbacks.
11508 */
11509 int base;
11510
11511 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11512 if (base < 0) {
11513 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011514 int tmp;
11515
11516 tmp = xmlCheckCdataPush(ctxt->input->cur,
11517 XML_PARSER_BIG_BUFFER_SIZE);
11518 if (tmp < 0) {
11519 tmp = -tmp;
11520 ctxt->input->cur += tmp;
11521 goto encoding_error;
11522 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011523 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11524 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011525 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011526 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011527 else if (ctxt->sax->characters != NULL)
11528 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011529 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011530 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011531 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011532 ctxt->checkIndex = 0;
11533 }
11534 goto done;
11535 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011536 int tmp;
11537
11538 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11539 if ((tmp < 0) || (tmp != base)) {
11540 tmp = -tmp;
11541 ctxt->input->cur += tmp;
11542 goto encoding_error;
11543 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011544 if ((ctxt->sax != NULL) && (base == 0) &&
11545 (ctxt->sax->cdataBlock != NULL) &&
11546 (!ctxt->disableSAX)) {
11547 /*
11548 * Special case to provide identical behaviour
11549 * between pull and push parsers on enpty CDATA
11550 * sections
11551 */
11552 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11553 (!strncmp((const char *)&ctxt->input->cur[-9],
11554 "<![CDATA[", 9)))
11555 ctxt->sax->cdataBlock(ctxt->userData,
11556 BAD_CAST "", 0);
11557 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011558 (!ctxt->disableSAX)) {
11559 if (ctxt->sax->cdataBlock != NULL)
11560 ctxt->sax->cdataBlock(ctxt->userData,
11561 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011562 else if (ctxt->sax->characters != NULL)
11563 ctxt->sax->characters(ctxt->userData,
11564 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011565 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000011566 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011567 ctxt->checkIndex = 0;
11568 ctxt->instate = XML_PARSER_CONTENT;
11569#ifdef DEBUG_PUSH
11570 xmlGenericError(xmlGenericErrorContext,
11571 "PP: entering CONTENT\n");
11572#endif
11573 }
11574 break;
11575 }
Owen Taylor3473f882001-02-23 17:55:21 +000011576 case XML_PARSER_MISC:
11577 SKIP_BLANKS;
11578 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011579 avail = ctxt->input->length -
11580 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011581 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011582 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011583 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011584 if (avail < 2)
11585 goto done;
11586 cur = ctxt->input->cur[0];
11587 next = ctxt->input->cur[1];
11588 if ((cur == '<') && (next == '?')) {
11589 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011590 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11591 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011592 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011593 }
Owen Taylor3473f882001-02-23 17:55:21 +000011594#ifdef DEBUG_PUSH
11595 xmlGenericError(xmlGenericErrorContext,
11596 "PP: Parsing PI\n");
11597#endif
11598 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011599 ctxt->instate = XML_PARSER_MISC;
11600 ctxt->progressive = 1;
Daniel Veillard40e4b212007-06-12 14:46:40 +000011601 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011602 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011603 (ctxt->input->cur[2] == '-') &&
11604 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011605 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011606 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11607 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011608 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011609 }
Owen Taylor3473f882001-02-23 17:55:21 +000011610#ifdef DEBUG_PUSH
11611 xmlGenericError(xmlGenericErrorContext,
11612 "PP: Parsing Comment\n");
11613#endif
11614 xmlParseComment(ctxt);
11615 ctxt->instate = XML_PARSER_MISC;
Daniel Veillard65686452012-07-19 18:25:01 +080011616 ctxt->progressive = 1;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011617 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011618 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011619 (ctxt->input->cur[2] == 'D') &&
11620 (ctxt->input->cur[3] == 'O') &&
11621 (ctxt->input->cur[4] == 'C') &&
11622 (ctxt->input->cur[5] == 'T') &&
11623 (ctxt->input->cur[6] == 'Y') &&
11624 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011625 (ctxt->input->cur[8] == 'E')) {
11626 if ((!terminate) &&
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011627 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0)) {
11628 ctxt->progressive = XML_PARSER_DTD;
Owen Taylor3473f882001-02-23 17:55:21 +000011629 goto done;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011630 }
Owen Taylor3473f882001-02-23 17:55:21 +000011631#ifdef DEBUG_PUSH
11632 xmlGenericError(xmlGenericErrorContext,
11633 "PP: Parsing internal subset\n");
11634#endif
11635 ctxt->inSubset = 1;
Daniel Veillard5353bbf2012-08-03 12:03:31 +080011636 ctxt->progressive = 1;
11637 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011638 xmlParseDocTypeDecl(ctxt);
11639 if (RAW == '[') {
11640 ctxt->instate = XML_PARSER_DTD;
11641#ifdef DEBUG_PUSH
11642 xmlGenericError(xmlGenericErrorContext,
11643 "PP: entering DTD\n");
11644#endif
11645 } else {
11646 /*
11647 * Create and update the external subset.
11648 */
11649 ctxt->inSubset = 2;
11650 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11651 (ctxt->sax->externalSubset != NULL))
11652 ctxt->sax->externalSubset(ctxt->userData,
11653 ctxt->intSubName, ctxt->extSubSystem,
11654 ctxt->extSubURI);
11655 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011656 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011657 ctxt->instate = XML_PARSER_PROLOG;
11658#ifdef DEBUG_PUSH
11659 xmlGenericError(xmlGenericErrorContext,
11660 "PP: entering PROLOG\n");
11661#endif
11662 }
11663 } else if ((cur == '<') && (next == '!') &&
11664 (avail < 9)) {
11665 goto done;
11666 } else {
11667 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard65686452012-07-19 18:25:01 +080011668 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011669 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011670#ifdef DEBUG_PUSH
11671 xmlGenericError(xmlGenericErrorContext,
11672 "PP: entering START_TAG\n");
11673#endif
11674 }
11675 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011676 case XML_PARSER_PROLOG:
11677 SKIP_BLANKS;
11678 if (ctxt->input->buf == NULL)
11679 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11680 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011681 avail = xmlBufUse(ctxt->input->buf->buffer) -
11682 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011683 if (avail < 2)
11684 goto done;
11685 cur = ctxt->input->cur[0];
11686 next = ctxt->input->cur[1];
11687 if ((cur == '<') && (next == '?')) {
11688 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011689 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11690 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011691 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011692 }
Owen Taylor3473f882001-02-23 17:55:21 +000011693#ifdef DEBUG_PUSH
11694 xmlGenericError(xmlGenericErrorContext,
11695 "PP: Parsing PI\n");
11696#endif
11697 xmlParsePI(ctxt);
Daniel Veillardf572a782012-07-19 20:36:25 +080011698 ctxt->instate = XML_PARSER_PROLOG;
11699 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011700 } else if ((cur == '<') && (next == '!') &&
11701 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11702 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011703 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11704 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011705 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011706 }
Owen Taylor3473f882001-02-23 17:55:21 +000011707#ifdef DEBUG_PUSH
11708 xmlGenericError(xmlGenericErrorContext,
11709 "PP: Parsing Comment\n");
11710#endif
11711 xmlParseComment(ctxt);
11712 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011713 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011714 } else if ((cur == '<') && (next == '!') &&
11715 (avail < 4)) {
11716 goto done;
11717 } else {
11718 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011719 if (ctxt->progressive == 0)
Daniel Veillard65686452012-07-19 18:25:01 +080011720 ctxt->progressive = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011721 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011722#ifdef DEBUG_PUSH
11723 xmlGenericError(xmlGenericErrorContext,
11724 "PP: entering START_TAG\n");
11725#endif
11726 }
11727 break;
11728 case XML_PARSER_EPILOG:
11729 SKIP_BLANKS;
11730 if (ctxt->input->buf == NULL)
11731 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11732 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011733 avail = xmlBufUse(ctxt->input->buf->buffer) -
11734 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011735 if (avail < 2)
11736 goto done;
11737 cur = ctxt->input->cur[0];
11738 next = ctxt->input->cur[1];
11739 if ((cur == '<') && (next == '?')) {
11740 if ((!terminate) &&
Daniel Veillardf572a782012-07-19 20:36:25 +080011741 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0)) {
11742 ctxt->progressive = XML_PARSER_PI;
Owen Taylor3473f882001-02-23 17:55:21 +000011743 goto done;
Daniel Veillardf572a782012-07-19 20:36:25 +080011744 }
Owen Taylor3473f882001-02-23 17:55:21 +000011745#ifdef DEBUG_PUSH
11746 xmlGenericError(xmlGenericErrorContext,
11747 "PP: Parsing PI\n");
11748#endif
11749 xmlParsePI(ctxt);
11750 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillardf572a782012-07-19 20:36:25 +080011751 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011752 } else if ((cur == '<') && (next == '!') &&
11753 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11754 if ((!terminate) &&
Daniel Veillard65686452012-07-19 18:25:01 +080011755 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0)) {
11756 ctxt->progressive = XML_PARSER_COMMENT;
Owen Taylor3473f882001-02-23 17:55:21 +000011757 goto done;
Daniel Veillard65686452012-07-19 18:25:01 +080011758 }
Owen Taylor3473f882001-02-23 17:55:21 +000011759#ifdef DEBUG_PUSH
11760 xmlGenericError(xmlGenericErrorContext,
11761 "PP: Parsing Comment\n");
11762#endif
11763 xmlParseComment(ctxt);
11764 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillard65686452012-07-19 18:25:01 +080011765 ctxt->progressive = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011766 } else if ((cur == '<') && (next == '!') &&
11767 (avail < 4)) {
11768 goto done;
11769 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011770 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011771 ctxt->instate = XML_PARSER_EOF;
11772#ifdef DEBUG_PUSH
11773 xmlGenericError(xmlGenericErrorContext,
11774 "PP: entering EOF\n");
11775#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011776 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011777 ctxt->sax->endDocument(ctxt->userData);
11778 goto done;
11779 }
11780 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011781 case XML_PARSER_DTD: {
11782 /*
11783 * Sorry but progressive parsing of the internal subset
11784 * is not expected to be supported. We first check that
11785 * the full content of the internal subset is available and
11786 * the parsing is launched only at that point.
11787 * Internal subset ends up with "']' S? '>'" in an unescaped
11788 * section and not in a ']]>' sequence which are conditional
11789 * sections (whoever argued to keep that crap in XML deserve
11790 * a place in hell !).
11791 */
11792 int base, i;
11793 xmlChar *buf;
11794 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011795 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000011796
11797 base = ctxt->input->cur - ctxt->input->base;
11798 if (base < 0) return(0);
11799 if (ctxt->checkIndex > base)
11800 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011801 buf = xmlBufContent(ctxt->input->buf->buffer);
11802 use = xmlBufUse(ctxt->input->buf->buffer);
11803 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000011804 if (quote != 0) {
11805 if (buf[base] == quote)
11806 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011807 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000011808 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011809 if ((quote == 0) && (buf[base] == '<')) {
11810 int found = 0;
11811 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011812 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000011813 (buf[base + 1] == '!') &&
11814 (buf[base + 2] == '-') &&
11815 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011816 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000011817 if ((buf[base] == '-') &&
11818 (buf[base + 1] == '-') &&
11819 (buf[base + 2] == '>')) {
11820 found = 1;
11821 base += 2;
11822 break;
11823 }
11824 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011825 if (!found) {
11826#if 0
11827 fprintf(stderr, "unfinished comment\n");
11828#endif
11829 break; /* for */
11830 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011831 continue;
11832 }
11833 }
Owen Taylor3473f882001-02-23 17:55:21 +000011834 if (buf[base] == '"') {
11835 quote = '"';
11836 continue;
11837 }
11838 if (buf[base] == '\'') {
11839 quote = '\'';
11840 continue;
11841 }
11842 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011843#if 0
11844 fprintf(stderr, "%c%c%c%c: ", buf[base],
11845 buf[base + 1], buf[base + 2], buf[base + 3]);
11846#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011847 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000011848 break;
11849 if (buf[base + 1] == ']') {
11850 /* conditional crap, skip both ']' ! */
11851 base++;
11852 continue;
11853 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011854 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011855 if (buf[base + i] == '>') {
11856#if 0
11857 fprintf(stderr, "found\n");
11858#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011859 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011860 }
11861 if (!IS_BLANK_CH(buf[base + i])) {
11862#if 0
11863 fprintf(stderr, "not found\n");
11864#endif
11865 goto not_end_of_int_subset;
11866 }
Owen Taylor3473f882001-02-23 17:55:21 +000011867 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011868#if 0
11869 fprintf(stderr, "end of stream\n");
11870#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011871 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011872
Owen Taylor3473f882001-02-23 17:55:21 +000011873 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011874not_end_of_int_subset:
11875 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011876 }
11877 /*
11878 * We didn't found the end of the Internal subset
11879 */
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011880 if (quote == 0)
11881 ctxt->checkIndex = base;
11882 else
11883 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011884#ifdef DEBUG_PUSH
11885 if (next == 0)
11886 xmlGenericError(xmlGenericErrorContext,
11887 "PP: lookup of int subset end filed\n");
11888#endif
11889 goto done;
11890
11891found_end_int_subset:
Daniel Veillard2b52aa02012-07-31 10:53:47 +080011892 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011893 xmlParseInternalSubset(ctxt);
11894 ctxt->inSubset = 2;
11895 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11896 (ctxt->sax->externalSubset != NULL))
11897 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11898 ctxt->extSubSystem, ctxt->extSubURI);
11899 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011900 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011901 ctxt->instate = XML_PARSER_PROLOG;
11902 ctxt->checkIndex = 0;
11903#ifdef DEBUG_PUSH
11904 xmlGenericError(xmlGenericErrorContext,
11905 "PP: entering PROLOG\n");
11906#endif
11907 break;
11908 }
11909 case XML_PARSER_COMMENT:
11910 xmlGenericError(xmlGenericErrorContext,
11911 "PP: internal error, state == COMMENT\n");
11912 ctxt->instate = XML_PARSER_CONTENT;
11913#ifdef DEBUG_PUSH
11914 xmlGenericError(xmlGenericErrorContext,
11915 "PP: entering CONTENT\n");
11916#endif
11917 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011918 case XML_PARSER_IGNORE:
11919 xmlGenericError(xmlGenericErrorContext,
11920 "PP: internal error, state == IGNORE");
11921 ctxt->instate = XML_PARSER_DTD;
11922#ifdef DEBUG_PUSH
11923 xmlGenericError(xmlGenericErrorContext,
11924 "PP: entering DTD\n");
11925#endif
11926 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011927 case XML_PARSER_PI:
11928 xmlGenericError(xmlGenericErrorContext,
11929 "PP: internal error, state == PI\n");
11930 ctxt->instate = XML_PARSER_CONTENT;
11931#ifdef DEBUG_PUSH
11932 xmlGenericError(xmlGenericErrorContext,
11933 "PP: entering CONTENT\n");
11934#endif
11935 break;
11936 case XML_PARSER_ENTITY_DECL:
11937 xmlGenericError(xmlGenericErrorContext,
11938 "PP: internal error, state == ENTITY_DECL\n");
11939 ctxt->instate = XML_PARSER_DTD;
11940#ifdef DEBUG_PUSH
11941 xmlGenericError(xmlGenericErrorContext,
11942 "PP: entering DTD\n");
11943#endif
11944 break;
11945 case XML_PARSER_ENTITY_VALUE:
11946 xmlGenericError(xmlGenericErrorContext,
11947 "PP: internal error, state == ENTITY_VALUE\n");
11948 ctxt->instate = XML_PARSER_CONTENT;
11949#ifdef DEBUG_PUSH
11950 xmlGenericError(xmlGenericErrorContext,
11951 "PP: entering DTD\n");
11952#endif
11953 break;
11954 case XML_PARSER_ATTRIBUTE_VALUE:
11955 xmlGenericError(xmlGenericErrorContext,
11956 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11957 ctxt->instate = XML_PARSER_START_TAG;
11958#ifdef DEBUG_PUSH
11959 xmlGenericError(xmlGenericErrorContext,
11960 "PP: entering START_TAG\n");
11961#endif
11962 break;
11963 case XML_PARSER_SYSTEM_LITERAL:
11964 xmlGenericError(xmlGenericErrorContext,
11965 "PP: internal error, state == SYSTEM_LITERAL\n");
11966 ctxt->instate = XML_PARSER_START_TAG;
11967#ifdef DEBUG_PUSH
11968 xmlGenericError(xmlGenericErrorContext,
11969 "PP: entering START_TAG\n");
11970#endif
11971 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011972 case XML_PARSER_PUBLIC_LITERAL:
11973 xmlGenericError(xmlGenericErrorContext,
11974 "PP: internal error, state == PUBLIC_LITERAL\n");
11975 ctxt->instate = XML_PARSER_START_TAG;
11976#ifdef DEBUG_PUSH
11977 xmlGenericError(xmlGenericErrorContext,
11978 "PP: entering START_TAG\n");
11979#endif
11980 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011981 }
11982 }
11983done:
11984#ifdef DEBUG_PUSH
11985 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11986#endif
11987 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011988encoding_error:
11989 {
11990 char buffer[150];
11991
11992 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11993 ctxt->input->cur[0], ctxt->input->cur[1],
11994 ctxt->input->cur[2], ctxt->input->cur[3]);
11995 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11996 "Input is not proper UTF-8, indicate encoding !\n%s",
11997 BAD_CAST buffer, NULL);
11998 }
11999 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000012000}
12001
12002/**
Daniel Veillard65686452012-07-19 18:25:01 +080012003 * xmlParseCheckTransition:
12004 * @ctxt: an XML parser context
12005 * @chunk: a char array
12006 * @size: the size in byte of the chunk
12007 *
12008 * Check depending on the current parser state if the chunk given must be
12009 * processed immediately or one need more data to advance on parsing.
12010 *
12011 * Returns -1 in case of error, 0 if the push is not needed and 1 if needed
12012 */
12013static int
12014xmlParseCheckTransition(xmlParserCtxtPtr ctxt, const char *chunk, int size) {
12015 if ((ctxt == NULL) || (chunk == NULL) || (size < 0))
12016 return(-1);
12017 if (ctxt->instate == XML_PARSER_START_TAG) {
12018 if (memchr(chunk, '>', size) != NULL)
12019 return(1);
12020 return(0);
12021 }
12022 if (ctxt->progressive == XML_PARSER_COMMENT) {
12023 if (memchr(chunk, '>', size) != NULL)
12024 return(1);
12025 return(0);
12026 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012027 if (ctxt->instate == XML_PARSER_CDATA_SECTION) {
12028 if (memchr(chunk, '>', size) != NULL)
12029 return(1);
12030 return(0);
12031 }
Daniel Veillardf572a782012-07-19 20:36:25 +080012032 if (ctxt->progressive == XML_PARSER_PI) {
12033 if (memchr(chunk, '>', size) != NULL)
12034 return(1);
12035 return(0);
12036 }
Daniel Veillard5353bbf2012-08-03 12:03:31 +080012037 if (ctxt->instate == XML_PARSER_END_TAG) {
12038 if (memchr(chunk, '>', size) != NULL)
12039 return(1);
12040 return(0);
12041 }
12042 if ((ctxt->progressive == XML_PARSER_DTD) ||
12043 (ctxt->instate == XML_PARSER_DTD)) {
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012044 if (memchr(chunk, ']', size) != NULL)
12045 return(1);
12046 return(0);
12047 }
Daniel Veillard65686452012-07-19 18:25:01 +080012048 return(1);
12049}
12050
12051/**
Owen Taylor3473f882001-02-23 17:55:21 +000012052 * xmlParseChunk:
12053 * @ctxt: an XML parser context
12054 * @chunk: an char array
12055 * @size: the size in byte of the chunk
12056 * @terminate: last chunk indicator
12057 *
12058 * Parse a Chunk of memory
12059 *
12060 * Returns zero if no error, the xmlParserErrors otherwise.
12061 */
12062int
12063xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12064 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000012065 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012066 int remain = 0;
Daniel Veillard65686452012-07-19 18:25:01 +080012067 size_t old_avail = 0;
12068 size_t avail = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000012069
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012070 if (ctxt == NULL)
12071 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000012072 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012073 return(ctxt->errNo);
Daniel Veillard48b4cdd2012-07-30 16:16:04 +080012074 if (ctxt->instate == XML_PARSER_EOF)
12075 return(-1);
Daniel Veillard309f81d2003-09-23 09:02:53 +000012076 if (ctxt->instate == XML_PARSER_START)
12077 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000012078 if ((size > 0) && (chunk != NULL) && (!terminate) &&
12079 (chunk[size - 1] == '\r')) {
12080 end_in_lf = 1;
12081 size--;
12082 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012083
12084xmldecl_done:
12085
Owen Taylor3473f882001-02-23 17:55:21 +000012086 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12087 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012088 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12089 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000012090 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012091
Daniel Veillard65686452012-07-19 18:25:01 +080012092 old_avail = xmlBufUse(ctxt->input->buf->buffer);
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012093 /*
12094 * Specific handling if we autodetected an encoding, we should not
12095 * push more than the first line ... which depend on the encoding
12096 * And only push the rest once the final encoding was detected
12097 */
12098 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
12099 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010012100 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012101
12102 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12103 BAD_CAST "UTF-16")) ||
12104 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12105 BAD_CAST "UTF16")))
12106 len = 90;
12107 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12108 BAD_CAST "UCS-4")) ||
12109 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
12110 BAD_CAST "UCS4")))
12111 len = 180;
12112
12113 if (ctxt->input->buf->rawconsumed < len)
12114 len -= ctxt->input->buf->rawconsumed;
12115
Raul Hudeaba9716a2010-03-15 10:13:29 +010012116 /*
12117 * Change size for reading the initial declaration only
12118 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
12119 * will blindly copy extra bytes from memory.
12120 */
Daniel Veillard60587d62010-11-04 15:16:27 +010012121 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010012122 remain = size - len;
12123 size = len;
12124 } else {
12125 remain = 0;
12126 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012127 }
William M. Bracka3215c72004-07-31 16:24:01 +000012128 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12129 if (res < 0) {
12130 ctxt->errNo = XML_PARSER_EOF;
12131 ctxt->disableSAX = 1;
12132 return (XML_PARSER_EOF);
12133 }
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012134 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012135#ifdef DEBUG_PUSH
12136 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12137#endif
12138
Owen Taylor3473f882001-02-23 17:55:21 +000012139 } else if (ctxt->instate != XML_PARSER_EOF) {
12140 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
12141 xmlParserInputBufferPtr in = ctxt->input->buf;
12142 if ((in->encoder != NULL) && (in->buffer != NULL) &&
12143 (in->raw != NULL)) {
12144 int nbchars;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012145
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012146 nbchars = xmlCharEncInput(in);
Owen Taylor3473f882001-02-23 17:55:21 +000012147 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012148 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000012149 xmlGenericError(xmlGenericErrorContext,
12150 "xmlParseChunk: encoder error\n");
12151 return(XML_ERR_INVALID_ENCODING);
12152 }
12153 }
12154 }
12155 }
Daniel Veillard65686452012-07-19 18:25:01 +080012156 if (remain != 0) {
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012157 xmlParseTryOrFinish(ctxt, 0);
Daniel Veillard65686452012-07-19 18:25:01 +080012158 } else {
12159 if ((ctxt->input != NULL) && (ctxt->input->buf != NULL))
12160 avail = xmlBufUse(ctxt->input->buf->buffer);
12161 /*
12162 * Depending on the current state it may not be such
12163 * a good idea to try parsing if there is nothing in the chunk
12164 * which would be worth doing a parser state transition and we
12165 * need to wait for more data
12166 */
12167 if ((terminate) || (avail > XML_MAX_TEXT_LENGTH) ||
12168 (old_avail == 0) || (avail == 0) ||
12169 (xmlParseCheckTransition(ctxt,
12170 (const char *)&ctxt->input->base[old_avail],
12171 avail - old_avail)))
12172 xmlParseTryOrFinish(ctxt, terminate);
12173 }
Daniel Veillard2b52aa02012-07-31 10:53:47 +080012174 if ((ctxt->input != NULL) &&
12175 (((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12176 ((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12177 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
12178 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12179 ctxt->instate = XML_PARSER_EOF;
12180 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020012181 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12182 return(ctxt->errNo);
12183
12184 if (remain != 0) {
12185 chunk += size;
12186 size = remain;
12187 remain = 0;
12188 goto xmldecl_done;
12189 }
Daniel Veillarda617e242006-01-09 14:38:44 +000012190 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12191 (ctxt->input->buf != NULL)) {
12192 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12193 }
Owen Taylor3473f882001-02-23 17:55:21 +000012194 if (terminate) {
12195 /*
12196 * Check for termination
12197 */
Daniel Veillard65686452012-07-19 18:25:01 +080012198 int cur_avail = 0;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012199
12200 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012201 if (ctxt->input->buf == NULL)
Daniel Veillard65686452012-07-19 18:25:01 +080012202 cur_avail = ctxt->input->length -
12203 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012204 else
Daniel Veillard65686452012-07-19 18:25:01 +080012205 cur_avail = xmlBufUse(ctxt->input->buf->buffer) -
12206 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard36e5cd52004-11-02 14:52:23 +000012207 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012208
Owen Taylor3473f882001-02-23 17:55:21 +000012209 if ((ctxt->instate != XML_PARSER_EOF) &&
12210 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012211 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012212 }
Daniel Veillard65686452012-07-19 18:25:01 +080012213 if ((ctxt->instate == XML_PARSER_EPILOG) && (cur_avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012214 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000012215 }
Owen Taylor3473f882001-02-23 17:55:21 +000012216 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000012217 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000012218 ctxt->sax->endDocument(ctxt->userData);
12219 }
12220 ctxt->instate = XML_PARSER_EOF;
12221 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012222 return((xmlParserErrors) ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000012223}
12224
12225/************************************************************************
12226 * *
12227 * I/O front end functions to the parser *
12228 * *
12229 ************************************************************************/
12230
12231/**
Owen Taylor3473f882001-02-23 17:55:21 +000012232 * xmlCreatePushParserCtxt:
12233 * @sax: a SAX handler
12234 * @user_data: The user data returned on SAX callbacks
12235 * @chunk: a pointer to an array of chars
12236 * @size: number of chars in the array
12237 * @filename: an optional file name or URI
12238 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000012239 * Create a parser context for using the XML parser in push mode.
12240 * If @buffer and @size are non-NULL, the data is used to detect
12241 * the encoding. The remaining characters will be parsed so they
12242 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000012243 * To allow content encoding detection, @size should be >= 4
12244 * The value of @filename is used for fetching external entities
12245 * and error/warning reports.
12246 *
12247 * Returns the new parser context or NULL
12248 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000012249
Owen Taylor3473f882001-02-23 17:55:21 +000012250xmlParserCtxtPtr
12251xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12252 const char *chunk, int size, const char *filename) {
12253 xmlParserCtxtPtr ctxt;
12254 xmlParserInputPtr inputStream;
12255 xmlParserInputBufferPtr buf;
12256 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
12257
12258 /*
12259 * plug some encoding conversion routines
12260 */
12261 if ((chunk != NULL) && (size >= 4))
12262 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
12263
12264 buf = xmlAllocParserInputBuffer(enc);
12265 if (buf == NULL) return(NULL);
12266
12267 ctxt = xmlNewParserCtxt();
12268 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000012269 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012270 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012271 return(NULL);
12272 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000012273 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012274 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
12275 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012276 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000012277 xmlFreeParserInputBuffer(buf);
12278 xmlFreeParserCtxt(ctxt);
12279 return(NULL);
12280 }
Owen Taylor3473f882001-02-23 17:55:21 +000012281 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012282#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012283 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012284#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012285 xmlFree(ctxt->sax);
12286 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12287 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012288 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012289 xmlFreeParserInputBuffer(buf);
12290 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012291 return(NULL);
12292 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012293 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12294 if (sax->initialized == XML_SAX2_MAGIC)
12295 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12296 else
12297 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012298 if (user_data != NULL)
12299 ctxt->userData = user_data;
12300 }
12301 if (filename == NULL) {
12302 ctxt->directory = NULL;
12303 } else {
12304 ctxt->directory = xmlParserGetDirectory(filename);
12305 }
12306
12307 inputStream = xmlNewInputStream(ctxt);
12308 if (inputStream == NULL) {
12309 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000012310 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012311 return(NULL);
12312 }
12313
12314 if (filename == NULL)
12315 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000012316 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000012317 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000012318 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000012319 if (inputStream->filename == NULL) {
12320 xmlFreeParserCtxt(ctxt);
12321 xmlFreeParserInputBuffer(buf);
12322 return(NULL);
12323 }
12324 }
Owen Taylor3473f882001-02-23 17:55:21 +000012325 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080012326 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000012327 inputPush(ctxt, inputStream);
12328
William M. Brack3a1cd212005-02-11 14:35:54 +000012329 /*
12330 * If the caller didn't provide an initial 'chunk' for determining
12331 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
12332 * that it can be automatically determined later
12333 */
12334 if ((size == 0) || (chunk == NULL)) {
12335 ctxt->charset = XML_CHAR_ENCODING_NONE;
12336 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012337 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
12338 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012339
Daniel Veillard768eb3b2012-07-16 14:19:49 +080012340 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000012341
Daniel Veillard00ac0d32012-07-16 18:03:01 +080012342 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000012343#ifdef DEBUG_PUSH
12344 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
12345#endif
12346 }
12347
Daniel Veillard0e4cd172001-06-28 12:13:56 +000012348 if (enc != XML_CHAR_ENCODING_NONE) {
12349 xmlSwitchEncoding(ctxt, enc);
12350 }
12351
Owen Taylor3473f882001-02-23 17:55:21 +000012352 return(ctxt);
12353}
Daniel Veillard73b013f2003-09-30 12:36:01 +000012354#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012355
12356/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000012357 * xmlStopParser:
12358 * @ctxt: an XML parser context
12359 *
12360 * Blocks further parser processing
12361 */
12362void
12363xmlStopParser(xmlParserCtxtPtr ctxt) {
12364 if (ctxt == NULL)
12365 return;
12366 ctxt->instate = XML_PARSER_EOF;
12367 ctxt->disableSAX = 1;
12368 if (ctxt->input != NULL) {
12369 ctxt->input->cur = BAD_CAST"";
12370 ctxt->input->base = ctxt->input->cur;
12371 }
12372}
12373
12374/**
Owen Taylor3473f882001-02-23 17:55:21 +000012375 * xmlCreateIOParserCtxt:
12376 * @sax: a SAX handler
12377 * @user_data: The user data returned on SAX callbacks
12378 * @ioread: an I/O read function
12379 * @ioclose: an I/O close function
12380 * @ioctx: an I/O handler
12381 * @enc: the charset encoding if known
12382 *
12383 * Create a parser context for using the XML parser with an existing
12384 * I/O stream
12385 *
12386 * Returns the new parser context or NULL
12387 */
12388xmlParserCtxtPtr
12389xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12390 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12391 void *ioctx, xmlCharEncoding enc) {
12392 xmlParserCtxtPtr ctxt;
12393 xmlParserInputPtr inputStream;
12394 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012395
Daniel Veillard42595322004-11-08 10:52:06 +000012396 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012397
12398 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012399 if (buf == NULL) {
12400 if (ioclose != NULL)
12401 ioclose(ioctx);
12402 return (NULL);
12403 }
Owen Taylor3473f882001-02-23 17:55:21 +000012404
12405 ctxt = xmlNewParserCtxt();
12406 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012407 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012408 return(NULL);
12409 }
12410 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012411#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012412 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012413#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012414 xmlFree(ctxt->sax);
12415 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12416 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012417 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012418 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012419 return(NULL);
12420 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012421 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12422 if (sax->initialized == XML_SAX2_MAGIC)
12423 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12424 else
12425 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012426 if (user_data != NULL)
12427 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012428 }
Owen Taylor3473f882001-02-23 17:55:21 +000012429
12430 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12431 if (inputStream == NULL) {
12432 xmlFreeParserCtxt(ctxt);
12433 return(NULL);
12434 }
12435 inputPush(ctxt, inputStream);
12436
12437 return(ctxt);
12438}
12439
Daniel Veillard4432df22003-09-28 18:58:27 +000012440#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012441/************************************************************************
12442 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012443 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012444 * *
12445 ************************************************************************/
12446
12447/**
12448 * xmlIOParseDTD:
12449 * @sax: the SAX handler block or NULL
12450 * @input: an Input Buffer
12451 * @enc: the charset encoding if known
12452 *
12453 * Load and parse a DTD
12454 *
12455 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012456 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012457 */
12458
12459xmlDtdPtr
12460xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12461 xmlCharEncoding enc) {
12462 xmlDtdPtr ret = NULL;
12463 xmlParserCtxtPtr ctxt;
12464 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012465 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012466
12467 if (input == NULL)
12468 return(NULL);
12469
12470 ctxt = xmlNewParserCtxt();
12471 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012472 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012473 return(NULL);
12474 }
12475
12476 /*
12477 * Set-up the SAX context
12478 */
12479 if (sax != NULL) {
12480 if (ctxt->sax != NULL)
12481 xmlFree(ctxt->sax);
12482 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012483 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012484 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012485 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012486
12487 /*
12488 * generate a parser input from the I/O handler
12489 */
12490
Daniel Veillard43caefb2003-12-07 19:32:22 +000012491 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012492 if (pinput == NULL) {
12493 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012494 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012495 xmlFreeParserCtxt(ctxt);
12496 return(NULL);
12497 }
12498
12499 /*
12500 * plug some encoding conversion routines here.
12501 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012502 if (xmlPushInput(ctxt, pinput) < 0) {
12503 if (sax != NULL) ctxt->sax = NULL;
12504 xmlFreeParserCtxt(ctxt);
12505 return(NULL);
12506 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012507 if (enc != XML_CHAR_ENCODING_NONE) {
12508 xmlSwitchEncoding(ctxt, enc);
12509 }
Owen Taylor3473f882001-02-23 17:55:21 +000012510
12511 pinput->filename = NULL;
12512 pinput->line = 1;
12513 pinput->col = 1;
12514 pinput->base = ctxt->input->cur;
12515 pinput->cur = ctxt->input->cur;
12516 pinput->free = NULL;
12517
12518 /*
12519 * let's parse that entity knowing it's an external subset.
12520 */
12521 ctxt->inSubset = 2;
12522 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012523 if (ctxt->myDoc == NULL) {
12524 xmlErrMemory(ctxt, "New Doc failed");
12525 return(NULL);
12526 }
12527 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012528 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12529 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012530
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012531 if ((enc == XML_CHAR_ENCODING_NONE) &&
12532 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000012533 /*
12534 * Get the 4 first bytes and decode the charset
12535 * if enc != XML_CHAR_ENCODING_NONE
12536 * plug some encoding conversion routines.
12537 */
12538 start[0] = RAW;
12539 start[1] = NXT(1);
12540 start[2] = NXT(2);
12541 start[3] = NXT(3);
12542 enc = xmlDetectCharEncoding(start, 4);
12543 if (enc != XML_CHAR_ENCODING_NONE) {
12544 xmlSwitchEncoding(ctxt, enc);
12545 }
12546 }
12547
Owen Taylor3473f882001-02-23 17:55:21 +000012548 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12549
12550 if (ctxt->myDoc != NULL) {
12551 if (ctxt->wellFormed) {
12552 ret = ctxt->myDoc->extSubset;
12553 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012554 if (ret != NULL) {
12555 xmlNodePtr tmp;
12556
12557 ret->doc = NULL;
12558 tmp = ret->children;
12559 while (tmp != NULL) {
12560 tmp->doc = NULL;
12561 tmp = tmp->next;
12562 }
12563 }
Owen Taylor3473f882001-02-23 17:55:21 +000012564 } else {
12565 ret = NULL;
12566 }
12567 xmlFreeDoc(ctxt->myDoc);
12568 ctxt->myDoc = NULL;
12569 }
12570 if (sax != NULL) ctxt->sax = NULL;
12571 xmlFreeParserCtxt(ctxt);
12572
12573 return(ret);
12574}
12575
12576/**
12577 * xmlSAXParseDTD:
12578 * @sax: the SAX handler block
12579 * @ExternalID: a NAME* containing the External ID of the DTD
12580 * @SystemID: a NAME* containing the URL to the DTD
12581 *
12582 * Load and parse an external subset.
12583 *
12584 * Returns the resulting xmlDtdPtr or NULL in case of error.
12585 */
12586
12587xmlDtdPtr
12588xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12589 const xmlChar *SystemID) {
12590 xmlDtdPtr ret = NULL;
12591 xmlParserCtxtPtr ctxt;
12592 xmlParserInputPtr input = NULL;
12593 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012594 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012595
12596 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12597
12598 ctxt = xmlNewParserCtxt();
12599 if (ctxt == NULL) {
12600 return(NULL);
12601 }
12602
12603 /*
12604 * Set-up the SAX context
12605 */
12606 if (sax != NULL) {
12607 if (ctxt->sax != NULL)
12608 xmlFree(ctxt->sax);
12609 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012610 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012611 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012612
12613 /*
12614 * Canonicalise the system ID
12615 */
12616 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012617 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012618 xmlFreeParserCtxt(ctxt);
12619 return(NULL);
12620 }
Owen Taylor3473f882001-02-23 17:55:21 +000012621
12622 /*
12623 * Ask the Entity resolver to load the damn thing
12624 */
12625
12626 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012627 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12628 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012629 if (input == NULL) {
12630 if (sax != NULL) ctxt->sax = NULL;
12631 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012632 if (systemIdCanonic != NULL)
12633 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012634 return(NULL);
12635 }
12636
12637 /*
12638 * plug some encoding conversion routines here.
12639 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012640 if (xmlPushInput(ctxt, input) < 0) {
12641 if (sax != NULL) ctxt->sax = NULL;
12642 xmlFreeParserCtxt(ctxt);
12643 if (systemIdCanonic != NULL)
12644 xmlFree(systemIdCanonic);
12645 return(NULL);
12646 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012647 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12648 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12649 xmlSwitchEncoding(ctxt, enc);
12650 }
Owen Taylor3473f882001-02-23 17:55:21 +000012651
12652 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012653 input->filename = (char *) systemIdCanonic;
12654 else
12655 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012656 input->line = 1;
12657 input->col = 1;
12658 input->base = ctxt->input->cur;
12659 input->cur = ctxt->input->cur;
12660 input->free = NULL;
12661
12662 /*
12663 * let's parse that entity knowing it's an external subset.
12664 */
12665 ctxt->inSubset = 2;
12666 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012667 if (ctxt->myDoc == NULL) {
12668 xmlErrMemory(ctxt, "New Doc failed");
12669 if (sax != NULL) ctxt->sax = NULL;
12670 xmlFreeParserCtxt(ctxt);
12671 return(NULL);
12672 }
12673 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012674 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12675 ExternalID, SystemID);
12676 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12677
12678 if (ctxt->myDoc != NULL) {
12679 if (ctxt->wellFormed) {
12680 ret = ctxt->myDoc->extSubset;
12681 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012682 if (ret != NULL) {
12683 xmlNodePtr tmp;
12684
12685 ret->doc = NULL;
12686 tmp = ret->children;
12687 while (tmp != NULL) {
12688 tmp->doc = NULL;
12689 tmp = tmp->next;
12690 }
12691 }
Owen Taylor3473f882001-02-23 17:55:21 +000012692 } else {
12693 ret = NULL;
12694 }
12695 xmlFreeDoc(ctxt->myDoc);
12696 ctxt->myDoc = NULL;
12697 }
12698 if (sax != NULL) ctxt->sax = NULL;
12699 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012700
Owen Taylor3473f882001-02-23 17:55:21 +000012701 return(ret);
12702}
12703
Daniel Veillard4432df22003-09-28 18:58:27 +000012704
Owen Taylor3473f882001-02-23 17:55:21 +000012705/**
12706 * xmlParseDTD:
12707 * @ExternalID: a NAME* containing the External ID of the DTD
12708 * @SystemID: a NAME* containing the URL to the DTD
12709 *
12710 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012711 *
Owen Taylor3473f882001-02-23 17:55:21 +000012712 * Returns the resulting xmlDtdPtr or NULL in case of error.
12713 */
12714
12715xmlDtdPtr
12716xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12717 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12718}
Daniel Veillard4432df22003-09-28 18:58:27 +000012719#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012720
12721/************************************************************************
12722 * *
12723 * Front ends when parsing an Entity *
12724 * *
12725 ************************************************************************/
12726
12727/**
Owen Taylor3473f882001-02-23 17:55:21 +000012728 * xmlParseCtxtExternalEntity:
12729 * @ctx: the existing parsing context
12730 * @URL: the URL for the entity to load
12731 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012732 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012733 *
12734 * Parse an external general entity within an existing parsing context
12735 * An external general parsed entity is well-formed if it matches the
12736 * production labeled extParsedEnt.
12737 *
12738 * [78] extParsedEnt ::= TextDecl? content
12739 *
12740 * Returns 0 if the entity is well formed, -1 in case of args problem and
12741 * the parser error code otherwise
12742 */
12743
12744int
12745xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012746 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012747 xmlParserCtxtPtr ctxt;
12748 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012749 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012750 xmlSAXHandlerPtr oldsax = NULL;
12751 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012752 xmlChar start[4];
12753 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012754
Daniel Veillardce682bc2004-11-05 17:22:25 +000012755 if (ctx == NULL) return(-1);
12756
Daniel Veillard0161e632008-08-28 15:36:32 +000012757 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12758 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012759 return(XML_ERR_ENTITY_LOOP);
12760 }
12761
Daniel Veillardcda96922001-08-21 10:56:31 +000012762 if (lst != NULL)
12763 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012764 if ((URL == NULL) && (ID == NULL))
12765 return(-1);
12766 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12767 return(-1);
12768
Rob Richards798743a2009-06-19 13:54:25 -040012769 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012770 if (ctxt == NULL) {
12771 return(-1);
12772 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012773
Owen Taylor3473f882001-02-23 17:55:21 +000012774 oldsax = ctxt->sax;
12775 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012776 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012777 newDoc = xmlNewDoc(BAD_CAST "1.0");
12778 if (newDoc == NULL) {
12779 xmlFreeParserCtxt(ctxt);
12780 return(-1);
12781 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012782 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012783 if (ctx->myDoc->dict) {
12784 newDoc->dict = ctx->myDoc->dict;
12785 xmlDictReference(newDoc->dict);
12786 }
Owen Taylor3473f882001-02-23 17:55:21 +000012787 if (ctx->myDoc != NULL) {
12788 newDoc->intSubset = ctx->myDoc->intSubset;
12789 newDoc->extSubset = ctx->myDoc->extSubset;
12790 }
12791 if (ctx->myDoc->URL != NULL) {
12792 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12793 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012794 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12795 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012796 ctxt->sax = oldsax;
12797 xmlFreeParserCtxt(ctxt);
12798 newDoc->intSubset = NULL;
12799 newDoc->extSubset = NULL;
12800 xmlFreeDoc(newDoc);
12801 return(-1);
12802 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012803 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012804 nodePush(ctxt, newDoc->children);
12805 if (ctx->myDoc == NULL) {
12806 ctxt->myDoc = newDoc;
12807 } else {
12808 ctxt->myDoc = ctx->myDoc;
12809 newDoc->children->doc = ctx->myDoc;
12810 }
12811
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012812 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012813 * Get the 4 first bytes and decode the charset
12814 * if enc != XML_CHAR_ENCODING_NONE
12815 * plug some encoding conversion routines.
12816 */
12817 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012818 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12819 start[0] = RAW;
12820 start[1] = NXT(1);
12821 start[2] = NXT(2);
12822 start[3] = NXT(3);
12823 enc = xmlDetectCharEncoding(start, 4);
12824 if (enc != XML_CHAR_ENCODING_NONE) {
12825 xmlSwitchEncoding(ctxt, enc);
12826 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012827 }
12828
Owen Taylor3473f882001-02-23 17:55:21 +000012829 /*
12830 * Parse a possible text declaration first
12831 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012832 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012833 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012834 /*
12835 * An XML-1.0 document can't reference an entity not XML-1.0
12836 */
12837 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12838 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12839 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12840 "Version mismatch between document and entity\n");
12841 }
Owen Taylor3473f882001-02-23 17:55:21 +000012842 }
12843
12844 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012845 * If the user provided its own SAX callbacks then reuse the
12846 * useData callback field, otherwise the expected setup in a
12847 * DOM builder is to have userData == ctxt
12848 */
12849 if (ctx->userData == ctx)
12850 ctxt->userData = ctxt;
12851 else
12852 ctxt->userData = ctx->userData;
12853
12854 /*
Owen Taylor3473f882001-02-23 17:55:21 +000012855 * Doing validity checking on chunk doesn't make sense
12856 */
12857 ctxt->instate = XML_PARSER_CONTENT;
12858 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012859 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012860 ctxt->loadsubset = ctx->loadsubset;
12861 ctxt->depth = ctx->depth + 1;
12862 ctxt->replaceEntities = ctx->replaceEntities;
12863 if (ctxt->validate) {
12864 ctxt->vctxt.error = ctx->vctxt.error;
12865 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012866 } else {
12867 ctxt->vctxt.error = NULL;
12868 ctxt->vctxt.warning = NULL;
12869 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012870 ctxt->vctxt.nodeTab = NULL;
12871 ctxt->vctxt.nodeNr = 0;
12872 ctxt->vctxt.nodeMax = 0;
12873 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012874 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12875 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012876 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12877 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12878 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012879 ctxt->dictNames = ctx->dictNames;
12880 ctxt->attsDefault = ctx->attsDefault;
12881 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012882 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012883
12884 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012885
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012886 ctx->validate = ctxt->validate;
12887 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012888 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012889 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012890 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012891 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012892 }
12893 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012894 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012895 }
12896
12897 if (!ctxt->wellFormed) {
12898 if (ctxt->errNo == 0)
12899 ret = 1;
12900 else
12901 ret = ctxt->errNo;
12902 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012903 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012904 xmlNodePtr cur;
12905
12906 /*
12907 * Return the newly created nodeset after unlinking it from
12908 * they pseudo parent.
12909 */
12910 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012911 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012912 while (cur != NULL) {
12913 cur->parent = NULL;
12914 cur = cur->next;
12915 }
12916 newDoc->children->children = NULL;
12917 }
12918 ret = 0;
12919 }
12920 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012921 ctxt->dict = NULL;
12922 ctxt->attsDefault = NULL;
12923 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012924 xmlFreeParserCtxt(ctxt);
12925 newDoc->intSubset = NULL;
12926 newDoc->extSubset = NULL;
12927 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012928
Owen Taylor3473f882001-02-23 17:55:21 +000012929 return(ret);
12930}
12931
12932/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012933 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012934 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012935 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012936 * @sax: the SAX handler bloc (possibly NULL)
12937 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12938 * @depth: Used for loop detection, use 0
12939 * @URL: the URL for the entity to load
12940 * @ID: the System ID for the entity to load
12941 * @list: the return value for the set of parsed nodes
12942 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012943 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012944 *
12945 * Returns 0 if the entity is well formed, -1 in case of args problem and
12946 * the parser error code otherwise
12947 */
12948
Daniel Veillard7d515752003-09-26 19:12:37 +000012949static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012950xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12951 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012952 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012953 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012954 xmlParserCtxtPtr ctxt;
12955 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012956 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012957 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012958 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012959 xmlChar start[4];
12960 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012961
Daniel Veillard0161e632008-08-28 15:36:32 +000012962 if (((depth > 40) &&
12963 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12964 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012965 return(XML_ERR_ENTITY_LOOP);
12966 }
12967
Owen Taylor3473f882001-02-23 17:55:21 +000012968 if (list != NULL)
12969 *list = NULL;
12970 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012971 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012972 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012973 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012974
12975
Rob Richards9c0aa472009-03-26 18:10:19 +000012976 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000012977 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012978 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012979 if (oldctxt != NULL) {
12980 ctxt->_private = oldctxt->_private;
12981 ctxt->loadsubset = oldctxt->loadsubset;
12982 ctxt->validate = oldctxt->validate;
12983 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012984 ctxt->record_info = oldctxt->record_info;
12985 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12986 ctxt->node_seq.length = oldctxt->node_seq.length;
12987 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012988 } else {
12989 /*
12990 * Doing validity checking on chunk without context
12991 * doesn't make sense
12992 */
12993 ctxt->_private = NULL;
12994 ctxt->validate = 0;
12995 ctxt->external = 2;
12996 ctxt->loadsubset = 0;
12997 }
Owen Taylor3473f882001-02-23 17:55:21 +000012998 if (sax != NULL) {
12999 oldsax = ctxt->sax;
13000 ctxt->sax = sax;
13001 if (user_data != NULL)
13002 ctxt->userData = user_data;
13003 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013004 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013005 newDoc = xmlNewDoc(BAD_CAST "1.0");
13006 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013007 ctxt->node_seq.maximum = 0;
13008 ctxt->node_seq.length = 0;
13009 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013010 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000013011 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013012 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013013 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000013014 newDoc->intSubset = doc->intSubset;
13015 newDoc->extSubset = doc->extSubset;
13016 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013017 xmlDictReference(newDoc->dict);
13018
Owen Taylor3473f882001-02-23 17:55:21 +000013019 if (doc->URL != NULL) {
13020 newDoc->URL = xmlStrdup(doc->URL);
13021 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013022 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13023 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013024 if (sax != NULL)
13025 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013026 ctxt->node_seq.maximum = 0;
13027 ctxt->node_seq.length = 0;
13028 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013029 xmlFreeParserCtxt(ctxt);
13030 newDoc->intSubset = NULL;
13031 newDoc->extSubset = NULL;
13032 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000013033 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000013034 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013035 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013036 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000013037 ctxt->myDoc = doc;
13038 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000013039
Daniel Veillard0161e632008-08-28 15:36:32 +000013040 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000013041 * Get the 4 first bytes and decode the charset
13042 * if enc != XML_CHAR_ENCODING_NONE
13043 * plug some encoding conversion routines.
13044 */
13045 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000013046 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
13047 start[0] = RAW;
13048 start[1] = NXT(1);
13049 start[2] = NXT(2);
13050 start[3] = NXT(3);
13051 enc = xmlDetectCharEncoding(start, 4);
13052 if (enc != XML_CHAR_ENCODING_NONE) {
13053 xmlSwitchEncoding(ctxt, enc);
13054 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000013055 }
13056
Owen Taylor3473f882001-02-23 17:55:21 +000013057 /*
13058 * Parse a possible text declaration first
13059 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000013060 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000013061 xmlParseTextDecl(ctxt);
13062 }
13063
Owen Taylor3473f882001-02-23 17:55:21 +000013064 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000013065 ctxt->depth = depth;
13066
13067 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013068
Daniel Veillard561b7f82002-03-20 21:55:57 +000013069 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013070 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000013071 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013072 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013073 }
13074 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013075 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013076 }
13077
13078 if (!ctxt->wellFormed) {
13079 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013080 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000013081 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013082 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000013083 } else {
13084 if (list != NULL) {
13085 xmlNodePtr cur;
13086
13087 /*
13088 * Return the newly created nodeset after unlinking it from
13089 * they pseudo parent.
13090 */
13091 cur = newDoc->children->children;
13092 *list = cur;
13093 while (cur != NULL) {
13094 cur->parent = NULL;
13095 cur = cur->next;
13096 }
13097 newDoc->children->children = NULL;
13098 }
Daniel Veillard7d515752003-09-26 19:12:37 +000013099 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000013100 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013101
13102 /*
13103 * Record in the parent context the number of entities replacement
13104 * done when parsing that reference.
13105 */
Daniel Veillard76d36452009-09-07 11:19:33 +020013106 if (oldctxt != NULL)
13107 oldctxt->nbentities += ctxt->nbentities;
13108
Daniel Veillard0161e632008-08-28 15:36:32 +000013109 /*
13110 * Also record the size of the entity parsed
13111 */
13112 if (ctxt->input != NULL) {
13113 oldctxt->sizeentities += ctxt->input->consumed;
13114 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
13115 }
13116 /*
13117 * And record the last error if any
13118 */
13119 if (ctxt->lastError.code != XML_ERR_OK)
13120 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13121
Owen Taylor3473f882001-02-23 17:55:21 +000013122 if (sax != NULL)
13123 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000013124 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
13125 oldctxt->node_seq.length = ctxt->node_seq.length;
13126 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000013127 ctxt->node_seq.maximum = 0;
13128 ctxt->node_seq.length = 0;
13129 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013130 xmlFreeParserCtxt(ctxt);
13131 newDoc->intSubset = NULL;
13132 newDoc->extSubset = NULL;
13133 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013134
Owen Taylor3473f882001-02-23 17:55:21 +000013135 return(ret);
13136}
13137
Daniel Veillard81273902003-09-30 00:43:48 +000013138#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013139/**
Daniel Veillard257d9102001-05-08 10:41:44 +000013140 * xmlParseExternalEntity:
13141 * @doc: the document the chunk pertains to
13142 * @sax: the SAX handler bloc (possibly NULL)
13143 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13144 * @depth: Used for loop detection, use 0
13145 * @URL: the URL for the entity to load
13146 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000013147 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000013148 *
13149 * Parse an external general entity
13150 * An external general parsed entity is well-formed if it matches the
13151 * production labeled extParsedEnt.
13152 *
13153 * [78] extParsedEnt ::= TextDecl? content
13154 *
13155 * Returns 0 if the entity is well formed, -1 in case of args problem and
13156 * the parser error code otherwise
13157 */
13158
13159int
13160xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000013161 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000013162 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000013163 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000013164}
13165
13166/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000013167 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000013168 * @doc: the document the chunk pertains to
13169 * @sax: the SAX handler bloc (possibly NULL)
13170 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13171 * @depth: Used for loop detection, use 0
13172 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000013173 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000013174 *
13175 * Parse a well-balanced chunk of an XML document
13176 * called by the parser
13177 * The allowed sequence for the Well Balanced Chunk is the one defined by
13178 * the content production in the XML grammar:
13179 *
13180 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13181 *
13182 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13183 * the parser error code otherwise
13184 */
13185
13186int
13187xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000013188 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013189 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
13190 depth, string, lst, 0 );
13191}
Daniel Veillard81273902003-09-30 00:43:48 +000013192#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000013193
13194/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000013195 * xmlParseBalancedChunkMemoryInternal:
13196 * @oldctxt: the existing parsing context
13197 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13198 * @user_data: the user data field for the parser context
13199 * @lst: the return value for the set of parsed nodes
13200 *
13201 *
13202 * Parse a well-balanced chunk of an XML document
13203 * called by the parser
13204 * The allowed sequence for the Well Balanced Chunk is the one defined by
13205 * the content production in the XML grammar:
13206 *
13207 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13208 *
Daniel Veillard7d515752003-09-26 19:12:37 +000013209 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13210 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000013211 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000013212 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000013213 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000013214 */
Daniel Veillard7d515752003-09-26 19:12:37 +000013215static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000013216xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13217 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13218 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013219 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013220 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013221 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013222 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013223 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013224 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000013225 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020013226#ifdef SAX2
13227 int i;
13228#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000013229
Daniel Veillard0161e632008-08-28 15:36:32 +000013230 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13231 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013232 return(XML_ERR_ENTITY_LOOP);
13233 }
13234
13235
13236 if (lst != NULL)
13237 *lst = NULL;
13238 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000013239 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013240
13241 size = xmlStrlen(string);
13242
13243 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000013244 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013245 if (user_data != NULL)
13246 ctxt->userData = user_data;
13247 else
13248 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013249 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13250 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000013251 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13252 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13253 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013254
Daniel Veillard74eaec12009-08-26 15:57:20 +020013255#ifdef SAX2
13256 /* propagate namespaces down the entity */
13257 for (i = 0;i < oldctxt->nsNr;i += 2) {
13258 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
13259 }
13260#endif
13261
Daniel Veillard328f48c2002-11-15 15:24:34 +000013262 oldsax = ctxt->sax;
13263 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013264 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013265 ctxt->replaceEntities = oldctxt->replaceEntities;
13266 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000013267
Daniel Veillarde1ca5032002-12-09 14:13:43 +000013268 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013269 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013270 newDoc = xmlNewDoc(BAD_CAST "1.0");
13271 if (newDoc == NULL) {
13272 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013273 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013274 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000013275 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013276 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013277 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000013278 newDoc->dict = ctxt->dict;
13279 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013280 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013281 } else {
13282 ctxt->myDoc = oldctxt->myDoc;
13283 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013284 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013285 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013286 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13287 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013288 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013289 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000013290 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013291 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013292 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013293 }
William M. Brack7b9154b2003-09-27 19:23:50 +000013294 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013295 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013296 ctxt->myDoc->children = NULL;
13297 ctxt->myDoc->last = NULL;
13298 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000013299 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013300 ctxt->instate = XML_PARSER_CONTENT;
13301 ctxt->depth = oldctxt->depth + 1;
13302
Daniel Veillard328f48c2002-11-15 15:24:34 +000013303 ctxt->validate = 0;
13304 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000013305 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13306 /*
13307 * ID/IDREF registration will be done in xmlValidateElement below
13308 */
13309 ctxt->loadsubset |= XML_SKIP_IDS;
13310 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013311 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013312 ctxt->attsDefault = oldctxt->attsDefault;
13313 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013314
Daniel Veillard68e9e742002-11-16 15:35:11 +000013315 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013316 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013317 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013318 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013319 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013320 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013321 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013322 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000013323 }
13324
13325 if (!ctxt->wellFormed) {
13326 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000013327 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013328 else
William M. Brack7b9154b2003-09-27 19:23:50 +000013329 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013330 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000013331 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013332 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013333
William M. Brack7b9154b2003-09-27 19:23:50 +000013334 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000013335 xmlNodePtr cur;
13336
13337 /*
13338 * Return the newly created nodeset after unlinking it from
13339 * they pseudo parent.
13340 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000013341 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013342 *lst = cur;
13343 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000013344#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000013345 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13346 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13347 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000013348 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13349 oldctxt->myDoc, cur);
13350 }
Daniel Veillard4432df22003-09-28 18:58:27 +000013351#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000013352 cur->parent = NULL;
13353 cur = cur->next;
13354 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000013355 ctxt->myDoc->children->children = NULL;
13356 }
13357 if (ctxt->myDoc != NULL) {
13358 xmlFreeNode(ctxt->myDoc->children);
13359 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000013360 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013361 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013362
13363 /*
13364 * Record in the parent context the number of entities replacement
13365 * done when parsing that reference.
13366 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020013367 if (oldctxt != NULL)
13368 oldctxt->nbentities += ctxt->nbentities;
13369
Daniel Veillard0161e632008-08-28 15:36:32 +000013370 /*
13371 * Also record the last error if any
13372 */
13373 if (ctxt->lastError.code != XML_ERR_OK)
13374 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13375
Daniel Veillard328f48c2002-11-15 15:24:34 +000013376 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000013377 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000013378 ctxt->attsDefault = NULL;
13379 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000013380 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013381 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000013382 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000013383 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013384
Daniel Veillard328f48c2002-11-15 15:24:34 +000013385 return(ret);
13386}
13387
Daniel Veillard29b17482004-08-16 00:39:03 +000013388/**
13389 * xmlParseInNodeContext:
13390 * @node: the context node
13391 * @data: the input string
13392 * @datalen: the input string length in bytes
13393 * @options: a combination of xmlParserOption
13394 * @lst: the return value for the set of parsed nodes
13395 *
13396 * Parse a well-balanced chunk of an XML document
13397 * within the context (DTD, namespaces, etc ...) of the given node.
13398 *
13399 * The allowed sequence for the data is a Well Balanced Chunk defined by
13400 * the content production in the XML grammar:
13401 *
13402 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13403 *
13404 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13405 * error code otherwise
13406 */
13407xmlParserErrors
13408xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13409 int options, xmlNodePtr *lst) {
13410#ifdef SAX2
13411 xmlParserCtxtPtr ctxt;
13412 xmlDocPtr doc = NULL;
13413 xmlNodePtr fake, cur;
13414 int nsnr = 0;
13415
13416 xmlParserErrors ret = XML_ERR_OK;
13417
13418 /*
13419 * check all input parameters, grab the document
13420 */
13421 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13422 return(XML_ERR_INTERNAL_ERROR);
13423 switch (node->type) {
13424 case XML_ELEMENT_NODE:
13425 case XML_ATTRIBUTE_NODE:
13426 case XML_TEXT_NODE:
13427 case XML_CDATA_SECTION_NODE:
13428 case XML_ENTITY_REF_NODE:
13429 case XML_PI_NODE:
13430 case XML_COMMENT_NODE:
13431 case XML_DOCUMENT_NODE:
13432 case XML_HTML_DOCUMENT_NODE:
13433 break;
13434 default:
13435 return(XML_ERR_INTERNAL_ERROR);
13436
13437 }
13438 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13439 (node->type != XML_DOCUMENT_NODE) &&
13440 (node->type != XML_HTML_DOCUMENT_NODE))
13441 node = node->parent;
13442 if (node == NULL)
13443 return(XML_ERR_INTERNAL_ERROR);
13444 if (node->type == XML_ELEMENT_NODE)
13445 doc = node->doc;
13446 else
13447 doc = (xmlDocPtr) node;
13448 if (doc == NULL)
13449 return(XML_ERR_INTERNAL_ERROR);
13450
13451 /*
13452 * allocate a context and set-up everything not related to the
13453 * node position in the tree
13454 */
13455 if (doc->type == XML_DOCUMENT_NODE)
13456 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13457#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013458 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013459 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013460 /*
13461 * When parsing in context, it makes no sense to add implied
13462 * elements like html/body/etc...
13463 */
13464 options |= HTML_PARSE_NOIMPLIED;
13465 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013466#endif
13467 else
13468 return(XML_ERR_INTERNAL_ERROR);
13469
13470 if (ctxt == NULL)
13471 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013472
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013473 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013474 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13475 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13476 * we must wait until the last moment to free the original one.
13477 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013478 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013479 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013480 xmlDictFree(ctxt->dict);
13481 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013482 } else
13483 options |= XML_PARSE_NODICT;
13484
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013485 if (doc->encoding != NULL) {
13486 xmlCharEncodingHandlerPtr hdlr;
13487
13488 if (ctxt->encoding != NULL)
13489 xmlFree((xmlChar *) ctxt->encoding);
13490 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13491
13492 hdlr = xmlFindCharEncodingHandler(doc->encoding);
13493 if (hdlr != NULL) {
13494 xmlSwitchToEncoding(ctxt, hdlr);
13495 } else {
13496 return(XML_ERR_UNSUPPORTED_ENCODING);
13497 }
13498 }
13499
Daniel Veillard37334572008-07-31 08:20:02 +000013500 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013501 xmlDetectSAX2(ctxt);
13502 ctxt->myDoc = doc;
13503
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013504 fake = xmlNewComment(NULL);
13505 if (fake == NULL) {
13506 xmlFreeParserCtxt(ctxt);
13507 return(XML_ERR_NO_MEMORY);
13508 }
13509 xmlAddChild(node, fake);
13510
Daniel Veillard29b17482004-08-16 00:39:03 +000013511 if (node->type == XML_ELEMENT_NODE) {
13512 nodePush(ctxt, node);
13513 /*
13514 * initialize the SAX2 namespaces stack
13515 */
13516 cur = node;
13517 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13518 xmlNsPtr ns = cur->nsDef;
13519 const xmlChar *iprefix, *ihref;
13520
13521 while (ns != NULL) {
13522 if (ctxt->dict) {
13523 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13524 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13525 } else {
13526 iprefix = ns->prefix;
13527 ihref = ns->href;
13528 }
13529
13530 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13531 nsPush(ctxt, iprefix, ihref);
13532 nsnr++;
13533 }
13534 ns = ns->next;
13535 }
13536 cur = cur->parent;
13537 }
13538 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000013539 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013540
13541 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13542 /*
13543 * ID/IDREF registration will be done in xmlValidateElement below
13544 */
13545 ctxt->loadsubset |= XML_SKIP_IDS;
13546 }
13547
Daniel Veillard499cc922006-01-18 17:22:35 +000013548#ifdef LIBXML_HTML_ENABLED
13549 if (doc->type == XML_HTML_DOCUMENT_NODE)
13550 __htmlParseContent(ctxt);
13551 else
13552#endif
13553 xmlParseContent(ctxt);
13554
Daniel Veillard29b17482004-08-16 00:39:03 +000013555 nsPop(ctxt, nsnr);
13556 if ((RAW == '<') && (NXT(1) == '/')) {
13557 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13558 } else if (RAW != 0) {
13559 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13560 }
13561 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13562 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13563 ctxt->wellFormed = 0;
13564 }
13565
13566 if (!ctxt->wellFormed) {
13567 if (ctxt->errNo == 0)
13568 ret = XML_ERR_INTERNAL_ERROR;
13569 else
13570 ret = (xmlParserErrors)ctxt->errNo;
13571 } else {
13572 ret = XML_ERR_OK;
13573 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013574
Daniel Veillard29b17482004-08-16 00:39:03 +000013575 /*
13576 * Return the newly created nodeset after unlinking it from
13577 * the pseudo sibling.
13578 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013579
Daniel Veillard29b17482004-08-16 00:39:03 +000013580 cur = fake->next;
13581 fake->next = NULL;
13582 node->last = fake;
13583
13584 if (cur != NULL) {
13585 cur->prev = NULL;
13586 }
13587
13588 *lst = cur;
13589
13590 while (cur != NULL) {
13591 cur->parent = NULL;
13592 cur = cur->next;
13593 }
13594
13595 xmlUnlinkNode(fake);
13596 xmlFreeNode(fake);
13597
13598
13599 if (ret != XML_ERR_OK) {
13600 xmlFreeNodeList(*lst);
13601 *lst = NULL;
13602 }
William M. Brackc3f81342004-10-03 01:22:44 +000013603
William M. Brackb7b54de2004-10-06 16:38:01 +000013604 if (doc->dict != NULL)
13605 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013606 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013607
Daniel Veillard29b17482004-08-16 00:39:03 +000013608 return(ret);
13609#else /* !SAX2 */
13610 return(XML_ERR_INTERNAL_ERROR);
13611#endif
13612}
13613
Daniel Veillard81273902003-09-30 00:43:48 +000013614#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013615/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013616 * xmlParseBalancedChunkMemoryRecover:
13617 * @doc: the document the chunk pertains to
13618 * @sax: the SAX handler bloc (possibly NULL)
13619 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13620 * @depth: Used for loop detection, use 0
13621 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13622 * @lst: the return value for the set of parsed nodes
13623 * @recover: return nodes even if the data is broken (use 0)
13624 *
13625 *
13626 * Parse a well-balanced chunk of an XML document
13627 * called by the parser
13628 * The allowed sequence for the Well Balanced Chunk is the one defined by
13629 * the content production in the XML grammar:
13630 *
13631 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13632 *
13633 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13634 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013635 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013636 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013637 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13638 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013639 */
13640int
13641xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013642 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013643 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013644 xmlParserCtxtPtr ctxt;
13645 xmlDocPtr newDoc;
13646 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013647 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013648 int size;
13649 int ret = 0;
13650
Daniel Veillard0161e632008-08-28 15:36:32 +000013651 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013652 return(XML_ERR_ENTITY_LOOP);
13653 }
13654
13655
Daniel Veillardcda96922001-08-21 10:56:31 +000013656 if (lst != NULL)
13657 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013658 if (string == NULL)
13659 return(-1);
13660
13661 size = xmlStrlen(string);
13662
13663 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13664 if (ctxt == NULL) return(-1);
13665 ctxt->userData = ctxt;
13666 if (sax != NULL) {
13667 oldsax = ctxt->sax;
13668 ctxt->sax = sax;
13669 if (user_data != NULL)
13670 ctxt->userData = user_data;
13671 }
13672 newDoc = xmlNewDoc(BAD_CAST "1.0");
13673 if (newDoc == NULL) {
13674 xmlFreeParserCtxt(ctxt);
13675 return(-1);
13676 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013677 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013678 if ((doc != NULL) && (doc->dict != NULL)) {
13679 xmlDictFree(ctxt->dict);
13680 ctxt->dict = doc->dict;
13681 xmlDictReference(ctxt->dict);
13682 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13683 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13684 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13685 ctxt->dictNames = 1;
13686 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013687 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013688 }
Owen Taylor3473f882001-02-23 17:55:21 +000013689 if (doc != NULL) {
13690 newDoc->intSubset = doc->intSubset;
13691 newDoc->extSubset = doc->extSubset;
13692 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013693 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13694 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013695 if (sax != NULL)
13696 ctxt->sax = oldsax;
13697 xmlFreeParserCtxt(ctxt);
13698 newDoc->intSubset = NULL;
13699 newDoc->extSubset = NULL;
13700 xmlFreeDoc(newDoc);
13701 return(-1);
13702 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013703 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13704 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013705 if (doc == NULL) {
13706 ctxt->myDoc = newDoc;
13707 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013708 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013709 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013710 /* Ensure that doc has XML spec namespace */
13711 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13712 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013713 }
13714 ctxt->instate = XML_PARSER_CONTENT;
13715 ctxt->depth = depth;
13716
13717 /*
13718 * Doing validity checking on chunk doesn't make sense
13719 */
13720 ctxt->validate = 0;
13721 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013722 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013723
Daniel Veillardb39bc392002-10-26 19:29:51 +000013724 if ( doc != NULL ){
13725 content = doc->children;
13726 doc->children = NULL;
13727 xmlParseContent(ctxt);
13728 doc->children = content;
13729 }
13730 else {
13731 xmlParseContent(ctxt);
13732 }
Owen Taylor3473f882001-02-23 17:55:21 +000013733 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013734 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013735 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013736 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013737 }
13738 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013739 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013740 }
13741
13742 if (!ctxt->wellFormed) {
13743 if (ctxt->errNo == 0)
13744 ret = 1;
13745 else
13746 ret = ctxt->errNo;
13747 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013748 ret = 0;
13749 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013750
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013751 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13752 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013753
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013754 /*
13755 * Return the newly created nodeset after unlinking it from
13756 * they pseudo parent.
13757 */
13758 cur = newDoc->children->children;
13759 *lst = cur;
13760 while (cur != NULL) {
13761 xmlSetTreeDoc(cur, doc);
13762 cur->parent = NULL;
13763 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013764 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013765 newDoc->children->children = NULL;
13766 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013767
13768 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013769 ctxt->sax = oldsax;
13770 xmlFreeParserCtxt(ctxt);
13771 newDoc->intSubset = NULL;
13772 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013773 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013774 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013775
Owen Taylor3473f882001-02-23 17:55:21 +000013776 return(ret);
13777}
13778
13779/**
13780 * xmlSAXParseEntity:
13781 * @sax: the SAX handler block
13782 * @filename: the filename
13783 *
13784 * parse an XML external entity out of context and build a tree.
13785 * It use the given SAX function block to handle the parsing callback.
13786 * If sax is NULL, fallback to the default DOM tree building routines.
13787 *
13788 * [78] extParsedEnt ::= TextDecl? content
13789 *
13790 * This correspond to a "Well Balanced" chunk
13791 *
13792 * Returns the resulting document tree
13793 */
13794
13795xmlDocPtr
13796xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13797 xmlDocPtr ret;
13798 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013799
13800 ctxt = xmlCreateFileParserCtxt(filename);
13801 if (ctxt == NULL) {
13802 return(NULL);
13803 }
13804 if (sax != NULL) {
13805 if (ctxt->sax != NULL)
13806 xmlFree(ctxt->sax);
13807 ctxt->sax = sax;
13808 ctxt->userData = NULL;
13809 }
13810
Owen Taylor3473f882001-02-23 17:55:21 +000013811 xmlParseExtParsedEnt(ctxt);
13812
13813 if (ctxt->wellFormed)
13814 ret = ctxt->myDoc;
13815 else {
13816 ret = NULL;
13817 xmlFreeDoc(ctxt->myDoc);
13818 ctxt->myDoc = NULL;
13819 }
13820 if (sax != NULL)
13821 ctxt->sax = NULL;
13822 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013823
Owen Taylor3473f882001-02-23 17:55:21 +000013824 return(ret);
13825}
13826
13827/**
13828 * xmlParseEntity:
13829 * @filename: the filename
13830 *
13831 * parse an XML external entity out of context and build a tree.
13832 *
13833 * [78] extParsedEnt ::= TextDecl? content
13834 *
13835 * This correspond to a "Well Balanced" chunk
13836 *
13837 * Returns the resulting document tree
13838 */
13839
13840xmlDocPtr
13841xmlParseEntity(const char *filename) {
13842 return(xmlSAXParseEntity(NULL, filename));
13843}
Daniel Veillard81273902003-09-30 00:43:48 +000013844#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013845
13846/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013847 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013848 * @URL: the entity URL
13849 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013850 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013851 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013852 *
13853 * Create a parser context for an external entity
13854 * Automatic support for ZLIB/Compress compressed document is provided
13855 * by default if found at compile-time.
13856 *
13857 * Returns the new parser context or NULL
13858 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013859static xmlParserCtxtPtr
13860xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13861 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013862 xmlParserCtxtPtr ctxt;
13863 xmlParserInputPtr inputStream;
13864 char *directory = NULL;
13865 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013866
Owen Taylor3473f882001-02-23 17:55:21 +000013867 ctxt = xmlNewParserCtxt();
13868 if (ctxt == NULL) {
13869 return(NULL);
13870 }
13871
Daniel Veillard48247b42009-07-10 16:12:46 +020013872 if (pctx != NULL) {
13873 ctxt->options = pctx->options;
13874 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000013875 }
13876
Owen Taylor3473f882001-02-23 17:55:21 +000013877 uri = xmlBuildURI(URL, base);
13878
13879 if (uri == NULL) {
13880 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13881 if (inputStream == NULL) {
13882 xmlFreeParserCtxt(ctxt);
13883 return(NULL);
13884 }
13885
13886 inputPush(ctxt, inputStream);
13887
13888 if ((ctxt->directory == NULL) && (directory == NULL))
13889 directory = xmlParserGetDirectory((char *)URL);
13890 if ((ctxt->directory == NULL) && (directory != NULL))
13891 ctxt->directory = directory;
13892 } else {
13893 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13894 if (inputStream == NULL) {
13895 xmlFree(uri);
13896 xmlFreeParserCtxt(ctxt);
13897 return(NULL);
13898 }
13899
13900 inputPush(ctxt, inputStream);
13901
13902 if ((ctxt->directory == NULL) && (directory == NULL))
13903 directory = xmlParserGetDirectory((char *)uri);
13904 if ((ctxt->directory == NULL) && (directory != NULL))
13905 ctxt->directory = directory;
13906 xmlFree(uri);
13907 }
Owen Taylor3473f882001-02-23 17:55:21 +000013908 return(ctxt);
13909}
13910
Rob Richards9c0aa472009-03-26 18:10:19 +000013911/**
13912 * xmlCreateEntityParserCtxt:
13913 * @URL: the entity URL
13914 * @ID: the entity PUBLIC ID
13915 * @base: a possible base for the target URI
13916 *
13917 * Create a parser context for an external entity
13918 * Automatic support for ZLIB/Compress compressed document is provided
13919 * by default if found at compile-time.
13920 *
13921 * Returns the new parser context or NULL
13922 */
13923xmlParserCtxtPtr
13924xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13925 const xmlChar *base) {
13926 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13927
13928}
13929
Owen Taylor3473f882001-02-23 17:55:21 +000013930/************************************************************************
13931 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000013932 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000013933 * *
13934 ************************************************************************/
13935
13936/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013937 * xmlCreateURLParserCtxt:
13938 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013939 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013940 *
Daniel Veillard61b93382003-11-03 14:28:31 +000013941 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000013942 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000013943 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000013944 *
13945 * Returns the new parser context or NULL
13946 */
13947xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000013948xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000013949{
13950 xmlParserCtxtPtr ctxt;
13951 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000013952 char *directory = NULL;
13953
Owen Taylor3473f882001-02-23 17:55:21 +000013954 ctxt = xmlNewParserCtxt();
13955 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000013956 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000013957 return(NULL);
13958 }
13959
Daniel Veillarddf292f72005-01-16 19:00:15 +000013960 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000013961 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000013962 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000013963
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000013964 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013965 if (inputStream == NULL) {
13966 xmlFreeParserCtxt(ctxt);
13967 return(NULL);
13968 }
13969
Owen Taylor3473f882001-02-23 17:55:21 +000013970 inputPush(ctxt, inputStream);
13971 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000013972 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013973 if ((ctxt->directory == NULL) && (directory != NULL))
13974 ctxt->directory = directory;
13975
13976 return(ctxt);
13977}
13978
Daniel Veillard61b93382003-11-03 14:28:31 +000013979/**
13980 * xmlCreateFileParserCtxt:
13981 * @filename: the filename
13982 *
13983 * Create a parser context for a file content.
13984 * Automatic support for ZLIB/Compress compressed document is provided
13985 * by default if found at compile-time.
13986 *
13987 * Returns the new parser context or NULL
13988 */
13989xmlParserCtxtPtr
13990xmlCreateFileParserCtxt(const char *filename)
13991{
13992 return(xmlCreateURLParserCtxt(filename, 0));
13993}
13994
Daniel Veillard81273902003-09-30 00:43:48 +000013995#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013996/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013997 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000013998 * @sax: the SAX handler block
13999 * @filename: the filename
14000 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14001 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000014002 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000014003 *
14004 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14005 * compressed document is provided by default if found at compile-time.
14006 * It use the given SAX function block to handle the parsing callback.
14007 * If sax is NULL, fallback to the default DOM tree building routines.
14008 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000014009 * User data (void *) is stored within the parser context in the
14010 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000014011 *
Owen Taylor3473f882001-02-23 17:55:21 +000014012 * Returns the resulting document tree
14013 */
14014
14015xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000014016xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
14017 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000014018 xmlDocPtr ret;
14019 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000014020
Daniel Veillard635ef722001-10-29 11:48:19 +000014021 xmlInitParser();
14022
Owen Taylor3473f882001-02-23 17:55:21 +000014023 ctxt = xmlCreateFileParserCtxt(filename);
14024 if (ctxt == NULL) {
14025 return(NULL);
14026 }
14027 if (sax != NULL) {
14028 if (ctxt->sax != NULL)
14029 xmlFree(ctxt->sax);
14030 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014031 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014032 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000014033 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000014034 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000014035 }
Owen Taylor3473f882001-02-23 17:55:21 +000014036
Daniel Veillard37d2d162008-03-14 10:54:00 +000014037 if (ctxt->directory == NULL)
14038 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014039
Daniel Veillarddad3f682002-11-17 16:47:27 +000014040 ctxt->recovery = recovery;
14041
Owen Taylor3473f882001-02-23 17:55:21 +000014042 xmlParseDocument(ctxt);
14043
William M. Brackc07329e2003-09-08 01:57:30 +000014044 if ((ctxt->wellFormed) || recovery) {
14045 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000014046 if (ret != NULL) {
14047 if (ctxt->input->buf->compressed > 0)
14048 ret->compression = 9;
14049 else
14050 ret->compression = ctxt->input->buf->compressed;
14051 }
William M. Brackc07329e2003-09-08 01:57:30 +000014052 }
Owen Taylor3473f882001-02-23 17:55:21 +000014053 else {
14054 ret = NULL;
14055 xmlFreeDoc(ctxt->myDoc);
14056 ctxt->myDoc = NULL;
14057 }
14058 if (sax != NULL)
14059 ctxt->sax = NULL;
14060 xmlFreeParserCtxt(ctxt);
14061
14062 return(ret);
14063}
14064
14065/**
Daniel Veillarda293c322001-10-02 13:54:14 +000014066 * xmlSAXParseFile:
14067 * @sax: the SAX handler block
14068 * @filename: the filename
14069 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14070 * documents
14071 *
14072 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14073 * compressed document is provided by default if found at compile-time.
14074 * It use the given SAX function block to handle the parsing callback.
14075 * If sax is NULL, fallback to the default DOM tree building routines.
14076 *
14077 * Returns the resulting document tree
14078 */
14079
14080xmlDocPtr
14081xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
14082 int recovery) {
14083 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
14084}
14085
14086/**
Owen Taylor3473f882001-02-23 17:55:21 +000014087 * xmlRecoverDoc:
14088 * @cur: a pointer to an array of xmlChar
14089 *
14090 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014091 * In the case the document is not Well Formed, a attempt to build a
14092 * tree is tried anyway
14093 *
14094 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014095 */
14096
14097xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020014098xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014099 return(xmlSAXParseDoc(NULL, cur, 1));
14100}
14101
14102/**
14103 * xmlParseFile:
14104 * @filename: the filename
14105 *
14106 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14107 * compressed document is provided by default if found at compile-time.
14108 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000014109 * Returns the resulting document tree if the file was wellformed,
14110 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000014111 */
14112
14113xmlDocPtr
14114xmlParseFile(const char *filename) {
14115 return(xmlSAXParseFile(NULL, filename, 0));
14116}
14117
14118/**
14119 * xmlRecoverFile:
14120 * @filename: the filename
14121 *
14122 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
14123 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014124 * In the case the document is not Well Formed, it attempts to build
14125 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000014126 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000014127 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000014128 */
14129
14130xmlDocPtr
14131xmlRecoverFile(const char *filename) {
14132 return(xmlSAXParseFile(NULL, filename, 1));
14133}
14134
14135
14136/**
14137 * xmlSetupParserForBuffer:
14138 * @ctxt: an XML parser context
14139 * @buffer: a xmlChar * buffer
14140 * @filename: a file name
14141 *
14142 * Setup the parser context to parse a new buffer; Clears any prior
14143 * contents from the parser context. The buffer parameter must not be
14144 * NULL, but the filename parameter can be
14145 */
14146void
14147xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
14148 const char* filename)
14149{
14150 xmlParserInputPtr input;
14151
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014152 if ((ctxt == NULL) || (buffer == NULL))
14153 return;
14154
Owen Taylor3473f882001-02-23 17:55:21 +000014155 input = xmlNewInputStream(ctxt);
14156 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000014157 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014158 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014159 return;
14160 }
14161
14162 xmlClearParserCtxt(ctxt);
14163 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000014164 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000014165 input->base = buffer;
14166 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000014167 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000014168 inputPush(ctxt, input);
14169}
14170
14171/**
14172 * xmlSAXUserParseFile:
14173 * @sax: a SAX handler
14174 * @user_data: The user data returned on SAX callbacks
14175 * @filename: a file name
14176 *
14177 * parse an XML file and call the given SAX handler routines.
14178 * Automatic support for ZLIB/Compress compressed document is provided
14179 *
14180 * Returns 0 in case of success or a error number otherwise
14181 */
14182int
14183xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14184 const char *filename) {
14185 int ret = 0;
14186 xmlParserCtxtPtr ctxt;
14187
14188 ctxt = xmlCreateFileParserCtxt(filename);
14189 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000014190 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000014191 xmlFree(ctxt->sax);
14192 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014193 xmlDetectSAX2(ctxt);
14194
Owen Taylor3473f882001-02-23 17:55:21 +000014195 if (user_data != NULL)
14196 ctxt->userData = user_data;
14197
14198 xmlParseDocument(ctxt);
14199
14200 if (ctxt->wellFormed)
14201 ret = 0;
14202 else {
14203 if (ctxt->errNo != 0)
14204 ret = ctxt->errNo;
14205 else
14206 ret = -1;
14207 }
14208 if (sax != NULL)
14209 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014210 if (ctxt->myDoc != NULL) {
14211 xmlFreeDoc(ctxt->myDoc);
14212 ctxt->myDoc = NULL;
14213 }
Owen Taylor3473f882001-02-23 17:55:21 +000014214 xmlFreeParserCtxt(ctxt);
14215
14216 return ret;
14217}
Daniel Veillard81273902003-09-30 00:43:48 +000014218#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014219
14220/************************************************************************
14221 * *
14222 * Front ends when parsing from memory *
14223 * *
14224 ************************************************************************/
14225
14226/**
14227 * xmlCreateMemoryParserCtxt:
14228 * @buffer: a pointer to a char array
14229 * @size: the size of the array
14230 *
14231 * Create a parser context for an XML in-memory document.
14232 *
14233 * Returns the new parser context or NULL
14234 */
14235xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014236xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014237 xmlParserCtxtPtr ctxt;
14238 xmlParserInputPtr input;
14239 xmlParserInputBufferPtr buf;
14240
14241 if (buffer == NULL)
14242 return(NULL);
14243 if (size <= 0)
14244 return(NULL);
14245
14246 ctxt = xmlNewParserCtxt();
14247 if (ctxt == NULL)
14248 return(NULL);
14249
Daniel Veillard53350552003-09-18 13:35:51 +000014250 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000014251 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014252 if (buf == NULL) {
14253 xmlFreeParserCtxt(ctxt);
14254 return(NULL);
14255 }
Owen Taylor3473f882001-02-23 17:55:21 +000014256
14257 input = xmlNewInputStream(ctxt);
14258 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000014259 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000014260 xmlFreeParserCtxt(ctxt);
14261 return(NULL);
14262 }
14263
14264 input->filename = NULL;
14265 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014266 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000014267
14268 inputPush(ctxt, input);
14269 return(ctxt);
14270}
14271
Daniel Veillard81273902003-09-30 00:43:48 +000014272#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014273/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014274 * xmlSAXParseMemoryWithData:
14275 * @sax: the SAX handler block
14276 * @buffer: an pointer to a char array
14277 * @size: the size of the array
14278 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14279 * documents
14280 * @data: the userdata
14281 *
14282 * parse an XML in-memory block and use the given SAX function block
14283 * to handle the parsing callback. If sax is NULL, fallback to the default
14284 * DOM tree building routines.
14285 *
14286 * User data (void *) is stored within the parser context in the
14287 * context's _private member, so it is available nearly everywhere in libxml
14288 *
14289 * Returns the resulting document tree
14290 */
14291
14292xmlDocPtr
14293xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14294 int size, int recovery, void *data) {
14295 xmlDocPtr ret;
14296 xmlParserCtxtPtr ctxt;
14297
Daniel Veillardab2a7632009-07-09 08:45:03 +020014298 xmlInitParser();
14299
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014300 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14301 if (ctxt == NULL) return(NULL);
14302 if (sax != NULL) {
14303 if (ctxt->sax != NULL)
14304 xmlFree(ctxt->sax);
14305 ctxt->sax = sax;
14306 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014307 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014308 if (data!=NULL) {
14309 ctxt->_private=data;
14310 }
14311
Daniel Veillardadba5f12003-04-04 16:09:01 +000014312 ctxt->recovery = recovery;
14313
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014314 xmlParseDocument(ctxt);
14315
14316 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14317 else {
14318 ret = NULL;
14319 xmlFreeDoc(ctxt->myDoc);
14320 ctxt->myDoc = NULL;
14321 }
14322 if (sax != NULL)
14323 ctxt->sax = NULL;
14324 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020014325
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014326 return(ret);
14327}
14328
14329/**
Owen Taylor3473f882001-02-23 17:55:21 +000014330 * xmlSAXParseMemory:
14331 * @sax: the SAX handler block
14332 * @buffer: an pointer to a char array
14333 * @size: the size of the array
14334 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
14335 * documents
14336 *
14337 * parse an XML in-memory block and use the given SAX function block
14338 * to handle the parsing callback. If sax is NULL, fallback to the default
14339 * DOM tree building routines.
14340 *
14341 * Returns the resulting document tree
14342 */
14343xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000014344xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14345 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000014346 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014347}
14348
14349/**
14350 * xmlParseMemory:
14351 * @buffer: an pointer to a char array
14352 * @size: the size of the array
14353 *
14354 * parse an XML in-memory block and build a tree.
14355 *
14356 * Returns the resulting document tree
14357 */
14358
Daniel Veillard50822cb2001-07-26 20:05:51 +000014359xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014360 return(xmlSAXParseMemory(NULL, buffer, size, 0));
14361}
14362
14363/**
14364 * xmlRecoverMemory:
14365 * @buffer: an pointer to a char array
14366 * @size: the size of the array
14367 *
14368 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000014369 * In the case the document is not Well Formed, an attempt to
14370 * build a tree is tried anyway
14371 *
14372 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000014373 */
14374
Daniel Veillard50822cb2001-07-26 20:05:51 +000014375xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014376 return(xmlSAXParseMemory(NULL, buffer, size, 1));
14377}
14378
14379/**
14380 * xmlSAXUserParseMemory:
14381 * @sax: a SAX handler
14382 * @user_data: The user data returned on SAX callbacks
14383 * @buffer: an in-memory XML document input
14384 * @size: the length of the XML document in bytes
14385 *
14386 * A better SAX parsing routine.
14387 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014388 *
Owen Taylor3473f882001-02-23 17:55:21 +000014389 * Returns 0 in case of success or a error number otherwise
14390 */
14391int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014392 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014393 int ret = 0;
14394 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014395
14396 xmlInitParser();
14397
Owen Taylor3473f882001-02-23 17:55:21 +000014398 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14399 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014400 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14401 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014402 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014403 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014404
Daniel Veillard30211a02001-04-26 09:33:18 +000014405 if (user_data != NULL)
14406 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014407
Owen Taylor3473f882001-02-23 17:55:21 +000014408 xmlParseDocument(ctxt);
14409
14410 if (ctxt->wellFormed)
14411 ret = 0;
14412 else {
14413 if (ctxt->errNo != 0)
14414 ret = ctxt->errNo;
14415 else
14416 ret = -1;
14417 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014418 if (sax != NULL)
14419 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014420 if (ctxt->myDoc != NULL) {
14421 xmlFreeDoc(ctxt->myDoc);
14422 ctxt->myDoc = NULL;
14423 }
Owen Taylor3473f882001-02-23 17:55:21 +000014424 xmlFreeParserCtxt(ctxt);
14425
14426 return ret;
14427}
Daniel Veillard81273902003-09-30 00:43:48 +000014428#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014429
14430/**
14431 * xmlCreateDocParserCtxt:
14432 * @cur: a pointer to an array of xmlChar
14433 *
14434 * Creates a parser context for an XML in-memory document.
14435 *
14436 * Returns the new parser context or NULL
14437 */
14438xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014439xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014440 int len;
14441
14442 if (cur == NULL)
14443 return(NULL);
14444 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014445 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014446}
14447
Daniel Veillard81273902003-09-30 00:43:48 +000014448#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014449/**
14450 * xmlSAXParseDoc:
14451 * @sax: the SAX handler block
14452 * @cur: a pointer to an array of xmlChar
14453 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14454 * documents
14455 *
14456 * parse an XML in-memory document and build a tree.
14457 * It use the given SAX function block to handle the parsing callback.
14458 * If sax is NULL, fallback to the default DOM tree building routines.
14459 *
14460 * Returns the resulting document tree
14461 */
14462
14463xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014464xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014465 xmlDocPtr ret;
14466 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014467 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014468
Daniel Veillard38936062004-11-04 17:45:11 +000014469 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014470
14471
14472 ctxt = xmlCreateDocParserCtxt(cur);
14473 if (ctxt == NULL) return(NULL);
14474 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014475 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014476 ctxt->sax = sax;
14477 ctxt->userData = NULL;
14478 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014479 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014480
14481 xmlParseDocument(ctxt);
14482 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14483 else {
14484 ret = NULL;
14485 xmlFreeDoc(ctxt->myDoc);
14486 ctxt->myDoc = NULL;
14487 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014488 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014489 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014490 xmlFreeParserCtxt(ctxt);
14491
14492 return(ret);
14493}
14494
14495/**
14496 * xmlParseDoc:
14497 * @cur: a pointer to an array of xmlChar
14498 *
14499 * parse an XML in-memory document and build a tree.
14500 *
14501 * Returns the resulting document tree
14502 */
14503
14504xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014505xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014506 return(xmlSAXParseDoc(NULL, cur, 0));
14507}
Daniel Veillard81273902003-09-30 00:43:48 +000014508#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014509
Daniel Veillard81273902003-09-30 00:43:48 +000014510#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014511/************************************************************************
14512 * *
14513 * Specific function to keep track of entities references *
14514 * and used by the XSLT debugger *
14515 * *
14516 ************************************************************************/
14517
14518static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14519
14520/**
14521 * xmlAddEntityReference:
14522 * @ent : A valid entity
14523 * @firstNode : A valid first node for children of entity
14524 * @lastNode : A valid last node of children entity
14525 *
14526 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14527 */
14528static void
14529xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14530 xmlNodePtr lastNode)
14531{
14532 if (xmlEntityRefFunc != NULL) {
14533 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14534 }
14535}
14536
14537
14538/**
14539 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014540 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014541 *
14542 * Set the function to call call back when a xml reference has been made
14543 */
14544void
14545xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14546{
14547 xmlEntityRefFunc = func;
14548}
Daniel Veillard81273902003-09-30 00:43:48 +000014549#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014550
14551/************************************************************************
14552 * *
14553 * Miscellaneous *
14554 * *
14555 ************************************************************************/
14556
14557#ifdef LIBXML_XPATH_ENABLED
14558#include <libxml/xpath.h>
14559#endif
14560
Daniel Veillardffa3c742005-07-21 13:24:09 +000014561extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014562static int xmlParserInitialized = 0;
14563
14564/**
14565 * xmlInitParser:
14566 *
14567 * Initialization function for the XML parser.
14568 * This is not reentrant. Call once before processing in case of
14569 * use in multithreaded programs.
14570 */
14571
14572void
14573xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014574 if (xmlParserInitialized != 0)
14575 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014576
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014577#ifdef LIBXML_THREAD_ENABLED
14578 __xmlGlobalInitMutexLock();
14579 if (xmlParserInitialized == 0) {
14580#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014581 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014582 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014583 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14584 (xmlGenericError == NULL))
14585 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014586 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014587 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014588 xmlInitCharEncodingHandlers();
14589 xmlDefaultSAXHandlerInit();
14590 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014591#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014592 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014593#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014594#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014595 htmlInitAutoClose();
14596 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014597#endif
14598#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014599 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014600#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014601 xmlParserInitialized = 1;
14602#ifdef LIBXML_THREAD_ENABLED
14603 }
14604 __xmlGlobalInitMutexUnlock();
14605#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014606}
14607
14608/**
14609 * xmlCleanupParser:
14610 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014611 * This function name is somewhat misleading. It does not clean up
14612 * parser state, it cleans up memory allocated by the library itself.
14613 * It is a cleanup function for the XML library. It tries to reclaim all
14614 * related global memory allocated for the library processing.
14615 * It doesn't deallocate any document related memory. One should
14616 * call xmlCleanupParser() only when the process has finished using
14617 * the library and all XML/HTML documents built with it.
14618 * See also xmlInitParser() which has the opposite function of preparing
14619 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014620 *
14621 * WARNING: if your application is multithreaded or has plugin support
14622 * calling this may crash the application if another thread or
14623 * a plugin is still using libxml2. It's sometimes very hard to
14624 * guess if libxml2 is in use in the application, some libraries
14625 * or plugins may use it without notice. In case of doubt abstain
14626 * from calling this function or do it just before calling exit()
14627 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014628 */
14629
14630void
14631xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014632 if (!xmlParserInitialized)
14633 return;
14634
Owen Taylor3473f882001-02-23 17:55:21 +000014635 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014636#ifdef LIBXML_CATALOG_ENABLED
14637 xmlCatalogCleanup();
14638#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014639 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014640 xmlCleanupInputCallbacks();
14641#ifdef LIBXML_OUTPUT_ENABLED
14642 xmlCleanupOutputCallbacks();
14643#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014644#ifdef LIBXML_SCHEMAS_ENABLED
14645 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014646 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014647#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000014648 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014649 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014650 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014651 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014652 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014653}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014654
14655/************************************************************************
14656 * *
14657 * New set (2.6.0) of simpler and more flexible APIs *
14658 * *
14659 ************************************************************************/
14660
14661/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014662 * DICT_FREE:
14663 * @str: a string
14664 *
14665 * Free a string if it is not owned by the "dict" dictionnary in the
14666 * current scope
14667 */
14668#define DICT_FREE(str) \
14669 if ((str) && ((!dict) || \
14670 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14671 xmlFree((char *)(str));
14672
14673/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014674 * xmlCtxtReset:
14675 * @ctxt: an XML parser context
14676 *
14677 * Reset a parser context
14678 */
14679void
14680xmlCtxtReset(xmlParserCtxtPtr ctxt)
14681{
14682 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014683 xmlDictPtr dict;
14684
14685 if (ctxt == NULL)
14686 return;
14687
14688 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014689
14690 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14691 xmlFreeInputStream(input);
14692 }
14693 ctxt->inputNr = 0;
14694 ctxt->input = NULL;
14695
14696 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014697 if (ctxt->spaceTab != NULL) {
14698 ctxt->spaceTab[0] = -1;
14699 ctxt->space = &ctxt->spaceTab[0];
14700 } else {
14701 ctxt->space = NULL;
14702 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014703
14704
14705 ctxt->nodeNr = 0;
14706 ctxt->node = NULL;
14707
14708 ctxt->nameNr = 0;
14709 ctxt->name = NULL;
14710
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014711 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014712 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014713 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014714 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014715 DICT_FREE(ctxt->directory);
14716 ctxt->directory = NULL;
14717 DICT_FREE(ctxt->extSubURI);
14718 ctxt->extSubURI = NULL;
14719 DICT_FREE(ctxt->extSubSystem);
14720 ctxt->extSubSystem = NULL;
14721 if (ctxt->myDoc != NULL)
14722 xmlFreeDoc(ctxt->myDoc);
14723 ctxt->myDoc = NULL;
14724
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014725 ctxt->standalone = -1;
14726 ctxt->hasExternalSubset = 0;
14727 ctxt->hasPErefs = 0;
14728 ctxt->html = 0;
14729 ctxt->external = 0;
14730 ctxt->instate = XML_PARSER_START;
14731 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014732
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014733 ctxt->wellFormed = 1;
14734 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014735 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014736 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014737#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014738 ctxt->vctxt.userData = ctxt;
14739 ctxt->vctxt.error = xmlParserValidityError;
14740 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014741#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014742 ctxt->record_info = 0;
14743 ctxt->nbChars = 0;
14744 ctxt->checkIndex = 0;
14745 ctxt->inSubset = 0;
14746 ctxt->errNo = XML_ERR_OK;
14747 ctxt->depth = 0;
14748 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14749 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014750 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014751 ctxt->sizeentities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014752 xmlInitNodeInfoSeq(&ctxt->node_seq);
14753
14754 if (ctxt->attsDefault != NULL) {
14755 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14756 ctxt->attsDefault = NULL;
14757 }
14758 if (ctxt->attsSpecial != NULL) {
14759 xmlHashFree(ctxt->attsSpecial, NULL);
14760 ctxt->attsSpecial = NULL;
14761 }
14762
Daniel Veillard4432df22003-09-28 18:58:27 +000014763#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014764 if (ctxt->catalogs != NULL)
14765 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014766#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014767 if (ctxt->lastError.code != XML_ERR_OK)
14768 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014769}
14770
14771/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014772 * xmlCtxtResetPush:
14773 * @ctxt: an XML parser context
14774 * @chunk: a pointer to an array of chars
14775 * @size: number of chars in the array
14776 * @filename: an optional file name or URI
14777 * @encoding: the document encoding, or NULL
14778 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014779 * Reset a push parser context
14780 *
14781 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014782 */
14783int
14784xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14785 int size, const char *filename, const char *encoding)
14786{
14787 xmlParserInputPtr inputStream;
14788 xmlParserInputBufferPtr buf;
14789 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14790
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014791 if (ctxt == NULL)
14792 return(1);
14793
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014794 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14795 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14796
14797 buf = xmlAllocParserInputBuffer(enc);
14798 if (buf == NULL)
14799 return(1);
14800
14801 if (ctxt == NULL) {
14802 xmlFreeParserInputBuffer(buf);
14803 return(1);
14804 }
14805
14806 xmlCtxtReset(ctxt);
14807
14808 if (ctxt->pushTab == NULL) {
14809 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14810 sizeof(xmlChar *));
14811 if (ctxt->pushTab == NULL) {
14812 xmlErrMemory(ctxt, NULL);
14813 xmlFreeParserInputBuffer(buf);
14814 return(1);
14815 }
14816 }
14817
14818 if (filename == NULL) {
14819 ctxt->directory = NULL;
14820 } else {
14821 ctxt->directory = xmlParserGetDirectory(filename);
14822 }
14823
14824 inputStream = xmlNewInputStream(ctxt);
14825 if (inputStream == NULL) {
14826 xmlFreeParserInputBuffer(buf);
14827 return(1);
14828 }
14829
14830 if (filename == NULL)
14831 inputStream->filename = NULL;
14832 else
14833 inputStream->filename = (char *)
14834 xmlCanonicPath((const xmlChar *) filename);
14835 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014836 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014837
14838 inputPush(ctxt, inputStream);
14839
14840 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14841 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014842 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14843 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014844
14845 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14846
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014847 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014848#ifdef DEBUG_PUSH
14849 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14850#endif
14851 }
14852
14853 if (encoding != NULL) {
14854 xmlCharEncodingHandlerPtr hdlr;
14855
Daniel Veillard37334572008-07-31 08:20:02 +000014856 if (ctxt->encoding != NULL)
14857 xmlFree((xmlChar *) ctxt->encoding);
14858 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14859
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014860 hdlr = xmlFindCharEncodingHandler(encoding);
14861 if (hdlr != NULL) {
14862 xmlSwitchToEncoding(ctxt, hdlr);
14863 } else {
14864 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14865 "Unsupported encoding %s\n", BAD_CAST encoding);
14866 }
14867 } else if (enc != XML_CHAR_ENCODING_NONE) {
14868 xmlSwitchEncoding(ctxt, enc);
14869 }
14870
14871 return(0);
14872}
14873
Daniel Veillard37334572008-07-31 08:20:02 +000014874
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014875/**
Daniel Veillard37334572008-07-31 08:20:02 +000014876 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014877 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014878 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014879 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014880 *
14881 * Applies the options to the parser context
14882 *
14883 * Returns 0 in case of success, the set of unknown or unimplemented options
14884 * in case of error.
14885 */
Daniel Veillard37334572008-07-31 08:20:02 +000014886static int
14887xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014888{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014889 if (ctxt == NULL)
14890 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014891 if (encoding != NULL) {
14892 if (ctxt->encoding != NULL)
14893 xmlFree((xmlChar *) ctxt->encoding);
14894 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14895 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014896 if (options & XML_PARSE_RECOVER) {
14897 ctxt->recovery = 1;
14898 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014899 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014900 } else
14901 ctxt->recovery = 0;
14902 if (options & XML_PARSE_DTDLOAD) {
14903 ctxt->loadsubset = XML_DETECT_IDS;
14904 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014905 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014906 } else
14907 ctxt->loadsubset = 0;
14908 if (options & XML_PARSE_DTDATTR) {
14909 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14910 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014911 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014912 }
14913 if (options & XML_PARSE_NOENT) {
14914 ctxt->replaceEntities = 1;
14915 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14916 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014917 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014918 } else
14919 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014920 if (options & XML_PARSE_PEDANTIC) {
14921 ctxt->pedantic = 1;
14922 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014923 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014924 } else
14925 ctxt->pedantic = 0;
14926 if (options & XML_PARSE_NOBLANKS) {
14927 ctxt->keepBlanks = 0;
14928 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14929 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014930 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014931 } else
14932 ctxt->keepBlanks = 1;
14933 if (options & XML_PARSE_DTDVALID) {
14934 ctxt->validate = 1;
14935 if (options & XML_PARSE_NOWARNING)
14936 ctxt->vctxt.warning = NULL;
14937 if (options & XML_PARSE_NOERROR)
14938 ctxt->vctxt.error = NULL;
14939 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014940 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014941 } else
14942 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000014943 if (options & XML_PARSE_NOWARNING) {
14944 ctxt->sax->warning = NULL;
14945 options -= XML_PARSE_NOWARNING;
14946 }
14947 if (options & XML_PARSE_NOERROR) {
14948 ctxt->sax->error = NULL;
14949 ctxt->sax->fatalError = NULL;
14950 options -= XML_PARSE_NOERROR;
14951 }
Daniel Veillard81273902003-09-30 00:43:48 +000014952#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014953 if (options & XML_PARSE_SAX1) {
14954 ctxt->sax->startElement = xmlSAX2StartElement;
14955 ctxt->sax->endElement = xmlSAX2EndElement;
14956 ctxt->sax->startElementNs = NULL;
14957 ctxt->sax->endElementNs = NULL;
14958 ctxt->sax->initialized = 1;
14959 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014960 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014961 }
Daniel Veillard81273902003-09-30 00:43:48 +000014962#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014963 if (options & XML_PARSE_NODICT) {
14964 ctxt->dictNames = 0;
14965 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014966 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014967 } else {
14968 ctxt->dictNames = 1;
14969 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014970 if (options & XML_PARSE_NOCDATA) {
14971 ctxt->sax->cdataBlock = NULL;
14972 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014973 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014974 }
14975 if (options & XML_PARSE_NSCLEAN) {
14976 ctxt->options |= XML_PARSE_NSCLEAN;
14977 options -= XML_PARSE_NSCLEAN;
14978 }
Daniel Veillard61b93382003-11-03 14:28:31 +000014979 if (options & XML_PARSE_NONET) {
14980 ctxt->options |= XML_PARSE_NONET;
14981 options -= XML_PARSE_NONET;
14982 }
Daniel Veillard8874b942005-08-25 13:19:21 +000014983 if (options & XML_PARSE_COMPACT) {
14984 ctxt->options |= XML_PARSE_COMPACT;
14985 options -= XML_PARSE_COMPACT;
14986 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000014987 if (options & XML_PARSE_OLD10) {
14988 ctxt->options |= XML_PARSE_OLD10;
14989 options -= XML_PARSE_OLD10;
14990 }
Daniel Veillard8915c152008-08-26 13:05:34 +000014991 if (options & XML_PARSE_NOBASEFIX) {
14992 ctxt->options |= XML_PARSE_NOBASEFIX;
14993 options -= XML_PARSE_NOBASEFIX;
14994 }
14995 if (options & XML_PARSE_HUGE) {
14996 ctxt->options |= XML_PARSE_HUGE;
14997 options -= XML_PARSE_HUGE;
Daniel Veillard52d8ade2012-07-30 10:08:45 +080014998 if (ctxt->dict != NULL)
14999 xmlDictSetLimit(ctxt->dict, 0);
Daniel Veillard8915c152008-08-26 13:05:34 +000015000 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000015001 if (options & XML_PARSE_OLDSAX) {
15002 ctxt->options |= XML_PARSE_OLDSAX;
15003 options -= XML_PARSE_OLDSAX;
15004 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080015005 if (options & XML_PARSE_IGNORE_ENC) {
15006 ctxt->options |= XML_PARSE_IGNORE_ENC;
15007 options -= XML_PARSE_IGNORE_ENC;
15008 }
Daniel Veillard968a03a2012-08-13 12:41:33 +080015009 if (options & XML_PARSE_BIG_LINES) {
15010 ctxt->options |= XML_PARSE_BIG_LINES;
15011 options -= XML_PARSE_BIG_LINES;
15012 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000015013 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015014 return (options);
15015}
15016
15017/**
Daniel Veillard37334572008-07-31 08:20:02 +000015018 * xmlCtxtUseOptions:
15019 * @ctxt: an XML parser context
15020 * @options: a combination of xmlParserOption
15021 *
15022 * Applies the options to the parser context
15023 *
15024 * Returns 0 in case of success, the set of unknown or unimplemented options
15025 * in case of error.
15026 */
15027int
15028xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
15029{
15030 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
15031}
15032
15033/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015034 * xmlDoRead:
15035 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000015036 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015037 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015038 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015039 * @reuse: keep the context for reuse
15040 *
15041 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000015042 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015043 * Returns the resulting document tree or NULL
15044 */
15045static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015046xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
15047 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015048{
15049 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000015050
15051 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015052 if (encoding != NULL) {
15053 xmlCharEncodingHandlerPtr hdlr;
15054
15055 hdlr = xmlFindCharEncodingHandler(encoding);
15056 if (hdlr != NULL)
15057 xmlSwitchToEncoding(ctxt, hdlr);
15058 }
Daniel Veillard60942de2003-09-25 21:05:58 +000015059 if ((URL != NULL) && (ctxt->input != NULL) &&
15060 (ctxt->input->filename == NULL))
15061 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015062 xmlParseDocument(ctxt);
15063 if ((ctxt->wellFormed) || ctxt->recovery)
15064 ret = ctxt->myDoc;
15065 else {
15066 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015067 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015068 xmlFreeDoc(ctxt->myDoc);
15069 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015070 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015071 ctxt->myDoc = NULL;
15072 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015073 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000015074 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015075
15076 return (ret);
15077}
15078
15079/**
15080 * xmlReadDoc:
15081 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015082 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015083 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015084 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015085 *
15086 * parse an XML in-memory document and build a tree.
15087 *
15088 * Returns the resulting document tree
15089 */
15090xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015091xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015092{
15093 xmlParserCtxtPtr ctxt;
15094
15095 if (cur == NULL)
15096 return (NULL);
15097
15098 ctxt = xmlCreateDocParserCtxt(cur);
15099 if (ctxt == NULL)
15100 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015101 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015102}
15103
15104/**
15105 * xmlReadFile:
15106 * @filename: a file or URL
15107 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015108 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015109 *
15110 * parse an XML file from the filesystem or the network.
15111 *
15112 * Returns the resulting document tree
15113 */
15114xmlDocPtr
15115xmlReadFile(const char *filename, const char *encoding, int options)
15116{
15117 xmlParserCtxtPtr ctxt;
15118
Daniel Veillard61b93382003-11-03 14:28:31 +000015119 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015120 if (ctxt == NULL)
15121 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015122 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015123}
15124
15125/**
15126 * xmlReadMemory:
15127 * @buffer: a pointer to a char array
15128 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015129 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015130 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015131 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015132 *
15133 * parse an XML in-memory document and build a tree.
15134 *
15135 * Returns the resulting document tree
15136 */
15137xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015138xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015139{
15140 xmlParserCtxtPtr ctxt;
15141
15142 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
15143 if (ctxt == NULL)
15144 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000015145 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015146}
15147
15148/**
15149 * xmlReadFd:
15150 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015151 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015152 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015153 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015154 *
15155 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015156 * NOTE that the file descriptor will not be closed when the
15157 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015158 *
15159 * Returns the resulting document tree
15160 */
15161xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015162xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015163{
15164 xmlParserCtxtPtr ctxt;
15165 xmlParserInputBufferPtr input;
15166 xmlParserInputPtr stream;
15167
15168 if (fd < 0)
15169 return (NULL);
15170
15171 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15172 if (input == NULL)
15173 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015174 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015175 ctxt = xmlNewParserCtxt();
15176 if (ctxt == NULL) {
15177 xmlFreeParserInputBuffer(input);
15178 return (NULL);
15179 }
15180 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15181 if (stream == NULL) {
15182 xmlFreeParserInputBuffer(input);
15183 xmlFreeParserCtxt(ctxt);
15184 return (NULL);
15185 }
15186 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015187 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015188}
15189
15190/**
15191 * xmlReadIO:
15192 * @ioread: an I/O read function
15193 * @ioclose: an I/O close function
15194 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015195 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015196 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015197 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015198 *
15199 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080015200 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015201 * Returns the resulting document tree
15202 */
15203xmlDocPtr
15204xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000015205 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015206{
15207 xmlParserCtxtPtr ctxt;
15208 xmlParserInputBufferPtr input;
15209 xmlParserInputPtr stream;
15210
15211 if (ioread == NULL)
15212 return (NULL);
15213
15214 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15215 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015216 if (input == NULL) {
15217 if (ioclose != NULL)
15218 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015219 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015220 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015221 ctxt = xmlNewParserCtxt();
15222 if (ctxt == NULL) {
15223 xmlFreeParserInputBuffer(input);
15224 return (NULL);
15225 }
15226 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15227 if (stream == NULL) {
15228 xmlFreeParserInputBuffer(input);
15229 xmlFreeParserCtxt(ctxt);
15230 return (NULL);
15231 }
15232 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015233 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015234}
15235
15236/**
15237 * xmlCtxtReadDoc:
15238 * @ctxt: an XML parser context
15239 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000015240 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015241 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015242 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015243 *
15244 * parse an XML in-memory document and build a tree.
15245 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015246 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015247 * Returns the resulting document tree
15248 */
15249xmlDocPtr
15250xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000015251 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015252{
15253 xmlParserInputPtr stream;
15254
15255 if (cur == NULL)
15256 return (NULL);
15257 if (ctxt == NULL)
15258 return (NULL);
15259
15260 xmlCtxtReset(ctxt);
15261
15262 stream = xmlNewStringInputStream(ctxt, cur);
15263 if (stream == NULL) {
15264 return (NULL);
15265 }
15266 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015267 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015268}
15269
15270/**
15271 * xmlCtxtReadFile:
15272 * @ctxt: an XML parser context
15273 * @filename: a file or URL
15274 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015275 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015276 *
15277 * parse an XML file from the filesystem or the network.
15278 * This reuses the existing @ctxt parser context
15279 *
15280 * Returns the resulting document tree
15281 */
15282xmlDocPtr
15283xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15284 const char *encoding, int options)
15285{
15286 xmlParserInputPtr stream;
15287
15288 if (filename == NULL)
15289 return (NULL);
15290 if (ctxt == NULL)
15291 return (NULL);
15292
15293 xmlCtxtReset(ctxt);
15294
Daniel Veillard29614c72004-11-26 10:47:26 +000015295 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015296 if (stream == NULL) {
15297 return (NULL);
15298 }
15299 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015300 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015301}
15302
15303/**
15304 * xmlCtxtReadMemory:
15305 * @ctxt: an XML parser context
15306 * @buffer: a pointer to a char array
15307 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000015308 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015309 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015310 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015311 *
15312 * parse an XML in-memory document and build a tree.
15313 * This reuses the existing @ctxt parser context
15314 *
15315 * Returns the resulting document tree
15316 */
15317xmlDocPtr
15318xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000015319 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015320{
15321 xmlParserInputBufferPtr input;
15322 xmlParserInputPtr stream;
15323
15324 if (ctxt == NULL)
15325 return (NULL);
15326 if (buffer == NULL)
15327 return (NULL);
15328
15329 xmlCtxtReset(ctxt);
15330
15331 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
15332 if (input == NULL) {
15333 return(NULL);
15334 }
15335
15336 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15337 if (stream == NULL) {
15338 xmlFreeParserInputBuffer(input);
15339 return(NULL);
15340 }
15341
15342 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015343 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015344}
15345
15346/**
15347 * xmlCtxtReadFd:
15348 * @ctxt: an XML parser context
15349 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000015350 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015351 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015352 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015353 *
15354 * parse an XML from a file descriptor and build a tree.
15355 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015356 * NOTE that the file descriptor will not be closed when the
15357 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015358 *
15359 * Returns the resulting document tree
15360 */
15361xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000015362xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15363 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015364{
15365 xmlParserInputBufferPtr input;
15366 xmlParserInputPtr stream;
15367
15368 if (fd < 0)
15369 return (NULL);
15370 if (ctxt == NULL)
15371 return (NULL);
15372
15373 xmlCtxtReset(ctxt);
15374
15375
15376 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15377 if (input == NULL)
15378 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000015379 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015380 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15381 if (stream == NULL) {
15382 xmlFreeParserInputBuffer(input);
15383 return (NULL);
15384 }
15385 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015386 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015387}
15388
15389/**
15390 * xmlCtxtReadIO:
15391 * @ctxt: an XML parser context
15392 * @ioread: an I/O read function
15393 * @ioclose: an I/O close function
15394 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015395 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015396 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015397 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015398 *
15399 * parse an XML document from I/O functions and source and build a tree.
15400 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015401 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015402 * Returns the resulting document tree
15403 */
15404xmlDocPtr
15405xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15406 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015407 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015408 const char *encoding, int options)
15409{
15410 xmlParserInputBufferPtr input;
15411 xmlParserInputPtr stream;
15412
15413 if (ioread == NULL)
15414 return (NULL);
15415 if (ctxt == NULL)
15416 return (NULL);
15417
15418 xmlCtxtReset(ctxt);
15419
15420 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15421 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015422 if (input == NULL) {
15423 if (ioclose != NULL)
15424 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015425 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015426 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015427 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15428 if (stream == NULL) {
15429 xmlFreeParserInputBuffer(input);
15430 return (NULL);
15431 }
15432 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015433 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015434}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015435
15436#define bottom_parser
15437#include "elfgcchack.h"