blob: f36e84b129d2e8987fd7dd39bc72708c1296a51a [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080043#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000045#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000047#include <libxml/threads.h>
48#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <libxml/tree.h>
50#include <libxml/parser.h>
51#include <libxml/parserInternals.h>
52#include <libxml/valid.h>
53#include <libxml/entities.h>
54#include <libxml/xmlerror.h>
55#include <libxml/encoding.h>
56#include <libxml/xmlIO.h>
57#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000058#ifdef LIBXML_CATALOG_ENABLED
59#include <libxml/catalog.h>
60#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000061#ifdef LIBXML_SCHEMAS_ENABLED
62#include <libxml/xmlschemastypes.h>
63#include <libxml/relaxng.h>
64#endif
Owen Taylor3473f882001-02-23 17:55:21 +000065#ifdef HAVE_CTYPE_H
66#include <ctype.h>
67#endif
68#ifdef HAVE_STDLIB_H
69#include <stdlib.h>
70#endif
71#ifdef HAVE_SYS_STAT_H
72#include <sys/stat.h>
73#endif
74#ifdef HAVE_FCNTL_H
75#include <fcntl.h>
76#endif
77#ifdef HAVE_UNISTD_H
78#include <unistd.h>
79#endif
80#ifdef HAVE_ZLIB_H
81#include <zlib.h>
82#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +020083#ifdef HAVE_LZMA_H
84#include <lzma.h>
85#endif
Owen Taylor3473f882001-02-23 17:55:21 +000086
Daniel Veillard768eb3b2012-07-16 14:19:49 +080087#include "buf.h"
88#include "enc.h"
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090static void
91xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
Rob Richards9c0aa472009-03-26 18:10:19 +000093static xmlParserCtxtPtr
94xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
Daniel Veillard0161e632008-08-28 15:36:32 +000097/************************************************************************
98 * *
99 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
100 * *
101 ************************************************************************/
102
103#define XML_PARSER_BIG_ENTITY 1000
104#define XML_PARSER_LOT_ENTITY 5000
105
106/*
107 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
108 * replacement over the size in byte of the input indicates that you have
109 * and eponential behaviour. A value of 10 correspond to at least 3 entity
110 * replacement per byte of input.
111 */
112#define XML_PARSER_NON_LINEAR 10
113
114/*
115 * xmlParserEntityCheck
116 *
117 * Function to check non-linear entity expansion behaviour
118 * This is here to detect and stop exponential linear entity expansion
119 * This is not a limitation of the parser but a safety
120 * boundary feature. It can be disabled with the XML_PARSE_HUGE
121 * parser option.
122 */
123static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800124xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard0161e632008-08-28 15:36:32 +0000125 xmlEntityPtr ent)
126{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800127 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000128
129 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
130 return (0);
131 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
132 return (1);
133 if (size != 0) {
134 /*
135 * Do the check based on the replacement size of the entity
136 */
137 if (size < XML_PARSER_BIG_ENTITY)
138 return(0);
139
140 /*
141 * A limit on the amount of text data reasonably used
142 */
143 if (ctxt->input != NULL) {
144 consumed = ctxt->input->consumed +
145 (ctxt->input->cur - ctxt->input->base);
146 }
147 consumed += ctxt->sizeentities;
148
149 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
150 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
151 return (0);
152 } else if (ent != NULL) {
153 /*
154 * use the number of parsed entities in the replacement
155 */
156 size = ent->checked;
157
158 /*
159 * The amount of data parsed counting entities size only once
160 */
161 if (ctxt->input != NULL) {
162 consumed = ctxt->input->consumed +
163 (ctxt->input->cur - ctxt->input->base);
164 }
165 consumed += ctxt->sizeentities;
166
167 /*
168 * Check the density of entities for the amount of data
169 * knowing an entity reference will take at least 3 bytes
170 */
171 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
172 return (0);
173 } else {
174 /*
175 * strange we got no data for checking just return
176 */
177 return (0);
178 }
179
180 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
181 return (1);
182}
183
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000184/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000185 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000186 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000187 * arbitrary depth limit for the XML documents that we allow to
188 * process. This is not a limitation of the parser but a safety
189 * boundary feature. It can be disabled with the XML_PARSE_HUGE
190 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000191 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000192unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000193
Daniel Veillard0fb18932003-09-07 09:14:37 +0000194
Daniel Veillard0161e632008-08-28 15:36:32 +0000195
196#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000197#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000198#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000199#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
200
Owen Taylor3473f882001-02-23 17:55:21 +0000201/*
Owen Taylor3473f882001-02-23 17:55:21 +0000202 * List of XML prefixed PI allowed by W3C specs
203 */
204
Daniel Veillardb44025c2001-10-11 22:55:55 +0000205static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000206 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800207 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000208 NULL
209};
210
Daniel Veillarda07050d2003-10-19 14:46:32 +0000211
Owen Taylor3473f882001-02-23 17:55:21 +0000212/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200213static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
214 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000215
Daniel Veillard7d515752003-09-26 19:12:37 +0000216static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000217xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
218 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000219 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000220 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000221
Daniel Veillard37334572008-07-31 08:20:02 +0000222static int
223xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
224 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000225#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000226static void
227xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
228 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000229#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000230
Daniel Veillard7d515752003-09-26 19:12:37 +0000231static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000232xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
233 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000234
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000235static int
236xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
237
Daniel Veillarde57ec792003-09-10 10:50:59 +0000238/************************************************************************
239 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000240 * Some factorized error routines *
241 * *
242 ************************************************************************/
243
244/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000245 * xmlErrAttributeDup:
246 * @ctxt: an XML parser context
247 * @prefix: the attribute prefix
248 * @localname: the attribute localname
249 *
250 * Handle a redefinition of attribute error
251 */
252static void
253xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
254 const xmlChar * localname)
255{
Daniel Veillard157fee02003-10-31 10:36:03 +0000256 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
257 (ctxt->instate == XML_PARSER_EOF))
258 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000259 if (ctxt != NULL)
260 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200261
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000262 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000263 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200264 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000265 (const char *) localname, NULL, NULL, 0, 0,
266 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000267 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000268 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200269 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000270 (const char *) prefix, (const char *) localname,
271 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
272 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000273 if (ctxt != NULL) {
274 ctxt->wellFormed = 0;
275 if (ctxt->recovery == 0)
276 ctxt->disableSAX = 1;
277 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000278}
279
280/**
281 * xmlFatalErr:
282 * @ctxt: an XML parser context
283 * @error: the error number
284 * @extra: extra information string
285 *
286 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
287 */
288static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000289xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290{
291 const char *errmsg;
292
Daniel Veillard157fee02003-10-31 10:36:03 +0000293 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
294 (ctxt->instate == XML_PARSER_EOF))
295 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 switch (error) {
297 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000298 errmsg = "CharRef: invalid hexadecimal value\n";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "CharRef: invalid decimal value\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "CharRef: invalid value\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "internal error";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "PEReference at end of document\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "PEReference in prolog\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "PEReference in epilog\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "PEReference: no name\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "PEReference: expecting ';'\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "Detected an entity reference loop\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "EntityValue: \" or ' expected\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "PEReferences forbidden in internal subset\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "EntityValue: \" or ' expected\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "AttValue: \" or ' expected\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Unescaped '<' not allowed in attributes values\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "SystemLiteral \" or ' expected\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "Unfinished System or Public ID \" or ' expected\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Sequence ']]>' not allowed in content\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "PUBLIC, the Public Identifier is missing\n";
356 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 errmsg = "Comment must not contain '--' (double-hyphen)\n";
359 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000360 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 errmsg = "xmlParsePI : no target name\n";
362 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 errmsg = "Invalid PI name\n";
365 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000367 errmsg = "NOTATION: Name expected here\n";
368 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000369 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000370 errmsg = "'>' required to close NOTATION declaration\n";
371 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000372 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000373 errmsg = "Entity value required\n";
374 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000375 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000376 errmsg = "Fragment not allowed";
377 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000378 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000379 errmsg = "'(' required to start ATTLIST enumeration\n";
380 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000381 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000382 errmsg = "NmToken expected in ATTLIST enumeration\n";
383 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000384 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 errmsg = "')' required to finish ATTLIST enumeration\n";
386 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000387 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
389 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000390 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000391 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
392 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000393 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000394 errmsg = "ContentDecl : Name or '(' expected\n";
395 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000396 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000397 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
398 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000399 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000400 errmsg =
401 "PEReference: forbidden within markup decl in internal subset\n";
402 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000403 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000404 errmsg = "expected '>'\n";
405 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000406 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000407 errmsg = "XML conditional section '[' expected\n";
408 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000409 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000410 errmsg = "Content error in the external subset\n";
411 break;
412 case XML_ERR_CONDSEC_INVALID_KEYWORD:
413 errmsg =
414 "conditional section INCLUDE or IGNORE keyword expected\n";
415 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000416 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 errmsg = "XML conditional section not closed\n";
418 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000419 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 errmsg = "Text declaration '<?xml' required\n";
421 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000422 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 errmsg = "parsing XML declaration: '?>' expected\n";
424 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000425 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 errmsg = "external parsed entities cannot be standalone\n";
427 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000428 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000429 errmsg = "EntityRef: expecting ';'\n";
430 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000431 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000432 errmsg = "DOCTYPE improperly terminated\n";
433 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000434 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000435 errmsg = "EndTag: '</' not found\n";
436 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000437 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000438 errmsg = "expected '='\n";
439 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000440 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000441 errmsg = "String not closed expecting \" or '\n";
442 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000443 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000444 errmsg = "String not started expecting ' or \"\n";
445 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000446 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000447 errmsg = "Invalid XML encoding name\n";
448 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000449 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000450 errmsg = "standalone accepts only 'yes' or 'no'\n";
451 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000452 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000453 errmsg = "Document is empty\n";
454 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000455 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000456 errmsg = "Extra content at the end of the document\n";
457 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000458 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000459 errmsg = "chunk is not well balanced\n";
460 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000461 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000462 errmsg = "extra content at the end of well balanced chunk\n";
463 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000464 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000465 errmsg = "Malformed declaration expecting version\n";
466 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000467#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000468 case:
469 errmsg = "\n";
470 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000471#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000472 default:
473 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000474 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000475 if (ctxt != NULL)
476 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000477 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000478 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
479 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000480 if (ctxt != NULL) {
481 ctxt->wellFormed = 0;
482 if (ctxt->recovery == 0)
483 ctxt->disableSAX = 1;
484 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000485}
486
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000487/**
488 * xmlFatalErrMsg:
489 * @ctxt: an XML parser context
490 * @error: the error number
491 * @msg: the error message
492 *
493 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
494 */
495static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000496xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
497 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000498{
Daniel Veillard157fee02003-10-31 10:36:03 +0000499 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
500 (ctxt->instate == XML_PARSER_EOF))
501 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000502 if (ctxt != NULL)
503 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000504 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200505 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000506 if (ctxt != NULL) {
507 ctxt->wellFormed = 0;
508 if (ctxt->recovery == 0)
509 ctxt->disableSAX = 1;
510 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000511}
512
513/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000514 * xmlWarningMsg:
515 * @ctxt: an XML parser context
516 * @error: the error number
517 * @msg: the error message
518 * @str1: extra data
519 * @str2: extra data
520 *
521 * Handle a warning.
522 */
523static void
524xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
525 const char *msg, const xmlChar *str1, const xmlChar *str2)
526{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000527 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000528
Daniel Veillard157fee02003-10-31 10:36:03 +0000529 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
530 (ctxt->instate == XML_PARSER_EOF))
531 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000532 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
533 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000534 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200535 if (ctxt != NULL) {
536 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000537 (ctxt->sax) ? ctxt->sax->warning : NULL,
538 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000539 ctxt, NULL, XML_FROM_PARSER, error,
540 XML_ERR_WARNING, NULL, 0,
541 (const char *) str1, (const char *) str2, NULL, 0, 0,
542 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200543 } else {
544 __xmlRaiseError(schannel, NULL, NULL,
545 ctxt, NULL, XML_FROM_PARSER, error,
546 XML_ERR_WARNING, NULL, 0,
547 (const char *) str1, (const char *) str2, NULL, 0, 0,
548 msg, (const char *) str1, (const char *) str2);
549 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000550}
551
552/**
553 * xmlValidityError:
554 * @ctxt: an XML parser context
555 * @error: the error number
556 * @msg: the error message
557 * @str1: extra data
558 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000559 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000560 */
561static void
562xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000563 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000564{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000565 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000566
567 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
568 (ctxt->instate == XML_PARSER_EOF))
569 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000570 if (ctxt != NULL) {
571 ctxt->errNo = error;
572 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
573 schannel = ctxt->sax->serror;
574 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200575 if (ctxt != NULL) {
576 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000577 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000578 ctxt, NULL, XML_FROM_DTD, error,
579 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000580 (const char *) str2, NULL, 0, 0,
581 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000582 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200583 } else {
584 __xmlRaiseError(schannel, NULL, NULL,
585 ctxt, NULL, XML_FROM_DTD, error,
586 XML_ERR_ERROR, NULL, 0, (const char *) str1,
587 (const char *) str2, NULL, 0, 0,
588 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000589 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000590}
591
592/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000593 * xmlFatalErrMsgInt:
594 * @ctxt: an XML parser context
595 * @error: the error number
596 * @msg: the error message
597 * @val: an integer value
598 *
599 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
600 */
601static void
602xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000603 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000604{
Daniel Veillard157fee02003-10-31 10:36:03 +0000605 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
606 (ctxt->instate == XML_PARSER_EOF))
607 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000608 if (ctxt != NULL)
609 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000610 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000611 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
612 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000613 if (ctxt != NULL) {
614 ctxt->wellFormed = 0;
615 if (ctxt->recovery == 0)
616 ctxt->disableSAX = 1;
617 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000618}
619
620/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000621 * xmlFatalErrMsgStrIntStr:
622 * @ctxt: an XML parser context
623 * @error: the error number
624 * @msg: the error message
625 * @str1: an string info
626 * @val: an integer value
627 * @str2: an string info
628 *
629 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
630 */
631static void
632xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
633 const char *msg, const xmlChar *str1, int val,
634 const xmlChar *str2)
635{
Daniel Veillard157fee02003-10-31 10:36:03 +0000636 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
637 (ctxt->instate == XML_PARSER_EOF))
638 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000639 if (ctxt != NULL)
640 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000641 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000642 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
643 NULL, 0, (const char *) str1, (const char *) str2,
644 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000645 if (ctxt != NULL) {
646 ctxt->wellFormed = 0;
647 if (ctxt->recovery == 0)
648 ctxt->disableSAX = 1;
649 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000650}
651
652/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000653 * xmlFatalErrMsgStr:
654 * @ctxt: an XML parser context
655 * @error: the error number
656 * @msg: the error message
657 * @val: a string value
658 *
659 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
660 */
661static void
662xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000663 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000664{
Daniel Veillard157fee02003-10-31 10:36:03 +0000665 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
666 (ctxt->instate == XML_PARSER_EOF))
667 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000668 if (ctxt != NULL)
669 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000670 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000671 XML_FROM_PARSER, error, XML_ERR_FATAL,
672 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
673 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000674 if (ctxt != NULL) {
675 ctxt->wellFormed = 0;
676 if (ctxt->recovery == 0)
677 ctxt->disableSAX = 1;
678 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000679}
680
681/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000682 * xmlErrMsgStr:
683 * @ctxt: an XML parser context
684 * @error: the error number
685 * @msg: the error message
686 * @val: a string value
687 *
688 * Handle a non fatal parser error
689 */
690static void
691xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
692 const char *msg, const xmlChar * val)
693{
Daniel Veillard157fee02003-10-31 10:36:03 +0000694 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
695 (ctxt->instate == XML_PARSER_EOF))
696 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000697 if (ctxt != NULL)
698 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000699 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000700 XML_FROM_PARSER, error, XML_ERR_ERROR,
701 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
702 val);
703}
704
705/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000706 * xmlNsErr:
707 * @ctxt: an XML parser context
708 * @error: the error number
709 * @msg: the message
710 * @info1: extra information string
711 * @info2: extra information string
712 *
713 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
714 */
715static void
716xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
717 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000718 const xmlChar * info1, const xmlChar * info2,
719 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000720{
Daniel Veillard157fee02003-10-31 10:36:03 +0000721 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
722 (ctxt->instate == XML_PARSER_EOF))
723 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000724 if (ctxt != NULL)
725 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000726 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000727 XML_ERR_ERROR, NULL, 0, (const char *) info1,
728 (const char *) info2, (const char *) info3, 0, 0, msg,
729 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000730 if (ctxt != NULL)
731 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000732}
733
Daniel Veillard37334572008-07-31 08:20:02 +0000734/**
735 * xmlNsWarn
736 * @ctxt: an XML parser context
737 * @error: the error number
738 * @msg: the message
739 * @info1: extra information string
740 * @info2: extra information string
741 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800742 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000743 */
744static void
745xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
746 const char *msg,
747 const xmlChar * info1, const xmlChar * info2,
748 const xmlChar * info3)
749{
750 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
751 (ctxt->instate == XML_PARSER_EOF))
752 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000753 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
754 XML_ERR_WARNING, NULL, 0, (const char *) info1,
755 (const char *) info2, (const char *) info3, 0, 0, msg,
756 info1, info2, info3);
757}
758
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000759/************************************************************************
760 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000761 * Library wide options *
762 * *
763 ************************************************************************/
764
765/**
766 * xmlHasFeature:
767 * @feature: the feature to be examined
768 *
769 * Examines if the library has been compiled with a given feature.
770 *
771 * Returns a non-zero value if the feature exist, otherwise zero.
772 * Returns zero (0) if the feature does not exist or an unknown
773 * unknown feature is requested, non-zero otherwise.
774 */
775int
776xmlHasFeature(xmlFeature feature)
777{
778 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000779 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000780#ifdef LIBXML_THREAD_ENABLED
781 return(1);
782#else
783 return(0);
784#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000785 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000786#ifdef LIBXML_TREE_ENABLED
787 return(1);
788#else
789 return(0);
790#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000791 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000792#ifdef LIBXML_OUTPUT_ENABLED
793 return(1);
794#else
795 return(0);
796#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000797 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000798#ifdef LIBXML_PUSH_ENABLED
799 return(1);
800#else
801 return(0);
802#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000803 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000804#ifdef LIBXML_READER_ENABLED
805 return(1);
806#else
807 return(0);
808#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000809 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000810#ifdef LIBXML_PATTERN_ENABLED
811 return(1);
812#else
813 return(0);
814#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000815 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000816#ifdef LIBXML_WRITER_ENABLED
817 return(1);
818#else
819 return(0);
820#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000821 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000822#ifdef LIBXML_SAX1_ENABLED
823 return(1);
824#else
825 return(0);
826#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000827 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000828#ifdef LIBXML_FTP_ENABLED
829 return(1);
830#else
831 return(0);
832#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000833 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000834#ifdef LIBXML_HTTP_ENABLED
835 return(1);
836#else
837 return(0);
838#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000839 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000840#ifdef LIBXML_VALID_ENABLED
841 return(1);
842#else
843 return(0);
844#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000845 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000846#ifdef LIBXML_HTML_ENABLED
847 return(1);
848#else
849 return(0);
850#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000851 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000852#ifdef LIBXML_LEGACY_ENABLED
853 return(1);
854#else
855 return(0);
856#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000857 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000858#ifdef LIBXML_C14N_ENABLED
859 return(1);
860#else
861 return(0);
862#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000863 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000864#ifdef LIBXML_CATALOG_ENABLED
865 return(1);
866#else
867 return(0);
868#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000869 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000870#ifdef LIBXML_XPATH_ENABLED
871 return(1);
872#else
873 return(0);
874#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000875 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000876#ifdef LIBXML_XPTR_ENABLED
877 return(1);
878#else
879 return(0);
880#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000881 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000882#ifdef LIBXML_XINCLUDE_ENABLED
883 return(1);
884#else
885 return(0);
886#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000887 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000888#ifdef LIBXML_ICONV_ENABLED
889 return(1);
890#else
891 return(0);
892#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000893 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000894#ifdef LIBXML_ISO8859X_ENABLED
895 return(1);
896#else
897 return(0);
898#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000899 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000900#ifdef LIBXML_UNICODE_ENABLED
901 return(1);
902#else
903 return(0);
904#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000905 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000906#ifdef LIBXML_REGEXP_ENABLED
907 return(1);
908#else
909 return(0);
910#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000911 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000912#ifdef LIBXML_AUTOMATA_ENABLED
913 return(1);
914#else
915 return(0);
916#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000917 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000918#ifdef LIBXML_EXPR_ENABLED
919 return(1);
920#else
921 return(0);
922#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000923 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000924#ifdef LIBXML_SCHEMAS_ENABLED
925 return(1);
926#else
927 return(0);
928#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000929 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000930#ifdef LIBXML_SCHEMATRON_ENABLED
931 return(1);
932#else
933 return(0);
934#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000935 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000936#ifdef LIBXML_MODULES_ENABLED
937 return(1);
938#else
939 return(0);
940#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000941 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000942#ifdef LIBXML_DEBUG_ENABLED
943 return(1);
944#else
945 return(0);
946#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000947 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000948#ifdef DEBUG_MEMORY_LOCATION
949 return(1);
950#else
951 return(0);
952#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000953 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000954#ifdef LIBXML_DEBUG_RUNTIME
955 return(1);
956#else
957 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000958#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000959 case XML_WITH_ZLIB:
960#ifdef LIBXML_ZLIB_ENABLED
961 return(1);
962#else
963 return(0);
964#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +0200965 case XML_WITH_LZMA:
966#ifdef LIBXML_LZMA_ENABLED
967 return(1);
968#else
969 return(0);
970#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100971 case XML_WITH_ICU:
972#ifdef LIBXML_ICU_ENABLED
973 return(1);
974#else
975 return(0);
976#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000977 default:
978 break;
979 }
980 return(0);
981}
982
983/************************************************************************
984 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000985 * SAX2 defaulted attributes handling *
986 * *
987 ************************************************************************/
988
989/**
990 * xmlDetectSAX2:
991 * @ctxt: an XML parser context
992 *
993 * Do the SAX2 detection and specific intialization
994 */
995static void
996xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
997 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000998#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000999 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1000 ((ctxt->sax->startElementNs != NULL) ||
1001 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001002#else
1003 ctxt->sax2 = 1;
1004#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001005
1006 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1007 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1008 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +00001009 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1010 (ctxt->str_xml_ns == NULL)) {
1011 xmlErrMemory(ctxt, NULL);
1012 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001013}
1014
Daniel Veillarde57ec792003-09-10 10:50:59 +00001015typedef struct _xmlDefAttrs xmlDefAttrs;
1016typedef xmlDefAttrs *xmlDefAttrsPtr;
1017struct _xmlDefAttrs {
1018 int nbAttrs; /* number of defaulted attributes on that element */
1019 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +00001020 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001021};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001022
1023/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001024 * xmlAttrNormalizeSpace:
1025 * @src: the source string
1026 * @dst: the target string
1027 *
1028 * Normalize the space in non CDATA attribute values:
1029 * If the attribute type is not CDATA, then the XML processor MUST further
1030 * process the normalized attribute value by discarding any leading and
1031 * trailing space (#x20) characters, and by replacing sequences of space
1032 * (#x20) characters by a single space (#x20) character.
1033 * Note that the size of dst need to be at least src, and if one doesn't need
1034 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1035 * passing src as dst is just fine.
1036 *
1037 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1038 * is needed.
1039 */
1040static xmlChar *
1041xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1042{
1043 if ((src == NULL) || (dst == NULL))
1044 return(NULL);
1045
1046 while (*src == 0x20) src++;
1047 while (*src != 0) {
1048 if (*src == 0x20) {
1049 while (*src == 0x20) src++;
1050 if (*src != 0)
1051 *dst++ = 0x20;
1052 } else {
1053 *dst++ = *src++;
1054 }
1055 }
1056 *dst = 0;
1057 if (dst == src)
1058 return(NULL);
1059 return(dst);
1060}
1061
1062/**
1063 * xmlAttrNormalizeSpace2:
1064 * @src: the source string
1065 *
1066 * Normalize the space in non CDATA attribute values, a slightly more complex
1067 * front end to avoid allocation problems when running on attribute values
1068 * coming from the input.
1069 *
1070 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1071 * is needed.
1072 */
1073static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001074xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001075{
1076 int i;
1077 int remove_head = 0;
1078 int need_realloc = 0;
1079 const xmlChar *cur;
1080
1081 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1082 return(NULL);
1083 i = *len;
1084 if (i <= 0)
1085 return(NULL);
1086
1087 cur = src;
1088 while (*cur == 0x20) {
1089 cur++;
1090 remove_head++;
1091 }
1092 while (*cur != 0) {
1093 if (*cur == 0x20) {
1094 cur++;
1095 if ((*cur == 0x20) || (*cur == 0)) {
1096 need_realloc = 1;
1097 break;
1098 }
1099 } else
1100 cur++;
1101 }
1102 if (need_realloc) {
1103 xmlChar *ret;
1104
1105 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1106 if (ret == NULL) {
1107 xmlErrMemory(ctxt, NULL);
1108 return(NULL);
1109 }
1110 xmlAttrNormalizeSpace(ret, ret);
1111 *len = (int) strlen((const char *)ret);
1112 return(ret);
1113 } else if (remove_head) {
1114 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001115 memmove(src, src + remove_head, 1 + *len);
1116 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001117 }
1118 return(NULL);
1119}
1120
1121/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001122 * xmlAddDefAttrs:
1123 * @ctxt: an XML parser context
1124 * @fullname: the element fullname
1125 * @fullattr: the attribute fullname
1126 * @value: the attribute value
1127 *
1128 * Add a defaulted attribute for an element
1129 */
1130static void
1131xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1132 const xmlChar *fullname,
1133 const xmlChar *fullattr,
1134 const xmlChar *value) {
1135 xmlDefAttrsPtr defaults;
1136 int len;
1137 const xmlChar *name;
1138 const xmlChar *prefix;
1139
Daniel Veillard6a31b832008-03-26 14:06:44 +00001140 /*
1141 * Allows to detect attribute redefinitions
1142 */
1143 if (ctxt->attsSpecial != NULL) {
1144 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1145 return;
1146 }
1147
Daniel Veillarde57ec792003-09-10 10:50:59 +00001148 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001149 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001150 if (ctxt->attsDefault == NULL)
1151 goto mem_error;
1152 }
1153
1154 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001155 * split the element name into prefix:localname , the string found
1156 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001157 */
1158 name = xmlSplitQName3(fullname, &len);
1159 if (name == NULL) {
1160 name = xmlDictLookup(ctxt->dict, fullname, -1);
1161 prefix = NULL;
1162 } else {
1163 name = xmlDictLookup(ctxt->dict, name, -1);
1164 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1165 }
1166
1167 /*
1168 * make sure there is some storage
1169 */
1170 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1171 if (defaults == NULL) {
1172 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001173 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001174 if (defaults == NULL)
1175 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001176 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001177 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001178 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1179 defaults, NULL) < 0) {
1180 xmlFree(defaults);
1181 goto mem_error;
1182 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001183 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001184 xmlDefAttrsPtr temp;
1185
1186 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001187 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001188 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001189 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001190 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001191 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001192 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1193 defaults, NULL) < 0) {
1194 xmlFree(defaults);
1195 goto mem_error;
1196 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001197 }
1198
1199 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001200 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001201 * are within the DTD and hen not associated to namespace names.
1202 */
1203 name = xmlSplitQName3(fullattr, &len);
1204 if (name == NULL) {
1205 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1206 prefix = NULL;
1207 } else {
1208 name = xmlDictLookup(ctxt->dict, name, -1);
1209 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1210 }
1211
Daniel Veillardae0765b2008-07-31 19:54:59 +00001212 defaults->values[5 * defaults->nbAttrs] = name;
1213 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001214 /* intern the string and precompute the end */
1215 len = xmlStrlen(value);
1216 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001217 defaults->values[5 * defaults->nbAttrs + 2] = value;
1218 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1219 if (ctxt->external)
1220 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1221 else
1222 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001223 defaults->nbAttrs++;
1224
1225 return;
1226
1227mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001228 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001229 return;
1230}
1231
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001232/**
1233 * xmlAddSpecialAttr:
1234 * @ctxt: an XML parser context
1235 * @fullname: the element fullname
1236 * @fullattr: the attribute fullname
1237 * @type: the attribute type
1238 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001239 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001240 */
1241static void
1242xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1243 const xmlChar *fullname,
1244 const xmlChar *fullattr,
1245 int type)
1246{
1247 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001248 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001249 if (ctxt->attsSpecial == NULL)
1250 goto mem_error;
1251 }
1252
Daniel Veillardac4118d2008-01-11 05:27:32 +00001253 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1254 return;
1255
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001256 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1257 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001258 return;
1259
1260mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001261 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001262 return;
1263}
1264
Daniel Veillard4432df22003-09-28 18:58:27 +00001265/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001266 * xmlCleanSpecialAttrCallback:
1267 *
1268 * Removes CDATA attributes from the special attribute table
1269 */
1270static void
1271xmlCleanSpecialAttrCallback(void *payload, void *data,
1272 const xmlChar *fullname, const xmlChar *fullattr,
1273 const xmlChar *unused ATTRIBUTE_UNUSED) {
1274 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1275
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001276 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001277 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1278 }
1279}
1280
1281/**
1282 * xmlCleanSpecialAttr:
1283 * @ctxt: an XML parser context
1284 *
1285 * Trim the list of attributes defined to remove all those of type
1286 * CDATA as they are not special. This call should be done when finishing
1287 * to parse the DTD and before starting to parse the document root.
1288 */
1289static void
1290xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1291{
1292 if (ctxt->attsSpecial == NULL)
1293 return;
1294
1295 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1296
1297 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1298 xmlHashFree(ctxt->attsSpecial, NULL);
1299 ctxt->attsSpecial = NULL;
1300 }
1301 return;
1302}
1303
1304/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001305 * xmlCheckLanguageID:
1306 * @lang: pointer to the string value
1307 *
1308 * Checks that the value conforms to the LanguageID production:
1309 *
1310 * NOTE: this is somewhat deprecated, those productions were removed from
1311 * the XML Second edition.
1312 *
1313 * [33] LanguageID ::= Langcode ('-' Subcode)*
1314 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1315 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1316 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1317 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1318 * [38] Subcode ::= ([a-z] | [A-Z])+
1319 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001320 * The current REC reference the sucessors of RFC 1766, currently 5646
1321 *
1322 * http://www.rfc-editor.org/rfc/rfc5646.txt
1323 * langtag = language
1324 * ["-" script]
1325 * ["-" region]
1326 * *("-" variant)
1327 * *("-" extension)
1328 * ["-" privateuse]
1329 * language = 2*3ALPHA ; shortest ISO 639 code
1330 * ["-" extlang] ; sometimes followed by
1331 * ; extended language subtags
1332 * / 4ALPHA ; or reserved for future use
1333 * / 5*8ALPHA ; or registered language subtag
1334 *
1335 * extlang = 3ALPHA ; selected ISO 639 codes
1336 * *2("-" 3ALPHA) ; permanently reserved
1337 *
1338 * script = 4ALPHA ; ISO 15924 code
1339 *
1340 * region = 2ALPHA ; ISO 3166-1 code
1341 * / 3DIGIT ; UN M.49 code
1342 *
1343 * variant = 5*8alphanum ; registered variants
1344 * / (DIGIT 3alphanum)
1345 *
1346 * extension = singleton 1*("-" (2*8alphanum))
1347 *
1348 * ; Single alphanumerics
1349 * ; "x" reserved for private use
1350 * singleton = DIGIT ; 0 - 9
1351 * / %x41-57 ; A - W
1352 * / %x59-5A ; Y - Z
1353 * / %x61-77 ; a - w
1354 * / %x79-7A ; y - z
1355 *
1356 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1357 * The parser below doesn't try to cope with extension or privateuse
1358 * that could be added but that's not interoperable anyway
1359 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001360 * Returns 1 if correct 0 otherwise
1361 **/
1362int
1363xmlCheckLanguageID(const xmlChar * lang)
1364{
Daniel Veillard60587d62010-11-04 15:16:27 +01001365 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001366
1367 if (cur == NULL)
1368 return (0);
1369 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001370 ((cur[0] == 'I') && (cur[1] == '-')) ||
1371 ((cur[0] == 'x') && (cur[1] == '-')) ||
1372 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001373 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001374 * Still allow IANA code and user code which were coming
1375 * from the previous version of the XML-1.0 specification
1376 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001377 */
1378 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001379 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001380 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1381 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001382 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001383 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001384 nxt = cur;
1385 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1386 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1387 nxt++;
1388 if (nxt - cur >= 4) {
1389 /*
1390 * Reserved
1391 */
1392 if ((nxt - cur > 8) || (nxt[0] != 0))
1393 return(0);
1394 return(1);
1395 }
1396 if (nxt - cur < 2)
1397 return(0);
1398 /* we got an ISO 639 code */
1399 if (nxt[0] == 0)
1400 return(1);
1401 if (nxt[0] != '-')
1402 return(0);
1403
1404 nxt++;
1405 cur = nxt;
1406 /* now we can have extlang or script or region or variant */
1407 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1408 goto region_m49;
1409
1410 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1411 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1412 nxt++;
1413 if (nxt - cur == 4)
1414 goto script;
1415 if (nxt - cur == 2)
1416 goto region;
1417 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1418 goto variant;
1419 if (nxt - cur != 3)
1420 return(0);
1421 /* we parsed an extlang */
1422 if (nxt[0] == 0)
1423 return(1);
1424 if (nxt[0] != '-')
1425 return(0);
1426
1427 nxt++;
1428 cur = nxt;
1429 /* now we can have script or region or variant */
1430 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1431 goto region_m49;
1432
1433 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1434 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1435 nxt++;
1436 if (nxt - cur == 2)
1437 goto region;
1438 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1439 goto variant;
1440 if (nxt - cur != 4)
1441 return(0);
1442 /* we parsed a script */
1443script:
1444 if (nxt[0] == 0)
1445 return(1);
1446 if (nxt[0] != '-')
1447 return(0);
1448
1449 nxt++;
1450 cur = nxt;
1451 /* now we can have region or variant */
1452 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1453 goto region_m49;
1454
1455 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1456 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1457 nxt++;
1458
1459 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1460 goto variant;
1461 if (nxt - cur != 2)
1462 return(0);
1463 /* we parsed a region */
1464region:
1465 if (nxt[0] == 0)
1466 return(1);
1467 if (nxt[0] != '-')
1468 return(0);
1469
1470 nxt++;
1471 cur = nxt;
1472 /* now we can just have a variant */
1473 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1474 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1475 nxt++;
1476
1477 if ((nxt - cur < 5) || (nxt - cur > 8))
1478 return(0);
1479
1480 /* we parsed a variant */
1481variant:
1482 if (nxt[0] == 0)
1483 return(1);
1484 if (nxt[0] != '-')
1485 return(0);
1486 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001487 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001488
1489region_m49:
1490 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1491 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1492 nxt += 3;
1493 goto region;
1494 }
1495 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001496}
1497
Owen Taylor3473f882001-02-23 17:55:21 +00001498/************************************************************************
1499 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001500 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001501 * *
1502 ************************************************************************/
1503
Daniel Veillard8ed10722009-08-20 19:17:36 +02001504static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1505 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001506
Daniel Veillard0fb18932003-09-07 09:14:37 +00001507#ifdef SAX2
1508/**
1509 * nsPush:
1510 * @ctxt: an XML parser context
1511 * @prefix: the namespace prefix or NULL
1512 * @URL: the namespace name
1513 *
1514 * Pushes a new parser namespace on top of the ns stack
1515 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001516 * Returns -1 in case of error, -2 if the namespace should be discarded
1517 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001518 */
1519static int
1520nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1521{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001522 if (ctxt->options & XML_PARSE_NSCLEAN) {
1523 int i;
1524 for (i = 0;i < ctxt->nsNr;i += 2) {
1525 if (ctxt->nsTab[i] == prefix) {
1526 /* in scope */
1527 if (ctxt->nsTab[i + 1] == URL)
1528 return(-2);
1529 /* out of scope keep it */
1530 break;
1531 }
1532 }
1533 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001534 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1535 ctxt->nsMax = 10;
1536 ctxt->nsNr = 0;
1537 ctxt->nsTab = (const xmlChar **)
1538 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1539 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001540 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001541 ctxt->nsMax = 0;
1542 return (-1);
1543 }
1544 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001545 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001546 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001547 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1548 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1549 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001550 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001551 ctxt->nsMax /= 2;
1552 return (-1);
1553 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001554 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001555 }
1556 ctxt->nsTab[ctxt->nsNr++] = prefix;
1557 ctxt->nsTab[ctxt->nsNr++] = URL;
1558 return (ctxt->nsNr);
1559}
1560/**
1561 * nsPop:
1562 * @ctxt: an XML parser context
1563 * @nr: the number to pop
1564 *
1565 * Pops the top @nr parser prefix/namespace from the ns stack
1566 *
1567 * Returns the number of namespaces removed
1568 */
1569static int
1570nsPop(xmlParserCtxtPtr ctxt, int nr)
1571{
1572 int i;
1573
1574 if (ctxt->nsTab == NULL) return(0);
1575 if (ctxt->nsNr < nr) {
1576 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1577 nr = ctxt->nsNr;
1578 }
1579 if (ctxt->nsNr <= 0)
1580 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001581
Daniel Veillard0fb18932003-09-07 09:14:37 +00001582 for (i = 0;i < nr;i++) {
1583 ctxt->nsNr--;
1584 ctxt->nsTab[ctxt->nsNr] = NULL;
1585 }
1586 return(nr);
1587}
1588#endif
1589
1590static int
1591xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1592 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001593 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001594 int maxatts;
1595
1596 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001597 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001598 atts = (const xmlChar **)
1599 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001600 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001601 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001602 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1603 if (attallocs == NULL) goto mem_error;
1604 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001605 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001606 } else if (nr + 5 > ctxt->maxatts) {
1607 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001608 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1609 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001610 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001611 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001612 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1613 (maxatts / 5) * sizeof(int));
1614 if (attallocs == NULL) goto mem_error;
1615 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001616 ctxt->maxatts = maxatts;
1617 }
1618 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001619mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001620 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001621 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001622}
1623
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001624/**
1625 * inputPush:
1626 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001627 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001628 *
1629 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001630 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001631 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001632 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001633int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001634inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1635{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001636 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001637 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001638 if (ctxt->inputNr >= ctxt->inputMax) {
1639 ctxt->inputMax *= 2;
1640 ctxt->inputTab =
1641 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1642 ctxt->inputMax *
1643 sizeof(ctxt->inputTab[0]));
1644 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001645 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001646 xmlFreeInputStream(value);
1647 ctxt->inputMax /= 2;
1648 value = NULL;
1649 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001650 }
1651 }
1652 ctxt->inputTab[ctxt->inputNr] = value;
1653 ctxt->input = value;
1654 return (ctxt->inputNr++);
1655}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001656/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001657 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001658 * @ctxt: an XML parser context
1659 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001660 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001661 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001662 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001663 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001664xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001665inputPop(xmlParserCtxtPtr ctxt)
1666{
1667 xmlParserInputPtr ret;
1668
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001669 if (ctxt == NULL)
1670 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001671 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001672 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001673 ctxt->inputNr--;
1674 if (ctxt->inputNr > 0)
1675 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1676 else
1677 ctxt->input = NULL;
1678 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001679 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001680 return (ret);
1681}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001682/**
1683 * nodePush:
1684 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001685 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001686 *
1687 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001688 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001689 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001690 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001691int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001692nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1693{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001694 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001695 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001696 xmlNodePtr *tmp;
1697
1698 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1699 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001700 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001701 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001702 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001703 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001704 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001705 ctxt->nodeTab = tmp;
1706 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001707 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001708 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1709 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001710 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001711 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001712 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001713 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001714 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001715 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001716 ctxt->nodeTab[ctxt->nodeNr] = value;
1717 ctxt->node = value;
1718 return (ctxt->nodeNr++);
1719}
Daniel Veillard8915c152008-08-26 13:05:34 +00001720
Daniel Veillard1c732d22002-11-30 11:22:59 +00001721/**
1722 * nodePop:
1723 * @ctxt: an XML parser context
1724 *
1725 * Pops the top element node from the node stack
1726 *
1727 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001728 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001729xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001730nodePop(xmlParserCtxtPtr ctxt)
1731{
1732 xmlNodePtr ret;
1733
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001734 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001735 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001736 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001737 ctxt->nodeNr--;
1738 if (ctxt->nodeNr > 0)
1739 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1740 else
1741 ctxt->node = NULL;
1742 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001743 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001744 return (ret);
1745}
Daniel Veillarda2351322004-06-27 12:08:10 +00001746
1747#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001748/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001749 * nameNsPush:
1750 * @ctxt: an XML parser context
1751 * @value: the element name
1752 * @prefix: the element prefix
1753 * @URI: the element namespace name
1754 *
1755 * Pushes a new element name/prefix/URL on top of the name stack
1756 *
1757 * Returns -1 in case of error, the index in the stack otherwise
1758 */
1759static int
1760nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1761 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1762{
1763 if (ctxt->nameNr >= ctxt->nameMax) {
1764 const xmlChar * *tmp;
1765 void **tmp2;
1766 ctxt->nameMax *= 2;
1767 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1768 ctxt->nameMax *
1769 sizeof(ctxt->nameTab[0]));
1770 if (tmp == NULL) {
1771 ctxt->nameMax /= 2;
1772 goto mem_error;
1773 }
1774 ctxt->nameTab = tmp;
1775 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1776 ctxt->nameMax * 3 *
1777 sizeof(ctxt->pushTab[0]));
1778 if (tmp2 == NULL) {
1779 ctxt->nameMax /= 2;
1780 goto mem_error;
1781 }
1782 ctxt->pushTab = tmp2;
1783 }
1784 ctxt->nameTab[ctxt->nameNr] = value;
1785 ctxt->name = value;
1786 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1787 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001788 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001789 return (ctxt->nameNr++);
1790mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001791 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001792 return (-1);
1793}
1794/**
1795 * nameNsPop:
1796 * @ctxt: an XML parser context
1797 *
1798 * Pops the top element/prefix/URI name from the name stack
1799 *
1800 * Returns the name just removed
1801 */
1802static const xmlChar *
1803nameNsPop(xmlParserCtxtPtr ctxt)
1804{
1805 const xmlChar *ret;
1806
1807 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001808 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001809 ctxt->nameNr--;
1810 if (ctxt->nameNr > 0)
1811 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1812 else
1813 ctxt->name = NULL;
1814 ret = ctxt->nameTab[ctxt->nameNr];
1815 ctxt->nameTab[ctxt->nameNr] = NULL;
1816 return (ret);
1817}
Daniel Veillarda2351322004-06-27 12:08:10 +00001818#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001819
1820/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001821 * namePush:
1822 * @ctxt: an XML parser context
1823 * @value: the element name
1824 *
1825 * Pushes a new element name on top of the name stack
1826 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001827 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001828 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001829int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001830namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001831{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001832 if (ctxt == NULL) return (-1);
1833
Daniel Veillard1c732d22002-11-30 11:22:59 +00001834 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001835 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001836 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001837 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001838 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001839 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001840 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001841 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001842 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001843 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001844 }
1845 ctxt->nameTab[ctxt->nameNr] = value;
1846 ctxt->name = value;
1847 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001848mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001849 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001850 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001851}
1852/**
1853 * namePop:
1854 * @ctxt: an XML parser context
1855 *
1856 * Pops the top element name from the name stack
1857 *
1858 * Returns the name just removed
1859 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001860const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001861namePop(xmlParserCtxtPtr ctxt)
1862{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001863 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001864
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001865 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1866 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001867 ctxt->nameNr--;
1868 if (ctxt->nameNr > 0)
1869 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1870 else
1871 ctxt->name = NULL;
1872 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001873 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001874 return (ret);
1875}
Owen Taylor3473f882001-02-23 17:55:21 +00001876
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001877static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001878 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001879 int *tmp;
1880
Owen Taylor3473f882001-02-23 17:55:21 +00001881 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001882 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1883 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1884 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001885 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001886 ctxt->spaceMax /=2;
1887 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001888 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001889 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001890 }
1891 ctxt->spaceTab[ctxt->spaceNr] = val;
1892 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1893 return(ctxt->spaceNr++);
1894}
1895
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001896static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001897 int ret;
1898 if (ctxt->spaceNr <= 0) return(0);
1899 ctxt->spaceNr--;
1900 if (ctxt->spaceNr > 0)
1901 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1902 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001903 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001904 ret = ctxt->spaceTab[ctxt->spaceNr];
1905 ctxt->spaceTab[ctxt->spaceNr] = -1;
1906 return(ret);
1907}
1908
1909/*
1910 * Macros for accessing the content. Those should be used only by the parser,
1911 * and not exported.
1912 *
1913 * Dirty macros, i.e. one often need to make assumption on the context to
1914 * use them
1915 *
1916 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1917 * To be used with extreme caution since operations consuming
1918 * characters may move the input buffer to a different location !
1919 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1920 * This should be used internally by the parser
1921 * only to compare to ASCII values otherwise it would break when
1922 * running with UTF-8 encoding.
1923 * RAW same as CUR but in the input buffer, bypass any token
1924 * extraction that may have been done
1925 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1926 * to compare on ASCII based substring.
1927 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001928 * strings without newlines within the parser.
1929 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1930 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001931 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1932 *
1933 * NEXT Skip to the next character, this does the proper decoding
1934 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001935 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001936 * CUR_CHAR(l) returns the current unicode character (int), set l
1937 * to the number of xmlChars used for the encoding [0-5].
1938 * CUR_SCHAR same but operate on a string instead of the context
1939 * COPY_BUF copy the current unicode char to the target buffer, increment
1940 * the index
1941 * GROW, SHRINK handling of input buffers
1942 */
1943
Daniel Veillardfdc91562002-07-01 21:52:03 +00001944#define RAW (*ctxt->input->cur)
1945#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001946#define NXT(val) ctxt->input->cur[(val)]
1947#define CUR_PTR ctxt->input->cur
1948
Daniel Veillarda07050d2003-10-19 14:46:32 +00001949#define CMP4( s, c1, c2, c3, c4 ) \
1950 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1951 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1952#define CMP5( s, c1, c2, c3, c4, c5 ) \
1953 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1954#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1955 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1956#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1957 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1958#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1959 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1960#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1961 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1962 ((unsigned char *) s)[ 8 ] == c9 )
1963#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1964 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1965 ((unsigned char *) s)[ 9 ] == c10 )
1966
Owen Taylor3473f882001-02-23 17:55:21 +00001967#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001968 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001969 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001970 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001971 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1972 xmlPopInput(ctxt); \
1973 } while (0)
1974
Daniel Veillard0b787f32004-03-26 17:29:53 +00001975#define SKIPL(val) do { \
1976 int skipl; \
1977 for(skipl=0; skipl<val; skipl++) { \
1978 if (*(ctxt->input->cur) == '\n') { \
1979 ctxt->input->line++; ctxt->input->col = 1; \
1980 } else ctxt->input->col++; \
1981 ctxt->nbChars++; \
1982 ctxt->input->cur++; \
1983 } \
1984 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1985 if ((*ctxt->input->cur == 0) && \
1986 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1987 xmlPopInput(ctxt); \
1988 } while (0)
1989
Daniel Veillarda880b122003-04-21 21:36:41 +00001990#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001991 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1992 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001993 xmlSHRINK (ctxt);
1994
1995static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1996 xmlParserInputShrink(ctxt->input);
1997 if ((*ctxt->input->cur == 0) &&
1998 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1999 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002000 }
Owen Taylor3473f882001-02-23 17:55:21 +00002001
Daniel Veillarda880b122003-04-21 21:36:41 +00002002#define GROW if ((ctxt->progressive == 0) && \
2003 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002004 xmlGROW (ctxt);
2005
2006static void xmlGROW (xmlParserCtxtPtr ctxt) {
2007 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard59df7832010-02-02 10:24:01 +01002008 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
Daniel Veillard46de64e2002-05-29 08:21:33 +00002009 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2010 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00002011}
Owen Taylor3473f882001-02-23 17:55:21 +00002012
2013#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2014
2015#define NEXT xmlNextChar(ctxt)
2016
Daniel Veillard21a0f912001-02-25 19:54:14 +00002017#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002018 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002019 ctxt->input->cur++; \
2020 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002021 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002022 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2023 }
2024
Owen Taylor3473f882001-02-23 17:55:21 +00002025#define NEXTL(l) do { \
2026 if (*(ctxt->input->cur) == '\n') { \
2027 ctxt->input->line++; ctxt->input->col = 1; \
2028 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002029 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002030 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00002031 } while (0)
2032
2033#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2034#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2035
2036#define COPY_BUF(l,b,i,v) \
2037 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002038 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002039
2040/**
2041 * xmlSkipBlankChars:
2042 * @ctxt: the XML parser context
2043 *
2044 * skip all blanks character found at that point in the input streams.
2045 * It pops up finished entities in the process if allowable at that point.
2046 *
2047 * Returns the number of space chars skipped
2048 */
2049
2050int
2051xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002052 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002053
2054 /*
2055 * It's Okay to use CUR/NEXT here since all the blanks are on
2056 * the ASCII range.
2057 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002058 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2059 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002060 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002061 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002062 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002063 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002064 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002065 if (*cur == '\n') {
2066 ctxt->input->line++; ctxt->input->col = 1;
2067 }
2068 cur++;
2069 res++;
2070 if (*cur == 0) {
2071 ctxt->input->cur = cur;
2072 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2073 cur = ctxt->input->cur;
2074 }
2075 }
2076 ctxt->input->cur = cur;
2077 } else {
2078 int cur;
2079 do {
2080 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00002081 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002082 NEXT;
2083 cur = CUR;
2084 res++;
2085 }
2086 while ((cur == 0) && (ctxt->inputNr > 1) &&
2087 (ctxt->instate != XML_PARSER_COMMENT)) {
2088 xmlPopInput(ctxt);
2089 cur = CUR;
2090 }
2091 /*
2092 * Need to handle support of entities branching here
2093 */
2094 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2095 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2096 }
Owen Taylor3473f882001-02-23 17:55:21 +00002097 return(res);
2098}
2099
2100/************************************************************************
2101 * *
2102 * Commodity functions to handle entities *
2103 * *
2104 ************************************************************************/
2105
2106/**
2107 * xmlPopInput:
2108 * @ctxt: an XML parser context
2109 *
2110 * xmlPopInput: the current input pointed by ctxt->input came to an end
2111 * pop it and return the next char.
2112 *
2113 * Returns the current xmlChar in the parser context
2114 */
2115xmlChar
2116xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002117 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002118 if (xmlParserDebugEntities)
2119 xmlGenericError(xmlGenericErrorContext,
2120 "Popping input %d\n", ctxt->inputNr);
2121 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00002122 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00002123 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2124 return(xmlPopInput(ctxt));
2125 return(CUR);
2126}
2127
2128/**
2129 * xmlPushInput:
2130 * @ctxt: an XML parser context
2131 * @input: an XML parser input fragment (entity, XML fragment ...).
2132 *
2133 * xmlPushInput: switch to a new input stream which is stacked on top
2134 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002135 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002136 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002137int
Owen Taylor3473f882001-02-23 17:55:21 +00002138xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002139 int ret;
2140 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002141
2142 if (xmlParserDebugEntities) {
2143 if ((ctxt->input != NULL) && (ctxt->input->filename))
2144 xmlGenericError(xmlGenericErrorContext,
2145 "%s(%d): ", ctxt->input->filename,
2146 ctxt->input->line);
2147 xmlGenericError(xmlGenericErrorContext,
2148 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2149 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002150 ret = inputPush(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002151 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002152 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002153}
2154
2155/**
2156 * xmlParseCharRef:
2157 * @ctxt: an XML parser context
2158 *
2159 * parse Reference declarations
2160 *
2161 * [66] CharRef ::= '&#' [0-9]+ ';' |
2162 * '&#x' [0-9a-fA-F]+ ';'
2163 *
2164 * [ WFC: Legal Character ]
2165 * Characters referred to using character references must match the
2166 * production for Char.
2167 *
2168 * Returns the value parsed (as an int), 0 in case of error
2169 */
2170int
2171xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002172 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002173 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002174 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002175
Owen Taylor3473f882001-02-23 17:55:21 +00002176 /*
2177 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2178 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002179 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002180 (NXT(2) == 'x')) {
2181 SKIP(3);
2182 GROW;
2183 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002184 if (count++ > 20) {
2185 count = 0;
2186 GROW;
2187 }
2188 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002189 val = val * 16 + (CUR - '0');
2190 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2191 val = val * 16 + (CUR - 'a') + 10;
2192 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2193 val = val * 16 + (CUR - 'A') + 10;
2194 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002195 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002196 val = 0;
2197 break;
2198 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002199 if (val > 0x10FFFF)
2200 outofrange = val;
2201
Owen Taylor3473f882001-02-23 17:55:21 +00002202 NEXT;
2203 count++;
2204 }
2205 if (RAW == ';') {
2206 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002207 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002208 ctxt->nbChars ++;
2209 ctxt->input->cur++;
2210 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002211 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002212 SKIP(2);
2213 GROW;
2214 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002215 if (count++ > 20) {
2216 count = 0;
2217 GROW;
2218 }
2219 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002220 val = val * 10 + (CUR - '0');
2221 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002222 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002223 val = 0;
2224 break;
2225 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002226 if (val > 0x10FFFF)
2227 outofrange = val;
2228
Owen Taylor3473f882001-02-23 17:55:21 +00002229 NEXT;
2230 count++;
2231 }
2232 if (RAW == ';') {
2233 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002234 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002235 ctxt->nbChars ++;
2236 ctxt->input->cur++;
2237 }
2238 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002239 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002240 }
2241
2242 /*
2243 * [ WFC: Legal Character ]
2244 * Characters referred to using character references must match the
2245 * production for Char.
2246 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002247 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002248 return(val);
2249 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002250 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2251 "xmlParseCharRef: invalid xmlChar value %d\n",
2252 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002253 }
2254 return(0);
2255}
2256
2257/**
2258 * xmlParseStringCharRef:
2259 * @ctxt: an XML parser context
2260 * @str: a pointer to an index in the string
2261 *
2262 * parse Reference declarations, variant parsing from a string rather
2263 * than an an input flow.
2264 *
2265 * [66] CharRef ::= '&#' [0-9]+ ';' |
2266 * '&#x' [0-9a-fA-F]+ ';'
2267 *
2268 * [ WFC: Legal Character ]
2269 * Characters referred to using character references must match the
2270 * production for Char.
2271 *
2272 * Returns the value parsed (as an int), 0 in case of error, str will be
2273 * updated to the current value of the index
2274 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002275static int
Owen Taylor3473f882001-02-23 17:55:21 +00002276xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2277 const xmlChar *ptr;
2278 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002279 unsigned int val = 0;
2280 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002281
2282 if ((str == NULL) || (*str == NULL)) return(0);
2283 ptr = *str;
2284 cur = *ptr;
2285 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2286 ptr += 3;
2287 cur = *ptr;
2288 while (cur != ';') { /* Non input consuming loop */
2289 if ((cur >= '0') && (cur <= '9'))
2290 val = val * 16 + (cur - '0');
2291 else if ((cur >= 'a') && (cur <= 'f'))
2292 val = val * 16 + (cur - 'a') + 10;
2293 else if ((cur >= 'A') && (cur <= 'F'))
2294 val = val * 16 + (cur - 'A') + 10;
2295 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002296 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002297 val = 0;
2298 break;
2299 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002300 if (val > 0x10FFFF)
2301 outofrange = val;
2302
Owen Taylor3473f882001-02-23 17:55:21 +00002303 ptr++;
2304 cur = *ptr;
2305 }
2306 if (cur == ';')
2307 ptr++;
2308 } else if ((cur == '&') && (ptr[1] == '#')){
2309 ptr += 2;
2310 cur = *ptr;
2311 while (cur != ';') { /* Non input consuming loops */
2312 if ((cur >= '0') && (cur <= '9'))
2313 val = val * 10 + (cur - '0');
2314 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002315 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002316 val = 0;
2317 break;
2318 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002319 if (val > 0x10FFFF)
2320 outofrange = val;
2321
Owen Taylor3473f882001-02-23 17:55:21 +00002322 ptr++;
2323 cur = *ptr;
2324 }
2325 if (cur == ';')
2326 ptr++;
2327 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002328 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002329 return(0);
2330 }
2331 *str = ptr;
2332
2333 /*
2334 * [ WFC: Legal Character ]
2335 * Characters referred to using character references must match the
2336 * production for Char.
2337 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002338 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002339 return(val);
2340 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002341 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2342 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2343 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002344 }
2345 return(0);
2346}
2347
2348/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002349 * xmlNewBlanksWrapperInputStream:
2350 * @ctxt: an XML parser context
2351 * @entity: an Entity pointer
2352 *
2353 * Create a new input stream for wrapping
2354 * blanks around a PEReference
2355 *
2356 * Returns the new input stream or NULL
2357 */
2358
2359static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2360
Daniel Veillardf4862f02002-09-10 11:13:43 +00002361static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002362xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2363 xmlParserInputPtr input;
2364 xmlChar *buffer;
2365 size_t length;
2366 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002367 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2368 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002369 return(NULL);
2370 }
2371 if (xmlParserDebugEntities)
2372 xmlGenericError(xmlGenericErrorContext,
2373 "new blanks wrapper for entity: %s\n", entity->name);
2374 input = xmlNewInputStream(ctxt);
2375 if (input == NULL) {
2376 return(NULL);
2377 }
2378 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002379 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002380 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002381 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002382 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002383 return(NULL);
2384 }
2385 buffer [0] = ' ';
2386 buffer [1] = '%';
2387 buffer [length-3] = ';';
2388 buffer [length-2] = ' ';
2389 buffer [length-1] = 0;
2390 memcpy(buffer + 2, entity->name, length - 5);
2391 input->free = deallocblankswrapper;
2392 input->base = buffer;
2393 input->cur = buffer;
2394 input->length = length;
2395 input->end = &buffer[length];
2396 return(input);
2397}
2398
2399/**
Owen Taylor3473f882001-02-23 17:55:21 +00002400 * xmlParserHandlePEReference:
2401 * @ctxt: the parser context
2402 *
2403 * [69] PEReference ::= '%' Name ';'
2404 *
2405 * [ WFC: No Recursion ]
2406 * A parsed entity must not contain a recursive
2407 * reference to itself, either directly or indirectly.
2408 *
2409 * [ WFC: Entity Declared ]
2410 * In a document without any DTD, a document with only an internal DTD
2411 * subset which contains no parameter entity references, or a document
2412 * with "standalone='yes'", ... ... The declaration of a parameter
2413 * entity must precede any reference to it...
2414 *
2415 * [ VC: Entity Declared ]
2416 * In a document with an external subset or external parameter entities
2417 * with "standalone='no'", ... ... The declaration of a parameter entity
2418 * must precede any reference to it...
2419 *
2420 * [ WFC: In DTD ]
2421 * Parameter-entity references may only appear in the DTD.
2422 * NOTE: misleading but this is handled.
2423 *
2424 * A PEReference may have been detected in the current input stream
2425 * the handling is done accordingly to
2426 * http://www.w3.org/TR/REC-xml#entproc
2427 * i.e.
2428 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002429 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002430 */
2431void
2432xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002433 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002434 xmlEntityPtr entity = NULL;
2435 xmlParserInputPtr input;
2436
Owen Taylor3473f882001-02-23 17:55:21 +00002437 if (RAW != '%') return;
2438 switch(ctxt->instate) {
2439 case XML_PARSER_CDATA_SECTION:
2440 return;
2441 case XML_PARSER_COMMENT:
2442 return;
2443 case XML_PARSER_START_TAG:
2444 return;
2445 case XML_PARSER_END_TAG:
2446 return;
2447 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002448 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002449 return;
2450 case XML_PARSER_PROLOG:
2451 case XML_PARSER_START:
2452 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002453 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002454 return;
2455 case XML_PARSER_ENTITY_DECL:
2456 case XML_PARSER_CONTENT:
2457 case XML_PARSER_ATTRIBUTE_VALUE:
2458 case XML_PARSER_PI:
2459 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002460 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002461 /* we just ignore it there */
2462 return;
2463 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002464 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002465 return;
2466 case XML_PARSER_ENTITY_VALUE:
2467 /*
2468 * NOTE: in the case of entity values, we don't do the
2469 * substitution here since we need the literal
2470 * entity value to be able to save the internal
2471 * subset of the document.
2472 * This will be handled by xmlStringDecodeEntities
2473 */
2474 return;
2475 case XML_PARSER_DTD:
2476 /*
2477 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2478 * In the internal DTD subset, parameter-entity references
2479 * can occur only where markup declarations can occur, not
2480 * within markup declarations.
2481 * In that case this is handled in xmlParseMarkupDecl
2482 */
2483 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2484 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002485 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002486 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002487 break;
2488 case XML_PARSER_IGNORE:
2489 return;
2490 }
2491
2492 NEXT;
2493 name = xmlParseName(ctxt);
2494 if (xmlParserDebugEntities)
2495 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002496 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002497 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002498 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002499 } else {
2500 if (RAW == ';') {
2501 NEXT;
2502 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2503 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2504 if (entity == NULL) {
2505
2506 /*
2507 * [ WFC: Entity Declared ]
2508 * In a document without any DTD, a document with only an
2509 * internal DTD subset which contains no parameter entity
2510 * references, or a document with "standalone='yes'", ...
2511 * ... The declaration of a parameter entity must precede
2512 * any reference to it...
2513 */
2514 if ((ctxt->standalone == 1) ||
2515 ((ctxt->hasExternalSubset == 0) &&
2516 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002517 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002518 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002519 } else {
2520 /*
2521 * [ VC: Entity Declared ]
2522 * In a document with an external subset or external
2523 * parameter entities with "standalone='no'", ...
2524 * ... The declaration of a parameter entity must precede
2525 * any reference to it...
2526 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002527 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2528 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2529 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002530 name, NULL);
Daniel Veillard24eb9782003-10-04 21:08:09 +00002531 } else
2532 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2533 "PEReference: %%%s; not found\n",
2534 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002535 ctxt->valid = 0;
2536 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002537 } else if (ctxt->input->free != deallocblankswrapper) {
2538 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002539 if (xmlPushInput(ctxt, input) < 0)
2540 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002541 } else {
2542 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2543 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002544 xmlChar start[4];
2545 xmlCharEncoding enc;
2546
Owen Taylor3473f882001-02-23 17:55:21 +00002547 /*
2548 * handle the extra spaces added before and after
2549 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002550 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002551 */
2552 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002553 if (xmlPushInput(ctxt, input) < 0)
2554 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002555
2556 /*
2557 * Get the 4 first bytes and decode the charset
2558 * if enc != XML_CHAR_ENCODING_NONE
2559 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002560 * Note that, since we may have some non-UTF8
2561 * encoding (like UTF16, bug 135229), the 'length'
2562 * is not known, but we can calculate based upon
2563 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002564 */
2565 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002566 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002567 start[0] = RAW;
2568 start[1] = NXT(1);
2569 start[2] = NXT(2);
2570 start[3] = NXT(3);
2571 enc = xmlDetectCharEncoding(start, 4);
2572 if (enc != XML_CHAR_ENCODING_NONE) {
2573 xmlSwitchEncoding(ctxt, enc);
2574 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002575 }
2576
Owen Taylor3473f882001-02-23 17:55:21 +00002577 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002578 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2579 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002580 xmlParseTextDecl(ctxt);
2581 }
Owen Taylor3473f882001-02-23 17:55:21 +00002582 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002583 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2584 "PEReference: %s is not a parameter entity\n",
2585 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002586 }
2587 }
2588 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002589 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002590 }
Owen Taylor3473f882001-02-23 17:55:21 +00002591 }
2592}
2593
2594/*
2595 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002596 * buffer##_size is expected to be a size_t
2597 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002598 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002599#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002600 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002601 size_t new_size = buffer##_size * 2 + n; \
2602 if (new_size < buffer##_size) goto mem_error; \
2603 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002604 if (tmp == NULL) goto mem_error; \
2605 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002606 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002607}
2608
2609/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002610 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002611 * @ctxt: the parser context
2612 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002613 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002614 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2615 * @end: an end marker xmlChar, 0 if none
2616 * @end2: an end marker xmlChar, 0 if none
2617 * @end3: an end marker xmlChar, 0 if none
2618 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002619 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002620 *
2621 * [67] Reference ::= EntityRef | CharRef
2622 *
2623 * [69] PEReference ::= '%' Name ';'
2624 *
2625 * Returns A newly allocated string with the substitution done. The caller
2626 * must deallocate it !
2627 */
2628xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002629xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2630 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002631 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002632 size_t buffer_size = 0;
2633 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002634
2635 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002636 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002637 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002638 xmlEntityPtr ent;
2639 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002640
Daniel Veillarda82b1822004-11-08 16:24:57 +00002641 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002642 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002643 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002644
Daniel Veillard0161e632008-08-28 15:36:32 +00002645 if (((ctxt->depth > 40) &&
2646 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2647 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002648 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002649 return(NULL);
2650 }
2651
2652 /*
2653 * allocate a translation buffer.
2654 */
2655 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002656 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002657 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002658
2659 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002660 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002661 * we are operating on already parsed values.
2662 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002663 if (str < last)
2664 c = CUR_SCHAR(str, l);
2665 else
2666 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002667 while ((c != 0) && (c != end) && /* non input consuming loop */
2668 (c != end2) && (c != end3)) {
2669
2670 if (c == 0) break;
2671 if ((c == '&') && (str[1] == '#')) {
2672 int val = xmlParseStringCharRef(ctxt, &str);
2673 if (val != 0) {
2674 COPY_BUF(0,buffer,nbchars,val);
2675 }
Daniel Veillard459eeb92012-07-17 16:19:17 +08002676 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002677 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002678 }
Owen Taylor3473f882001-02-23 17:55:21 +00002679 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2680 if (xmlParserDebugEntities)
2681 xmlGenericError(xmlGenericErrorContext,
2682 "String decoding Entity Reference: %.30s\n",
2683 str);
2684 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002685 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2686 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002687 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002688 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002689 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002690 if ((ent != NULL) &&
2691 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2692 if (ent->content != NULL) {
2693 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002694 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002695 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002696 }
Owen Taylor3473f882001-02-23 17:55:21 +00002697 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002698 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2699 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002700 }
2701 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002702 ctxt->depth++;
2703 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2704 0, 0, 0);
2705 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002706
Owen Taylor3473f882001-02-23 17:55:21 +00002707 if (rep != NULL) {
2708 current = rep;
2709 while (*current != 0) { /* non input consuming loop */
2710 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002711 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002712 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2713 goto int_error;
2714 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002715 }
2716 }
2717 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002718 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002719 }
2720 } else if (ent != NULL) {
2721 int i = xmlStrlen(ent->name);
2722 const xmlChar *cur = ent->name;
2723
2724 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002725 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002726 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002727 }
2728 for (;i > 0;i--)
2729 buffer[nbchars++] = *cur++;
2730 buffer[nbchars++] = ';';
2731 }
2732 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2733 if (xmlParserDebugEntities)
2734 xmlGenericError(xmlGenericErrorContext,
2735 "String decoding PE Reference: %.30s\n", str);
2736 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002737 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2738 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002739 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002740 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002741 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002742 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002743 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002744 }
Owen Taylor3473f882001-02-23 17:55:21 +00002745 ctxt->depth++;
2746 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2747 0, 0, 0);
2748 ctxt->depth--;
2749 if (rep != NULL) {
2750 current = rep;
2751 while (*current != 0) { /* non input consuming loop */
2752 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002753 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002754 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2755 goto int_error;
2756 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002757 }
2758 }
2759 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002760 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002761 }
2762 }
2763 } else {
2764 COPY_BUF(l,buffer,nbchars,c);
2765 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002766 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2767 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002768 }
2769 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002770 if (str < last)
2771 c = CUR_SCHAR(str, l);
2772 else
2773 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002774 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002775 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002776 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002777
2778mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002779 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002780int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002781 if (rep != NULL)
2782 xmlFree(rep);
2783 if (buffer != NULL)
2784 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002785 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002786}
2787
Daniel Veillarde57ec792003-09-10 10:50:59 +00002788/**
2789 * xmlStringDecodeEntities:
2790 * @ctxt: the parser context
2791 * @str: the input string
2792 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2793 * @end: an end marker xmlChar, 0 if none
2794 * @end2: an end marker xmlChar, 0 if none
2795 * @end3: an end marker xmlChar, 0 if none
2796 *
2797 * Takes a entity string content and process to do the adequate substitutions.
2798 *
2799 * [67] Reference ::= EntityRef | CharRef
2800 *
2801 * [69] PEReference ::= '%' Name ';'
2802 *
2803 * Returns A newly allocated string with the substitution done. The caller
2804 * must deallocate it !
2805 */
2806xmlChar *
2807xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2808 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002809 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002810 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2811 end, end2, end3));
2812}
Owen Taylor3473f882001-02-23 17:55:21 +00002813
2814/************************************************************************
2815 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002816 * Commodity functions, cleanup needed ? *
2817 * *
2818 ************************************************************************/
2819
2820/**
2821 * areBlanks:
2822 * @ctxt: an XML parser context
2823 * @str: a xmlChar *
2824 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002825 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002826 *
2827 * Is this a sequence of blank chars that one can ignore ?
2828 *
2829 * Returns 1 if ignorable 0 otherwise.
2830 */
2831
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002832static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2833 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002834 int i, ret;
2835 xmlNodePtr lastChild;
2836
Daniel Veillard05c13a22001-09-09 08:38:09 +00002837 /*
2838 * Don't spend time trying to differentiate them, the same callback is
2839 * used !
2840 */
2841 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002842 return(0);
2843
Owen Taylor3473f882001-02-23 17:55:21 +00002844 /*
2845 * Check for xml:space value.
2846 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002847 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2848 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002849 return(0);
2850
2851 /*
2852 * Check that the string is made of blanks
2853 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002854 if (blank_chars == 0) {
2855 for (i = 0;i < len;i++)
2856 if (!(IS_BLANK_CH(str[i]))) return(0);
2857 }
Owen Taylor3473f882001-02-23 17:55:21 +00002858
2859 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002860 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002861 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002862 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002863 if (ctxt->myDoc != NULL) {
2864 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2865 if (ret == 0) return(1);
2866 if (ret == 1) return(0);
2867 }
2868
2869 /*
2870 * Otherwise, heuristic :-\
2871 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002872 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002873 if ((ctxt->node->children == NULL) &&
2874 (RAW == '<') && (NXT(1) == '/')) return(0);
2875
2876 lastChild = xmlGetLastChild(ctxt->node);
2877 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002878 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2879 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002880 } else if (xmlNodeIsText(lastChild))
2881 return(0);
2882 else if ((ctxt->node->children != NULL) &&
2883 (xmlNodeIsText(ctxt->node->children)))
2884 return(0);
2885 return(1);
2886}
2887
Owen Taylor3473f882001-02-23 17:55:21 +00002888/************************************************************************
2889 * *
2890 * Extra stuff for namespace support *
2891 * Relates to http://www.w3.org/TR/WD-xml-names *
2892 * *
2893 ************************************************************************/
2894
2895/**
2896 * xmlSplitQName:
2897 * @ctxt: an XML parser context
2898 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002899 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002900 *
2901 * parse an UTF8 encoded XML qualified name string
2902 *
2903 * [NS 5] QName ::= (Prefix ':')? LocalPart
2904 *
2905 * [NS 6] Prefix ::= NCName
2906 *
2907 * [NS 7] LocalPart ::= NCName
2908 *
2909 * Returns the local part, and prefix is updated
2910 * to get the Prefix if any.
2911 */
2912
2913xmlChar *
2914xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2915 xmlChar buf[XML_MAX_NAMELEN + 5];
2916 xmlChar *buffer = NULL;
2917 int len = 0;
2918 int max = XML_MAX_NAMELEN;
2919 xmlChar *ret = NULL;
2920 const xmlChar *cur = name;
2921 int c;
2922
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002923 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002924 *prefix = NULL;
2925
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002926 if (cur == NULL) return(NULL);
2927
Owen Taylor3473f882001-02-23 17:55:21 +00002928#ifndef XML_XML_NAMESPACE
2929 /* xml: prefix is not really a namespace */
2930 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2931 (cur[2] == 'l') && (cur[3] == ':'))
2932 return(xmlStrdup(name));
2933#endif
2934
Daniel Veillard597bc482003-07-24 16:08:28 +00002935 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002936 if (cur[0] == ':')
2937 return(xmlStrdup(name));
2938
2939 c = *cur++;
2940 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2941 buf[len++] = c;
2942 c = *cur++;
2943 }
2944 if (len >= max) {
2945 /*
2946 * Okay someone managed to make a huge name, so he's ready to pay
2947 * for the processing speed.
2948 */
2949 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002950
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002951 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002952 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002953 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002954 return(NULL);
2955 }
2956 memcpy(buffer, buf, len);
2957 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2958 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002959 xmlChar *tmp;
2960
Owen Taylor3473f882001-02-23 17:55:21 +00002961 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002962 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002963 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002964 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002965 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002966 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002967 return(NULL);
2968 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002969 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002970 }
2971 buffer[len++] = c;
2972 c = *cur++;
2973 }
2974 buffer[len] = 0;
2975 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002976
Daniel Veillard597bc482003-07-24 16:08:28 +00002977 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002978 if (buffer != NULL)
2979 xmlFree(buffer);
2980 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002981 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002982 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002983
Owen Taylor3473f882001-02-23 17:55:21 +00002984 if (buffer == NULL)
2985 ret = xmlStrndup(buf, len);
2986 else {
2987 ret = buffer;
2988 buffer = NULL;
2989 max = XML_MAX_NAMELEN;
2990 }
2991
2992
2993 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002994 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002995 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002996 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002997 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002998 }
Owen Taylor3473f882001-02-23 17:55:21 +00002999 len = 0;
3000
Daniel Veillardbb284f42002-10-16 18:02:47 +00003001 /*
3002 * Check that the first character is proper to start
3003 * a new name
3004 */
3005 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3006 ((c >= 0x41) && (c <= 0x5A)) ||
3007 (c == '_') || (c == ':'))) {
3008 int l;
3009 int first = CUR_SCHAR(cur, l);
3010
3011 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003012 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003013 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003014 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003015 }
3016 }
3017 cur++;
3018
Owen Taylor3473f882001-02-23 17:55:21 +00003019 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3020 buf[len++] = c;
3021 c = *cur++;
3022 }
3023 if (len >= max) {
3024 /*
3025 * Okay someone managed to make a huge name, so he's ready to pay
3026 * for the processing speed.
3027 */
3028 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003029
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003030 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003031 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003032 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003033 return(NULL);
3034 }
3035 memcpy(buffer, buf, len);
3036 while (c != 0) { /* tested bigname2.xml */
3037 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003038 xmlChar *tmp;
3039
Owen Taylor3473f882001-02-23 17:55:21 +00003040 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003041 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003042 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003043 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003044 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003045 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003046 return(NULL);
3047 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003048 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003049 }
3050 buffer[len++] = c;
3051 c = *cur++;
3052 }
3053 buffer[len] = 0;
3054 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003055
Owen Taylor3473f882001-02-23 17:55:21 +00003056 if (buffer == NULL)
3057 ret = xmlStrndup(buf, len);
3058 else {
3059 ret = buffer;
3060 }
3061 }
3062
3063 return(ret);
3064}
3065
3066/************************************************************************
3067 * *
3068 * The parser itself *
3069 * Relates to http://www.w3.org/TR/REC-xml *
3070 * *
3071 ************************************************************************/
3072
Daniel Veillard34e3f642008-07-29 09:02:27 +00003073/************************************************************************
3074 * *
3075 * Routines to parse Name, NCName and NmToken *
3076 * *
3077 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003078#ifdef DEBUG
3079static unsigned long nbParseName = 0;
3080static unsigned long nbParseNmToken = 0;
3081static unsigned long nbParseNCName = 0;
3082static unsigned long nbParseNCNameComplex = 0;
3083static unsigned long nbParseNameComplex = 0;
3084static unsigned long nbParseStringName = 0;
3085#endif
3086
Daniel Veillard34e3f642008-07-29 09:02:27 +00003087/*
3088 * The two following functions are related to the change of accepted
3089 * characters for Name and NmToken in the Revision 5 of XML-1.0
3090 * They correspond to the modified production [4] and the new production [4a]
3091 * changes in that revision. Also note that the macros used for the
3092 * productions Letter, Digit, CombiningChar and Extender are not needed
3093 * anymore.
3094 * We still keep compatibility to pre-revision5 parsing semantic if the
3095 * new XML_PARSE_OLD10 option is given to the parser.
3096 */
3097static int
3098xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3099 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3100 /*
3101 * Use the new checks of production [4] [4a] amd [5] of the
3102 * Update 5 of XML-1.0
3103 */
3104 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3105 (((c >= 'a') && (c <= 'z')) ||
3106 ((c >= 'A') && (c <= 'Z')) ||
3107 (c == '_') || (c == ':') ||
3108 ((c >= 0xC0) && (c <= 0xD6)) ||
3109 ((c >= 0xD8) && (c <= 0xF6)) ||
3110 ((c >= 0xF8) && (c <= 0x2FF)) ||
3111 ((c >= 0x370) && (c <= 0x37D)) ||
3112 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3113 ((c >= 0x200C) && (c <= 0x200D)) ||
3114 ((c >= 0x2070) && (c <= 0x218F)) ||
3115 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3116 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3117 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3118 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3119 ((c >= 0x10000) && (c <= 0xEFFFF))))
3120 return(1);
3121 } else {
3122 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3123 return(1);
3124 }
3125 return(0);
3126}
3127
3128static int
3129xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3130 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3131 /*
3132 * Use the new checks of production [4] [4a] amd [5] of the
3133 * Update 5 of XML-1.0
3134 */
3135 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3136 (((c >= 'a') && (c <= 'z')) ||
3137 ((c >= 'A') && (c <= 'Z')) ||
3138 ((c >= '0') && (c <= '9')) || /* !start */
3139 (c == '_') || (c == ':') ||
3140 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3141 ((c >= 0xC0) && (c <= 0xD6)) ||
3142 ((c >= 0xD8) && (c <= 0xF6)) ||
3143 ((c >= 0xF8) && (c <= 0x2FF)) ||
3144 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3145 ((c >= 0x370) && (c <= 0x37D)) ||
3146 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3147 ((c >= 0x200C) && (c <= 0x200D)) ||
3148 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3149 ((c >= 0x2070) && (c <= 0x218F)) ||
3150 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3151 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3152 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3153 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3154 ((c >= 0x10000) && (c <= 0xEFFFF))))
3155 return(1);
3156 } else {
3157 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3158 (c == '.') || (c == '-') ||
3159 (c == '_') || (c == ':') ||
3160 (IS_COMBINING(c)) ||
3161 (IS_EXTENDER(c)))
3162 return(1);
3163 }
3164 return(0);
3165}
3166
Daniel Veillarde57ec792003-09-10 10:50:59 +00003167static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003168 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003169
Daniel Veillard34e3f642008-07-29 09:02:27 +00003170static const xmlChar *
3171xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3172 int len = 0, l;
3173 int c;
3174 int count = 0;
3175
Daniel Veillardc6561462009-03-25 10:22:31 +00003176#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003177 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003178#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003179
3180 /*
3181 * Handler for more complex cases
3182 */
3183 GROW;
3184 c = CUR_CHAR(l);
3185 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3186 /*
3187 * Use the new checks of production [4] [4a] amd [5] of the
3188 * Update 5 of XML-1.0
3189 */
3190 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3191 (!(((c >= 'a') && (c <= 'z')) ||
3192 ((c >= 'A') && (c <= 'Z')) ||
3193 (c == '_') || (c == ':') ||
3194 ((c >= 0xC0) && (c <= 0xD6)) ||
3195 ((c >= 0xD8) && (c <= 0xF6)) ||
3196 ((c >= 0xF8) && (c <= 0x2FF)) ||
3197 ((c >= 0x370) && (c <= 0x37D)) ||
3198 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3199 ((c >= 0x200C) && (c <= 0x200D)) ||
3200 ((c >= 0x2070) && (c <= 0x218F)) ||
3201 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3202 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3203 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3204 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3205 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3206 return(NULL);
3207 }
3208 len += l;
3209 NEXTL(l);
3210 c = CUR_CHAR(l);
3211 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3212 (((c >= 'a') && (c <= 'z')) ||
3213 ((c >= 'A') && (c <= 'Z')) ||
3214 ((c >= '0') && (c <= '9')) || /* !start */
3215 (c == '_') || (c == ':') ||
3216 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3217 ((c >= 0xC0) && (c <= 0xD6)) ||
3218 ((c >= 0xD8) && (c <= 0xF6)) ||
3219 ((c >= 0xF8) && (c <= 0x2FF)) ||
3220 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3221 ((c >= 0x370) && (c <= 0x37D)) ||
3222 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3223 ((c >= 0x200C) && (c <= 0x200D)) ||
3224 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3225 ((c >= 0x2070) && (c <= 0x218F)) ||
3226 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3227 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3228 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3229 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3230 ((c >= 0x10000) && (c <= 0xEFFFF))
3231 )) {
3232 if (count++ > 100) {
3233 count = 0;
3234 GROW;
3235 }
3236 len += l;
3237 NEXTL(l);
3238 c = CUR_CHAR(l);
3239 }
3240 } else {
3241 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3242 (!IS_LETTER(c) && (c != '_') &&
3243 (c != ':'))) {
3244 return(NULL);
3245 }
3246 len += l;
3247 NEXTL(l);
3248 c = CUR_CHAR(l);
3249
3250 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3251 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3252 (c == '.') || (c == '-') ||
3253 (c == '_') || (c == ':') ||
3254 (IS_COMBINING(c)) ||
3255 (IS_EXTENDER(c)))) {
3256 if (count++ > 100) {
3257 count = 0;
3258 GROW;
3259 }
3260 len += l;
3261 NEXTL(l);
3262 c = CUR_CHAR(l);
3263 }
3264 }
3265 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3266 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3267 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3268}
3269
Owen Taylor3473f882001-02-23 17:55:21 +00003270/**
3271 * xmlParseName:
3272 * @ctxt: an XML parser context
3273 *
3274 * parse an XML name.
3275 *
3276 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3277 * CombiningChar | Extender
3278 *
3279 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3280 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003281 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003282 *
3283 * Returns the Name parsed or NULL
3284 */
3285
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003286const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003287xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003288 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003289 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003290 int count = 0;
3291
3292 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003293
Daniel Veillardc6561462009-03-25 10:22:31 +00003294#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003295 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003296#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003297
Daniel Veillard48b2f892001-02-25 16:11:03 +00003298 /*
3299 * Accelerator for simple ASCII names
3300 */
3301 in = ctxt->input->cur;
3302 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3303 ((*in >= 0x41) && (*in <= 0x5A)) ||
3304 (*in == '_') || (*in == ':')) {
3305 in++;
3306 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3307 ((*in >= 0x41) && (*in <= 0x5A)) ||
3308 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003309 (*in == '_') || (*in == '-') ||
3310 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003311 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003312 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003313 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003314 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003315 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003316 ctxt->nbChars += count;
3317 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003318 if (ret == NULL)
3319 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003320 return(ret);
3321 }
3322 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003323 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003324 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003325}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003326
Daniel Veillard34e3f642008-07-29 09:02:27 +00003327static const xmlChar *
3328xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3329 int len = 0, l;
3330 int c;
3331 int count = 0;
3332
Daniel Veillardc6561462009-03-25 10:22:31 +00003333#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003334 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003335#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003336
3337 /*
3338 * Handler for more complex cases
3339 */
3340 GROW;
3341 c = CUR_CHAR(l);
3342 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3343 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3344 return(NULL);
3345 }
3346
3347 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3348 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3349 if (count++ > 100) {
3350 count = 0;
3351 GROW;
3352 }
3353 len += l;
3354 NEXTL(l);
3355 c = CUR_CHAR(l);
3356 }
3357 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3358}
3359
3360/**
3361 * xmlParseNCName:
3362 * @ctxt: an XML parser context
3363 * @len: lenght of the string parsed
3364 *
3365 * parse an XML name.
3366 *
3367 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3368 * CombiningChar | Extender
3369 *
3370 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3371 *
3372 * Returns the Name parsed or NULL
3373 */
3374
3375static const xmlChar *
3376xmlParseNCName(xmlParserCtxtPtr ctxt) {
3377 const xmlChar *in;
3378 const xmlChar *ret;
3379 int count = 0;
3380
Daniel Veillardc6561462009-03-25 10:22:31 +00003381#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003382 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003383#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003384
3385 /*
3386 * Accelerator for simple ASCII names
3387 */
3388 in = ctxt->input->cur;
3389 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3390 ((*in >= 0x41) && (*in <= 0x5A)) ||
3391 (*in == '_')) {
3392 in++;
3393 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3394 ((*in >= 0x41) && (*in <= 0x5A)) ||
3395 ((*in >= 0x30) && (*in <= 0x39)) ||
3396 (*in == '_') || (*in == '-') ||
3397 (*in == '.'))
3398 in++;
3399 if ((*in > 0) && (*in < 0x80)) {
3400 count = in - ctxt->input->cur;
3401 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3402 ctxt->input->cur = in;
3403 ctxt->nbChars += count;
3404 ctxt->input->col += count;
3405 if (ret == NULL) {
3406 xmlErrMemory(ctxt, NULL);
3407 }
3408 return(ret);
3409 }
3410 }
3411 return(xmlParseNCNameComplex(ctxt));
3412}
3413
Daniel Veillard46de64e2002-05-29 08:21:33 +00003414/**
3415 * xmlParseNameAndCompare:
3416 * @ctxt: an XML parser context
3417 *
3418 * parse an XML name and compares for match
3419 * (specialized for endtag parsing)
3420 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003421 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3422 * and the name for mismatch
3423 */
3424
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003425static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003426xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003427 register const xmlChar *cmp = other;
3428 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003429 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003430
3431 GROW;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003432
Daniel Veillard46de64e2002-05-29 08:21:33 +00003433 in = ctxt->input->cur;
3434 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003435 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003436 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003437 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003438 }
William M. Brack76e95df2003-10-18 16:20:14 +00003439 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003440 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003441 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003442 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003443 }
3444 /* failure (or end of input buffer), check with full function */
3445 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003446 /* strings coming from the dictionnary direct compare possible */
3447 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003448 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003449 }
3450 return ret;
3451}
3452
Owen Taylor3473f882001-02-23 17:55:21 +00003453/**
3454 * xmlParseStringName:
3455 * @ctxt: an XML parser context
3456 * @str: a pointer to the string pointer (IN/OUT)
3457 *
3458 * parse an XML name.
3459 *
3460 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3461 * CombiningChar | Extender
3462 *
3463 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3464 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003465 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003466 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003467 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003468 * is updated to the current location in the string.
3469 */
3470
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003471static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003472xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3473 xmlChar buf[XML_MAX_NAMELEN + 5];
3474 const xmlChar *cur = *str;
3475 int len = 0, l;
3476 int c;
3477
Daniel Veillardc6561462009-03-25 10:22:31 +00003478#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003479 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003480#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003481
Owen Taylor3473f882001-02-23 17:55:21 +00003482 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003483 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003484 return(NULL);
3485 }
3486
Daniel Veillard34e3f642008-07-29 09:02:27 +00003487 COPY_BUF(l,buf,len,c);
3488 cur += l;
3489 c = CUR_SCHAR(cur, l);
3490 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003491 COPY_BUF(l,buf,len,c);
3492 cur += l;
3493 c = CUR_SCHAR(cur, l);
3494 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3495 /*
3496 * Okay someone managed to make a huge name, so he's ready to pay
3497 * for the processing speed.
3498 */
3499 xmlChar *buffer;
3500 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003501
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003502 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003503 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003504 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003505 return(NULL);
3506 }
3507 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003508 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003509 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003510 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003511 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003512 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003513 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003514 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003515 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003516 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003517 return(NULL);
3518 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003519 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003520 }
3521 COPY_BUF(l,buffer,len,c);
3522 cur += l;
3523 c = CUR_SCHAR(cur, l);
3524 }
3525 buffer[len] = 0;
3526 *str = cur;
3527 return(buffer);
3528 }
3529 }
3530 *str = cur;
3531 return(xmlStrndup(buf, len));
3532}
3533
3534/**
3535 * xmlParseNmtoken:
3536 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003537 *
Owen Taylor3473f882001-02-23 17:55:21 +00003538 * parse an XML Nmtoken.
3539 *
3540 * [7] Nmtoken ::= (NameChar)+
3541 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003542 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003543 *
3544 * Returns the Nmtoken parsed or NULL
3545 */
3546
3547xmlChar *
3548xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3549 xmlChar buf[XML_MAX_NAMELEN + 5];
3550 int len = 0, l;
3551 int c;
3552 int count = 0;
3553
Daniel Veillardc6561462009-03-25 10:22:31 +00003554#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003555 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003556#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003557
Owen Taylor3473f882001-02-23 17:55:21 +00003558 GROW;
3559 c = CUR_CHAR(l);
3560
Daniel Veillard34e3f642008-07-29 09:02:27 +00003561 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003562 if (count++ > 100) {
3563 count = 0;
3564 GROW;
3565 }
3566 COPY_BUF(l,buf,len,c);
3567 NEXTL(l);
3568 c = CUR_CHAR(l);
3569 if (len >= XML_MAX_NAMELEN) {
3570 /*
3571 * Okay someone managed to make a huge token, so he's ready to pay
3572 * for the processing speed.
3573 */
3574 xmlChar *buffer;
3575 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003576
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003577 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003578 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003579 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003580 return(NULL);
3581 }
3582 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003583 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003584 if (count++ > 100) {
3585 count = 0;
3586 GROW;
3587 }
3588 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003589 xmlChar *tmp;
3590
Owen Taylor3473f882001-02-23 17:55:21 +00003591 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003592 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003593 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003594 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003595 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003596 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003597 return(NULL);
3598 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003599 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003600 }
3601 COPY_BUF(l,buffer,len,c);
3602 NEXTL(l);
3603 c = CUR_CHAR(l);
3604 }
3605 buffer[len] = 0;
3606 return(buffer);
3607 }
3608 }
3609 if (len == 0)
3610 return(NULL);
3611 return(xmlStrndup(buf, len));
3612}
3613
3614/**
3615 * xmlParseEntityValue:
3616 * @ctxt: an XML parser context
3617 * @orig: if non-NULL store a copy of the original entity value
3618 *
3619 * parse a value for ENTITY declarations
3620 *
3621 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3622 * "'" ([^%&'] | PEReference | Reference)* "'"
3623 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003624 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003625 */
3626
3627xmlChar *
3628xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3629 xmlChar *buf = NULL;
3630 int len = 0;
3631 int size = XML_PARSER_BUFFER_SIZE;
3632 int c, l;
3633 xmlChar stop;
3634 xmlChar *ret = NULL;
3635 const xmlChar *cur = NULL;
3636 xmlParserInputPtr input;
3637
3638 if (RAW == '"') stop = '"';
3639 else if (RAW == '\'') stop = '\'';
3640 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003641 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003642 return(NULL);
3643 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003644 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003645 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003646 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003647 return(NULL);
3648 }
3649
3650 /*
3651 * The content of the entity definition is copied in a buffer.
3652 */
3653
3654 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3655 input = ctxt->input;
3656 GROW;
3657 NEXT;
3658 c = CUR_CHAR(l);
3659 /*
3660 * NOTE: 4.4.5 Included in Literal
3661 * When a parameter entity reference appears in a literal entity
3662 * value, ... a single or double quote character in the replacement
3663 * text is always treated as a normal data character and will not
3664 * terminate the literal.
3665 * In practice it means we stop the loop only when back at parsing
3666 * the initial entity and the quote is found
3667 */
William M. Brack871611b2003-10-18 04:53:14 +00003668 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003669 (ctxt->input != input))) {
3670 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003671 xmlChar *tmp;
3672
Owen Taylor3473f882001-02-23 17:55:21 +00003673 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003674 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3675 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003676 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003677 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003678 return(NULL);
3679 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003680 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003681 }
3682 COPY_BUF(l,buf,len,c);
3683 NEXTL(l);
3684 /*
3685 * Pop-up of finished entities.
3686 */
3687 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3688 xmlPopInput(ctxt);
3689
3690 GROW;
3691 c = CUR_CHAR(l);
3692 if (c == 0) {
3693 GROW;
3694 c = CUR_CHAR(l);
3695 }
3696 }
3697 buf[len] = 0;
3698
3699 /*
3700 * Raise problem w.r.t. '&' and '%' being used in non-entities
3701 * reference constructs. Note Charref will be handled in
3702 * xmlStringDecodeEntities()
3703 */
3704 cur = buf;
3705 while (*cur != 0) { /* non input consuming */
3706 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3707 xmlChar *name;
3708 xmlChar tmp = *cur;
3709
3710 cur++;
3711 name = xmlParseStringName(ctxt, &cur);
3712 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003713 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003714 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003715 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003716 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003717 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3718 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003719 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003720 }
3721 if (name != NULL)
3722 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003723 if (*cur == 0)
3724 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003725 }
3726 cur++;
3727 }
3728
3729 /*
3730 * Then PEReference entities are substituted.
3731 */
3732 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003733 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003734 xmlFree(buf);
3735 } else {
3736 NEXT;
3737 /*
3738 * NOTE: 4.4.7 Bypassed
3739 * When a general entity reference appears in the EntityValue in
3740 * an entity declaration, it is bypassed and left as is.
3741 * so XML_SUBSTITUTE_REF is not set here.
3742 */
3743 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3744 0, 0, 0);
3745 if (orig != NULL)
3746 *orig = buf;
3747 else
3748 xmlFree(buf);
3749 }
3750
3751 return(ret);
3752}
3753
3754/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003755 * xmlParseAttValueComplex:
3756 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003757 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003758 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003759 *
3760 * parse a value for an attribute, this is the fallback function
3761 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003762 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003763 *
3764 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3765 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003766static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003767xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003768 xmlChar limit = 0;
3769 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003770 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003771 size_t len = 0;
3772 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003773 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003774 xmlChar *current = NULL;
3775 xmlEntityPtr ent;
3776
Owen Taylor3473f882001-02-23 17:55:21 +00003777 if (NXT(0) == '"') {
3778 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3779 limit = '"';
3780 NEXT;
3781 } else if (NXT(0) == '\'') {
3782 limit = '\'';
3783 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3784 NEXT;
3785 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003786 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003787 return(NULL);
3788 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003789
Owen Taylor3473f882001-02-23 17:55:21 +00003790 /*
3791 * allocate a translation buffer.
3792 */
3793 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003794 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003795 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003796
3797 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003798 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003799 */
3800 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003801 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003802 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003803 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003804 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003805 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003806 if (NXT(1) == '#') {
3807 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003808
Owen Taylor3473f882001-02-23 17:55:21 +00003809 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003810 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003811 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003812 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003813 }
3814 buf[len++] = '&';
3815 } else {
3816 /*
3817 * The reparsing will be done in xmlStringGetNodeList()
3818 * called by the attribute() function in SAX.c
3819 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08003820 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003821 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003822 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003823 buf[len++] = '&';
3824 buf[len++] = '#';
3825 buf[len++] = '3';
3826 buf[len++] = '8';
3827 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003828 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003829 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003830 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003831 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003832 }
Owen Taylor3473f882001-02-23 17:55:21 +00003833 len += xmlCopyChar(0, &buf[len], val);
3834 }
3835 } else {
3836 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003837 ctxt->nbentities++;
3838 if (ent != NULL)
3839 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003840 if ((ent != NULL) &&
3841 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003842 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003843 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003844 }
3845 if ((ctxt->replaceEntities == 0) &&
3846 (ent->content[0] == '&')) {
3847 buf[len++] = '&';
3848 buf[len++] = '#';
3849 buf[len++] = '3';
3850 buf[len++] = '8';
3851 buf[len++] = ';';
3852 } else {
3853 buf[len++] = ent->content[0];
3854 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003855 } else if ((ent != NULL) &&
3856 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003857 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3858 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003859 XML_SUBSTITUTE_REF,
3860 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003861 if (rep != NULL) {
3862 current = rep;
3863 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02003864 if ((*current == 0xD) || (*current == 0xA) ||
3865 (*current == 0x9)) {
3866 buf[len++] = 0x20;
3867 current++;
3868 } else
3869 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003870 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003871 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003872 }
3873 }
3874 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003875 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003876 }
3877 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003878 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003879 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003880 }
Owen Taylor3473f882001-02-23 17:55:21 +00003881 if (ent->content != NULL)
3882 buf[len++] = ent->content[0];
3883 }
3884 } else if (ent != NULL) {
3885 int i = xmlStrlen(ent->name);
3886 const xmlChar *cur = ent->name;
3887
3888 /*
3889 * This may look absurd but is needed to detect
3890 * entities problems
3891 */
3892 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3893 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003894 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003895 XML_SUBSTITUTE_REF, 0, 0, 0);
3896 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003897 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003898 rep = NULL;
3899 }
Owen Taylor3473f882001-02-23 17:55:21 +00003900 }
3901
3902 /*
3903 * Just output the reference
3904 */
3905 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08003906 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003907 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003908 }
3909 for (;i > 0;i--)
3910 buf[len++] = *cur++;
3911 buf[len++] = ';';
3912 }
3913 }
3914 } else {
3915 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003916 if ((len != 0) || (!normalize)) {
3917 if ((!normalize) || (!in_space)) {
3918 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08003919 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003920 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003921 }
3922 }
3923 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003924 }
3925 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003926 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003927 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08003928 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003929 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003930 }
3931 }
3932 NEXTL(l);
3933 }
3934 GROW;
3935 c = CUR_CHAR(l);
3936 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003937 if ((in_space) && (normalize)) {
3938 while (buf[len - 1] == 0x20) len--;
3939 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003940 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003941 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003942 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003943 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003944 if ((c != 0) && (!IS_CHAR(c))) {
3945 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3946 "invalid character in attribute value\n");
3947 } else {
3948 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3949 "AttValue: ' expected\n");
3950 }
Owen Taylor3473f882001-02-23 17:55:21 +00003951 } else
3952 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003953
3954 /*
3955 * There we potentially risk an overflow, don't allow attribute value of
3956 * lenght more than INT_MAX it is a very reasonnable assumption !
3957 */
3958 if (len >= INT_MAX) {
3959 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3960 "AttValue lenght too long\n");
3961 goto mem_error;
3962 }
3963
3964 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00003965 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003966
3967mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003968 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003969 if (buf != NULL)
3970 xmlFree(buf);
3971 if (rep != NULL)
3972 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003973 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003974}
3975
3976/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003977 * xmlParseAttValue:
3978 * @ctxt: an XML parser context
3979 *
3980 * parse a value for an attribute
3981 * Note: the parser won't do substitution of entities here, this
3982 * will be handled later in xmlStringGetNodeList
3983 *
3984 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3985 * "'" ([^<&'] | Reference)* "'"
3986 *
3987 * 3.3.3 Attribute-Value Normalization:
3988 * Before the value of an attribute is passed to the application or
3989 * checked for validity, the XML processor must normalize it as follows:
3990 * - a character reference is processed by appending the referenced
3991 * character to the attribute value
3992 * - an entity reference is processed by recursively processing the
3993 * replacement text of the entity
3994 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3995 * appending #x20 to the normalized value, except that only a single
3996 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3997 * parsed entity or the literal entity value of an internal parsed entity
3998 * - other characters are processed by appending them to the normalized value
3999 * If the declared value is not CDATA, then the XML processor must further
4000 * process the normalized attribute value by discarding any leading and
4001 * trailing space (#x20) characters, and by replacing sequences of space
4002 * (#x20) characters by a single space (#x20) character.
4003 * All attributes for which no declaration has been read should be treated
4004 * by a non-validating parser as if declared CDATA.
4005 *
4006 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4007 */
4008
4009
4010xmlChar *
4011xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004012 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004013 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004014}
4015
4016/**
Owen Taylor3473f882001-02-23 17:55:21 +00004017 * xmlParseSystemLiteral:
4018 * @ctxt: an XML parser context
4019 *
4020 * parse an XML Literal
4021 *
4022 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4023 *
4024 * Returns the SystemLiteral parsed or NULL
4025 */
4026
4027xmlChar *
4028xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4029 xmlChar *buf = NULL;
4030 int len = 0;
4031 int size = XML_PARSER_BUFFER_SIZE;
4032 int cur, l;
4033 xmlChar stop;
4034 int state = ctxt->instate;
4035 int count = 0;
4036
4037 SHRINK;
4038 if (RAW == '"') {
4039 NEXT;
4040 stop = '"';
4041 } else if (RAW == '\'') {
4042 NEXT;
4043 stop = '\'';
4044 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004045 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004046 return(NULL);
4047 }
4048
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004049 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004050 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004051 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004052 return(NULL);
4053 }
4054 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4055 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004056 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004057 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004058 xmlChar *tmp;
4059
Owen Taylor3473f882001-02-23 17:55:21 +00004060 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004061 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4062 if (tmp == NULL) {
4063 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004064 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004065 ctxt->instate = (xmlParserInputState) state;
4066 return(NULL);
4067 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004068 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004069 }
4070 count++;
4071 if (count > 50) {
4072 GROW;
4073 count = 0;
4074 }
4075 COPY_BUF(l,buf,len,cur);
4076 NEXTL(l);
4077 cur = CUR_CHAR(l);
4078 if (cur == 0) {
4079 GROW;
4080 SHRINK;
4081 cur = CUR_CHAR(l);
4082 }
4083 }
4084 buf[len] = 0;
4085 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004086 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004087 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004088 } else {
4089 NEXT;
4090 }
4091 return(buf);
4092}
4093
4094/**
4095 * xmlParsePubidLiteral:
4096 * @ctxt: an XML parser context
4097 *
4098 * parse an XML public literal
4099 *
4100 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4101 *
4102 * Returns the PubidLiteral parsed or NULL.
4103 */
4104
4105xmlChar *
4106xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4107 xmlChar *buf = NULL;
4108 int len = 0;
4109 int size = XML_PARSER_BUFFER_SIZE;
4110 xmlChar cur;
4111 xmlChar stop;
4112 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004113 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004114
4115 SHRINK;
4116 if (RAW == '"') {
4117 NEXT;
4118 stop = '"';
4119 } else if (RAW == '\'') {
4120 NEXT;
4121 stop = '\'';
4122 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004123 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004124 return(NULL);
4125 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004126 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004127 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004128 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004129 return(NULL);
4130 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004131 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004132 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004133 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004134 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004135 xmlChar *tmp;
4136
Owen Taylor3473f882001-02-23 17:55:21 +00004137 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004138 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4139 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004140 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004141 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004142 return(NULL);
4143 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004144 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004145 }
4146 buf[len++] = cur;
4147 count++;
4148 if (count > 50) {
4149 GROW;
4150 count = 0;
4151 }
4152 NEXT;
4153 cur = CUR;
4154 if (cur == 0) {
4155 GROW;
4156 SHRINK;
4157 cur = CUR;
4158 }
4159 }
4160 buf[len] = 0;
4161 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004162 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004163 } else {
4164 NEXT;
4165 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004166 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004167 return(buf);
4168}
4169
Daniel Veillard8ed10722009-08-20 19:17:36 +02004170static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004171
4172/*
4173 * used for the test in the inner loop of the char data testing
4174 */
4175static const unsigned char test_char_data[256] = {
4176 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4177 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4178 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4179 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4180 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4181 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4182 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4183 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4184 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4185 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4186 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4187 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4188 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4189 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4190 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4191 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4192 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4193 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4194 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4195 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4196 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4197 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4198 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4199 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4200 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4201 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4202 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4203 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4204 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4205 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4206 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4207 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4208};
4209
Owen Taylor3473f882001-02-23 17:55:21 +00004210/**
4211 * xmlParseCharData:
4212 * @ctxt: an XML parser context
4213 * @cdata: int indicating whether we are within a CDATA section
4214 *
4215 * parse a CharData section.
4216 * if we are within a CDATA section ']]>' marks an end of section.
4217 *
4218 * The right angle bracket (>) may be represented using the string "&gt;",
4219 * and must, for compatibility, be escaped using "&gt;" or a character
4220 * reference when it appears in the string "]]>" in content, when that
4221 * string is not marking the end of a CDATA section.
4222 *
4223 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4224 */
4225
4226void
4227xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004228 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004229 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004230 int line = ctxt->input->line;
4231 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004232 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004233
4234 SHRINK;
4235 GROW;
4236 /*
4237 * Accelerated common case where input don't need to be
4238 * modified before passing it to the handler.
4239 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004240 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004241 in = ctxt->input->cur;
4242 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004243get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004244 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004245 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004246 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004247 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004248 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004249 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004250 goto get_more_space;
4251 }
4252 if (*in == '<') {
4253 nbchar = in - ctxt->input->cur;
4254 if (nbchar > 0) {
4255 const xmlChar *tmp = ctxt->input->cur;
4256 ctxt->input->cur = in;
4257
Daniel Veillard34099b42004-11-04 17:34:35 +00004258 if ((ctxt->sax != NULL) &&
4259 (ctxt->sax->ignorableWhitespace !=
4260 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004261 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004262 if (ctxt->sax->ignorableWhitespace != NULL)
4263 ctxt->sax->ignorableWhitespace(ctxt->userData,
4264 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004265 } else {
4266 if (ctxt->sax->characters != NULL)
4267 ctxt->sax->characters(ctxt->userData,
4268 tmp, nbchar);
4269 if (*ctxt->space == -1)
4270 *ctxt->space = -2;
4271 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004272 } else if ((ctxt->sax != NULL) &&
4273 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004274 ctxt->sax->characters(ctxt->userData,
4275 tmp, nbchar);
4276 }
4277 }
4278 return;
4279 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004280
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004281get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004282 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004283 while (test_char_data[*in]) {
4284 in++;
4285 ccol++;
4286 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004287 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004288 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004289 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004290 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004291 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004292 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004293 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004294 }
4295 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004296 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004297 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004298 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004299 return;
4300 }
4301 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004302 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004303 goto get_more;
4304 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004305 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004306 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004307 if ((ctxt->sax != NULL) &&
4308 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004309 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004310 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004311 const xmlChar *tmp = ctxt->input->cur;
4312 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004313
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004314 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004315 if (ctxt->sax->ignorableWhitespace != NULL)
4316 ctxt->sax->ignorableWhitespace(ctxt->userData,
4317 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004318 } else {
4319 if (ctxt->sax->characters != NULL)
4320 ctxt->sax->characters(ctxt->userData,
4321 tmp, nbchar);
4322 if (*ctxt->space == -1)
4323 *ctxt->space = -2;
4324 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004325 line = ctxt->input->line;
4326 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004327 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004328 if (ctxt->sax->characters != NULL)
4329 ctxt->sax->characters(ctxt->userData,
4330 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004331 line = ctxt->input->line;
4332 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004333 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004334 /* something really bad happened in the SAX callback */
4335 if (ctxt->instate != XML_PARSER_CONTENT)
4336 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004337 }
4338 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004339 if (*in == 0xD) {
4340 in++;
4341 if (*in == 0xA) {
4342 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004343 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004344 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004345 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004346 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004347 in--;
4348 }
4349 if (*in == '<') {
4350 return;
4351 }
4352 if (*in == '&') {
4353 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004354 }
4355 SHRINK;
4356 GROW;
4357 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004358 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004359 nbchar = 0;
4360 }
Daniel Veillard50582112001-03-26 22:52:16 +00004361 ctxt->input->line = line;
4362 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004363 xmlParseCharDataComplex(ctxt, cdata);
4364}
4365
Daniel Veillard01c13b52002-12-10 15:19:08 +00004366/**
4367 * xmlParseCharDataComplex:
4368 * @ctxt: an XML parser context
4369 * @cdata: int indicating whether we are within a CDATA section
4370 *
4371 * parse a CharData section.this is the fallback function
4372 * of xmlParseCharData() when the parsing requires handling
4373 * of non-ASCII characters.
4374 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004375static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004376xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004377 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4378 int nbchar = 0;
4379 int cur, l;
4380 int count = 0;
4381
4382 SHRINK;
4383 GROW;
4384 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004385 while ((cur != '<') && /* checked */
4386 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004387 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004388 if ((cur == ']') && (NXT(1) == ']') &&
4389 (NXT(2) == '>')) {
4390 if (cdata) break;
4391 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004392 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004393 }
4394 }
4395 COPY_BUF(l,buf,nbchar,cur);
4396 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004397 buf[nbchar] = 0;
4398
Owen Taylor3473f882001-02-23 17:55:21 +00004399 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004400 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004401 */
4402 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004403 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004404 if (ctxt->sax->ignorableWhitespace != NULL)
4405 ctxt->sax->ignorableWhitespace(ctxt->userData,
4406 buf, nbchar);
4407 } else {
4408 if (ctxt->sax->characters != NULL)
4409 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004410 if ((ctxt->sax->characters !=
4411 ctxt->sax->ignorableWhitespace) &&
4412 (*ctxt->space == -1))
4413 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004414 }
4415 }
4416 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004417 /* something really bad happened in the SAX callback */
4418 if (ctxt->instate != XML_PARSER_CONTENT)
4419 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004420 }
4421 count++;
4422 if (count > 50) {
4423 GROW;
4424 count = 0;
4425 }
4426 NEXTL(l);
4427 cur = CUR_CHAR(l);
4428 }
4429 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004430 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004431 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004432 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004433 */
4434 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004435 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004436 if (ctxt->sax->ignorableWhitespace != NULL)
4437 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4438 } else {
4439 if (ctxt->sax->characters != NULL)
4440 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004441 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4442 (*ctxt->space == -1))
4443 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004444 }
4445 }
4446 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004447 if ((cur != 0) && (!IS_CHAR(cur))) {
4448 /* Generate the error and skip the offending character */
4449 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4450 "PCDATA invalid Char value %d\n",
4451 cur);
4452 NEXTL(l);
4453 }
Owen Taylor3473f882001-02-23 17:55:21 +00004454}
4455
4456/**
4457 * xmlParseExternalID:
4458 * @ctxt: an XML parser context
4459 * @publicID: a xmlChar** receiving PubidLiteral
4460 * @strict: indicate whether we should restrict parsing to only
4461 * production [75], see NOTE below
4462 *
4463 * Parse an External ID or a Public ID
4464 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004465 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004466 * 'PUBLIC' S PubidLiteral S SystemLiteral
4467 *
4468 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4469 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4470 *
4471 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4472 *
4473 * Returns the function returns SystemLiteral and in the second
4474 * case publicID receives PubidLiteral, is strict is off
4475 * it is possible to return NULL and have publicID set.
4476 */
4477
4478xmlChar *
4479xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4480 xmlChar *URI = NULL;
4481
4482 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004483
4484 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004485 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004486 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004487 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004488 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4489 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004490 }
4491 SKIP_BLANKS;
4492 URI = xmlParseSystemLiteral(ctxt);
4493 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004494 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004495 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004496 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004497 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004498 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004499 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004500 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004501 }
4502 SKIP_BLANKS;
4503 *publicID = xmlParsePubidLiteral(ctxt);
4504 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004505 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004506 }
4507 if (strict) {
4508 /*
4509 * We don't handle [83] so "S SystemLiteral" is required.
4510 */
William M. Brack76e95df2003-10-18 16:20:14 +00004511 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004512 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004513 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004514 }
4515 } else {
4516 /*
4517 * We handle [83] so we return immediately, if
4518 * "S SystemLiteral" is not detected. From a purely parsing
4519 * point of view that's a nice mess.
4520 */
4521 const xmlChar *ptr;
4522 GROW;
4523
4524 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004525 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004526
William M. Brack76e95df2003-10-18 16:20:14 +00004527 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004528 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4529 }
4530 SKIP_BLANKS;
4531 URI = xmlParseSystemLiteral(ctxt);
4532 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004533 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004534 }
4535 }
4536 return(URI);
4537}
4538
4539/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004540 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004541 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004542 * @buf: the already parsed part of the buffer
4543 * @len: number of bytes filles in the buffer
4544 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004545 *
4546 * Skip an XML (SGML) comment <!-- .... -->
4547 * The spec says that "For compatibility, the string "--" (double-hyphen)
4548 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004549 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004550 *
4551 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4552 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004553static void
4554xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004555 int q, ql;
4556 int r, rl;
4557 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00004558 int count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004559 int inputid;
4560
4561 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004562
Owen Taylor3473f882001-02-23 17:55:21 +00004563 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004564 len = 0;
4565 size = XML_PARSER_BUFFER_SIZE;
4566 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4567 if (buf == NULL) {
4568 xmlErrMemory(ctxt, NULL);
4569 return;
4570 }
Owen Taylor3473f882001-02-23 17:55:21 +00004571 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004572 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004573 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004574 if (q == 0)
4575 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004576 if (!IS_CHAR(q)) {
4577 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4578 "xmlParseComment: invalid xmlChar value %d\n",
4579 q);
4580 xmlFree (buf);
4581 return;
4582 }
Owen Taylor3473f882001-02-23 17:55:21 +00004583 NEXTL(ql);
4584 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004585 if (r == 0)
4586 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004587 if (!IS_CHAR(r)) {
4588 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4589 "xmlParseComment: invalid xmlChar value %d\n",
4590 q);
4591 xmlFree (buf);
4592 return;
4593 }
Owen Taylor3473f882001-02-23 17:55:21 +00004594 NEXTL(rl);
4595 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004596 if (cur == 0)
4597 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004598 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004599 ((cur != '>') ||
4600 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004601 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004602 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004603 }
4604 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004605 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004606 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004607 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4608 if (new_buf == NULL) {
4609 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004610 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004611 return;
4612 }
William M. Bracka3215c72004-07-31 16:24:01 +00004613 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004614 }
4615 COPY_BUF(ql,buf,len,q);
4616 q = r;
4617 ql = rl;
4618 r = cur;
4619 rl = l;
4620
4621 count++;
4622 if (count > 50) {
4623 GROW;
4624 count = 0;
4625 }
4626 NEXTL(l);
4627 cur = CUR_CHAR(l);
4628 if (cur == 0) {
4629 SHRINK;
4630 GROW;
4631 cur = CUR_CHAR(l);
4632 }
4633 }
4634 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004635 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004636 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004637 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004638 } else if (!IS_CHAR(cur)) {
4639 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4640 "xmlParseComment: invalid xmlChar value %d\n",
4641 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004642 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004643 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004644 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4645 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004646 }
4647 NEXT;
4648 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4649 (!ctxt->disableSAX))
4650 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004651 }
Daniel Veillardda629342007-08-01 07:49:06 +00004652 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004653 return;
4654not_terminated:
4655 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4656 "Comment not terminated\n", NULL);
4657 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004658 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004659}
Daniel Veillardda629342007-08-01 07:49:06 +00004660
Daniel Veillard4c778d82005-01-23 17:37:44 +00004661/**
4662 * xmlParseComment:
4663 * @ctxt: an XML parser context
4664 *
4665 * Skip an XML (SGML) comment <!-- .... -->
4666 * The spec says that "For compatibility, the string "--" (double-hyphen)
4667 * must not occur within comments. "
4668 *
4669 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4670 */
4671void
4672xmlParseComment(xmlParserCtxtPtr ctxt) {
4673 xmlChar *buf = NULL;
4674 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004675 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004676 xmlParserInputState state;
4677 const xmlChar *in;
4678 int nbchar = 0, ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004679 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004680
4681 /*
4682 * Check that there is a comment right here.
4683 */
4684 if ((RAW != '<') || (NXT(1) != '!') ||
4685 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004686 state = ctxt->instate;
4687 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004688 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004689 SKIP(4);
4690 SHRINK;
4691 GROW;
4692
4693 /*
4694 * Accelerated common case where input don't need to be
4695 * modified before passing it to the handler.
4696 */
4697 in = ctxt->input->cur;
4698 do {
4699 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004700 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004701 ctxt->input->line++; ctxt->input->col = 1;
4702 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004703 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004704 }
4705get_more:
4706 ccol = ctxt->input->col;
4707 while (((*in > '-') && (*in <= 0x7F)) ||
4708 ((*in >= 0x20) && (*in < '-')) ||
4709 (*in == 0x09)) {
4710 in++;
4711 ccol++;
4712 }
4713 ctxt->input->col = ccol;
4714 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004715 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004716 ctxt->input->line++; ctxt->input->col = 1;
4717 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004718 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004719 goto get_more;
4720 }
4721 nbchar = in - ctxt->input->cur;
4722 /*
4723 * save current set of data
4724 */
4725 if (nbchar > 0) {
4726 if ((ctxt->sax != NULL) &&
4727 (ctxt->sax->comment != NULL)) {
4728 if (buf == NULL) {
4729 if ((*in == '-') && (in[1] == '-'))
4730 size = nbchar + 1;
4731 else
4732 size = XML_PARSER_BUFFER_SIZE + nbchar;
4733 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4734 if (buf == NULL) {
4735 xmlErrMemory(ctxt, NULL);
4736 ctxt->instate = state;
4737 return;
4738 }
4739 len = 0;
4740 } else if (len + nbchar + 1 >= size) {
4741 xmlChar *new_buf;
4742 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4743 new_buf = (xmlChar *) xmlRealloc(buf,
4744 size * sizeof(xmlChar));
4745 if (new_buf == NULL) {
4746 xmlFree (buf);
4747 xmlErrMemory(ctxt, NULL);
4748 ctxt->instate = state;
4749 return;
4750 }
4751 buf = new_buf;
4752 }
4753 memcpy(&buf[len], ctxt->input->cur, nbchar);
4754 len += nbchar;
4755 buf[len] = 0;
4756 }
4757 }
4758 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004759 if (*in == 0xA) {
4760 in++;
4761 ctxt->input->line++; ctxt->input->col = 1;
4762 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004763 if (*in == 0xD) {
4764 in++;
4765 if (*in == 0xA) {
4766 ctxt->input->cur = in;
4767 in++;
4768 ctxt->input->line++; ctxt->input->col = 1;
4769 continue; /* while */
4770 }
4771 in--;
4772 }
4773 SHRINK;
4774 GROW;
4775 in = ctxt->input->cur;
4776 if (*in == '-') {
4777 if (in[1] == '-') {
4778 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004779 if (ctxt->input->id != inputid) {
4780 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4781 "comment doesn't start and stop in the same entity\n");
4782 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004783 SKIP(3);
4784 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4785 (!ctxt->disableSAX)) {
4786 if (buf != NULL)
4787 ctxt->sax->comment(ctxt->userData, buf);
4788 else
4789 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4790 }
4791 if (buf != NULL)
4792 xmlFree(buf);
4793 ctxt->instate = state;
4794 return;
4795 }
Bryan Henderson8658d272012-05-08 16:39:05 +08004796 if (buf != NULL) {
4797 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4798 "Double hyphen within comment: "
4799 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00004800 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08004801 } else
4802 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4803 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004804 in++;
4805 ctxt->input->col++;
4806 }
4807 in++;
4808 ctxt->input->col++;
4809 goto get_more;
4810 }
4811 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4812 xmlParseCommentComplex(ctxt, buf, len, size);
4813 ctxt->instate = state;
4814 return;
4815}
4816
Owen Taylor3473f882001-02-23 17:55:21 +00004817
4818/**
4819 * xmlParsePITarget:
4820 * @ctxt: an XML parser context
4821 *
4822 * parse the name of a PI
4823 *
4824 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4825 *
4826 * Returns the PITarget name or NULL
4827 */
4828
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004829const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004830xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004831 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004832
4833 name = xmlParseName(ctxt);
4834 if ((name != NULL) &&
4835 ((name[0] == 'x') || (name[0] == 'X')) &&
4836 ((name[1] == 'm') || (name[1] == 'M')) &&
4837 ((name[2] == 'l') || (name[2] == 'L'))) {
4838 int i;
4839 if ((name[0] == 'x') && (name[1] == 'm') &&
4840 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004841 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004842 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004843 return(name);
4844 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004845 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004846 return(name);
4847 }
4848 for (i = 0;;i++) {
4849 if (xmlW3CPIs[i] == NULL) break;
4850 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4851 return(name);
4852 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004853 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4854 "xmlParsePITarget: invalid name prefix 'xml'\n",
4855 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004856 }
Daniel Veillard37334572008-07-31 08:20:02 +00004857 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4858 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4859 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4860 }
Owen Taylor3473f882001-02-23 17:55:21 +00004861 return(name);
4862}
4863
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004864#ifdef LIBXML_CATALOG_ENABLED
4865/**
4866 * xmlParseCatalogPI:
4867 * @ctxt: an XML parser context
4868 * @catalog: the PI value string
4869 *
4870 * parse an XML Catalog Processing Instruction.
4871 *
4872 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4873 *
4874 * Occurs only if allowed by the user and if happening in the Misc
4875 * part of the document before any doctype informations
4876 * This will add the given catalog to the parsing context in order
4877 * to be used if there is a resolution need further down in the document
4878 */
4879
4880static void
4881xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4882 xmlChar *URL = NULL;
4883 const xmlChar *tmp, *base;
4884 xmlChar marker;
4885
4886 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004887 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004888 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4889 goto error;
4890 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004891 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004892 if (*tmp != '=') {
4893 return;
4894 }
4895 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004896 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004897 marker = *tmp;
4898 if ((marker != '\'') && (marker != '"'))
4899 goto error;
4900 tmp++;
4901 base = tmp;
4902 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4903 if (*tmp == 0)
4904 goto error;
4905 URL = xmlStrndup(base, tmp - base);
4906 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004907 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004908 if (*tmp != 0)
4909 goto error;
4910
4911 if (URL != NULL) {
4912 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4913 xmlFree(URL);
4914 }
4915 return;
4916
4917error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004918 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4919 "Catalog PI syntax error: %s\n",
4920 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004921 if (URL != NULL)
4922 xmlFree(URL);
4923}
4924#endif
4925
Owen Taylor3473f882001-02-23 17:55:21 +00004926/**
4927 * xmlParsePI:
4928 * @ctxt: an XML parser context
4929 *
4930 * parse an XML Processing Instruction.
4931 *
4932 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4933 *
4934 * The processing is transfered to SAX once parsed.
4935 */
4936
4937void
4938xmlParsePI(xmlParserCtxtPtr ctxt) {
4939 xmlChar *buf = NULL;
4940 int len = 0;
4941 int size = XML_PARSER_BUFFER_SIZE;
4942 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004943 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004944 xmlParserInputState state;
4945 int count = 0;
4946
4947 if ((RAW == '<') && (NXT(1) == '?')) {
4948 xmlParserInputPtr input = ctxt->input;
4949 state = ctxt->instate;
4950 ctxt->instate = XML_PARSER_PI;
4951 /*
4952 * this is a Processing Instruction.
4953 */
4954 SKIP(2);
4955 SHRINK;
4956
4957 /*
4958 * Parse the target name and check for special support like
4959 * namespace.
4960 */
4961 target = xmlParsePITarget(ctxt);
4962 if (target != NULL) {
4963 if ((RAW == '?') && (NXT(1) == '>')) {
4964 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004965 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4966 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004967 }
4968 SKIP(2);
4969
4970 /*
4971 * SAX: PI detected.
4972 */
4973 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4974 (ctxt->sax->processingInstruction != NULL))
4975 ctxt->sax->processingInstruction(ctxt->userData,
4976 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08004977 if (ctxt->instate != XML_PARSER_EOF)
4978 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004979 return;
4980 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004981 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004982 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004983 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004984 ctxt->instate = state;
4985 return;
4986 }
4987 cur = CUR;
4988 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004989 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4990 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004991 }
4992 SKIP_BLANKS;
4993 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004994 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004995 ((cur != '?') || (NXT(1) != '>'))) {
4996 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004997 xmlChar *tmp;
4998
Owen Taylor3473f882001-02-23 17:55:21 +00004999 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00005000 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
5001 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005002 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005003 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005004 ctxt->instate = state;
5005 return;
5006 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005007 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005008 }
5009 count++;
5010 if (count > 50) {
5011 GROW;
5012 count = 0;
5013 }
5014 COPY_BUF(l,buf,len,cur);
5015 NEXTL(l);
5016 cur = CUR_CHAR(l);
5017 if (cur == 0) {
5018 SHRINK;
5019 GROW;
5020 cur = CUR_CHAR(l);
5021 }
5022 }
5023 buf[len] = 0;
5024 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005025 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5026 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005027 } else {
5028 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005029 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5030 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005031 }
5032 SKIP(2);
5033
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005034#ifdef LIBXML_CATALOG_ENABLED
5035 if (((state == XML_PARSER_MISC) ||
5036 (state == XML_PARSER_START)) &&
5037 (xmlStrEqual(target, XML_CATALOG_PI))) {
5038 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5039 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5040 (allow == XML_CATA_ALLOW_ALL))
5041 xmlParseCatalogPI(ctxt, buf);
5042 }
5043#endif
5044
5045
Owen Taylor3473f882001-02-23 17:55:21 +00005046 /*
5047 * SAX: PI detected.
5048 */
5049 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5050 (ctxt->sax->processingInstruction != NULL))
5051 ctxt->sax->processingInstruction(ctxt->userData,
5052 target, buf);
5053 }
5054 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005055 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005056 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005057 }
Chris Evans77404b82011-12-14 16:18:25 +08005058 if (ctxt->instate != XML_PARSER_EOF)
5059 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005060 }
5061}
5062
5063/**
5064 * xmlParseNotationDecl:
5065 * @ctxt: an XML parser context
5066 *
5067 * parse a notation declaration
5068 *
5069 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5070 *
5071 * Hence there is actually 3 choices:
5072 * 'PUBLIC' S PubidLiteral
5073 * 'PUBLIC' S PubidLiteral S SystemLiteral
5074 * and 'SYSTEM' S SystemLiteral
5075 *
5076 * See the NOTE on xmlParseExternalID().
5077 */
5078
5079void
5080xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005081 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005082 xmlChar *Pubid;
5083 xmlChar *Systemid;
5084
Daniel Veillarda07050d2003-10-19 14:46:32 +00005085 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005086 xmlParserInputPtr input = ctxt->input;
5087 SHRINK;
5088 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00005089 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005090 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5091 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005092 return;
5093 }
5094 SKIP_BLANKS;
5095
Daniel Veillard76d66f42001-05-16 21:05:17 +00005096 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005097 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005098 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005099 return;
5100 }
William M. Brack76e95df2003-10-18 16:20:14 +00005101 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005102 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005103 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005104 return;
5105 }
Daniel Veillard37334572008-07-31 08:20:02 +00005106 if (xmlStrchr(name, ':') != NULL) {
5107 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5108 "colon are forbidden from notation names '%s'\n",
5109 name, NULL, NULL);
5110 }
Owen Taylor3473f882001-02-23 17:55:21 +00005111 SKIP_BLANKS;
5112
5113 /*
5114 * Parse the IDs.
5115 */
5116 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5117 SKIP_BLANKS;
5118
5119 if (RAW == '>') {
5120 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005121 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5122 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005123 }
5124 NEXT;
5125 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5126 (ctxt->sax->notationDecl != NULL))
5127 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5128 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005129 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005130 }
Owen Taylor3473f882001-02-23 17:55:21 +00005131 if (Systemid != NULL) xmlFree(Systemid);
5132 if (Pubid != NULL) xmlFree(Pubid);
5133 }
5134}
5135
5136/**
5137 * xmlParseEntityDecl:
5138 * @ctxt: an XML parser context
5139 *
5140 * parse <!ENTITY declarations
5141 *
5142 * [70] EntityDecl ::= GEDecl | PEDecl
5143 *
5144 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5145 *
5146 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5147 *
5148 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5149 *
5150 * [74] PEDef ::= EntityValue | ExternalID
5151 *
5152 * [76] NDataDecl ::= S 'NDATA' S Name
5153 *
5154 * [ VC: Notation Declared ]
5155 * The Name must match the declared name of a notation.
5156 */
5157
5158void
5159xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005160 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005161 xmlChar *value = NULL;
5162 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005163 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005164 int isParameter = 0;
5165 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005166 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00005167
Daniel Veillard4c778d82005-01-23 17:37:44 +00005168 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005169 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005170 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005171 SHRINK;
5172 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005173 skipped = SKIP_BLANKS;
5174 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005175 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5176 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005177 }
Owen Taylor3473f882001-02-23 17:55:21 +00005178
5179 if (RAW == '%') {
5180 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005181 skipped = SKIP_BLANKS;
5182 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005183 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5184 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005185 }
Owen Taylor3473f882001-02-23 17:55:21 +00005186 isParameter = 1;
5187 }
5188
Daniel Veillard76d66f42001-05-16 21:05:17 +00005189 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005190 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005191 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5192 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005193 return;
5194 }
Daniel Veillard37334572008-07-31 08:20:02 +00005195 if (xmlStrchr(name, ':') != NULL) {
5196 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5197 "colon are forbidden from entities names '%s'\n",
5198 name, NULL, NULL);
5199 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005200 skipped = SKIP_BLANKS;
5201 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005202 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5203 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005204 }
Owen Taylor3473f882001-02-23 17:55:21 +00005205
Daniel Veillardf5582f12002-06-11 10:08:16 +00005206 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005207 /*
5208 * handle the various case of definitions...
5209 */
5210 if (isParameter) {
5211 if ((RAW == '"') || (RAW == '\'')) {
5212 value = xmlParseEntityValue(ctxt, &orig);
5213 if (value) {
5214 if ((ctxt->sax != NULL) &&
5215 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5216 ctxt->sax->entityDecl(ctxt->userData, name,
5217 XML_INTERNAL_PARAMETER_ENTITY,
5218 NULL, NULL, value);
5219 }
5220 } else {
5221 URI = xmlParseExternalID(ctxt, &literal, 1);
5222 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005223 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005224 }
5225 if (URI) {
5226 xmlURIPtr uri;
5227
5228 uri = xmlParseURI((const char *) URI);
5229 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005230 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5231 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005232 /*
5233 * This really ought to be a well formedness error
5234 * but the XML Core WG decided otherwise c.f. issue
5235 * E26 of the XML erratas.
5236 */
Owen Taylor3473f882001-02-23 17:55:21 +00005237 } else {
5238 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005239 /*
5240 * Okay this is foolish to block those but not
5241 * invalid URIs.
5242 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005243 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005244 } else {
5245 if ((ctxt->sax != NULL) &&
5246 (!ctxt->disableSAX) &&
5247 (ctxt->sax->entityDecl != NULL))
5248 ctxt->sax->entityDecl(ctxt->userData, name,
5249 XML_EXTERNAL_PARAMETER_ENTITY,
5250 literal, URI, NULL);
5251 }
5252 xmlFreeURI(uri);
5253 }
5254 }
5255 }
5256 } else {
5257 if ((RAW == '"') || (RAW == '\'')) {
5258 value = xmlParseEntityValue(ctxt, &orig);
5259 if ((ctxt->sax != NULL) &&
5260 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5261 ctxt->sax->entityDecl(ctxt->userData, name,
5262 XML_INTERNAL_GENERAL_ENTITY,
5263 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005264 /*
5265 * For expat compatibility in SAX mode.
5266 */
5267 if ((ctxt->myDoc == NULL) ||
5268 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5269 if (ctxt->myDoc == NULL) {
5270 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005271 if (ctxt->myDoc == NULL) {
5272 xmlErrMemory(ctxt, "New Doc failed");
5273 return;
5274 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005275 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005276 }
5277 if (ctxt->myDoc->intSubset == NULL)
5278 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5279 BAD_CAST "fake", NULL, NULL);
5280
Daniel Veillard1af9a412003-08-20 22:54:39 +00005281 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5282 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005283 }
Owen Taylor3473f882001-02-23 17:55:21 +00005284 } else {
5285 URI = xmlParseExternalID(ctxt, &literal, 1);
5286 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005287 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005288 }
5289 if (URI) {
5290 xmlURIPtr uri;
5291
5292 uri = xmlParseURI((const char *)URI);
5293 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005294 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5295 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005296 /*
5297 * This really ought to be a well formedness error
5298 * but the XML Core WG decided otherwise c.f. issue
5299 * E26 of the XML erratas.
5300 */
Owen Taylor3473f882001-02-23 17:55:21 +00005301 } else {
5302 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005303 /*
5304 * Okay this is foolish to block those but not
5305 * invalid URIs.
5306 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005307 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005308 }
5309 xmlFreeURI(uri);
5310 }
5311 }
William M. Brack76e95df2003-10-18 16:20:14 +00005312 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005313 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5314 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005315 }
5316 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005317 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005318 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005319 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005320 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5321 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005322 }
5323 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005324 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005325 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5326 (ctxt->sax->unparsedEntityDecl != NULL))
5327 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5328 literal, URI, ndata);
5329 } else {
5330 if ((ctxt->sax != NULL) &&
5331 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5332 ctxt->sax->entityDecl(ctxt->userData, name,
5333 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5334 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005335 /*
5336 * For expat compatibility in SAX mode.
5337 * assuming the entity repalcement was asked for
5338 */
5339 if ((ctxt->replaceEntities != 0) &&
5340 ((ctxt->myDoc == NULL) ||
5341 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5342 if (ctxt->myDoc == NULL) {
5343 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005344 if (ctxt->myDoc == NULL) {
5345 xmlErrMemory(ctxt, "New Doc failed");
5346 return;
5347 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005348 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005349 }
5350
5351 if (ctxt->myDoc->intSubset == NULL)
5352 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5353 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005354 xmlSAX2EntityDecl(ctxt, name,
5355 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5356 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005357 }
Owen Taylor3473f882001-02-23 17:55:21 +00005358 }
5359 }
5360 }
5361 SKIP_BLANKS;
5362 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005363 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005364 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005365 } else {
5366 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005367 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5368 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005369 }
5370 NEXT;
5371 }
5372 if (orig != NULL) {
5373 /*
5374 * Ugly mechanism to save the raw entity value.
5375 */
5376 xmlEntityPtr cur = NULL;
5377
5378 if (isParameter) {
5379 if ((ctxt->sax != NULL) &&
5380 (ctxt->sax->getParameterEntity != NULL))
5381 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5382 } else {
5383 if ((ctxt->sax != NULL) &&
5384 (ctxt->sax->getEntity != NULL))
5385 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005386 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005387 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005388 }
Owen Taylor3473f882001-02-23 17:55:21 +00005389 }
5390 if (cur != NULL) {
5391 if (cur->orig != NULL)
5392 xmlFree(orig);
5393 else
5394 cur->orig = orig;
5395 } else
5396 xmlFree(orig);
5397 }
Owen Taylor3473f882001-02-23 17:55:21 +00005398 if (value != NULL) xmlFree(value);
5399 if (URI != NULL) xmlFree(URI);
5400 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005401 }
5402}
5403
5404/**
5405 * xmlParseDefaultDecl:
5406 * @ctxt: an XML parser context
5407 * @value: Receive a possible fixed default value for the attribute
5408 *
5409 * Parse an attribute default declaration
5410 *
5411 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5412 *
5413 * [ VC: Required Attribute ]
5414 * if the default declaration is the keyword #REQUIRED, then the
5415 * attribute must be specified for all elements of the type in the
5416 * attribute-list declaration.
5417 *
5418 * [ VC: Attribute Default Legal ]
5419 * The declared default value must meet the lexical constraints of
5420 * the declared attribute type c.f. xmlValidateAttributeDecl()
5421 *
5422 * [ VC: Fixed Attribute Default ]
5423 * if an attribute has a default value declared with the #FIXED
5424 * keyword, instances of that attribute must match the default value.
5425 *
5426 * [ WFC: No < in Attribute Values ]
5427 * handled in xmlParseAttValue()
5428 *
5429 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5430 * or XML_ATTRIBUTE_FIXED.
5431 */
5432
5433int
5434xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5435 int val;
5436 xmlChar *ret;
5437
5438 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005439 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005440 SKIP(9);
5441 return(XML_ATTRIBUTE_REQUIRED);
5442 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005443 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005444 SKIP(8);
5445 return(XML_ATTRIBUTE_IMPLIED);
5446 }
5447 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005448 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005449 SKIP(6);
5450 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005451 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005452 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5453 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005454 }
5455 SKIP_BLANKS;
5456 }
5457 ret = xmlParseAttValue(ctxt);
5458 ctxt->instate = XML_PARSER_DTD;
5459 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005460 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005461 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005462 } else
5463 *value = ret;
5464 return(val);
5465}
5466
5467/**
5468 * xmlParseNotationType:
5469 * @ctxt: an XML parser context
5470 *
5471 * parse an Notation attribute type.
5472 *
5473 * Note: the leading 'NOTATION' S part has already being parsed...
5474 *
5475 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5476 *
5477 * [ VC: Notation Attributes ]
5478 * Values of this type must match one of the notation names included
5479 * in the declaration; all notation names in the declaration must be declared.
5480 *
5481 * Returns: the notation attribute tree built while parsing
5482 */
5483
5484xmlEnumerationPtr
5485xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005486 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005487 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005488
5489 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005490 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005491 return(NULL);
5492 }
5493 SHRINK;
5494 do {
5495 NEXT;
5496 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005497 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005498 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005499 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5500 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005501 xmlFreeEnumeration(ret);
5502 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005503 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005504 tmp = ret;
5505 while (tmp != NULL) {
5506 if (xmlStrEqual(name, tmp->name)) {
5507 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5508 "standalone: attribute notation value token %s duplicated\n",
5509 name, NULL);
5510 if (!xmlDictOwns(ctxt->dict, name))
5511 xmlFree((xmlChar *) name);
5512 break;
5513 }
5514 tmp = tmp->next;
5515 }
5516 if (tmp == NULL) {
5517 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005518 if (cur == NULL) {
5519 xmlFreeEnumeration(ret);
5520 return(NULL);
5521 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005522 if (last == NULL) ret = last = cur;
5523 else {
5524 last->next = cur;
5525 last = cur;
5526 }
Owen Taylor3473f882001-02-23 17:55:21 +00005527 }
5528 SKIP_BLANKS;
5529 } while (RAW == '|');
5530 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005531 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005532 xmlFreeEnumeration(ret);
5533 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005534 }
5535 NEXT;
5536 return(ret);
5537}
5538
5539/**
5540 * xmlParseEnumerationType:
5541 * @ctxt: an XML parser context
5542 *
5543 * parse an Enumeration attribute type.
5544 *
5545 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5546 *
5547 * [ VC: Enumeration ]
5548 * Values of this type must match one of the Nmtoken tokens in
5549 * the declaration
5550 *
5551 * Returns: the enumeration attribute tree built while parsing
5552 */
5553
5554xmlEnumerationPtr
5555xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5556 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005557 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005558
5559 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005560 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005561 return(NULL);
5562 }
5563 SHRINK;
5564 do {
5565 NEXT;
5566 SKIP_BLANKS;
5567 name = xmlParseNmtoken(ctxt);
5568 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005569 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005570 return(ret);
5571 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005572 tmp = ret;
5573 while (tmp != NULL) {
5574 if (xmlStrEqual(name, tmp->name)) {
5575 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5576 "standalone: attribute enumeration value token %s duplicated\n",
5577 name, NULL);
5578 if (!xmlDictOwns(ctxt->dict, name))
5579 xmlFree(name);
5580 break;
5581 }
5582 tmp = tmp->next;
5583 }
5584 if (tmp == NULL) {
5585 cur = xmlCreateEnumeration(name);
5586 if (!xmlDictOwns(ctxt->dict, name))
5587 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005588 if (cur == NULL) {
5589 xmlFreeEnumeration(ret);
5590 return(NULL);
5591 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005592 if (last == NULL) ret = last = cur;
5593 else {
5594 last->next = cur;
5595 last = cur;
5596 }
Owen Taylor3473f882001-02-23 17:55:21 +00005597 }
5598 SKIP_BLANKS;
5599 } while (RAW == '|');
5600 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005601 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005602 return(ret);
5603 }
5604 NEXT;
5605 return(ret);
5606}
5607
5608/**
5609 * xmlParseEnumeratedType:
5610 * @ctxt: an XML parser context
5611 * @tree: the enumeration tree built while parsing
5612 *
5613 * parse an Enumerated attribute type.
5614 *
5615 * [57] EnumeratedType ::= NotationType | Enumeration
5616 *
5617 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5618 *
5619 *
5620 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5621 */
5622
5623int
5624xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005625 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005626 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005627 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005628 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5629 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005630 return(0);
5631 }
5632 SKIP_BLANKS;
5633 *tree = xmlParseNotationType(ctxt);
5634 if (*tree == NULL) return(0);
5635 return(XML_ATTRIBUTE_NOTATION);
5636 }
5637 *tree = xmlParseEnumerationType(ctxt);
5638 if (*tree == NULL) return(0);
5639 return(XML_ATTRIBUTE_ENUMERATION);
5640}
5641
5642/**
5643 * xmlParseAttributeType:
5644 * @ctxt: an XML parser context
5645 * @tree: the enumeration tree built while parsing
5646 *
5647 * parse the Attribute list def for an element
5648 *
5649 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5650 *
5651 * [55] StringType ::= 'CDATA'
5652 *
5653 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5654 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5655 *
5656 * Validity constraints for attribute values syntax are checked in
5657 * xmlValidateAttributeValue()
5658 *
5659 * [ VC: ID ]
5660 * Values of type ID must match the Name production. A name must not
5661 * appear more than once in an XML document as a value of this type;
5662 * i.e., ID values must uniquely identify the elements which bear them.
5663 *
5664 * [ VC: One ID per Element Type ]
5665 * No element type may have more than one ID attribute specified.
5666 *
5667 * [ VC: ID Attribute Default ]
5668 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5669 *
5670 * [ VC: IDREF ]
5671 * Values of type IDREF must match the Name production, and values
5672 * of type IDREFS must match Names; each IDREF Name must match the value
5673 * of an ID attribute on some element in the XML document; i.e. IDREF
5674 * values must match the value of some ID attribute.
5675 *
5676 * [ VC: Entity Name ]
5677 * Values of type ENTITY must match the Name production, values
5678 * of type ENTITIES must match Names; each Entity Name must match the
5679 * name of an unparsed entity declared in the DTD.
5680 *
5681 * [ VC: Name Token ]
5682 * Values of type NMTOKEN must match the Nmtoken production; values
5683 * of type NMTOKENS must match Nmtokens.
5684 *
5685 * Returns the attribute type
5686 */
5687int
5688xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5689 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005690 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005691 SKIP(5);
5692 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005693 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005694 SKIP(6);
5695 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005696 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005697 SKIP(5);
5698 return(XML_ATTRIBUTE_IDREF);
5699 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5700 SKIP(2);
5701 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005702 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005703 SKIP(6);
5704 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005705 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005706 SKIP(8);
5707 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005708 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005709 SKIP(8);
5710 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005711 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005712 SKIP(7);
5713 return(XML_ATTRIBUTE_NMTOKEN);
5714 }
5715 return(xmlParseEnumeratedType(ctxt, tree));
5716}
5717
5718/**
5719 * xmlParseAttributeListDecl:
5720 * @ctxt: an XML parser context
5721 *
5722 * : parse the Attribute list def for an element
5723 *
5724 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5725 *
5726 * [53] AttDef ::= S Name S AttType S DefaultDecl
5727 *
5728 */
5729void
5730xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005731 const xmlChar *elemName;
5732 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005733 xmlEnumerationPtr tree;
5734
Daniel Veillarda07050d2003-10-19 14:46:32 +00005735 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005736 xmlParserInputPtr input = ctxt->input;
5737
5738 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005739 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005740 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005741 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005742 }
5743 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005744 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005745 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005746 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5747 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005748 return;
5749 }
5750 SKIP_BLANKS;
5751 GROW;
5752 while (RAW != '>') {
5753 const xmlChar *check = CUR_PTR;
5754 int type;
5755 int def;
5756 xmlChar *defaultValue = NULL;
5757
5758 GROW;
5759 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005760 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005761 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005762 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5763 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005764 break;
5765 }
5766 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005767 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005768 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005769 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005770 break;
5771 }
5772 SKIP_BLANKS;
5773
5774 type = xmlParseAttributeType(ctxt, &tree);
5775 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005776 break;
5777 }
5778
5779 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005780 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005781 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5782 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005783 if (tree != NULL)
5784 xmlFreeEnumeration(tree);
5785 break;
5786 }
5787 SKIP_BLANKS;
5788
5789 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5790 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005791 if (defaultValue != NULL)
5792 xmlFree(defaultValue);
5793 if (tree != NULL)
5794 xmlFreeEnumeration(tree);
5795 break;
5796 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005797 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5798 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005799
5800 GROW;
5801 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005802 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005803 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005804 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005805 if (defaultValue != NULL)
5806 xmlFree(defaultValue);
5807 if (tree != NULL)
5808 xmlFreeEnumeration(tree);
5809 break;
5810 }
5811 SKIP_BLANKS;
5812 }
5813 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005814 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5815 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005816 if (defaultValue != NULL)
5817 xmlFree(defaultValue);
5818 if (tree != NULL)
5819 xmlFreeEnumeration(tree);
5820 break;
5821 }
5822 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5823 (ctxt->sax->attributeDecl != NULL))
5824 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5825 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005826 else if (tree != NULL)
5827 xmlFreeEnumeration(tree);
5828
5829 if ((ctxt->sax2) && (defaultValue != NULL) &&
5830 (def != XML_ATTRIBUTE_IMPLIED) &&
5831 (def != XML_ATTRIBUTE_REQUIRED)) {
5832 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5833 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005834 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005835 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5836 }
Owen Taylor3473f882001-02-23 17:55:21 +00005837 if (defaultValue != NULL)
5838 xmlFree(defaultValue);
5839 GROW;
5840 }
5841 if (RAW == '>') {
5842 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00005843 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5844 "Attribute list declaration doesn't start and stop in the same entity\n",
5845 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005846 }
5847 NEXT;
5848 }
Owen Taylor3473f882001-02-23 17:55:21 +00005849 }
5850}
5851
5852/**
5853 * xmlParseElementMixedContentDecl:
5854 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005855 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005856 *
5857 * parse the declaration for a Mixed Element content
5858 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5859 *
5860 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5861 * '(' S? '#PCDATA' S? ')'
5862 *
5863 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5864 *
5865 * [ VC: No Duplicate Types ]
5866 * The same name must not appear more than once in a single
5867 * mixed-content declaration.
5868 *
5869 * returns: the list of the xmlElementContentPtr describing the element choices
5870 */
5871xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005872xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005873 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005874 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005875
5876 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005877 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005878 SKIP(7);
5879 SKIP_BLANKS;
5880 SHRINK;
5881 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005882 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005883 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5884"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005885 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005886 }
Owen Taylor3473f882001-02-23 17:55:21 +00005887 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005888 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005889 if (ret == NULL)
5890 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005891 if (RAW == '*') {
5892 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5893 NEXT;
5894 }
5895 return(ret);
5896 }
5897 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005898 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005899 if (ret == NULL) return(NULL);
5900 }
5901 while (RAW == '|') {
5902 NEXT;
5903 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005904 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005905 if (ret == NULL) return(NULL);
5906 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005907 if (cur != NULL)
5908 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005909 cur = ret;
5910 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005911 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005912 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005913 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005914 if (n->c1 != NULL)
5915 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005916 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005917 if (n != NULL)
5918 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005919 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005920 }
5921 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005922 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005923 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005924 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005925 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005926 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005927 return(NULL);
5928 }
5929 SKIP_BLANKS;
5930 GROW;
5931 }
5932 if ((RAW == ')') && (NXT(1) == '*')) {
5933 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005934 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005935 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005936 if (cur->c2 != NULL)
5937 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005938 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02005939 if (ret != NULL)
5940 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005941 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005942 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5943"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005944 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005945 }
Owen Taylor3473f882001-02-23 17:55:21 +00005946 SKIP(2);
5947 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005948 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005949 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005950 return(NULL);
5951 }
5952
5953 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005954 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005955 }
5956 return(ret);
5957}
5958
5959/**
Daniel Veillard489f9672009-08-10 16:49:30 +02005960 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00005961 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005962 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02005963 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00005964 *
5965 * parse the declaration for a Mixed Element content
5966 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5967 *
5968 *
5969 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5970 *
5971 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5972 *
5973 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5974 *
5975 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5976 *
5977 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5978 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005979 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005980 * opening or closing parentheses in a choice, seq, or Mixed
5981 * construct is contained in the replacement text for a parameter
5982 * entity, both must be contained in the same replacement text. For
5983 * interoperability, if a parameter-entity reference appears in a
5984 * choice, seq, or Mixed construct, its replacement text should not
5985 * be empty, and neither the first nor last non-blank character of
5986 * the replacement text should be a connector (| or ,).
5987 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005988 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005989 * hierarchy.
5990 */
Daniel Veillard489f9672009-08-10 16:49:30 +02005991static xmlElementContentPtr
5992xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5993 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00005994 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005995 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005996 xmlChar type = 0;
5997
Daniel Veillard489f9672009-08-10 16:49:30 +02005998 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5999 (depth > 2048)) {
6000 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6001"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6002 depth);
6003 return(NULL);
6004 }
Owen Taylor3473f882001-02-23 17:55:21 +00006005 SKIP_BLANKS;
6006 GROW;
6007 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006008 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006009
Owen Taylor3473f882001-02-23 17:55:21 +00006010 /* Recurse on first child */
6011 NEXT;
6012 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006013 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6014 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006015 SKIP_BLANKS;
6016 GROW;
6017 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006018 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006019 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006020 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006021 return(NULL);
6022 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006023 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006024 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006025 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006026 return(NULL);
6027 }
Owen Taylor3473f882001-02-23 17:55:21 +00006028 GROW;
6029 if (RAW == '?') {
6030 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6031 NEXT;
6032 } else if (RAW == '*') {
6033 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6034 NEXT;
6035 } else if (RAW == '+') {
6036 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6037 NEXT;
6038 } else {
6039 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6040 }
Owen Taylor3473f882001-02-23 17:55:21 +00006041 GROW;
6042 }
6043 SKIP_BLANKS;
6044 SHRINK;
6045 while (RAW != ')') {
6046 /*
6047 * Each loop we parse one separator and one element.
6048 */
6049 if (RAW == ',') {
6050 if (type == 0) type = CUR;
6051
6052 /*
6053 * Detect "Name | Name , Name" error
6054 */
6055 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006056 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006057 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006058 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006059 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006060 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006061 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006062 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006063 return(NULL);
6064 }
6065 NEXT;
6066
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006067 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006068 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006069 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006070 xmlFreeDocElementContent(ctxt->myDoc, last);
6071 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006072 return(NULL);
6073 }
6074 if (last == NULL) {
6075 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006076 if (ret != NULL)
6077 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006078 ret = cur = op;
6079 } else {
6080 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006081 if (op != NULL)
6082 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006083 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006084 if (last != NULL)
6085 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006086 cur =op;
6087 last = NULL;
6088 }
6089 } else if (RAW == '|') {
6090 if (type == 0) type = CUR;
6091
6092 /*
6093 * Detect "Name , Name | Name" error
6094 */
6095 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006096 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006097 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006098 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006099 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006100 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006101 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006102 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006103 return(NULL);
6104 }
6105 NEXT;
6106
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006107 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006108 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006109 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006110 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006111 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006112 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006113 return(NULL);
6114 }
6115 if (last == NULL) {
6116 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006117 if (ret != NULL)
6118 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006119 ret = cur = op;
6120 } else {
6121 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006122 if (op != NULL)
6123 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006124 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006125 if (last != NULL)
6126 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006127 cur =op;
6128 last = NULL;
6129 }
6130 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006131 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006132 if ((last != NULL) && (last != ret))
6133 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006134 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006135 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006136 return(NULL);
6137 }
6138 GROW;
6139 SKIP_BLANKS;
6140 GROW;
6141 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006142 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006143 /* Recurse on second child */
6144 NEXT;
6145 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006146 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6147 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006148 SKIP_BLANKS;
6149 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006150 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006151 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006152 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006153 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006154 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006155 return(NULL);
6156 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006157 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006158 if (last == NULL) {
6159 if (ret != NULL)
6160 xmlFreeDocElementContent(ctxt->myDoc, ret);
6161 return(NULL);
6162 }
Owen Taylor3473f882001-02-23 17:55:21 +00006163 if (RAW == '?') {
6164 last->ocur = XML_ELEMENT_CONTENT_OPT;
6165 NEXT;
6166 } else if (RAW == '*') {
6167 last->ocur = XML_ELEMENT_CONTENT_MULT;
6168 NEXT;
6169 } else if (RAW == '+') {
6170 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6171 NEXT;
6172 } else {
6173 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6174 }
6175 }
6176 SKIP_BLANKS;
6177 GROW;
6178 }
6179 if ((cur != NULL) && (last != NULL)) {
6180 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006181 if (last != NULL)
6182 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006183 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006184 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006185 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6186"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006187 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006188 }
Owen Taylor3473f882001-02-23 17:55:21 +00006189 NEXT;
6190 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006191 if (ret != NULL) {
6192 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6193 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6194 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6195 else
6196 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6197 }
Owen Taylor3473f882001-02-23 17:55:21 +00006198 NEXT;
6199 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006200 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006201 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006202 cur = ret;
6203 /*
6204 * Some normalization:
6205 * (a | b* | c?)* == (a | b | c)*
6206 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006207 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006208 if ((cur->c1 != NULL) &&
6209 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6210 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6211 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6212 if ((cur->c2 != NULL) &&
6213 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6214 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6215 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6216 cur = cur->c2;
6217 }
6218 }
Owen Taylor3473f882001-02-23 17:55:21 +00006219 NEXT;
6220 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006221 if (ret != NULL) {
6222 int found = 0;
6223
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006224 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6225 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6226 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006227 else
6228 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006229 /*
6230 * Some normalization:
6231 * (a | b*)+ == (a | b)*
6232 * (a | b?)+ == (a | b)*
6233 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006234 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006235 if ((cur->c1 != NULL) &&
6236 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6237 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6238 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6239 found = 1;
6240 }
6241 if ((cur->c2 != NULL) &&
6242 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6243 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6244 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6245 found = 1;
6246 }
6247 cur = cur->c2;
6248 }
6249 if (found)
6250 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6251 }
Owen Taylor3473f882001-02-23 17:55:21 +00006252 NEXT;
6253 }
6254 return(ret);
6255}
6256
6257/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006258 * xmlParseElementChildrenContentDecl:
6259 * @ctxt: an XML parser context
6260 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006261 *
6262 * parse the declaration for a Mixed Element content
6263 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6264 *
6265 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6266 *
6267 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6268 *
6269 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6270 *
6271 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6272 *
6273 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6274 * TODO Parameter-entity replacement text must be properly nested
6275 * with parenthesized groups. That is to say, if either of the
6276 * opening or closing parentheses in a choice, seq, or Mixed
6277 * construct is contained in the replacement text for a parameter
6278 * entity, both must be contained in the same replacement text. For
6279 * interoperability, if a parameter-entity reference appears in a
6280 * choice, seq, or Mixed construct, its replacement text should not
6281 * be empty, and neither the first nor last non-blank character of
6282 * the replacement text should be a connector (| or ,).
6283 *
6284 * Returns the tree of xmlElementContentPtr describing the element
6285 * hierarchy.
6286 */
6287xmlElementContentPtr
6288xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6289 /* stub left for API/ABI compat */
6290 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6291}
6292
6293/**
Owen Taylor3473f882001-02-23 17:55:21 +00006294 * xmlParseElementContentDecl:
6295 * @ctxt: an XML parser context
6296 * @name: the name of the element being defined.
6297 * @result: the Element Content pointer will be stored here if any
6298 *
6299 * parse the declaration for an Element content either Mixed or Children,
6300 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6301 *
6302 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6303 *
6304 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6305 */
6306
6307int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006308xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006309 xmlElementContentPtr *result) {
6310
6311 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006312 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006313 int res;
6314
6315 *result = NULL;
6316
6317 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006318 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006319 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006320 return(-1);
6321 }
6322 NEXT;
6323 GROW;
6324 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006325 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006326 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006327 res = XML_ELEMENT_TYPE_MIXED;
6328 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006329 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006330 res = XML_ELEMENT_TYPE_ELEMENT;
6331 }
Owen Taylor3473f882001-02-23 17:55:21 +00006332 SKIP_BLANKS;
6333 *result = tree;
6334 return(res);
6335}
6336
6337/**
6338 * xmlParseElementDecl:
6339 * @ctxt: an XML parser context
6340 *
6341 * parse an Element declaration.
6342 *
6343 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6344 *
6345 * [ VC: Unique Element Type Declaration ]
6346 * No element type may be declared more than once
6347 *
6348 * Returns the type of the element, or -1 in case of error
6349 */
6350int
6351xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006352 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006353 int ret = -1;
6354 xmlElementContentPtr content = NULL;
6355
Daniel Veillard4c778d82005-01-23 17:37:44 +00006356 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006357 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006358 xmlParserInputPtr input = ctxt->input;
6359
6360 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006361 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006362 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6363 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006364 }
6365 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006366 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006367 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006368 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6369 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006370 return(-1);
6371 }
6372 while ((RAW == 0) && (ctxt->inputNr > 1))
6373 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006374 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006375 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6376 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006377 }
6378 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006379 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006380 SKIP(5);
6381 /*
6382 * Element must always be empty.
6383 */
6384 ret = XML_ELEMENT_TYPE_EMPTY;
6385 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6386 (NXT(2) == 'Y')) {
6387 SKIP(3);
6388 /*
6389 * Element is a generic container.
6390 */
6391 ret = XML_ELEMENT_TYPE_ANY;
6392 } else if (RAW == '(') {
6393 ret = xmlParseElementContentDecl(ctxt, name, &content);
6394 } else {
6395 /*
6396 * [ WFC: PEs in Internal Subset ] error handling.
6397 */
6398 if ((RAW == '%') && (ctxt->external == 0) &&
6399 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006400 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006401 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006402 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006403 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006404 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6405 }
Owen Taylor3473f882001-02-23 17:55:21 +00006406 return(-1);
6407 }
6408
6409 SKIP_BLANKS;
6410 /*
6411 * Pop-up of finished entities.
6412 */
6413 while ((RAW == 0) && (ctxt->inputNr > 1))
6414 xmlPopInput(ctxt);
6415 SKIP_BLANKS;
6416
6417 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006418 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006419 if (content != NULL) {
6420 xmlFreeDocElementContent(ctxt->myDoc, content);
6421 }
Owen Taylor3473f882001-02-23 17:55:21 +00006422 } else {
6423 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006424 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6425 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006426 }
6427
6428 NEXT;
6429 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006430 (ctxt->sax->elementDecl != NULL)) {
6431 if (content != NULL)
6432 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006433 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6434 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006435 if ((content != NULL) && (content->parent == NULL)) {
6436 /*
6437 * this is a trick: if xmlAddElementDecl is called,
6438 * instead of copying the full tree it is plugged directly
6439 * if called from the parser. Avoid duplicating the
6440 * interfaces or change the API/ABI
6441 */
6442 xmlFreeDocElementContent(ctxt->myDoc, content);
6443 }
6444 } else if (content != NULL) {
6445 xmlFreeDocElementContent(ctxt->myDoc, content);
6446 }
Owen Taylor3473f882001-02-23 17:55:21 +00006447 }
Owen Taylor3473f882001-02-23 17:55:21 +00006448 }
6449 return(ret);
6450}
6451
6452/**
Owen Taylor3473f882001-02-23 17:55:21 +00006453 * xmlParseConditionalSections
6454 * @ctxt: an XML parser context
6455 *
6456 * [61] conditionalSect ::= includeSect | ignoreSect
6457 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6458 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6459 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6460 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6461 */
6462
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006463static void
Owen Taylor3473f882001-02-23 17:55:21 +00006464xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006465 int id = ctxt->input->id;
6466
Owen Taylor3473f882001-02-23 17:55:21 +00006467 SKIP(3);
6468 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006469 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006470 SKIP(7);
6471 SKIP_BLANKS;
6472 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006473 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006474 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006475 if (ctxt->input->id != id) {
6476 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6477 "All markup of the conditional section is not in the same entity\n",
6478 NULL, NULL);
6479 }
Owen Taylor3473f882001-02-23 17:55:21 +00006480 NEXT;
6481 }
6482 if (xmlParserDebugEntities) {
6483 if ((ctxt->input != NULL) && (ctxt->input->filename))
6484 xmlGenericError(xmlGenericErrorContext,
6485 "%s(%d): ", ctxt->input->filename,
6486 ctxt->input->line);
6487 xmlGenericError(xmlGenericErrorContext,
6488 "Entering INCLUDE Conditional Section\n");
6489 }
6490
6491 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6492 (NXT(2) != '>'))) {
6493 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006494 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006495
6496 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6497 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006498 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006499 NEXT;
6500 } else if (RAW == '%') {
6501 xmlParsePEReference(ctxt);
6502 } else
6503 xmlParseMarkupDecl(ctxt);
6504
6505 /*
6506 * Pop-up of finished entities.
6507 */
6508 while ((RAW == 0) && (ctxt->inputNr > 1))
6509 xmlPopInput(ctxt);
6510
Daniel Veillardfdc91562002-07-01 21:52:03 +00006511 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006512 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006513 break;
6514 }
6515 }
6516 if (xmlParserDebugEntities) {
6517 if ((ctxt->input != NULL) && (ctxt->input->filename))
6518 xmlGenericError(xmlGenericErrorContext,
6519 "%s(%d): ", ctxt->input->filename,
6520 ctxt->input->line);
6521 xmlGenericError(xmlGenericErrorContext,
6522 "Leaving INCLUDE Conditional Section\n");
6523 }
6524
Daniel Veillarda07050d2003-10-19 14:46:32 +00006525 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006526 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006527 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006528 int depth = 0;
6529
6530 SKIP(6);
6531 SKIP_BLANKS;
6532 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006533 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006534 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006535 if (ctxt->input->id != id) {
6536 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6537 "All markup of the conditional section is not in the same entity\n",
6538 NULL, NULL);
6539 }
Owen Taylor3473f882001-02-23 17:55:21 +00006540 NEXT;
6541 }
6542 if (xmlParserDebugEntities) {
6543 if ((ctxt->input != NULL) && (ctxt->input->filename))
6544 xmlGenericError(xmlGenericErrorContext,
6545 "%s(%d): ", ctxt->input->filename,
6546 ctxt->input->line);
6547 xmlGenericError(xmlGenericErrorContext,
6548 "Entering IGNORE Conditional Section\n");
6549 }
6550
6551 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006552 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006553 * But disable SAX event generating DTD building in the meantime
6554 */
6555 state = ctxt->disableSAX;
6556 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006557 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006558 ctxt->instate = XML_PARSER_IGNORE;
6559
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006560 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006561 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6562 depth++;
6563 SKIP(3);
6564 continue;
6565 }
6566 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6567 if (--depth >= 0) SKIP(3);
6568 continue;
6569 }
6570 NEXT;
6571 continue;
6572 }
6573
6574 ctxt->disableSAX = state;
6575 ctxt->instate = instate;
6576
6577 if (xmlParserDebugEntities) {
6578 if ((ctxt->input != NULL) && (ctxt->input->filename))
6579 xmlGenericError(xmlGenericErrorContext,
6580 "%s(%d): ", ctxt->input->filename,
6581 ctxt->input->line);
6582 xmlGenericError(xmlGenericErrorContext,
6583 "Leaving IGNORE Conditional Section\n");
6584 }
6585
6586 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006587 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006588 }
6589
6590 if (RAW == 0)
6591 SHRINK;
6592
6593 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006594 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006595 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006596 if (ctxt->input->id != id) {
6597 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6598 "All markup of the conditional section is not in the same entity\n",
6599 NULL, NULL);
6600 }
Owen Taylor3473f882001-02-23 17:55:21 +00006601 SKIP(3);
6602 }
6603}
6604
6605/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006606 * xmlParseMarkupDecl:
6607 * @ctxt: an XML parser context
6608 *
6609 * parse Markup declarations
6610 *
6611 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6612 * NotationDecl | PI | Comment
6613 *
6614 * [ VC: Proper Declaration/PE Nesting ]
6615 * Parameter-entity replacement text must be properly nested with
6616 * markup declarations. That is to say, if either the first character
6617 * or the last character of a markup declaration (markupdecl above) is
6618 * contained in the replacement text for a parameter-entity reference,
6619 * both must be contained in the same replacement text.
6620 *
6621 * [ WFC: PEs in Internal Subset ]
6622 * In the internal DTD subset, parameter-entity references can occur
6623 * only where markup declarations can occur, not within markup declarations.
6624 * (This does not apply to references that occur in external parameter
6625 * entities or to the external subset.)
6626 */
6627void
6628xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6629 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006630 if (CUR == '<') {
6631 if (NXT(1) == '!') {
6632 switch (NXT(2)) {
6633 case 'E':
6634 if (NXT(3) == 'L')
6635 xmlParseElementDecl(ctxt);
6636 else if (NXT(3) == 'N')
6637 xmlParseEntityDecl(ctxt);
6638 break;
6639 case 'A':
6640 xmlParseAttributeListDecl(ctxt);
6641 break;
6642 case 'N':
6643 xmlParseNotationDecl(ctxt);
6644 break;
6645 case '-':
6646 xmlParseComment(ctxt);
6647 break;
6648 default:
6649 /* there is an error but it will be detected later */
6650 break;
6651 }
6652 } else if (NXT(1) == '?') {
6653 xmlParsePI(ctxt);
6654 }
6655 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006656 /*
6657 * This is only for internal subset. On external entities,
6658 * the replacement is done before parsing stage
6659 */
6660 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6661 xmlParsePEReference(ctxt);
6662
6663 /*
6664 * Conditional sections are allowed from entities included
6665 * by PE References in the internal subset.
6666 */
6667 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6668 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6669 xmlParseConditionalSections(ctxt);
6670 }
6671 }
6672
6673 ctxt->instate = XML_PARSER_DTD;
6674}
6675
6676/**
6677 * xmlParseTextDecl:
6678 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006679 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006680 * parse an XML declaration header for external entities
6681 *
6682 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006683 */
6684
6685void
6686xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6687 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006688 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006689
6690 /*
6691 * We know that '<?xml' is here.
6692 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006693 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006694 SKIP(5);
6695 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006696 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006697 return;
6698 }
6699
William M. Brack76e95df2003-10-18 16:20:14 +00006700 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006701 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6702 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006703 }
6704 SKIP_BLANKS;
6705
6706 /*
6707 * We may have the VersionInfo here.
6708 */
6709 version = xmlParseVersionInfo(ctxt);
6710 if (version == NULL)
6711 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006712 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006713 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006714 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6715 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006716 }
6717 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006718 ctxt->input->version = version;
6719
6720 /*
6721 * We must have the encoding declaration
6722 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006723 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006724 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6725 /*
6726 * The XML REC instructs us to stop parsing right here
6727 */
6728 return;
6729 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006730 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6731 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6732 "Missing encoding in text declaration\n");
6733 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006734
6735 SKIP_BLANKS;
6736 if ((RAW == '?') && (NXT(1) == '>')) {
6737 SKIP(2);
6738 } else if (RAW == '>') {
6739 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006740 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006741 NEXT;
6742 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006743 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006744 MOVETO_ENDTAG(CUR_PTR);
6745 NEXT;
6746 }
6747}
6748
6749/**
Owen Taylor3473f882001-02-23 17:55:21 +00006750 * xmlParseExternalSubset:
6751 * @ctxt: an XML parser context
6752 * @ExternalID: the external identifier
6753 * @SystemID: the system identifier (or URL)
6754 *
6755 * parse Markup declarations from an external subset
6756 *
6757 * [30] extSubset ::= textDecl? extSubsetDecl
6758 *
6759 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6760 */
6761void
6762xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6763 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006764 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006765 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006766
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01006767 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006768 (ctxt->input->end - ctxt->input->cur >= 4)) {
6769 xmlChar start[4];
6770 xmlCharEncoding enc;
6771
6772 start[0] = RAW;
6773 start[1] = NXT(1);
6774 start[2] = NXT(2);
6775 start[3] = NXT(3);
6776 enc = xmlDetectCharEncoding(start, 4);
6777 if (enc != XML_CHAR_ENCODING_NONE)
6778 xmlSwitchEncoding(ctxt, enc);
6779 }
6780
Daniel Veillarda07050d2003-10-19 14:46:32 +00006781 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006782 xmlParseTextDecl(ctxt);
6783 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6784 /*
6785 * The XML REC instructs us to stop parsing right here
6786 */
6787 ctxt->instate = XML_PARSER_EOF;
6788 return;
6789 }
6790 }
6791 if (ctxt->myDoc == NULL) {
6792 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006793 if (ctxt->myDoc == NULL) {
6794 xmlErrMemory(ctxt, "New Doc failed");
6795 return;
6796 }
6797 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006798 }
6799 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6800 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6801
6802 ctxt->instate = XML_PARSER_DTD;
6803 ctxt->external = 1;
6804 while (((RAW == '<') && (NXT(1) == '?')) ||
6805 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006806 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006807 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006808 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006809
6810 GROW;
6811 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6812 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006813 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006814 NEXT;
6815 } else if (RAW == '%') {
6816 xmlParsePEReference(ctxt);
6817 } else
6818 xmlParseMarkupDecl(ctxt);
6819
6820 /*
6821 * Pop-up of finished entities.
6822 */
6823 while ((RAW == 0) && (ctxt->inputNr > 1))
6824 xmlPopInput(ctxt);
6825
Daniel Veillardfdc91562002-07-01 21:52:03 +00006826 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006827 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006828 break;
6829 }
6830 }
6831
6832 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006833 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006834 }
6835
6836}
6837
6838/**
6839 * xmlParseReference:
6840 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006841 *
Owen Taylor3473f882001-02-23 17:55:21 +00006842 * parse and handle entity references in content, depending on the SAX
6843 * interface, this may end-up in a call to character() if this is a
6844 * CharRef, a predefined entity, if there is no reference() callback.
6845 * or if the parser was asked to switch to that mode.
6846 *
6847 * [67] Reference ::= EntityRef | CharRef
6848 */
6849void
6850xmlParseReference(xmlParserCtxtPtr ctxt) {
6851 xmlEntityPtr ent;
6852 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00006853 int was_checked;
6854 xmlNodePtr list = NULL;
6855 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006856
Daniel Veillard0161e632008-08-28 15:36:32 +00006857
6858 if (RAW != '&')
6859 return;
6860
6861 /*
6862 * Simple case of a CharRef
6863 */
Owen Taylor3473f882001-02-23 17:55:21 +00006864 if (NXT(1) == '#') {
6865 int i = 0;
6866 xmlChar out[10];
6867 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006868 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00006869
Daniel Veillarddc171602008-03-26 17:41:38 +00006870 if (value == 0)
6871 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006872 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6873 /*
6874 * So we are using non-UTF-8 buffers
6875 * Check that the char fit on 8bits, if not
6876 * generate a CharRef.
6877 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006878 if (value <= 0xFF) {
6879 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006880 out[1] = 0;
6881 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6882 (!ctxt->disableSAX))
6883 ctxt->sax->characters(ctxt->userData, out, 1);
6884 } else {
6885 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006886 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006887 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006888 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006889 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6890 (!ctxt->disableSAX))
6891 ctxt->sax->reference(ctxt->userData, out);
6892 }
6893 } else {
6894 /*
6895 * Just encode the value in UTF-8
6896 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006897 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006898 out[i] = 0;
6899 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6900 (!ctxt->disableSAX))
6901 ctxt->sax->characters(ctxt->userData, out, i);
6902 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006903 return;
6904 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006905
Daniel Veillard0161e632008-08-28 15:36:32 +00006906 /*
6907 * We are seeing an entity reference
6908 */
6909 ent = xmlParseEntityRef(ctxt);
6910 if (ent == NULL) return;
6911 if (!ctxt->wellFormed)
6912 return;
6913 was_checked = ent->checked;
6914
6915 /* special case of predefined entities */
6916 if ((ent->name == NULL) ||
6917 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6918 val = ent->content;
6919 if (val == NULL) return;
6920 /*
6921 * inline the entity.
6922 */
6923 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6924 (!ctxt->disableSAX))
6925 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6926 return;
6927 }
6928
6929 /*
6930 * The first reference to the entity trigger a parsing phase
6931 * where the ent->children is filled with the result from
6932 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08006933 * Note: external parsed entities will not be loaded, it is not
6934 * required for a non-validating parser, unless the parsing option
6935 * of validating, or substituting entities were given. Doing so is
6936 * far more secure as the parser will only process data coming from
6937 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00006938 */
Daniel Veillard4629ee02012-07-23 14:15:40 +08006939 if ((ent->checked == 0) &&
6940 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
6941 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Daniel Veillard0161e632008-08-28 15:36:32 +00006942 unsigned long oldnbent = ctxt->nbentities;
6943
6944 /*
6945 * This is a bit hackish but this seems the best
6946 * way to make sure both SAX and DOM entity support
6947 * behaves okay.
6948 */
6949 void *user_data;
6950 if (ctxt->userData == ctxt)
6951 user_data = NULL;
6952 else
6953 user_data = ctxt->userData;
6954
6955 /*
6956 * Check that this entity is well formed
6957 * 4.3.2: An internal general parsed entity is well-formed
6958 * if its replacement text matches the production labeled
6959 * content.
6960 */
6961 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6962 ctxt->depth++;
6963 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6964 user_data, &list);
6965 ctxt->depth--;
6966
6967 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6968 ctxt->depth++;
6969 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6970 user_data, ctxt->depth, ent->URI,
6971 ent->ExternalID, &list);
6972 ctxt->depth--;
6973 } else {
6974 ret = XML_ERR_ENTITY_PE_INTERNAL;
6975 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6976 "invalid entity type found\n", NULL);
6977 }
6978
6979 /*
6980 * Store the number of entities needing parsing for this entity
6981 * content and do checkings
6982 */
6983 ent->checked = ctxt->nbentities - oldnbent;
6984 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006985 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00006986 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006987 return;
6988 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006989 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6990 xmlFreeNodeList(list);
6991 return;
6992 }
Owen Taylor3473f882001-02-23 17:55:21 +00006993
Daniel Veillard0161e632008-08-28 15:36:32 +00006994 if ((ret == XML_ERR_OK) && (list != NULL)) {
6995 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6996 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6997 (ent->children == NULL)) {
6998 ent->children = list;
6999 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00007000 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007001 * Prune it directly in the generated document
7002 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00007003 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007004 if (((list->type == XML_TEXT_NODE) &&
7005 (list->next == NULL)) ||
7006 (ctxt->parseMode == XML_PARSE_READER)) {
7007 list->parent = (xmlNodePtr) ent;
7008 list = NULL;
7009 ent->owner = 1;
7010 } else {
7011 ent->owner = 0;
7012 while (list != NULL) {
7013 list->parent = (xmlNodePtr) ctxt->node;
7014 list->doc = ctxt->myDoc;
7015 if (list->next == NULL)
7016 ent->last = list;
7017 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007018 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007019 list = ent->children;
7020#ifdef LIBXML_LEGACY_ENABLED
7021 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7022 xmlAddEntityReference(ent, list, NULL);
7023#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007024 }
7025 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007026 ent->owner = 1;
7027 while (list != NULL) {
7028 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007029 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007030 if (list->next == NULL)
7031 ent->last = list;
7032 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007033 }
7034 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007035 } else {
7036 xmlFreeNodeList(list);
7037 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007038 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007039 } else if ((ret != XML_ERR_OK) &&
7040 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7041 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7042 "Entity '%s' failed to parse\n", ent->name);
7043 } else if (list != NULL) {
7044 xmlFreeNodeList(list);
7045 list = NULL;
7046 }
7047 if (ent->checked == 0)
7048 ent->checked = 1;
7049 } else if (ent->checked != 1) {
7050 ctxt->nbentities += ent->checked;
7051 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007052
Daniel Veillard0161e632008-08-28 15:36:32 +00007053 /*
7054 * Now that the entity content has been gathered
7055 * provide it to the application, this can take different forms based
7056 * on the parsing modes.
7057 */
7058 if (ent->children == NULL) {
7059 /*
7060 * Probably running in SAX mode and the callbacks don't
7061 * build the entity content. So unless we already went
7062 * though parsing for first checking go though the entity
7063 * content to generate callbacks associated to the entity
7064 */
7065 if (was_checked != 0) {
7066 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007067 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007068 * This is a bit hackish but this seems the best
7069 * way to make sure both SAX and DOM entity support
7070 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007071 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007072 if (ctxt->userData == ctxt)
7073 user_data = NULL;
7074 else
7075 user_data = ctxt->userData;
7076
7077 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7078 ctxt->depth++;
7079 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7080 ent->content, user_data, NULL);
7081 ctxt->depth--;
7082 } else if (ent->etype ==
7083 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7084 ctxt->depth++;
7085 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7086 ctxt->sax, user_data, ctxt->depth,
7087 ent->URI, ent->ExternalID, NULL);
7088 ctxt->depth--;
7089 } else {
7090 ret = XML_ERR_ENTITY_PE_INTERNAL;
7091 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7092 "invalid entity type found\n", NULL);
7093 }
7094 if (ret == XML_ERR_ENTITY_LOOP) {
7095 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7096 return;
7097 }
7098 }
7099 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7100 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7101 /*
7102 * Entity reference callback comes second, it's somewhat
7103 * superfluous but a compatibility to historical behaviour
7104 */
7105 ctxt->sax->reference(ctxt->userData, ent->name);
7106 }
7107 return;
7108 }
7109
7110 /*
7111 * If we didn't get any children for the entity being built
7112 */
7113 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7114 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7115 /*
7116 * Create a node.
7117 */
7118 ctxt->sax->reference(ctxt->userData, ent->name);
7119 return;
7120 }
7121
7122 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7123 /*
7124 * There is a problem on the handling of _private for entities
7125 * (bug 155816): Should we copy the content of the field from
7126 * the entity (possibly overwriting some value set by the user
7127 * when a copy is created), should we leave it alone, or should
7128 * we try to take care of different situations? The problem
7129 * is exacerbated by the usage of this field by the xmlReader.
7130 * To fix this bug, we look at _private on the created node
7131 * and, if it's NULL, we copy in whatever was in the entity.
7132 * If it's not NULL we leave it alone. This is somewhat of a
7133 * hack - maybe we should have further tests to determine
7134 * what to do.
7135 */
7136 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7137 /*
7138 * Seems we are generating the DOM content, do
7139 * a simple tree copy for all references except the first
7140 * In the first occurrence list contains the replacement.
7141 * progressive == 2 means we are operating on the Reader
7142 * and since nodes are discarded we must copy all the time.
7143 */
7144 if (((list == NULL) && (ent->owner == 0)) ||
7145 (ctxt->parseMode == XML_PARSE_READER)) {
7146 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7147
7148 /*
7149 * when operating on a reader, the entities definitions
7150 * are always owning the entities subtree.
7151 if (ctxt->parseMode == XML_PARSE_READER)
7152 ent->owner = 1;
7153 */
7154
7155 cur = ent->children;
7156 while (cur != NULL) {
7157 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7158 if (nw != NULL) {
7159 if (nw->_private == NULL)
7160 nw->_private = cur->_private;
7161 if (firstChild == NULL){
7162 firstChild = nw;
7163 }
7164 nw = xmlAddChild(ctxt->node, nw);
7165 }
7166 if (cur == ent->last) {
7167 /*
7168 * needed to detect some strange empty
7169 * node cases in the reader tests
7170 */
7171 if ((ctxt->parseMode == XML_PARSE_READER) &&
7172 (nw != NULL) &&
7173 (nw->type == XML_ELEMENT_NODE) &&
7174 (nw->children == NULL))
7175 nw->extra = 1;
7176
7177 break;
7178 }
7179 cur = cur->next;
7180 }
7181#ifdef LIBXML_LEGACY_ENABLED
7182 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7183 xmlAddEntityReference(ent, firstChild, nw);
7184#endif /* LIBXML_LEGACY_ENABLED */
7185 } else if (list == NULL) {
7186 xmlNodePtr nw = NULL, cur, next, last,
7187 firstChild = NULL;
7188 /*
7189 * Copy the entity child list and make it the new
7190 * entity child list. The goal is to make sure any
7191 * ID or REF referenced will be the one from the
7192 * document content and not the entity copy.
7193 */
7194 cur = ent->children;
7195 ent->children = NULL;
7196 last = ent->last;
7197 ent->last = NULL;
7198 while (cur != NULL) {
7199 next = cur->next;
7200 cur->next = NULL;
7201 cur->parent = NULL;
7202 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7203 if (nw != NULL) {
7204 if (nw->_private == NULL)
7205 nw->_private = cur->_private;
7206 if (firstChild == NULL){
7207 firstChild = cur;
7208 }
7209 xmlAddChild((xmlNodePtr) ent, nw);
7210 xmlAddChild(ctxt->node, cur);
7211 }
7212 if (cur == last)
7213 break;
7214 cur = next;
7215 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007216 if (ent->owner == 0)
7217 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007218#ifdef LIBXML_LEGACY_ENABLED
7219 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7220 xmlAddEntityReference(ent, firstChild, nw);
7221#endif /* LIBXML_LEGACY_ENABLED */
7222 } else {
7223 const xmlChar *nbktext;
7224
7225 /*
7226 * the name change is to avoid coalescing of the
7227 * node with a possible previous text one which
7228 * would make ent->children a dangling pointer
7229 */
7230 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7231 -1);
7232 if (ent->children->type == XML_TEXT_NODE)
7233 ent->children->name = nbktext;
7234 if ((ent->last != ent->children) &&
7235 (ent->last->type == XML_TEXT_NODE))
7236 ent->last->name = nbktext;
7237 xmlAddChildList(ctxt->node, ent->children);
7238 }
7239
7240 /*
7241 * This is to avoid a nasty side effect, see
7242 * characters() in SAX.c
7243 */
7244 ctxt->nodemem = 0;
7245 ctxt->nodelen = 0;
7246 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007247 }
7248 }
7249}
7250
7251/**
7252 * xmlParseEntityRef:
7253 * @ctxt: an XML parser context
7254 *
7255 * parse ENTITY references declarations
7256 *
7257 * [68] EntityRef ::= '&' Name ';'
7258 *
7259 * [ WFC: Entity Declared ]
7260 * In a document without any DTD, a document with only an internal DTD
7261 * subset which contains no parameter entity references, or a document
7262 * with "standalone='yes'", the Name given in the entity reference
7263 * must match that in an entity declaration, except that well-formed
7264 * documents need not declare any of the following entities: amp, lt,
7265 * gt, apos, quot. The declaration of a parameter entity must precede
7266 * any reference to it. Similarly, the declaration of a general entity
7267 * must precede any reference to it which appears in a default value in an
7268 * attribute-list declaration. Note that if entities are declared in the
7269 * external subset or in external parameter entities, a non-validating
7270 * processor is not obligated to read and process their declarations;
7271 * for such documents, the rule that an entity must be declared is a
7272 * well-formedness constraint only if standalone='yes'.
7273 *
7274 * [ WFC: Parsed Entity ]
7275 * An entity reference must not contain the name of an unparsed entity
7276 *
7277 * Returns the xmlEntityPtr if found, or NULL otherwise.
7278 */
7279xmlEntityPtr
7280xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007281 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007282 xmlEntityPtr ent = NULL;
7283
7284 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007285
Daniel Veillard0161e632008-08-28 15:36:32 +00007286 if (RAW != '&')
7287 return(NULL);
7288 NEXT;
7289 name = xmlParseName(ctxt);
7290 if (name == NULL) {
7291 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7292 "xmlParseEntityRef: no name\n");
7293 return(NULL);
7294 }
7295 if (RAW != ';') {
7296 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7297 return(NULL);
7298 }
7299 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007300
Daniel Veillard0161e632008-08-28 15:36:32 +00007301 /*
7302 * Predefined entites override any extra definition
7303 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007304 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7305 ent = xmlGetPredefinedEntity(name);
7306 if (ent != NULL)
7307 return(ent);
7308 }
Owen Taylor3473f882001-02-23 17:55:21 +00007309
Daniel Veillard0161e632008-08-28 15:36:32 +00007310 /*
7311 * Increate the number of entity references parsed
7312 */
7313 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007314
Daniel Veillard0161e632008-08-28 15:36:32 +00007315 /*
7316 * Ask first SAX for entity resolution, otherwise try the
7317 * entities which may have stored in the parser context.
7318 */
7319 if (ctxt->sax != NULL) {
7320 if (ctxt->sax->getEntity != NULL)
7321 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007322 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7323 (ctxt->options & XML_PARSE_OLDSAX))
7324 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007325 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7326 (ctxt->userData==ctxt)) {
7327 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007328 }
7329 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007330 /*
7331 * [ WFC: Entity Declared ]
7332 * In a document without any DTD, a document with only an
7333 * internal DTD subset which contains no parameter entity
7334 * references, or a document with "standalone='yes'", the
7335 * Name given in the entity reference must match that in an
7336 * entity declaration, except that well-formed documents
7337 * need not declare any of the following entities: amp, lt,
7338 * gt, apos, quot.
7339 * The declaration of a parameter entity must precede any
7340 * reference to it.
7341 * Similarly, the declaration of a general entity must
7342 * precede any reference to it which appears in a default
7343 * value in an attribute-list declaration. Note that if
7344 * entities are declared in the external subset or in
7345 * external parameter entities, a non-validating processor
7346 * is not obligated to read and process their declarations;
7347 * for such documents, the rule that an entity must be
7348 * declared is a well-formedness constraint only if
7349 * standalone='yes'.
7350 */
7351 if (ent == NULL) {
7352 if ((ctxt->standalone == 1) ||
7353 ((ctxt->hasExternalSubset == 0) &&
7354 (ctxt->hasPErefs == 0))) {
7355 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7356 "Entity '%s' not defined\n", name);
7357 } else {
7358 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7359 "Entity '%s' not defined\n", name);
7360 if ((ctxt->inSubset == 0) &&
7361 (ctxt->sax != NULL) &&
7362 (ctxt->sax->reference != NULL)) {
7363 ctxt->sax->reference(ctxt->userData, name);
7364 }
7365 }
7366 ctxt->valid = 0;
7367 }
7368
7369 /*
7370 * [ WFC: Parsed Entity ]
7371 * An entity reference must not contain the name of an
7372 * unparsed entity
7373 */
7374 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7375 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7376 "Entity reference to unparsed entity %s\n", name);
7377 }
7378
7379 /*
7380 * [ WFC: No External Entity References ]
7381 * Attribute values cannot contain direct or indirect
7382 * entity references to external entities.
7383 */
7384 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7385 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7386 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7387 "Attribute references external entity '%s'\n", name);
7388 }
7389 /*
7390 * [ WFC: No < in Attribute Values ]
7391 * The replacement text of any entity referred to directly or
7392 * indirectly in an attribute value (other than "&lt;") must
7393 * not contain a <.
7394 */
7395 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7396 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007397 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007398 (xmlStrchr(ent->content, '<'))) {
7399 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7400 "'<' in entity '%s' is not allowed in attributes values\n", name);
7401 }
7402
7403 /*
7404 * Internal check, no parameter entities here ...
7405 */
7406 else {
7407 switch (ent->etype) {
7408 case XML_INTERNAL_PARAMETER_ENTITY:
7409 case XML_EXTERNAL_PARAMETER_ENTITY:
7410 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7411 "Attempt to reference the parameter entity '%s'\n",
7412 name);
7413 break;
7414 default:
7415 break;
7416 }
7417 }
7418
7419 /*
7420 * [ WFC: No Recursion ]
7421 * A parsed entity must not contain a recursive reference
7422 * to itself, either directly or indirectly.
7423 * Done somewhere else
7424 */
Owen Taylor3473f882001-02-23 17:55:21 +00007425 return(ent);
7426}
7427
7428/**
7429 * xmlParseStringEntityRef:
7430 * @ctxt: an XML parser context
7431 * @str: a pointer to an index in the string
7432 *
7433 * parse ENTITY references declarations, but this version parses it from
7434 * a string value.
7435 *
7436 * [68] EntityRef ::= '&' Name ';'
7437 *
7438 * [ WFC: Entity Declared ]
7439 * In a document without any DTD, a document with only an internal DTD
7440 * subset which contains no parameter entity references, or a document
7441 * with "standalone='yes'", the Name given in the entity reference
7442 * must match that in an entity declaration, except that well-formed
7443 * documents need not declare any of the following entities: amp, lt,
7444 * gt, apos, quot. The declaration of a parameter entity must precede
7445 * any reference to it. Similarly, the declaration of a general entity
7446 * must precede any reference to it which appears in a default value in an
7447 * attribute-list declaration. Note that if entities are declared in the
7448 * external subset or in external parameter entities, a non-validating
7449 * processor is not obligated to read and process their declarations;
7450 * for such documents, the rule that an entity must be declared is a
7451 * well-formedness constraint only if standalone='yes'.
7452 *
7453 * [ WFC: Parsed Entity ]
7454 * An entity reference must not contain the name of an unparsed entity
7455 *
7456 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7457 * is updated to the current location in the string.
7458 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007459static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007460xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7461 xmlChar *name;
7462 const xmlChar *ptr;
7463 xmlChar cur;
7464 xmlEntityPtr ent = NULL;
7465
7466 if ((str == NULL) || (*str == NULL))
7467 return(NULL);
7468 ptr = *str;
7469 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007470 if (cur != '&')
7471 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007472
Daniel Veillard0161e632008-08-28 15:36:32 +00007473 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007474 name = xmlParseStringName(ctxt, &ptr);
7475 if (name == NULL) {
7476 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7477 "xmlParseStringEntityRef: no name\n");
7478 *str = ptr;
7479 return(NULL);
7480 }
7481 if (*ptr != ';') {
7482 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007483 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007484 *str = ptr;
7485 return(NULL);
7486 }
7487 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007488
Owen Taylor3473f882001-02-23 17:55:21 +00007489
Daniel Veillard0161e632008-08-28 15:36:32 +00007490 /*
7491 * Predefined entites override any extra definition
7492 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007493 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7494 ent = xmlGetPredefinedEntity(name);
7495 if (ent != NULL) {
7496 xmlFree(name);
7497 *str = ptr;
7498 return(ent);
7499 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007500 }
Owen Taylor3473f882001-02-23 17:55:21 +00007501
Daniel Veillard0161e632008-08-28 15:36:32 +00007502 /*
7503 * Increate the number of entity references parsed
7504 */
7505 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007506
Daniel Veillard0161e632008-08-28 15:36:32 +00007507 /*
7508 * Ask first SAX for entity resolution, otherwise try the
7509 * entities which may have stored in the parser context.
7510 */
7511 if (ctxt->sax != NULL) {
7512 if (ctxt->sax->getEntity != NULL)
7513 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007514 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7515 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007516 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7517 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007518 }
7519 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007520
7521 /*
7522 * [ WFC: Entity Declared ]
7523 * In a document without any DTD, a document with only an
7524 * internal DTD subset which contains no parameter entity
7525 * references, or a document with "standalone='yes'", the
7526 * Name given in the entity reference must match that in an
7527 * entity declaration, except that well-formed documents
7528 * need not declare any of the following entities: amp, lt,
7529 * gt, apos, quot.
7530 * The declaration of a parameter entity must precede any
7531 * reference to it.
7532 * Similarly, the declaration of a general entity must
7533 * precede any reference to it which appears in a default
7534 * value in an attribute-list declaration. Note that if
7535 * entities are declared in the external subset or in
7536 * external parameter entities, a non-validating processor
7537 * is not obligated to read and process their declarations;
7538 * for such documents, the rule that an entity must be
7539 * declared is a well-formedness constraint only if
7540 * standalone='yes'.
7541 */
7542 if (ent == NULL) {
7543 if ((ctxt->standalone == 1) ||
7544 ((ctxt->hasExternalSubset == 0) &&
7545 (ctxt->hasPErefs == 0))) {
7546 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7547 "Entity '%s' not defined\n", name);
7548 } else {
7549 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7550 "Entity '%s' not defined\n",
7551 name);
7552 }
7553 /* TODO ? check regressions ctxt->valid = 0; */
7554 }
7555
7556 /*
7557 * [ WFC: Parsed Entity ]
7558 * An entity reference must not contain the name of an
7559 * unparsed entity
7560 */
7561 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7562 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7563 "Entity reference to unparsed entity %s\n", name);
7564 }
7565
7566 /*
7567 * [ WFC: No External Entity References ]
7568 * Attribute values cannot contain direct or indirect
7569 * entity references to external entities.
7570 */
7571 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7572 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7573 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7574 "Attribute references external entity '%s'\n", name);
7575 }
7576 /*
7577 * [ WFC: No < in Attribute Values ]
7578 * The replacement text of any entity referred to directly or
7579 * indirectly in an attribute value (other than "&lt;") must
7580 * not contain a <.
7581 */
7582 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7583 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007584 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007585 (xmlStrchr(ent->content, '<'))) {
7586 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7587 "'<' in entity '%s' is not allowed in attributes values\n",
7588 name);
7589 }
7590
7591 /*
7592 * Internal check, no parameter entities here ...
7593 */
7594 else {
7595 switch (ent->etype) {
7596 case XML_INTERNAL_PARAMETER_ENTITY:
7597 case XML_EXTERNAL_PARAMETER_ENTITY:
7598 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7599 "Attempt to reference the parameter entity '%s'\n",
7600 name);
7601 break;
7602 default:
7603 break;
7604 }
7605 }
7606
7607 /*
7608 * [ WFC: No Recursion ]
7609 * A parsed entity must not contain a recursive reference
7610 * to itself, either directly or indirectly.
7611 * Done somewhere else
7612 */
7613
7614 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007615 *str = ptr;
7616 return(ent);
7617}
7618
7619/**
7620 * xmlParsePEReference:
7621 * @ctxt: an XML parser context
7622 *
7623 * parse PEReference declarations
7624 * The entity content is handled directly by pushing it's content as
7625 * a new input stream.
7626 *
7627 * [69] PEReference ::= '%' Name ';'
7628 *
7629 * [ WFC: No Recursion ]
7630 * A parsed entity must not contain a recursive
7631 * reference to itself, either directly or indirectly.
7632 *
7633 * [ WFC: Entity Declared ]
7634 * In a document without any DTD, a document with only an internal DTD
7635 * subset which contains no parameter entity references, or a document
7636 * with "standalone='yes'", ... ... The declaration of a parameter
7637 * entity must precede any reference to it...
7638 *
7639 * [ VC: Entity Declared ]
7640 * In a document with an external subset or external parameter entities
7641 * with "standalone='no'", ... ... The declaration of a parameter entity
7642 * must precede any reference to it...
7643 *
7644 * [ WFC: In DTD ]
7645 * Parameter-entity references may only appear in the DTD.
7646 * NOTE: misleading but this is handled.
7647 */
7648void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007649xmlParsePEReference(xmlParserCtxtPtr ctxt)
7650{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007651 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007652 xmlEntityPtr entity = NULL;
7653 xmlParserInputPtr input;
7654
Daniel Veillard0161e632008-08-28 15:36:32 +00007655 if (RAW != '%')
7656 return;
7657 NEXT;
7658 name = xmlParseName(ctxt);
7659 if (name == NULL) {
7660 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7661 "xmlParsePEReference: no name\n");
7662 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007663 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007664 if (RAW != ';') {
7665 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7666 return;
7667 }
7668
7669 NEXT;
7670
7671 /*
7672 * Increate the number of entity references parsed
7673 */
7674 ctxt->nbentities++;
7675
7676 /*
7677 * Request the entity from SAX
7678 */
7679 if ((ctxt->sax != NULL) &&
7680 (ctxt->sax->getParameterEntity != NULL))
7681 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7682 name);
7683 if (entity == NULL) {
7684 /*
7685 * [ WFC: Entity Declared ]
7686 * In a document without any DTD, a document with only an
7687 * internal DTD subset which contains no parameter entity
7688 * references, or a document with "standalone='yes'", ...
7689 * ... The declaration of a parameter entity must precede
7690 * any reference to it...
7691 */
7692 if ((ctxt->standalone == 1) ||
7693 ((ctxt->hasExternalSubset == 0) &&
7694 (ctxt->hasPErefs == 0))) {
7695 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7696 "PEReference: %%%s; not found\n",
7697 name);
7698 } else {
7699 /*
7700 * [ VC: Entity Declared ]
7701 * In a document with an external subset or external
7702 * parameter entities with "standalone='no'", ...
7703 * ... The declaration of a parameter entity must
7704 * precede any reference to it...
7705 */
7706 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7707 "PEReference: %%%s; not found\n",
7708 name, NULL);
7709 ctxt->valid = 0;
7710 }
7711 } else {
7712 /*
7713 * Internal checking in case the entity quest barfed
7714 */
7715 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7716 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7717 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7718 "Internal: %%%s; is not a parameter entity\n",
7719 name, NULL);
7720 } else if (ctxt->input->free != deallocblankswrapper) {
7721 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7722 if (xmlPushInput(ctxt, input) < 0)
7723 return;
7724 } else {
7725 /*
7726 * TODO !!!
7727 * handle the extra spaces added before and after
7728 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7729 */
7730 input = xmlNewEntityInputStream(ctxt, entity);
7731 if (xmlPushInput(ctxt, input) < 0)
7732 return;
7733 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7734 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7735 (IS_BLANK_CH(NXT(5)))) {
7736 xmlParseTextDecl(ctxt);
7737 if (ctxt->errNo ==
7738 XML_ERR_UNSUPPORTED_ENCODING) {
7739 /*
7740 * The XML REC instructs us to stop parsing
7741 * right here
7742 */
7743 ctxt->instate = XML_PARSER_EOF;
7744 return;
7745 }
7746 }
7747 }
7748 }
7749 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007750}
7751
7752/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007753 * xmlLoadEntityContent:
7754 * @ctxt: an XML parser context
7755 * @entity: an unloaded system entity
7756 *
7757 * Load the original content of the given system entity from the
7758 * ExternalID/SystemID given. This is to be used for Included in Literal
7759 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7760 *
7761 * Returns 0 in case of success and -1 in case of failure
7762 */
7763static int
7764xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7765 xmlParserInputPtr input;
7766 xmlBufferPtr buf;
7767 int l, c;
7768 int count = 0;
7769
7770 if ((ctxt == NULL) || (entity == NULL) ||
7771 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7772 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7773 (entity->content != NULL)) {
7774 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7775 "xmlLoadEntityContent parameter error");
7776 return(-1);
7777 }
7778
7779 if (xmlParserDebugEntities)
7780 xmlGenericError(xmlGenericErrorContext,
7781 "Reading %s entity content input\n", entity->name);
7782
7783 buf = xmlBufferCreate();
7784 if (buf == NULL) {
7785 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7786 "xmlLoadEntityContent parameter error");
7787 return(-1);
7788 }
7789
7790 input = xmlNewEntityInputStream(ctxt, entity);
7791 if (input == NULL) {
7792 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7793 "xmlLoadEntityContent input error");
7794 xmlBufferFree(buf);
7795 return(-1);
7796 }
7797
7798 /*
7799 * Push the entity as the current input, read char by char
7800 * saving to the buffer until the end of the entity or an error
7801 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00007802 if (xmlPushInput(ctxt, input) < 0) {
7803 xmlBufferFree(buf);
7804 return(-1);
7805 }
7806
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007807 GROW;
7808 c = CUR_CHAR(l);
7809 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7810 (IS_CHAR(c))) {
7811 xmlBufferAdd(buf, ctxt->input->cur, l);
7812 if (count++ > 100) {
7813 count = 0;
7814 GROW;
7815 }
7816 NEXTL(l);
7817 c = CUR_CHAR(l);
7818 }
7819
7820 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7821 xmlPopInput(ctxt);
7822 } else if (!IS_CHAR(c)) {
7823 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7824 "xmlLoadEntityContent: invalid char value %d\n",
7825 c);
7826 xmlBufferFree(buf);
7827 return(-1);
7828 }
7829 entity->content = buf->content;
7830 buf->content = NULL;
7831 xmlBufferFree(buf);
7832
7833 return(0);
7834}
7835
7836/**
Owen Taylor3473f882001-02-23 17:55:21 +00007837 * xmlParseStringPEReference:
7838 * @ctxt: an XML parser context
7839 * @str: a pointer to an index in the string
7840 *
7841 * parse PEReference declarations
7842 *
7843 * [69] PEReference ::= '%' Name ';'
7844 *
7845 * [ WFC: No Recursion ]
7846 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007847 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007848 *
7849 * [ WFC: Entity Declared ]
7850 * In a document without any DTD, a document with only an internal DTD
7851 * subset which contains no parameter entity references, or a document
7852 * with "standalone='yes'", ... ... The declaration of a parameter
7853 * entity must precede any reference to it...
7854 *
7855 * [ VC: Entity Declared ]
7856 * In a document with an external subset or external parameter entities
7857 * with "standalone='no'", ... ... The declaration of a parameter entity
7858 * must precede any reference to it...
7859 *
7860 * [ WFC: In DTD ]
7861 * Parameter-entity references may only appear in the DTD.
7862 * NOTE: misleading but this is handled.
7863 *
7864 * Returns the string of the entity content.
7865 * str is updated to the current value of the index
7866 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007867static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007868xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7869 const xmlChar *ptr;
7870 xmlChar cur;
7871 xmlChar *name;
7872 xmlEntityPtr entity = NULL;
7873
7874 if ((str == NULL) || (*str == NULL)) return(NULL);
7875 ptr = *str;
7876 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007877 if (cur != '%')
7878 return(NULL);
7879 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007880 name = xmlParseStringName(ctxt, &ptr);
7881 if (name == NULL) {
7882 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7883 "xmlParseStringPEReference: no name\n");
7884 *str = ptr;
7885 return(NULL);
7886 }
7887 cur = *ptr;
7888 if (cur != ';') {
7889 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7890 xmlFree(name);
7891 *str = ptr;
7892 return(NULL);
7893 }
7894 ptr++;
7895
7896 /*
7897 * Increate the number of entity references parsed
7898 */
7899 ctxt->nbentities++;
7900
7901 /*
7902 * Request the entity from SAX
7903 */
7904 if ((ctxt->sax != NULL) &&
7905 (ctxt->sax->getParameterEntity != NULL))
7906 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7907 name);
7908 if (entity == NULL) {
7909 /*
7910 * [ WFC: Entity Declared ]
7911 * In a document without any DTD, a document with only an
7912 * internal DTD subset which contains no parameter entity
7913 * references, or a document with "standalone='yes'", ...
7914 * ... The declaration of a parameter entity must precede
7915 * any reference to it...
7916 */
7917 if ((ctxt->standalone == 1) ||
7918 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7919 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7920 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007921 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007922 /*
7923 * [ VC: Entity Declared ]
7924 * In a document with an external subset or external
7925 * parameter entities with "standalone='no'", ...
7926 * ... The declaration of a parameter entity must
7927 * precede any reference to it...
7928 */
7929 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7930 "PEReference: %%%s; not found\n",
7931 name, NULL);
7932 ctxt->valid = 0;
7933 }
7934 } else {
7935 /*
7936 * Internal checking in case the entity quest barfed
7937 */
7938 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7939 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7940 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7941 "%%%s; is not a parameter entity\n",
7942 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007943 }
7944 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007945 ctxt->hasPErefs = 1;
7946 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007947 *str = ptr;
7948 return(entity);
7949}
7950
7951/**
7952 * xmlParseDocTypeDecl:
7953 * @ctxt: an XML parser context
7954 *
7955 * parse a DOCTYPE declaration
7956 *
7957 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7958 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7959 *
7960 * [ VC: Root Element Type ]
7961 * The Name in the document type declaration must match the element
7962 * type of the root element.
7963 */
7964
7965void
7966xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007967 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007968 xmlChar *ExternalID = NULL;
7969 xmlChar *URI = NULL;
7970
7971 /*
7972 * We know that '<!DOCTYPE' has been detected.
7973 */
7974 SKIP(9);
7975
7976 SKIP_BLANKS;
7977
7978 /*
7979 * Parse the DOCTYPE name.
7980 */
7981 name = xmlParseName(ctxt);
7982 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007983 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7984 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007985 }
7986 ctxt->intSubName = name;
7987
7988 SKIP_BLANKS;
7989
7990 /*
7991 * Check for SystemID and ExternalID
7992 */
7993 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7994
7995 if ((URI != NULL) || (ExternalID != NULL)) {
7996 ctxt->hasExternalSubset = 1;
7997 }
7998 ctxt->extSubURI = URI;
7999 ctxt->extSubSystem = ExternalID;
8000
8001 SKIP_BLANKS;
8002
8003 /*
8004 * Create and update the internal subset.
8005 */
8006 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8007 (!ctxt->disableSAX))
8008 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8009
8010 /*
8011 * Is there any internal subset declarations ?
8012 * they are handled separately in xmlParseInternalSubset()
8013 */
8014 if (RAW == '[')
8015 return;
8016
8017 /*
8018 * We should be at the end of the DOCTYPE declaration.
8019 */
8020 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008021 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008022 }
8023 NEXT;
8024}
8025
8026/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008027 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008028 * @ctxt: an XML parser context
8029 *
8030 * parse the internal subset declaration
8031 *
8032 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8033 */
8034
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008035static void
Owen Taylor3473f882001-02-23 17:55:21 +00008036xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8037 /*
8038 * Is there any DTD definition ?
8039 */
8040 if (RAW == '[') {
8041 ctxt->instate = XML_PARSER_DTD;
8042 NEXT;
8043 /*
8044 * Parse the succession of Markup declarations and
8045 * PEReferences.
8046 * Subsequence (markupdecl | PEReference | S)*
8047 */
8048 while (RAW != ']') {
8049 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008050 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008051
8052 SKIP_BLANKS;
8053 xmlParseMarkupDecl(ctxt);
8054 xmlParsePEReference(ctxt);
8055
8056 /*
8057 * Pop-up of finished entities.
8058 */
8059 while ((RAW == 0) && (ctxt->inputNr > 1))
8060 xmlPopInput(ctxt);
8061
8062 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008063 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008064 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008065 break;
8066 }
8067 }
8068 if (RAW == ']') {
8069 NEXT;
8070 SKIP_BLANKS;
8071 }
8072 }
8073
8074 /*
8075 * We should be at the end of the DOCTYPE declaration.
8076 */
8077 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008078 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008079 }
8080 NEXT;
8081}
8082
Daniel Veillard81273902003-09-30 00:43:48 +00008083#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008084/**
8085 * xmlParseAttribute:
8086 * @ctxt: an XML parser context
8087 * @value: a xmlChar ** used to store the value of the attribute
8088 *
8089 * parse an attribute
8090 *
8091 * [41] Attribute ::= Name Eq AttValue
8092 *
8093 * [ WFC: No External Entity References ]
8094 * Attribute values cannot contain direct or indirect entity references
8095 * to external entities.
8096 *
8097 * [ WFC: No < in Attribute Values ]
8098 * The replacement text of any entity referred to directly or indirectly in
8099 * an attribute value (other than "&lt;") must not contain a <.
8100 *
8101 * [ VC: Attribute Value Type ]
8102 * The attribute must have been declared; the value must be of the type
8103 * declared for it.
8104 *
8105 * [25] Eq ::= S? '=' S?
8106 *
8107 * With namespace:
8108 *
8109 * [NS 11] Attribute ::= QName Eq AttValue
8110 *
8111 * Also the case QName == xmlns:??? is handled independently as a namespace
8112 * definition.
8113 *
8114 * Returns the attribute name, and the value in *value.
8115 */
8116
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008117const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008118xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008119 const xmlChar *name;
8120 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008121
8122 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008123 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008124 name = xmlParseName(ctxt);
8125 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008126 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008127 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008128 return(NULL);
8129 }
8130
8131 /*
8132 * read the value
8133 */
8134 SKIP_BLANKS;
8135 if (RAW == '=') {
8136 NEXT;
8137 SKIP_BLANKS;
8138 val = xmlParseAttValue(ctxt);
8139 ctxt->instate = XML_PARSER_CONTENT;
8140 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008141 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00008142 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008143 return(NULL);
8144 }
8145
8146 /*
8147 * Check that xml:lang conforms to the specification
8148 * No more registered as an error, just generate a warning now
8149 * since this was deprecated in XML second edition
8150 */
8151 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8152 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008153 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8154 "Malformed value for xml:lang : %s\n",
8155 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008156 }
8157 }
8158
8159 /*
8160 * Check that xml:space conforms to the specification
8161 */
8162 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8163 if (xmlStrEqual(val, BAD_CAST "default"))
8164 *(ctxt->space) = 0;
8165 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8166 *(ctxt->space) = 1;
8167 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008168 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008169"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008170 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008171 }
8172 }
8173
8174 *value = val;
8175 return(name);
8176}
8177
8178/**
8179 * xmlParseStartTag:
8180 * @ctxt: an XML parser context
8181 *
8182 * parse a start of tag either for rule element or
8183 * EmptyElement. In both case we don't parse the tag closing chars.
8184 *
8185 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8186 *
8187 * [ WFC: Unique Att Spec ]
8188 * No attribute name may appear more than once in the same start-tag or
8189 * empty-element tag.
8190 *
8191 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8192 *
8193 * [ WFC: Unique Att Spec ]
8194 * No attribute name may appear more than once in the same start-tag or
8195 * empty-element tag.
8196 *
8197 * With namespace:
8198 *
8199 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8200 *
8201 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8202 *
8203 * Returns the element name parsed
8204 */
8205
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008206const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008207xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008208 const xmlChar *name;
8209 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008210 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008211 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008212 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008213 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008214 int i;
8215
8216 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008217 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008218
8219 name = xmlParseName(ctxt);
8220 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008221 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008222 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008223 return(NULL);
8224 }
8225
8226 /*
8227 * Now parse the attributes, it ends up with the ending
8228 *
8229 * (S Attribute)* S?
8230 */
8231 SKIP_BLANKS;
8232 GROW;
8233
Daniel Veillard21a0f912001-02-25 19:54:14 +00008234 while ((RAW != '>') &&
8235 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008236 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008237 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008238 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008239
8240 attname = xmlParseAttribute(ctxt, &attvalue);
8241 if ((attname != NULL) && (attvalue != NULL)) {
8242 /*
8243 * [ WFC: Unique Att Spec ]
8244 * No attribute name may appear more than once in the same
8245 * start-tag or empty-element tag.
8246 */
8247 for (i = 0; i < nbatts;i += 2) {
8248 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008249 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008250 xmlFree(attvalue);
8251 goto failed;
8252 }
8253 }
Owen Taylor3473f882001-02-23 17:55:21 +00008254 /*
8255 * Add the pair to atts
8256 */
8257 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008258 maxatts = 22; /* allow for 10 attrs by default */
8259 atts = (const xmlChar **)
8260 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008261 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008262 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008263 if (attvalue != NULL)
8264 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008265 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008266 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008267 ctxt->atts = atts;
8268 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008269 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008270 const xmlChar **n;
8271
Owen Taylor3473f882001-02-23 17:55:21 +00008272 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008273 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008274 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008275 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008276 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008277 if (attvalue != NULL)
8278 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008279 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008280 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008281 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008282 ctxt->atts = atts;
8283 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008284 }
8285 atts[nbatts++] = attname;
8286 atts[nbatts++] = attvalue;
8287 atts[nbatts] = NULL;
8288 atts[nbatts + 1] = NULL;
8289 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008290 if (attvalue != NULL)
8291 xmlFree(attvalue);
8292 }
8293
8294failed:
8295
Daniel Veillard3772de32002-12-17 10:31:45 +00008296 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008297 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8298 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008299 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008300 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8301 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008302 }
8303 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008304 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8305 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008306 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8307 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008308 break;
8309 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008310 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008311 GROW;
8312 }
8313
8314 /*
8315 * SAX: Start of Element !
8316 */
8317 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008318 (!ctxt->disableSAX)) {
8319 if (nbatts > 0)
8320 ctxt->sax->startElement(ctxt->userData, name, atts);
8321 else
8322 ctxt->sax->startElement(ctxt->userData, name, NULL);
8323 }
Owen Taylor3473f882001-02-23 17:55:21 +00008324
8325 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008326 /* Free only the content strings */
8327 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008328 if (atts[i] != NULL)
8329 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008330 }
8331 return(name);
8332}
8333
8334/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008335 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008336 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008337 * @line: line of the start tag
8338 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008339 *
8340 * parse an end of tag
8341 *
8342 * [42] ETag ::= '</' Name S? '>'
8343 *
8344 * With namespace
8345 *
8346 * [NS 9] ETag ::= '</' QName S? '>'
8347 */
8348
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008349static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008350xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008351 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008352
8353 GROW;
8354 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008355 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008356 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008357 return;
8358 }
8359 SKIP(2);
8360
Daniel Veillard46de64e2002-05-29 08:21:33 +00008361 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008362
8363 /*
8364 * We should definitely be at the ending "S? '>'" part
8365 */
8366 GROW;
8367 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008368 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008369 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008370 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008371 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008372
8373 /*
8374 * [ WFC: Element Type Match ]
8375 * The Name in an element's end-tag must match the element type in the
8376 * start-tag.
8377 *
8378 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008379 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008380 if (name == NULL) name = BAD_CAST "unparseable";
8381 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008382 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008383 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008384 }
8385
8386 /*
8387 * SAX: End of Tag
8388 */
8389 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8390 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008391 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008392
Daniel Veillarde57ec792003-09-10 10:50:59 +00008393 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008394 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008395 return;
8396}
8397
8398/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008399 * xmlParseEndTag:
8400 * @ctxt: an XML parser context
8401 *
8402 * parse an end of tag
8403 *
8404 * [42] ETag ::= '</' Name S? '>'
8405 *
8406 * With namespace
8407 *
8408 * [NS 9] ETag ::= '</' QName S? '>'
8409 */
8410
8411void
8412xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008413 xmlParseEndTag1(ctxt, 0);
8414}
Daniel Veillard81273902003-09-30 00:43:48 +00008415#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008416
8417/************************************************************************
8418 * *
8419 * SAX 2 specific operations *
8420 * *
8421 ************************************************************************/
8422
Daniel Veillard0fb18932003-09-07 09:14:37 +00008423/*
8424 * xmlGetNamespace:
8425 * @ctxt: an XML parser context
8426 * @prefix: the prefix to lookup
8427 *
8428 * Lookup the namespace name for the @prefix (which ca be NULL)
8429 * The prefix must come from the @ctxt->dict dictionnary
8430 *
8431 * Returns the namespace name or NULL if not bound
8432 */
8433static const xmlChar *
8434xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8435 int i;
8436
Daniel Veillarde57ec792003-09-10 10:50:59 +00008437 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008438 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008439 if (ctxt->nsTab[i] == prefix) {
8440 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8441 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008442 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008443 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008444 return(NULL);
8445}
8446
8447/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008448 * xmlParseQName:
8449 * @ctxt: an XML parser context
8450 * @prefix: pointer to store the prefix part
8451 *
8452 * parse an XML Namespace QName
8453 *
8454 * [6] QName ::= (Prefix ':')? LocalPart
8455 * [7] Prefix ::= NCName
8456 * [8] LocalPart ::= NCName
8457 *
8458 * Returns the Name parsed or NULL
8459 */
8460
8461static const xmlChar *
8462xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8463 const xmlChar *l, *p;
8464
8465 GROW;
8466
8467 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008468 if (l == NULL) {
8469 if (CUR == ':') {
8470 l = xmlParseName(ctxt);
8471 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008472 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8473 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008474 *prefix = NULL;
8475 return(l);
8476 }
8477 }
8478 return(NULL);
8479 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008480 if (CUR == ':') {
8481 NEXT;
8482 p = l;
8483 l = xmlParseNCName(ctxt);
8484 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008485 xmlChar *tmp;
8486
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008487 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8488 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008489 l = xmlParseNmtoken(ctxt);
8490 if (l == NULL)
8491 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8492 else {
8493 tmp = xmlBuildQName(l, p, NULL, 0);
8494 xmlFree((char *)l);
8495 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008496 p = xmlDictLookup(ctxt->dict, tmp, -1);
8497 if (tmp != NULL) xmlFree(tmp);
8498 *prefix = NULL;
8499 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008500 }
8501 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008502 xmlChar *tmp;
8503
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008504 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8505 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008506 NEXT;
8507 tmp = (xmlChar *) xmlParseName(ctxt);
8508 if (tmp != NULL) {
8509 tmp = xmlBuildQName(tmp, l, NULL, 0);
8510 l = xmlDictLookup(ctxt->dict, tmp, -1);
8511 if (tmp != NULL) xmlFree(tmp);
8512 *prefix = p;
8513 return(l);
8514 }
8515 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8516 l = xmlDictLookup(ctxt->dict, tmp, -1);
8517 if (tmp != NULL) xmlFree(tmp);
8518 *prefix = p;
8519 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008520 }
8521 *prefix = p;
8522 } else
8523 *prefix = NULL;
8524 return(l);
8525}
8526
8527/**
8528 * xmlParseQNameAndCompare:
8529 * @ctxt: an XML parser context
8530 * @name: the localname
8531 * @prefix: the prefix, if any.
8532 *
8533 * parse an XML name and compares for match
8534 * (specialized for endtag parsing)
8535 *
8536 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8537 * and the name for mismatch
8538 */
8539
8540static const xmlChar *
8541xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8542 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008543 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008544 const xmlChar *in;
8545 const xmlChar *ret;
8546 const xmlChar *prefix2;
8547
8548 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8549
8550 GROW;
8551 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008552
Daniel Veillard0fb18932003-09-07 09:14:37 +00008553 cmp = prefix;
8554 while (*in != 0 && *in == *cmp) {
8555 ++in;
8556 ++cmp;
8557 }
8558 if ((*cmp == 0) && (*in == ':')) {
8559 in++;
8560 cmp = name;
8561 while (*in != 0 && *in == *cmp) {
8562 ++in;
8563 ++cmp;
8564 }
William M. Brack76e95df2003-10-18 16:20:14 +00008565 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008566 /* success */
8567 ctxt->input->cur = in;
8568 return((const xmlChar*) 1);
8569 }
8570 }
8571 /*
8572 * all strings coms from the dictionary, equality can be done directly
8573 */
8574 ret = xmlParseQName (ctxt, &prefix2);
8575 if ((ret == name) && (prefix == prefix2))
8576 return((const xmlChar*) 1);
8577 return ret;
8578}
8579
8580/**
8581 * xmlParseAttValueInternal:
8582 * @ctxt: an XML parser context
8583 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008584 * @alloc: whether the attribute was reallocated as a new string
8585 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008586 *
8587 * parse a value for an attribute.
8588 * NOTE: if no normalization is needed, the routine will return pointers
8589 * directly from the data buffer.
8590 *
8591 * 3.3.3 Attribute-Value Normalization:
8592 * Before the value of an attribute is passed to the application or
8593 * checked for validity, the XML processor must normalize it as follows:
8594 * - a character reference is processed by appending the referenced
8595 * character to the attribute value
8596 * - an entity reference is processed by recursively processing the
8597 * replacement text of the entity
8598 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8599 * appending #x20 to the normalized value, except that only a single
8600 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8601 * parsed entity or the literal entity value of an internal parsed entity
8602 * - other characters are processed by appending them to the normalized value
8603 * If the declared value is not CDATA, then the XML processor must further
8604 * process the normalized attribute value by discarding any leading and
8605 * trailing space (#x20) characters, and by replacing sequences of space
8606 * (#x20) characters by a single space (#x20) character.
8607 * All attributes for which no declaration has been read should be treated
8608 * by a non-validating parser as if declared CDATA.
8609 *
8610 * Returns the AttValue parsed or NULL. The value has to be freed by the
8611 * caller if it was copied, this can be detected by val[*len] == 0.
8612 */
8613
8614static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008615xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8616 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008617{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008618 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008619 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008620 xmlChar *ret = NULL;
8621
8622 GROW;
8623 in = (xmlChar *) CUR_PTR;
8624 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008625 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008626 return (NULL);
8627 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008628 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008629
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008630 /*
8631 * try to handle in this routine the most common case where no
8632 * allocation of a new string is required and where content is
8633 * pure ASCII.
8634 */
8635 limit = *in++;
8636 end = ctxt->input->end;
8637 start = in;
8638 if (in >= end) {
8639 const xmlChar *oldbase = ctxt->input->base;
8640 GROW;
8641 if (oldbase != ctxt->input->base) {
8642 long delta = ctxt->input->base - oldbase;
8643 start = start + delta;
8644 in = in + delta;
8645 }
8646 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008647 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008648 if (normalize) {
8649 /*
8650 * Skip any leading spaces
8651 */
8652 while ((in < end) && (*in != limit) &&
8653 ((*in == 0x20) || (*in == 0x9) ||
8654 (*in == 0xA) || (*in == 0xD))) {
8655 in++;
8656 start = in;
8657 if (in >= end) {
8658 const xmlChar *oldbase = ctxt->input->base;
8659 GROW;
8660 if (oldbase != ctxt->input->base) {
8661 long delta = ctxt->input->base - oldbase;
8662 start = start + delta;
8663 in = in + delta;
8664 }
8665 end = ctxt->input->end;
8666 }
8667 }
8668 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8669 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8670 if ((*in++ == 0x20) && (*in == 0x20)) break;
8671 if (in >= end) {
8672 const xmlChar *oldbase = ctxt->input->base;
8673 GROW;
8674 if (oldbase != ctxt->input->base) {
8675 long delta = ctxt->input->base - oldbase;
8676 start = start + delta;
8677 in = in + delta;
8678 }
8679 end = ctxt->input->end;
8680 }
8681 }
8682 last = in;
8683 /*
8684 * skip the trailing blanks
8685 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008686 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008687 while ((in < end) && (*in != limit) &&
8688 ((*in == 0x20) || (*in == 0x9) ||
8689 (*in == 0xA) || (*in == 0xD))) {
8690 in++;
8691 if (in >= end) {
8692 const xmlChar *oldbase = ctxt->input->base;
8693 GROW;
8694 if (oldbase != ctxt->input->base) {
8695 long delta = ctxt->input->base - oldbase;
8696 start = start + delta;
8697 in = in + delta;
8698 last = last + delta;
8699 }
8700 end = ctxt->input->end;
8701 }
8702 }
8703 if (*in != limit) goto need_complex;
8704 } else {
8705 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8706 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8707 in++;
8708 if (in >= end) {
8709 const xmlChar *oldbase = ctxt->input->base;
8710 GROW;
8711 if (oldbase != ctxt->input->base) {
8712 long delta = ctxt->input->base - oldbase;
8713 start = start + delta;
8714 in = in + delta;
8715 }
8716 end = ctxt->input->end;
8717 }
8718 }
8719 last = in;
8720 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008721 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008722 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008723 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008724 *len = last - start;
8725 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008726 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008727 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008728 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008729 }
8730 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008731 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008732 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008733need_complex:
8734 if (alloc) *alloc = 1;
8735 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008736}
8737
8738/**
8739 * xmlParseAttribute2:
8740 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008741 * @pref: the element prefix
8742 * @elem: the element name
8743 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008744 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008745 * @len: an int * to save the length of the attribute
8746 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008747 *
8748 * parse an attribute in the new SAX2 framework.
8749 *
8750 * Returns the attribute name, and the value in *value, .
8751 */
8752
8753static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008754xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008755 const xmlChar * pref, const xmlChar * elem,
8756 const xmlChar ** prefix, xmlChar ** value,
8757 int *len, int *alloc)
8758{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008759 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008760 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008761 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008762
8763 *value = NULL;
8764 GROW;
8765 name = xmlParseQName(ctxt, prefix);
8766 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008767 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8768 "error parsing attribute name\n");
8769 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008770 }
8771
8772 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008773 * get the type if needed
8774 */
8775 if (ctxt->attsSpecial != NULL) {
8776 int type;
8777
8778 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008779 pref, elem, *prefix, name);
8780 if (type != 0)
8781 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008782 }
8783
8784 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008785 * read the value
8786 */
8787 SKIP_BLANKS;
8788 if (RAW == '=') {
8789 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008790 SKIP_BLANKS;
8791 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8792 if (normalize) {
8793 /*
8794 * Sometimes a second normalisation pass for spaces is needed
8795 * but that only happens if charrefs or entities refernces
8796 * have been used in the attribute value, i.e. the attribute
8797 * value have been extracted in an allocated string already.
8798 */
8799 if (*alloc) {
8800 const xmlChar *val2;
8801
8802 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008803 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008804 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008805 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008806 }
8807 }
8808 }
8809 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008810 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008811 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8812 "Specification mandate value for attribute %s\n",
8813 name);
8814 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008815 }
8816
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008817 if (*prefix == ctxt->str_xml) {
8818 /*
8819 * Check that xml:lang conforms to the specification
8820 * No more registered as an error, just generate a warning now
8821 * since this was deprecated in XML second edition
8822 */
8823 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8824 internal_val = xmlStrndup(val, *len);
8825 if (!xmlCheckLanguageID(internal_val)) {
8826 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8827 "Malformed value for xml:lang : %s\n",
8828 internal_val, NULL);
8829 }
8830 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008831
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008832 /*
8833 * Check that xml:space conforms to the specification
8834 */
8835 if (xmlStrEqual(name, BAD_CAST "space")) {
8836 internal_val = xmlStrndup(val, *len);
8837 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8838 *(ctxt->space) = 0;
8839 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8840 *(ctxt->space) = 1;
8841 else {
8842 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8843 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8844 internal_val, NULL);
8845 }
8846 }
8847 if (internal_val) {
8848 xmlFree(internal_val);
8849 }
8850 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008851
8852 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008853 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008854}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008855/**
8856 * xmlParseStartTag2:
8857 * @ctxt: an XML parser context
8858 *
8859 * parse a start of tag either for rule element or
8860 * EmptyElement. In both case we don't parse the tag closing chars.
8861 * This routine is called when running SAX2 parsing
8862 *
8863 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8864 *
8865 * [ WFC: Unique Att Spec ]
8866 * No attribute name may appear more than once in the same start-tag or
8867 * empty-element tag.
8868 *
8869 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8870 *
8871 * [ WFC: Unique Att Spec ]
8872 * No attribute name may appear more than once in the same start-tag or
8873 * empty-element tag.
8874 *
8875 * With namespace:
8876 *
8877 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8878 *
8879 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8880 *
8881 * Returns the element name parsed
8882 */
8883
8884static const xmlChar *
8885xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008886 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008887 const xmlChar *localname;
8888 const xmlChar *prefix;
8889 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008890 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008891 const xmlChar *nsname;
8892 xmlChar *attvalue;
8893 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008894 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008895 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008896 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008897 const xmlChar *base;
8898 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008899 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008900
8901 if (RAW != '<') return(NULL);
8902 NEXT1;
8903
8904 /*
8905 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8906 * point since the attribute values may be stored as pointers to
8907 * the buffer and calling SHRINK would destroy them !
8908 * The Shrinking is only possible once the full set of attribute
8909 * callbacks have been done.
8910 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008911reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008912 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008913 base = ctxt->input->base;
8914 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008915 oldline = ctxt->input->line;
8916 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008917 nbatts = 0;
8918 nratts = 0;
8919 nbdef = 0;
8920 nbNs = 0;
8921 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008922 /* Forget any namespaces added during an earlier parse of this element. */
8923 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008924
8925 localname = xmlParseQName(ctxt, &prefix);
8926 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008927 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8928 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008929 return(NULL);
8930 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008931 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008932
8933 /*
8934 * Now parse the attributes, it ends up with the ending
8935 *
8936 * (S Attribute)* S?
8937 */
8938 SKIP_BLANKS;
8939 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008940 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008941
8942 while ((RAW != '>') &&
8943 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008944 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008945 const xmlChar *q = CUR_PTR;
8946 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008947 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008948
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008949 attname = xmlParseAttribute2(ctxt, prefix, localname,
8950 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008951 if (ctxt->input->base != base) {
8952 if ((attvalue != NULL) && (alloc != 0))
8953 xmlFree(attvalue);
8954 attvalue = NULL;
8955 goto base_changed;
8956 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008957 if ((attname != NULL) && (attvalue != NULL)) {
8958 if (len < 0) len = xmlStrlen(attvalue);
8959 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008960 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8961 xmlURIPtr uri;
8962
8963 if (*URL != 0) {
8964 uri = xmlParseURI((const char *) URL);
8965 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008966 xmlNsErr(ctxt, XML_WAR_NS_URI,
8967 "xmlns: '%s' is not a valid URI\n",
8968 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008969 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00008970 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008971 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8972 "xmlns: URI %s is not absolute\n",
8973 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008974 }
8975 xmlFreeURI(uri);
8976 }
Daniel Veillard37334572008-07-31 08:20:02 +00008977 if (URL == ctxt->str_xml_ns) {
8978 if (attname != ctxt->str_xml) {
8979 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8980 "xml namespace URI cannot be the default namespace\n",
8981 NULL, NULL, NULL);
8982 }
8983 goto skip_default_ns;
8984 }
8985 if ((len == 29) &&
8986 (xmlStrEqual(URL,
8987 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8988 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8989 "reuse of the xmlns namespace name is forbidden\n",
8990 NULL, NULL, NULL);
8991 goto skip_default_ns;
8992 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008993 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008994 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008995 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008996 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008997 for (j = 1;j <= nbNs;j++)
8998 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8999 break;
9000 if (j <= nbNs)
9001 xmlErrAttributeDup(ctxt, NULL, attname);
9002 else
9003 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009004skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009005 if (alloc != 0) xmlFree(attvalue);
9006 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009007 continue;
9008 }
9009 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009010 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9011 xmlURIPtr uri;
9012
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009013 if (attname == ctxt->str_xml) {
9014 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009015 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9016 "xml namespace prefix mapped to wrong URI\n",
9017 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009018 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009019 /*
9020 * Do not keep a namespace definition node
9021 */
Daniel Veillard37334572008-07-31 08:20:02 +00009022 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009023 }
Daniel Veillard37334572008-07-31 08:20:02 +00009024 if (URL == ctxt->str_xml_ns) {
9025 if (attname != ctxt->str_xml) {
9026 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9027 "xml namespace URI mapped to wrong prefix\n",
9028 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009029 }
Daniel Veillard37334572008-07-31 08:20:02 +00009030 goto skip_ns;
9031 }
9032 if (attname == ctxt->str_xmlns) {
9033 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9034 "redefinition of the xmlns prefix is forbidden\n",
9035 NULL, NULL, NULL);
9036 goto skip_ns;
9037 }
9038 if ((len == 29) &&
9039 (xmlStrEqual(URL,
9040 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9041 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9042 "reuse of the xmlns namespace name is forbidden\n",
9043 NULL, NULL, NULL);
9044 goto skip_ns;
9045 }
9046 if ((URL == NULL) || (URL[0] == 0)) {
9047 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9048 "xmlns:%s: Empty XML namespace is not allowed\n",
9049 attname, NULL, NULL);
9050 goto skip_ns;
9051 } else {
9052 uri = xmlParseURI((const char *) URL);
9053 if (uri == NULL) {
9054 xmlNsErr(ctxt, XML_WAR_NS_URI,
9055 "xmlns:%s: '%s' is not a valid URI\n",
9056 attname, URL, NULL);
9057 } else {
9058 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9059 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9060 "xmlns:%s: URI %s is not absolute\n",
9061 attname, URL, NULL);
9062 }
9063 xmlFreeURI(uri);
9064 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009065 }
9066
Daniel Veillard0fb18932003-09-07 09:14:37 +00009067 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009068 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009069 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009070 for (j = 1;j <= nbNs;j++)
9071 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9072 break;
9073 if (j <= nbNs)
9074 xmlErrAttributeDup(ctxt, aprefix, attname);
9075 else
9076 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009077skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009078 if (alloc != 0) xmlFree(attvalue);
9079 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00009080 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009081 continue;
9082 }
9083
9084 /*
9085 * Add the pair to atts
9086 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009087 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9088 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009089 if (attvalue[len] == 0)
9090 xmlFree(attvalue);
9091 goto failed;
9092 }
9093 maxatts = ctxt->maxatts;
9094 atts = ctxt->atts;
9095 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009096 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009097 atts[nbatts++] = attname;
9098 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009099 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00009100 atts[nbatts++] = attvalue;
9101 attvalue += len;
9102 atts[nbatts++] = attvalue;
9103 /*
9104 * tag if some deallocation is needed
9105 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009106 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009107 } else {
9108 if ((attvalue != NULL) && (attvalue[len] == 0))
9109 xmlFree(attvalue);
9110 }
9111
Daniel Veillard37334572008-07-31 08:20:02 +00009112failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009113
9114 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00009115 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009116 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9117 break;
William M. Brack76e95df2003-10-18 16:20:14 +00009118 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009119 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9120 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009121 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009122 }
9123 SKIP_BLANKS;
9124 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9125 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009126 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009127 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009128 break;
9129 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009130 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009131 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009132 }
9133
Daniel Veillard0fb18932003-09-07 09:14:37 +00009134 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009135 * The attributes defaulting
9136 */
9137 if (ctxt->attsDefault != NULL) {
9138 xmlDefAttrsPtr defaults;
9139
9140 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9141 if (defaults != NULL) {
9142 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009143 attname = defaults->values[5 * i];
9144 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009145
9146 /*
9147 * special work for namespaces defaulted defs
9148 */
9149 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9150 /*
9151 * check that it's not a defined namespace
9152 */
9153 for (j = 1;j <= nbNs;j++)
9154 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9155 break;
9156 if (j <= nbNs) continue;
9157
9158 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009159 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009160 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009161 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009162 nbNs++;
9163 }
9164 } else if (aprefix == ctxt->str_xmlns) {
9165 /*
9166 * check that it's not a defined namespace
9167 */
9168 for (j = 1;j <= nbNs;j++)
9169 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9170 break;
9171 if (j <= nbNs) continue;
9172
9173 nsname = xmlGetNamespace(ctxt, attname);
9174 if (nsname != defaults->values[2]) {
9175 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009176 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009177 nbNs++;
9178 }
9179 } else {
9180 /*
9181 * check that it's not a defined attribute
9182 */
9183 for (j = 0;j < nbatts;j+=5) {
9184 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9185 break;
9186 }
9187 if (j < nbatts) continue;
9188
9189 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9190 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009191 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009192 }
9193 maxatts = ctxt->maxatts;
9194 atts = ctxt->atts;
9195 }
9196 atts[nbatts++] = attname;
9197 atts[nbatts++] = aprefix;
9198 if (aprefix == NULL)
9199 atts[nbatts++] = NULL;
9200 else
9201 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009202 atts[nbatts++] = defaults->values[5 * i + 2];
9203 atts[nbatts++] = defaults->values[5 * i + 3];
9204 if ((ctxt->standalone == 1) &&
9205 (defaults->values[5 * i + 4] != NULL)) {
9206 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9207 "standalone: attribute %s on %s defaulted from external subset\n",
9208 attname, localname);
9209 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009210 nbdef++;
9211 }
9212 }
9213 }
9214 }
9215
Daniel Veillarde70c8772003-11-25 07:21:18 +00009216 /*
9217 * The attributes checkings
9218 */
9219 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009220 /*
9221 * The default namespace does not apply to attribute names.
9222 */
9223 if (atts[i + 1] != NULL) {
9224 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9225 if (nsname == NULL) {
9226 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9227 "Namespace prefix %s for %s on %s is not defined\n",
9228 atts[i + 1], atts[i], localname);
9229 }
9230 atts[i + 2] = nsname;
9231 } else
9232 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009233 /*
9234 * [ WFC: Unique Att Spec ]
9235 * No attribute name may appear more than once in the same
9236 * start-tag or empty-element tag.
9237 * As extended by the Namespace in XML REC.
9238 */
9239 for (j = 0; j < i;j += 5) {
9240 if (atts[i] == atts[j]) {
9241 if (atts[i+1] == atts[j+1]) {
9242 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9243 break;
9244 }
9245 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9246 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9247 "Namespaced Attribute %s in '%s' redefined\n",
9248 atts[i], nsname, NULL);
9249 break;
9250 }
9251 }
9252 }
9253 }
9254
Daniel Veillarde57ec792003-09-10 10:50:59 +00009255 nsname = xmlGetNamespace(ctxt, prefix);
9256 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009257 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9258 "Namespace prefix %s on %s is not defined\n",
9259 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009260 }
9261 *pref = prefix;
9262 *URI = nsname;
9263
9264 /*
9265 * SAX: Start of Element !
9266 */
9267 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9268 (!ctxt->disableSAX)) {
9269 if (nbNs > 0)
9270 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9271 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9272 nbatts / 5, nbdef, atts);
9273 else
9274 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9275 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9276 }
9277
9278 /*
9279 * Free up attribute allocated strings if needed
9280 */
9281 if (attval != 0) {
9282 for (i = 3,j = 0; j < nratts;i += 5,j++)
9283 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9284 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009285 }
9286
9287 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009288
9289base_changed:
9290 /*
9291 * the attribute strings are valid iif the base didn't changed
9292 */
9293 if (attval != 0) {
9294 for (i = 3,j = 0; j < nratts;i += 5,j++)
9295 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9296 xmlFree((xmlChar *) atts[i]);
9297 }
9298 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009299 ctxt->input->line = oldline;
9300 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009301 if (ctxt->wellFormed == 1) {
9302 goto reparse;
9303 }
9304 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009305}
9306
9307/**
9308 * xmlParseEndTag2:
9309 * @ctxt: an XML parser context
9310 * @line: line of the start tag
9311 * @nsNr: number of namespaces on the start tag
9312 *
9313 * parse an end of tag
9314 *
9315 * [42] ETag ::= '</' Name S? '>'
9316 *
9317 * With namespace
9318 *
9319 * [NS 9] ETag ::= '</' QName S? '>'
9320 */
9321
9322static void
9323xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009324 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009325 const xmlChar *name;
9326
9327 GROW;
9328 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009329 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009330 return;
9331 }
9332 SKIP(2);
9333
William M. Brack13dfa872004-09-18 04:52:08 +00009334 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009335 if (ctxt->input->cur[tlen] == '>') {
9336 ctxt->input->cur += tlen + 1;
9337 goto done;
9338 }
9339 ctxt->input->cur += tlen;
9340 name = (xmlChar*)1;
9341 } else {
9342 if (prefix == NULL)
9343 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9344 else
9345 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9346 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009347
9348 /*
9349 * We should definitely be at the ending "S? '>'" part
9350 */
9351 GROW;
9352 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009353 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009354 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009355 } else
9356 NEXT1;
9357
9358 /*
9359 * [ WFC: Element Type Match ]
9360 * The Name in an element's end-tag must match the element type in the
9361 * start-tag.
9362 *
9363 */
9364 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009365 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009366 if ((line == 0) && (ctxt->node != NULL))
9367 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009368 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009369 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009370 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009371 }
9372
9373 /*
9374 * SAX: End of Tag
9375 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009376done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009377 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9378 (!ctxt->disableSAX))
9379 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9380
Daniel Veillard0fb18932003-09-07 09:14:37 +00009381 spacePop(ctxt);
9382 if (nsNr != 0)
9383 nsPop(ctxt, nsNr);
9384 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009385}
9386
9387/**
Owen Taylor3473f882001-02-23 17:55:21 +00009388 * xmlParseCDSect:
9389 * @ctxt: an XML parser context
9390 *
9391 * Parse escaped pure raw content.
9392 *
9393 * [18] CDSect ::= CDStart CData CDEnd
9394 *
9395 * [19] CDStart ::= '<![CDATA['
9396 *
9397 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9398 *
9399 * [21] CDEnd ::= ']]>'
9400 */
9401void
9402xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9403 xmlChar *buf = NULL;
9404 int len = 0;
9405 int size = XML_PARSER_BUFFER_SIZE;
9406 int r, rl;
9407 int s, sl;
9408 int cur, l;
9409 int count = 0;
9410
Daniel Veillard8f597c32003-10-06 08:19:27 +00009411 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009412 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009413 SKIP(9);
9414 } else
9415 return;
9416
9417 ctxt->instate = XML_PARSER_CDATA_SECTION;
9418 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009419 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009420 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009421 ctxt->instate = XML_PARSER_CONTENT;
9422 return;
9423 }
9424 NEXTL(rl);
9425 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009426 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009427 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009428 ctxt->instate = XML_PARSER_CONTENT;
9429 return;
9430 }
9431 NEXTL(sl);
9432 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009433 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009434 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009435 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009436 return;
9437 }
William M. Brack871611b2003-10-18 04:53:14 +00009438 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009439 ((r != ']') || (s != ']') || (cur != '>'))) {
9440 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009441 xmlChar *tmp;
9442
Owen Taylor3473f882001-02-23 17:55:21 +00009443 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009444 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9445 if (tmp == NULL) {
9446 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009447 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009448 return;
9449 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009450 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009451 }
9452 COPY_BUF(rl,buf,len,r);
9453 r = s;
9454 rl = sl;
9455 s = cur;
9456 sl = l;
9457 count++;
9458 if (count > 50) {
9459 GROW;
9460 count = 0;
9461 }
9462 NEXTL(l);
9463 cur = CUR_CHAR(l);
9464 }
9465 buf[len] = 0;
9466 ctxt->instate = XML_PARSER_CONTENT;
9467 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009468 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009469 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009470 xmlFree(buf);
9471 return;
9472 }
9473 NEXTL(l);
9474
9475 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009476 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009477 */
9478 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9479 if (ctxt->sax->cdataBlock != NULL)
9480 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009481 else if (ctxt->sax->characters != NULL)
9482 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009483 }
9484 xmlFree(buf);
9485}
9486
9487/**
9488 * xmlParseContent:
9489 * @ctxt: an XML parser context
9490 *
9491 * Parse a content:
9492 *
9493 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9494 */
9495
9496void
9497xmlParseContent(xmlParserCtxtPtr ctxt) {
9498 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009499 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009500 ((RAW != '<') || (NXT(1) != '/')) &&
9501 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009502 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009503 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009504 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009505
9506 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009507 * First case : a Processing Instruction.
9508 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009509 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009510 xmlParsePI(ctxt);
9511 }
9512
9513 /*
9514 * Second case : a CDSection
9515 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009516 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009517 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009518 xmlParseCDSect(ctxt);
9519 }
9520
9521 /*
9522 * Third case : a comment
9523 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009524 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009525 (NXT(2) == '-') && (NXT(3) == '-')) {
9526 xmlParseComment(ctxt);
9527 ctxt->instate = XML_PARSER_CONTENT;
9528 }
9529
9530 /*
9531 * Fourth case : a sub-element.
9532 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009533 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009534 xmlParseElement(ctxt);
9535 }
9536
9537 /*
9538 * Fifth case : a reference. If if has not been resolved,
9539 * parsing returns it's Name, create the node
9540 */
9541
Daniel Veillard21a0f912001-02-25 19:54:14 +00009542 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009543 xmlParseReference(ctxt);
9544 }
9545
9546 /*
9547 * Last case, text. Note that References are handled directly.
9548 */
9549 else {
9550 xmlParseCharData(ctxt, 0);
9551 }
9552
9553 GROW;
9554 /*
9555 * Pop-up of finished entities.
9556 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009557 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009558 xmlPopInput(ctxt);
9559 SHRINK;
9560
Daniel Veillardfdc91562002-07-01 21:52:03 +00009561 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009562 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9563 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009564 ctxt->instate = XML_PARSER_EOF;
9565 break;
9566 }
9567 }
9568}
9569
9570/**
9571 * xmlParseElement:
9572 * @ctxt: an XML parser context
9573 *
9574 * parse an XML element, this is highly recursive
9575 *
9576 * [39] element ::= EmptyElemTag | STag content ETag
9577 *
9578 * [ WFC: Element Type Match ]
9579 * The Name in an element's end-tag must match the element type in the
9580 * start-tag.
9581 *
Owen Taylor3473f882001-02-23 17:55:21 +00009582 */
9583
9584void
9585xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009586 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009587 const xmlChar *prefix = NULL;
9588 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009589 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +08009590 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00009591 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009592 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009593
Daniel Veillard8915c152008-08-26 13:05:34 +00009594 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9595 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9596 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9597 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9598 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009599 ctxt->instate = XML_PARSER_EOF;
9600 return;
9601 }
9602
Owen Taylor3473f882001-02-23 17:55:21 +00009603 /* Capture start position */
9604 if (ctxt->record_info) {
9605 node_info.begin_pos = ctxt->input->consumed +
9606 (CUR_PTR - ctxt->input->base);
9607 node_info.begin_line = ctxt->input->line;
9608 }
9609
9610 if (ctxt->spaceNr == 0)
9611 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009612 else if (*ctxt->space == -2)
9613 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009614 else
9615 spacePush(ctxt, *ctxt->space);
9616
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009617 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009618#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009619 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009620#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009621 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009622#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009623 else
9624 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009625#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +08009626 if (ctxt->instate == XML_PARSER_EOF)
9627 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009628 if (name == NULL) {
9629 spacePop(ctxt);
9630 return;
9631 }
9632 namePush(ctxt, name);
9633 ret = ctxt->node;
9634
Daniel Veillard4432df22003-09-28 18:58:27 +00009635#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009636 /*
9637 * [ VC: Root Element Type ]
9638 * The Name in the document type declaration must match the element
9639 * type of the root element.
9640 */
9641 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9642 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9643 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009644#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009645
9646 /*
9647 * Check for an Empty Element.
9648 */
9649 if ((RAW == '/') && (NXT(1) == '>')) {
9650 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009651 if (ctxt->sax2) {
9652 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9653 (!ctxt->disableSAX))
9654 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009655#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009656 } else {
9657 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9658 (!ctxt->disableSAX))
9659 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009660#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009661 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009662 namePop(ctxt);
9663 spacePop(ctxt);
9664 if (nsNr != ctxt->nsNr)
9665 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009666 if ( ret != NULL && ctxt->record_info ) {
9667 node_info.end_pos = ctxt->input->consumed +
9668 (CUR_PTR - ctxt->input->base);
9669 node_info.end_line = ctxt->input->line;
9670 node_info.node = ret;
9671 xmlParserAddNodeInfo(ctxt, &node_info);
9672 }
9673 return;
9674 }
9675 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009676 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009677 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009678 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9679 "Couldn't find end of Start Tag %s line %d\n",
9680 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009681
9682 /*
9683 * end of parsing of this node.
9684 */
9685 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009686 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009687 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009688 if (nsNr != ctxt->nsNr)
9689 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009690
9691 /*
9692 * Capture end position and add node
9693 */
9694 if ( ret != NULL && ctxt->record_info ) {
9695 node_info.end_pos = ctxt->input->consumed +
9696 (CUR_PTR - ctxt->input->base);
9697 node_info.end_line = ctxt->input->line;
9698 node_info.node = ret;
9699 xmlParserAddNodeInfo(ctxt, &node_info);
9700 }
9701 return;
9702 }
9703
9704 /*
9705 * Parse the content of the element:
9706 */
9707 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009708 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009709 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009710 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009711 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009712
9713 /*
9714 * end of parsing of this node.
9715 */
9716 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009717 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009718 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009719 if (nsNr != ctxt->nsNr)
9720 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009721 return;
9722 }
9723
9724 /*
9725 * parse the end of tag: '</' should be here.
9726 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009727 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009728 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009729 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009730 }
9731#ifdef LIBXML_SAX1_ENABLED
9732 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00009733 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00009734#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009735
9736 /*
9737 * Capture end position and add node
9738 */
9739 if ( ret != NULL && ctxt->record_info ) {
9740 node_info.end_pos = ctxt->input->consumed +
9741 (CUR_PTR - ctxt->input->base);
9742 node_info.end_line = ctxt->input->line;
9743 node_info.node = ret;
9744 xmlParserAddNodeInfo(ctxt, &node_info);
9745 }
9746}
9747
9748/**
9749 * xmlParseVersionNum:
9750 * @ctxt: an XML parser context
9751 *
9752 * parse the XML version value.
9753 *
Daniel Veillard34e3f642008-07-29 09:02:27 +00009754 * [26] VersionNum ::= '1.' [0-9]+
9755 *
9756 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +00009757 *
9758 * Returns the string giving the XML version number, or NULL
9759 */
9760xmlChar *
9761xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9762 xmlChar *buf = NULL;
9763 int len = 0;
9764 int size = 10;
9765 xmlChar cur;
9766
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009767 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009768 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009769 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009770 return(NULL);
9771 }
9772 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00009773 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009774 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009775 return(NULL);
9776 }
9777 buf[len++] = cur;
9778 NEXT;
9779 cur=CUR;
9780 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009781 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009782 return(NULL);
9783 }
9784 buf[len++] = cur;
9785 NEXT;
9786 cur=CUR;
9787 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009788 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009789 xmlChar *tmp;
9790
Owen Taylor3473f882001-02-23 17:55:21 +00009791 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009792 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9793 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009794 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009795 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009796 return(NULL);
9797 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009798 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009799 }
9800 buf[len++] = cur;
9801 NEXT;
9802 cur=CUR;
9803 }
9804 buf[len] = 0;
9805 return(buf);
9806}
9807
9808/**
9809 * xmlParseVersionInfo:
9810 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009811 *
Owen Taylor3473f882001-02-23 17:55:21 +00009812 * parse the XML version.
9813 *
9814 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009815 *
Owen Taylor3473f882001-02-23 17:55:21 +00009816 * [25] Eq ::= S? '=' S?
9817 *
9818 * Returns the version string, e.g. "1.0"
9819 */
9820
9821xmlChar *
9822xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9823 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009824
Daniel Veillarda07050d2003-10-19 14:46:32 +00009825 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009826 SKIP(7);
9827 SKIP_BLANKS;
9828 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009829 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009830 return(NULL);
9831 }
9832 NEXT;
9833 SKIP_BLANKS;
9834 if (RAW == '"') {
9835 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009836 version = xmlParseVersionNum(ctxt);
9837 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009838 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009839 } else
9840 NEXT;
9841 } else if (RAW == '\''){
9842 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009843 version = xmlParseVersionNum(ctxt);
9844 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009845 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009846 } else
9847 NEXT;
9848 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009849 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009850 }
9851 }
9852 return(version);
9853}
9854
9855/**
9856 * xmlParseEncName:
9857 * @ctxt: an XML parser context
9858 *
9859 * parse the XML encoding name
9860 *
9861 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9862 *
9863 * Returns the encoding name value or NULL
9864 */
9865xmlChar *
9866xmlParseEncName(xmlParserCtxtPtr ctxt) {
9867 xmlChar *buf = NULL;
9868 int len = 0;
9869 int size = 10;
9870 xmlChar cur;
9871
9872 cur = CUR;
9873 if (((cur >= 'a') && (cur <= 'z')) ||
9874 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009875 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009876 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009877 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009878 return(NULL);
9879 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00009880
Owen Taylor3473f882001-02-23 17:55:21 +00009881 buf[len++] = cur;
9882 NEXT;
9883 cur = CUR;
9884 while (((cur >= 'a') && (cur <= 'z')) ||
9885 ((cur >= 'A') && (cur <= 'Z')) ||
9886 ((cur >= '0') && (cur <= '9')) ||
9887 (cur == '.') || (cur == '_') ||
9888 (cur == '-')) {
9889 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009890 xmlChar *tmp;
9891
Owen Taylor3473f882001-02-23 17:55:21 +00009892 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009893 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9894 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009895 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009896 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009897 return(NULL);
9898 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009899 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009900 }
9901 buf[len++] = cur;
9902 NEXT;
9903 cur = CUR;
9904 if (cur == 0) {
9905 SHRINK;
9906 GROW;
9907 cur = CUR;
9908 }
9909 }
9910 buf[len] = 0;
9911 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009912 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009913 }
9914 return(buf);
9915}
9916
9917/**
9918 * xmlParseEncodingDecl:
9919 * @ctxt: an XML parser context
9920 *
9921 * parse the XML encoding declaration
9922 *
9923 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9924 *
9925 * this setups the conversion filters.
9926 *
9927 * Returns the encoding value or NULL
9928 */
9929
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009930const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009931xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9932 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009933
9934 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009935 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009936 SKIP(8);
9937 SKIP_BLANKS;
9938 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009939 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009940 return(NULL);
9941 }
9942 NEXT;
9943 SKIP_BLANKS;
9944 if (RAW == '"') {
9945 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009946 encoding = xmlParseEncName(ctxt);
9947 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009948 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009949 } else
9950 NEXT;
9951 } else if (RAW == '\''){
9952 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009953 encoding = xmlParseEncName(ctxt);
9954 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009955 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009956 } else
9957 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009958 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009959 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009960 }
Daniel Veillardc62efc82011-05-16 16:03:50 +08009961
9962 /*
9963 * Non standard parsing, allowing the user to ignore encoding
9964 */
9965 if (ctxt->options & XML_PARSE_IGNORE_ENC)
9966 return(encoding);
9967
Daniel Veillard6b621b82003-08-11 15:03:34 +00009968 /*
9969 * UTF-16 encoding stwich has already taken place at this stage,
9970 * more over the little-endian/big-endian selection is already done
9971 */
9972 if ((encoding != NULL) &&
9973 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9974 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +00009975 /*
9976 * If no encoding was passed to the parser, that we are
9977 * using UTF-16 and no decoder is present i.e. the
9978 * document is apparently UTF-8 compatible, then raise an
9979 * encoding mismatch fatal error
9980 */
9981 if ((ctxt->encoding == NULL) &&
9982 (ctxt->input->buf != NULL) &&
9983 (ctxt->input->buf->encoder == NULL)) {
9984 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9985 "Document labelled UTF-16 but has UTF-8 content\n");
9986 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009987 if (ctxt->encoding != NULL)
9988 xmlFree((xmlChar *) ctxt->encoding);
9989 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009990 }
9991 /*
9992 * UTF-8 encoding is handled natively
9993 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009994 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009995 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9996 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009997 if (ctxt->encoding != NULL)
9998 xmlFree((xmlChar *) ctxt->encoding);
9999 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010000 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010001 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010002 xmlCharEncodingHandlerPtr handler;
10003
10004 if (ctxt->input->encoding != NULL)
10005 xmlFree((xmlChar *) ctxt->input->encoding);
10006 ctxt->input->encoding = encoding;
10007
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010008 handler = xmlFindCharEncodingHandler((const char *) encoding);
10009 if (handler != NULL) {
10010 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +000010011 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010012 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010013 "Unsupported encoding %s\n", encoding);
10014 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010015 }
10016 }
10017 }
10018 return(encoding);
10019}
10020
10021/**
10022 * xmlParseSDDecl:
10023 * @ctxt: an XML parser context
10024 *
10025 * parse the XML standalone declaration
10026 *
10027 * [32] SDDecl ::= S 'standalone' Eq
10028 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10029 *
10030 * [ VC: Standalone Document Declaration ]
10031 * TODO The standalone document declaration must have the value "no"
10032 * if any external markup declarations contain declarations of:
10033 * - attributes with default values, if elements to which these
10034 * attributes apply appear in the document without specifications
10035 * of values for these attributes, or
10036 * - entities (other than amp, lt, gt, apos, quot), if references
10037 * to those entities appear in the document, or
10038 * - attributes with values subject to normalization, where the
10039 * attribute appears in the document with a value which will change
10040 * as a result of normalization, or
10041 * - element types with element content, if white space occurs directly
10042 * within any instance of those types.
10043 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010044 * Returns:
10045 * 1 if standalone="yes"
10046 * 0 if standalone="no"
10047 * -2 if standalone attribute is missing or invalid
10048 * (A standalone value of -2 means that the XML declaration was found,
10049 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010050 */
10051
10052int
10053xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010054 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010055
10056 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010057 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010058 SKIP(10);
10059 SKIP_BLANKS;
10060 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010061 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010062 return(standalone);
10063 }
10064 NEXT;
10065 SKIP_BLANKS;
10066 if (RAW == '\''){
10067 NEXT;
10068 if ((RAW == 'n') && (NXT(1) == 'o')) {
10069 standalone = 0;
10070 SKIP(2);
10071 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10072 (NXT(2) == 's')) {
10073 standalone = 1;
10074 SKIP(3);
10075 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010076 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010077 }
10078 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010079 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010080 } else
10081 NEXT;
10082 } else if (RAW == '"'){
10083 NEXT;
10084 if ((RAW == 'n') && (NXT(1) == 'o')) {
10085 standalone = 0;
10086 SKIP(2);
10087 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10088 (NXT(2) == 's')) {
10089 standalone = 1;
10090 SKIP(3);
10091 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010092 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010093 }
10094 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010095 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010096 } else
10097 NEXT;
10098 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010099 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010100 }
10101 }
10102 return(standalone);
10103}
10104
10105/**
10106 * xmlParseXMLDecl:
10107 * @ctxt: an XML parser context
10108 *
10109 * parse an XML declaration header
10110 *
10111 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10112 */
10113
10114void
10115xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10116 xmlChar *version;
10117
10118 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010119 * This value for standalone indicates that the document has an
10120 * XML declaration but it does not have a standalone attribute.
10121 * It will be overwritten later if a standalone attribute is found.
10122 */
10123 ctxt->input->standalone = -2;
10124
10125 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010126 * We know that '<?xml' is here.
10127 */
10128 SKIP(5);
10129
William M. Brack76e95df2003-10-18 16:20:14 +000010130 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010131 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10132 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010133 }
10134 SKIP_BLANKS;
10135
10136 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010137 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010138 */
10139 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010140 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010141 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010142 } else {
10143 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10144 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010145 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010146 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010147 if (ctxt->options & XML_PARSE_OLD10) {
10148 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10149 "Unsupported version '%s'\n",
10150 version);
10151 } else {
10152 if ((version[0] == '1') && ((version[1] == '.'))) {
10153 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10154 "Unsupported version '%s'\n",
10155 version, NULL);
10156 } else {
10157 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10158 "Unsupported version '%s'\n",
10159 version);
10160 }
10161 }
Daniel Veillard19840942001-11-29 16:11:38 +000010162 }
10163 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010164 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010165 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010166 }
Owen Taylor3473f882001-02-23 17:55:21 +000010167
10168 /*
10169 * We may have the encoding declaration
10170 */
William M. Brack76e95df2003-10-18 16:20:14 +000010171 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010172 if ((RAW == '?') && (NXT(1) == '>')) {
10173 SKIP(2);
10174 return;
10175 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010176 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010177 }
10178 xmlParseEncodingDecl(ctxt);
10179 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10180 /*
10181 * The XML REC instructs us to stop parsing right here
10182 */
10183 return;
10184 }
10185
10186 /*
10187 * We may have the standalone status.
10188 */
William M. Brack76e95df2003-10-18 16:20:14 +000010189 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010190 if ((RAW == '?') && (NXT(1) == '>')) {
10191 SKIP(2);
10192 return;
10193 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010194 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010195 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010196
10197 /*
10198 * We can grow the input buffer freely at that point
10199 */
10200 GROW;
10201
Owen Taylor3473f882001-02-23 17:55:21 +000010202 SKIP_BLANKS;
10203 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10204
10205 SKIP_BLANKS;
10206 if ((RAW == '?') && (NXT(1) == '>')) {
10207 SKIP(2);
10208 } else if (RAW == '>') {
10209 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010210 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010211 NEXT;
10212 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010213 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010214 MOVETO_ENDTAG(CUR_PTR);
10215 NEXT;
10216 }
10217}
10218
10219/**
10220 * xmlParseMisc:
10221 * @ctxt: an XML parser context
10222 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010223 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010224 *
10225 * [27] Misc ::= Comment | PI | S
10226 */
10227
10228void
10229xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010230 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +000010231 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +000010232 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010233 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010234 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010235 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010236 NEXT;
10237 } else
10238 xmlParseComment(ctxt);
10239 }
10240}
10241
10242/**
10243 * xmlParseDocument:
10244 * @ctxt: an XML parser context
10245 *
10246 * parse an XML document (and build a tree if using the standard SAX
10247 * interface).
10248 *
10249 * [1] document ::= prolog element Misc*
10250 *
10251 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10252 *
10253 * Returns 0, -1 in case of error. the parser context is augmented
10254 * as a result of the parsing.
10255 */
10256
10257int
10258xmlParseDocument(xmlParserCtxtPtr ctxt) {
10259 xmlChar start[4];
10260 xmlCharEncoding enc;
10261
10262 xmlInitParser();
10263
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010264 if ((ctxt == NULL) || (ctxt->input == NULL))
10265 return(-1);
10266
Owen Taylor3473f882001-02-23 17:55:21 +000010267 GROW;
10268
10269 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010270 * SAX: detecting the level.
10271 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010272 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010273
10274 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010275 * SAX: beginning of the document processing.
10276 */
10277 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10278 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10279
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010280 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010281 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +000010282 /*
10283 * Get the 4 first bytes and decode the charset
10284 * if enc != XML_CHAR_ENCODING_NONE
10285 * plug some encoding conversion routines.
10286 */
10287 start[0] = RAW;
10288 start[1] = NXT(1);
10289 start[2] = NXT(2);
10290 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010291 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010292 if (enc != XML_CHAR_ENCODING_NONE) {
10293 xmlSwitchEncoding(ctxt, enc);
10294 }
Owen Taylor3473f882001-02-23 17:55:21 +000010295 }
10296
10297
10298 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010299 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010300 }
10301
10302 /*
10303 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010304 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010305 * than just the first line, unless the amount of data is really
10306 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010307 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010308 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10309 GROW;
10310 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010311 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010312
10313 /*
10314 * Note that we will switch encoding on the fly.
10315 */
10316 xmlParseXMLDecl(ctxt);
10317 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10318 /*
10319 * The XML REC instructs us to stop parsing right here
10320 */
10321 return(-1);
10322 }
10323 ctxt->standalone = ctxt->input->standalone;
10324 SKIP_BLANKS;
10325 } else {
10326 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10327 }
10328 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10329 ctxt->sax->startDocument(ctxt->userData);
10330
10331 /*
10332 * The Misc part of the Prolog
10333 */
10334 GROW;
10335 xmlParseMisc(ctxt);
10336
10337 /*
10338 * Then possibly doc type declaration(s) and more Misc
10339 * (doctypedecl Misc*)?
10340 */
10341 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010342 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010343
10344 ctxt->inSubset = 1;
10345 xmlParseDocTypeDecl(ctxt);
10346 if (RAW == '[') {
10347 ctxt->instate = XML_PARSER_DTD;
10348 xmlParseInternalSubset(ctxt);
10349 }
10350
10351 /*
10352 * Create and update the external subset.
10353 */
10354 ctxt->inSubset = 2;
10355 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10356 (!ctxt->disableSAX))
10357 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10358 ctxt->extSubSystem, ctxt->extSubURI);
10359 ctxt->inSubset = 0;
10360
Daniel Veillardac4118d2008-01-11 05:27:32 +000010361 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010362
10363 ctxt->instate = XML_PARSER_PROLOG;
10364 xmlParseMisc(ctxt);
10365 }
10366
10367 /*
10368 * Time to start parsing the tree itself
10369 */
10370 GROW;
10371 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010372 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10373 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010374 } else {
10375 ctxt->instate = XML_PARSER_CONTENT;
10376 xmlParseElement(ctxt);
10377 ctxt->instate = XML_PARSER_EPILOG;
10378
10379
10380 /*
10381 * The Misc part at the end
10382 */
10383 xmlParseMisc(ctxt);
10384
Daniel Veillard561b7f82002-03-20 21:55:57 +000010385 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010386 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010387 }
10388 ctxt->instate = XML_PARSER_EOF;
10389 }
10390
10391 /*
10392 * SAX: end of the document processing.
10393 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010394 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010395 ctxt->sax->endDocument(ctxt->userData);
10396
Daniel Veillard5997aca2002-03-18 18:36:20 +000010397 /*
10398 * Remove locally kept entity definitions if the tree was not built
10399 */
10400 if ((ctxt->myDoc != NULL) &&
10401 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10402 xmlFreeDoc(ctxt->myDoc);
10403 ctxt->myDoc = NULL;
10404 }
10405
Daniel Veillardae0765b2008-07-31 19:54:59 +000010406 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10407 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10408 if (ctxt->valid)
10409 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10410 if (ctxt->nsWellFormed)
10411 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10412 if (ctxt->options & XML_PARSE_OLD10)
10413 ctxt->myDoc->properties |= XML_DOC_OLD10;
10414 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010415 if (! ctxt->wellFormed) {
10416 ctxt->valid = 0;
10417 return(-1);
10418 }
Owen Taylor3473f882001-02-23 17:55:21 +000010419 return(0);
10420}
10421
10422/**
10423 * xmlParseExtParsedEnt:
10424 * @ctxt: an XML parser context
10425 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010426 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010427 * An external general parsed entity is well-formed if it matches the
10428 * production labeled extParsedEnt.
10429 *
10430 * [78] extParsedEnt ::= TextDecl? content
10431 *
10432 * Returns 0, -1 in case of error. the parser context is augmented
10433 * as a result of the parsing.
10434 */
10435
10436int
10437xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10438 xmlChar start[4];
10439 xmlCharEncoding enc;
10440
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010441 if ((ctxt == NULL) || (ctxt->input == NULL))
10442 return(-1);
10443
Owen Taylor3473f882001-02-23 17:55:21 +000010444 xmlDefaultSAXHandlerInit();
10445
Daniel Veillard309f81d2003-09-23 09:02:53 +000010446 xmlDetectSAX2(ctxt);
10447
Owen Taylor3473f882001-02-23 17:55:21 +000010448 GROW;
10449
10450 /*
10451 * SAX: beginning of the document processing.
10452 */
10453 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10454 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10455
10456 /*
10457 * Get the 4 first bytes and decode the charset
10458 * if enc != XML_CHAR_ENCODING_NONE
10459 * plug some encoding conversion routines.
10460 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010461 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10462 start[0] = RAW;
10463 start[1] = NXT(1);
10464 start[2] = NXT(2);
10465 start[3] = NXT(3);
10466 enc = xmlDetectCharEncoding(start, 4);
10467 if (enc != XML_CHAR_ENCODING_NONE) {
10468 xmlSwitchEncoding(ctxt, enc);
10469 }
Owen Taylor3473f882001-02-23 17:55:21 +000010470 }
10471
10472
10473 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010474 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010475 }
10476
10477 /*
10478 * Check for the XMLDecl in the Prolog.
10479 */
10480 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010481 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010482
10483 /*
10484 * Note that we will switch encoding on the fly.
10485 */
10486 xmlParseXMLDecl(ctxt);
10487 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10488 /*
10489 * The XML REC instructs us to stop parsing right here
10490 */
10491 return(-1);
10492 }
10493 SKIP_BLANKS;
10494 } else {
10495 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10496 }
10497 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10498 ctxt->sax->startDocument(ctxt->userData);
10499
10500 /*
10501 * Doing validity checking on chunk doesn't make sense
10502 */
10503 ctxt->instate = XML_PARSER_CONTENT;
10504 ctxt->validate = 0;
10505 ctxt->loadsubset = 0;
10506 ctxt->depth = 0;
10507
10508 xmlParseContent(ctxt);
10509
10510 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010511 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010512 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010513 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010514 }
10515
10516 /*
10517 * SAX: end of the document processing.
10518 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010519 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010520 ctxt->sax->endDocument(ctxt->userData);
10521
10522 if (! ctxt->wellFormed) return(-1);
10523 return(0);
10524}
10525
Daniel Veillard73b013f2003-09-30 12:36:01 +000010526#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010527/************************************************************************
10528 * *
10529 * Progressive parsing interfaces *
10530 * *
10531 ************************************************************************/
10532
10533/**
10534 * xmlParseLookupSequence:
10535 * @ctxt: an XML parser context
10536 * @first: the first char to lookup
10537 * @next: the next char to lookup or zero
10538 * @third: the next char to lookup or zero
10539 *
10540 * Try to find if a sequence (first, next, third) or just (first next) or
10541 * (first) is available in the input stream.
10542 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10543 * to avoid rescanning sequences of bytes, it DOES change the state of the
10544 * parser, do not use liberally.
10545 *
10546 * Returns the index to the current parsing point if the full sequence
10547 * is available, -1 otherwise.
10548 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010549static int
Owen Taylor3473f882001-02-23 17:55:21 +000010550xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10551 xmlChar next, xmlChar third) {
10552 int base, len;
10553 xmlParserInputPtr in;
10554 const xmlChar *buf;
10555
10556 in = ctxt->input;
10557 if (in == NULL) return(-1);
10558 base = in->cur - in->base;
10559 if (base < 0) return(-1);
10560 if (ctxt->checkIndex > base)
10561 base = ctxt->checkIndex;
10562 if (in->buf == NULL) {
10563 buf = in->base;
10564 len = in->length;
10565 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080010566 buf = xmlBufContent(in->buf->buffer);
10567 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000010568 }
10569 /* take into account the sequence length */
10570 if (third) len -= 2;
10571 else if (next) len --;
10572 for (;base < len;base++) {
10573 if (buf[base] == first) {
10574 if (third != 0) {
10575 if ((buf[base + 1] != next) ||
10576 (buf[base + 2] != third)) continue;
10577 } else if (next != 0) {
10578 if (buf[base + 1] != next) continue;
10579 }
10580 ctxt->checkIndex = 0;
10581#ifdef DEBUG_PUSH
10582 if (next == 0)
10583 xmlGenericError(xmlGenericErrorContext,
10584 "PP: lookup '%c' found at %d\n",
10585 first, base);
10586 else if (third == 0)
10587 xmlGenericError(xmlGenericErrorContext,
10588 "PP: lookup '%c%c' found at %d\n",
10589 first, next, base);
10590 else
10591 xmlGenericError(xmlGenericErrorContext,
10592 "PP: lookup '%c%c%c' found at %d\n",
10593 first, next, third, base);
10594#endif
10595 return(base - (in->cur - in->base));
10596 }
10597 }
10598 ctxt->checkIndex = base;
10599#ifdef DEBUG_PUSH
10600 if (next == 0)
10601 xmlGenericError(xmlGenericErrorContext,
10602 "PP: lookup '%c' failed\n", first);
10603 else if (third == 0)
10604 xmlGenericError(xmlGenericErrorContext,
10605 "PP: lookup '%c%c' failed\n", first, next);
10606 else
10607 xmlGenericError(xmlGenericErrorContext,
10608 "PP: lookup '%c%c%c' failed\n", first, next, third);
10609#endif
10610 return(-1);
10611}
10612
10613/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010614 * xmlParseGetLasts:
10615 * @ctxt: an XML parser context
10616 * @lastlt: pointer to store the last '<' from the input
10617 * @lastgt: pointer to store the last '>' from the input
10618 *
10619 * Lookup the last < and > in the current chunk
10620 */
10621static void
10622xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10623 const xmlChar **lastgt) {
10624 const xmlChar *tmp;
10625
10626 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10627 xmlGenericError(xmlGenericErrorContext,
10628 "Internal error: xmlParseGetLasts\n");
10629 return;
10630 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010631 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010632 tmp = ctxt->input->end;
10633 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010634 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010635 if (tmp < ctxt->input->base) {
10636 *lastlt = NULL;
10637 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010638 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010639 *lastlt = tmp;
10640 tmp++;
10641 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10642 if (*tmp == '\'') {
10643 tmp++;
10644 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10645 if (tmp < ctxt->input->end) tmp++;
10646 } else if (*tmp == '"') {
10647 tmp++;
10648 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10649 if (tmp < ctxt->input->end) tmp++;
10650 } else
10651 tmp++;
10652 }
10653 if (tmp < ctxt->input->end)
10654 *lastgt = tmp;
10655 else {
10656 tmp = *lastlt;
10657 tmp--;
10658 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10659 if (tmp >= ctxt->input->base)
10660 *lastgt = tmp;
10661 else
10662 *lastgt = NULL;
10663 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010664 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010665 } else {
10666 *lastlt = NULL;
10667 *lastgt = NULL;
10668 }
10669}
10670/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010671 * xmlCheckCdataPush:
10672 * @cur: pointer to the bock of characters
10673 * @len: length of the block in bytes
10674 *
10675 * Check that the block of characters is okay as SCdata content [20]
10676 *
10677 * Returns the number of bytes to pass if okay, a negative index where an
10678 * UTF-8 error occured otherwise
10679 */
10680static int
10681xmlCheckCdataPush(const xmlChar *utf, int len) {
10682 int ix;
10683 unsigned char c;
10684 int codepoint;
10685
10686 if ((utf == NULL) || (len <= 0))
10687 return(0);
10688
10689 for (ix = 0; ix < len;) { /* string is 0-terminated */
10690 c = utf[ix];
10691 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10692 if (c >= 0x20)
10693 ix++;
10694 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10695 ix++;
10696 else
10697 return(-ix);
10698 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10699 if (ix + 2 > len) return(ix);
10700 if ((utf[ix+1] & 0xc0 ) != 0x80)
10701 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010702 codepoint = (utf[ix] & 0x1f) << 6;
10703 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010704 if (!xmlIsCharQ(codepoint))
10705 return(-ix);
10706 ix += 2;
10707 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10708 if (ix + 3 > len) return(ix);
10709 if (((utf[ix+1] & 0xc0) != 0x80) ||
10710 ((utf[ix+2] & 0xc0) != 0x80))
10711 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010712 codepoint = (utf[ix] & 0xf) << 12;
10713 codepoint |= (utf[ix+1] & 0x3f) << 6;
10714 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010715 if (!xmlIsCharQ(codepoint))
10716 return(-ix);
10717 ix += 3;
10718 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10719 if (ix + 4 > len) return(ix);
10720 if (((utf[ix+1] & 0xc0) != 0x80) ||
10721 ((utf[ix+2] & 0xc0) != 0x80) ||
10722 ((utf[ix+3] & 0xc0) != 0x80))
10723 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010724 codepoint = (utf[ix] & 0x7) << 18;
10725 codepoint |= (utf[ix+1] & 0x3f) << 12;
10726 codepoint |= (utf[ix+2] & 0x3f) << 6;
10727 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010728 if (!xmlIsCharQ(codepoint))
10729 return(-ix);
10730 ix += 4;
10731 } else /* unknown encoding */
10732 return(-ix);
10733 }
10734 return(ix);
10735}
10736
10737/**
Owen Taylor3473f882001-02-23 17:55:21 +000010738 * xmlParseTryOrFinish:
10739 * @ctxt: an XML parser context
10740 * @terminate: last chunk indicator
10741 *
10742 * Try to progress on parsing
10743 *
10744 * Returns zero if no parsing was possible
10745 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010746static int
Owen Taylor3473f882001-02-23 17:55:21 +000010747xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10748 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010749 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000010750 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000010751 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000010752
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010753 if (ctxt->input == NULL)
10754 return(0);
10755
Owen Taylor3473f882001-02-23 17:55:21 +000010756#ifdef DEBUG_PUSH
10757 switch (ctxt->instate) {
10758 case XML_PARSER_EOF:
10759 xmlGenericError(xmlGenericErrorContext,
10760 "PP: try EOF\n"); break;
10761 case XML_PARSER_START:
10762 xmlGenericError(xmlGenericErrorContext,
10763 "PP: try START\n"); break;
10764 case XML_PARSER_MISC:
10765 xmlGenericError(xmlGenericErrorContext,
10766 "PP: try MISC\n");break;
10767 case XML_PARSER_COMMENT:
10768 xmlGenericError(xmlGenericErrorContext,
10769 "PP: try COMMENT\n");break;
10770 case XML_PARSER_PROLOG:
10771 xmlGenericError(xmlGenericErrorContext,
10772 "PP: try PROLOG\n");break;
10773 case XML_PARSER_START_TAG:
10774 xmlGenericError(xmlGenericErrorContext,
10775 "PP: try START_TAG\n");break;
10776 case XML_PARSER_CONTENT:
10777 xmlGenericError(xmlGenericErrorContext,
10778 "PP: try CONTENT\n");break;
10779 case XML_PARSER_CDATA_SECTION:
10780 xmlGenericError(xmlGenericErrorContext,
10781 "PP: try CDATA_SECTION\n");break;
10782 case XML_PARSER_END_TAG:
10783 xmlGenericError(xmlGenericErrorContext,
10784 "PP: try END_TAG\n");break;
10785 case XML_PARSER_ENTITY_DECL:
10786 xmlGenericError(xmlGenericErrorContext,
10787 "PP: try ENTITY_DECL\n");break;
10788 case XML_PARSER_ENTITY_VALUE:
10789 xmlGenericError(xmlGenericErrorContext,
10790 "PP: try ENTITY_VALUE\n");break;
10791 case XML_PARSER_ATTRIBUTE_VALUE:
10792 xmlGenericError(xmlGenericErrorContext,
10793 "PP: try ATTRIBUTE_VALUE\n");break;
10794 case XML_PARSER_DTD:
10795 xmlGenericError(xmlGenericErrorContext,
10796 "PP: try DTD\n");break;
10797 case XML_PARSER_EPILOG:
10798 xmlGenericError(xmlGenericErrorContext,
10799 "PP: try EPILOG\n");break;
10800 case XML_PARSER_PI:
10801 xmlGenericError(xmlGenericErrorContext,
10802 "PP: try PI\n");break;
10803 case XML_PARSER_IGNORE:
10804 xmlGenericError(xmlGenericErrorContext,
10805 "PP: try IGNORE\n");break;
10806 }
10807#endif
10808
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010809 if ((ctxt->input != NULL) &&
10810 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010811 xmlSHRINK(ctxt);
10812 ctxt->checkIndex = 0;
10813 }
10814 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010815
Daniel Veillarda880b122003-04-21 21:36:41 +000010816 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010817 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010818 return(0);
10819
10820
Owen Taylor3473f882001-02-23 17:55:21 +000010821 /*
10822 * Pop-up of finished entities.
10823 */
10824 while ((RAW == 0) && (ctxt->inputNr > 1))
10825 xmlPopInput(ctxt);
10826
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010827 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010828 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010829 avail = ctxt->input->length -
10830 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010831 else {
10832 /*
10833 * If we are operating on converted input, try to flush
10834 * remainng chars to avoid them stalling in the non-converted
10835 * buffer.
10836 */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080010837 if (xmlBufIsEmpty(ctxt->input->buf->buffer) == 0) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080010838 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer,
10839 ctxt->input);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080010840 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000010841
10842 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard00ac0d32012-07-16 18:03:01 +080010843 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input,
10844 base, current);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010845 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080010846 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000010847 (ctxt->input->cur - ctxt->input->base);
10848 }
Owen Taylor3473f882001-02-23 17:55:21 +000010849 if (avail < 1)
10850 goto done;
10851 switch (ctxt->instate) {
10852 case XML_PARSER_EOF:
10853 /*
10854 * Document parsing is done !
10855 */
10856 goto done;
10857 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010858 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10859 xmlChar start[4];
10860 xmlCharEncoding enc;
10861
10862 /*
10863 * Very first chars read from the document flow.
10864 */
10865 if (avail < 4)
10866 goto done;
10867
10868 /*
10869 * Get the 4 first bytes and decode the charset
10870 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010871 * plug some encoding conversion routines,
10872 * else xmlSwitchEncoding will set to (default)
10873 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010874 */
10875 start[0] = RAW;
10876 start[1] = NXT(1);
10877 start[2] = NXT(2);
10878 start[3] = NXT(3);
10879 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010880 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010881 break;
10882 }
Owen Taylor3473f882001-02-23 17:55:21 +000010883
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010884 if (avail < 2)
10885 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010886 cur = ctxt->input->cur[0];
10887 next = ctxt->input->cur[1];
10888 if (cur == 0) {
10889 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10890 ctxt->sax->setDocumentLocator(ctxt->userData,
10891 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010892 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010893 ctxt->instate = XML_PARSER_EOF;
10894#ifdef DEBUG_PUSH
10895 xmlGenericError(xmlGenericErrorContext,
10896 "PP: entering EOF\n");
10897#endif
10898 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10899 ctxt->sax->endDocument(ctxt->userData);
10900 goto done;
10901 }
10902 if ((cur == '<') && (next == '?')) {
10903 /* PI or XML decl */
10904 if (avail < 5) return(ret);
10905 if ((!terminate) &&
10906 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10907 return(ret);
10908 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10909 ctxt->sax->setDocumentLocator(ctxt->userData,
10910 &xmlDefaultSAXLocator);
10911 if ((ctxt->input->cur[2] == 'x') &&
10912 (ctxt->input->cur[3] == 'm') &&
10913 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010914 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010915 ret += 5;
10916#ifdef DEBUG_PUSH
10917 xmlGenericError(xmlGenericErrorContext,
10918 "PP: Parsing XML Decl\n");
10919#endif
10920 xmlParseXMLDecl(ctxt);
10921 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10922 /*
10923 * The XML REC instructs us to stop parsing right
10924 * here
10925 */
10926 ctxt->instate = XML_PARSER_EOF;
10927 return(0);
10928 }
10929 ctxt->standalone = ctxt->input->standalone;
10930 if ((ctxt->encoding == NULL) &&
10931 (ctxt->input->encoding != NULL))
10932 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10933 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10934 (!ctxt->disableSAX))
10935 ctxt->sax->startDocument(ctxt->userData);
10936 ctxt->instate = XML_PARSER_MISC;
10937#ifdef DEBUG_PUSH
10938 xmlGenericError(xmlGenericErrorContext,
10939 "PP: entering MISC\n");
10940#endif
10941 } else {
10942 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10943 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10944 (!ctxt->disableSAX))
10945 ctxt->sax->startDocument(ctxt->userData);
10946 ctxt->instate = XML_PARSER_MISC;
10947#ifdef DEBUG_PUSH
10948 xmlGenericError(xmlGenericErrorContext,
10949 "PP: entering MISC\n");
10950#endif
10951 }
10952 } else {
10953 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10954 ctxt->sax->setDocumentLocator(ctxt->userData,
10955 &xmlDefaultSAXLocator);
10956 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010957 if (ctxt->version == NULL) {
10958 xmlErrMemory(ctxt, NULL);
10959 break;
10960 }
Owen Taylor3473f882001-02-23 17:55:21 +000010961 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10962 (!ctxt->disableSAX))
10963 ctxt->sax->startDocument(ctxt->userData);
10964 ctxt->instate = XML_PARSER_MISC;
10965#ifdef DEBUG_PUSH
10966 xmlGenericError(xmlGenericErrorContext,
10967 "PP: entering MISC\n");
10968#endif
10969 }
10970 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010971 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010972 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020010973 const xmlChar *prefix = NULL;
10974 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010975 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010976
10977 if ((avail < 2) && (ctxt->inputNr == 1))
10978 goto done;
10979 cur = ctxt->input->cur[0];
10980 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010981 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010982 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010983 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10984 ctxt->sax->endDocument(ctxt->userData);
10985 goto done;
10986 }
10987 if (!terminate) {
10988 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010989 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010990 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010991 goto done;
10992 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10993 goto done;
10994 }
10995 }
10996 if (ctxt->spaceNr == 0)
10997 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010998 else if (*ctxt->space == -2)
10999 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011000 else
11001 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011002#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011003 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011004#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011005 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011006#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011007 else
11008 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011009#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011010 if (ctxt->instate == XML_PARSER_EOF)
11011 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011012 if (name == NULL) {
11013 spacePop(ctxt);
11014 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000011015 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11016 ctxt->sax->endDocument(ctxt->userData);
11017 goto done;
11018 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011019#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011020 /*
11021 * [ VC: Root Element Type ]
11022 * The Name in the document type declaration must match
11023 * the element type of the root element.
11024 */
11025 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11026 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11027 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011028#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011029
11030 /*
11031 * Check for an Empty Element.
11032 */
11033 if ((RAW == '/') && (NXT(1) == '>')) {
11034 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011035
11036 if (ctxt->sax2) {
11037 if ((ctxt->sax != NULL) &&
11038 (ctxt->sax->endElementNs != NULL) &&
11039 (!ctxt->disableSAX))
11040 ctxt->sax->endElementNs(ctxt->userData, name,
11041 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011042 if (ctxt->nsNr - nsNr > 0)
11043 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011044#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011045 } else {
11046 if ((ctxt->sax != NULL) &&
11047 (ctxt->sax->endElement != NULL) &&
11048 (!ctxt->disableSAX))
11049 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011050#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011051 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011052 spacePop(ctxt);
11053 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011054 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011055 } else {
11056 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011057 }
11058 break;
11059 }
11060 if (RAW == '>') {
11061 NEXT;
11062 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011063 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011064 "Couldn't find end of Start Tag %s\n",
11065 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011066 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011067 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011068 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011069 if (ctxt->sax2)
11070 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011071#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011072 else
11073 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011074#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011075
Daniel Veillarda880b122003-04-21 21:36:41 +000011076 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011077 break;
11078 }
11079 case XML_PARSER_CONTENT: {
11080 const xmlChar *test;
11081 unsigned int cons;
11082 if ((avail < 2) && (ctxt->inputNr == 1))
11083 goto done;
11084 cur = ctxt->input->cur[0];
11085 next = ctxt->input->cur[1];
11086
11087 test = CUR_PTR;
11088 cons = ctxt->input->consumed;
11089 if ((cur == '<') && (next == '/')) {
11090 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011091 break;
11092 } else if ((cur == '<') && (next == '?')) {
11093 if ((!terminate) &&
11094 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11095 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011096 xmlParsePI(ctxt);
11097 } else if ((cur == '<') && (next != '!')) {
11098 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011099 break;
11100 } else if ((cur == '<') && (next == '!') &&
11101 (ctxt->input->cur[2] == '-') &&
11102 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011103 int term;
11104
11105 if (avail < 4)
11106 goto done;
11107 ctxt->input->cur += 4;
11108 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11109 ctxt->input->cur -= 4;
11110 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000011111 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011112 xmlParseComment(ctxt);
11113 ctxt->instate = XML_PARSER_CONTENT;
11114 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11115 (ctxt->input->cur[2] == '[') &&
11116 (ctxt->input->cur[3] == 'C') &&
11117 (ctxt->input->cur[4] == 'D') &&
11118 (ctxt->input->cur[5] == 'A') &&
11119 (ctxt->input->cur[6] == 'T') &&
11120 (ctxt->input->cur[7] == 'A') &&
11121 (ctxt->input->cur[8] == '[')) {
11122 SKIP(9);
11123 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011124 break;
11125 } else if ((cur == '<') && (next == '!') &&
11126 (avail < 9)) {
11127 goto done;
11128 } else if (cur == '&') {
11129 if ((!terminate) &&
11130 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11131 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011132 xmlParseReference(ctxt);
11133 } else {
11134 /* TODO Avoid the extra copy, handle directly !!! */
11135 /*
11136 * Goal of the following test is:
11137 * - minimize calls to the SAX 'character' callback
11138 * when they are mergeable
11139 * - handle an problem for isBlank when we only parse
11140 * a sequence of blank chars and the next one is
11141 * not available to check against '<' presence.
11142 * - tries to homogenize the differences in SAX
11143 * callbacks between the push and pull versions
11144 * of the parser.
11145 */
11146 if ((ctxt->inputNr == 1) &&
11147 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11148 if (!terminate) {
11149 if (ctxt->progressive) {
11150 if ((lastlt == NULL) ||
11151 (ctxt->input->cur > lastlt))
11152 goto done;
11153 } else if (xmlParseLookupSequence(ctxt,
11154 '<', 0, 0) < 0) {
11155 goto done;
11156 }
11157 }
11158 }
11159 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011160 xmlParseCharData(ctxt, 0);
11161 }
11162 /*
11163 * Pop-up of finished entities.
11164 */
11165 while ((RAW == 0) && (ctxt->inputNr > 1))
11166 xmlPopInput(ctxt);
11167 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011168 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11169 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000011170 ctxt->instate = XML_PARSER_EOF;
11171 break;
11172 }
11173 break;
11174 }
11175 case XML_PARSER_END_TAG:
11176 if (avail < 2)
11177 goto done;
11178 if (!terminate) {
11179 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011180 /* > can be found unescaped in attribute values */
11181 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011182 goto done;
11183 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11184 goto done;
11185 }
11186 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011187 if (ctxt->sax2) {
11188 xmlParseEndTag2(ctxt,
11189 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11190 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011191 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011192 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011193 }
11194#ifdef LIBXML_SAX1_ENABLED
11195 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011196 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011197#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011198 if (ctxt->instate == XML_PARSER_EOF) {
11199 /* Nothing */
11200 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011201 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011202 } else {
11203 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011204 }
11205 break;
11206 case XML_PARSER_CDATA_SECTION: {
11207 /*
11208 * The Push mode need to have the SAX callback for
11209 * cdataBlock merge back contiguous callbacks.
11210 */
11211 int base;
11212
11213 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11214 if (base < 0) {
11215 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011216 int tmp;
11217
11218 tmp = xmlCheckCdataPush(ctxt->input->cur,
11219 XML_PARSER_BIG_BUFFER_SIZE);
11220 if (tmp < 0) {
11221 tmp = -tmp;
11222 ctxt->input->cur += tmp;
11223 goto encoding_error;
11224 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011225 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11226 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011227 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011228 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011229 else if (ctxt->sax->characters != NULL)
11230 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011231 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011232 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011233 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011234 ctxt->checkIndex = 0;
11235 }
11236 goto done;
11237 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011238 int tmp;
11239
11240 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11241 if ((tmp < 0) || (tmp != base)) {
11242 tmp = -tmp;
11243 ctxt->input->cur += tmp;
11244 goto encoding_error;
11245 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011246 if ((ctxt->sax != NULL) && (base == 0) &&
11247 (ctxt->sax->cdataBlock != NULL) &&
11248 (!ctxt->disableSAX)) {
11249 /*
11250 * Special case to provide identical behaviour
11251 * between pull and push parsers on enpty CDATA
11252 * sections
11253 */
11254 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11255 (!strncmp((const char *)&ctxt->input->cur[-9],
11256 "<![CDATA[", 9)))
11257 ctxt->sax->cdataBlock(ctxt->userData,
11258 BAD_CAST "", 0);
11259 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011260 (!ctxt->disableSAX)) {
11261 if (ctxt->sax->cdataBlock != NULL)
11262 ctxt->sax->cdataBlock(ctxt->userData,
11263 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011264 else if (ctxt->sax->characters != NULL)
11265 ctxt->sax->characters(ctxt->userData,
11266 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011267 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000011268 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011269 ctxt->checkIndex = 0;
11270 ctxt->instate = XML_PARSER_CONTENT;
11271#ifdef DEBUG_PUSH
11272 xmlGenericError(xmlGenericErrorContext,
11273 "PP: entering CONTENT\n");
11274#endif
11275 }
11276 break;
11277 }
Owen Taylor3473f882001-02-23 17:55:21 +000011278 case XML_PARSER_MISC:
11279 SKIP_BLANKS;
11280 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011281 avail = ctxt->input->length -
11282 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011283 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011284 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011285 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011286 if (avail < 2)
11287 goto done;
11288 cur = ctxt->input->cur[0];
11289 next = ctxt->input->cur[1];
11290 if ((cur == '<') && (next == '?')) {
11291 if ((!terminate) &&
11292 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11293 goto done;
11294#ifdef DEBUG_PUSH
11295 xmlGenericError(xmlGenericErrorContext,
11296 "PP: Parsing PI\n");
11297#endif
11298 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000011299 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011300 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011301 (ctxt->input->cur[2] == '-') &&
11302 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011303 if ((!terminate) &&
11304 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11305 goto done;
11306#ifdef DEBUG_PUSH
11307 xmlGenericError(xmlGenericErrorContext,
11308 "PP: Parsing Comment\n");
11309#endif
11310 xmlParseComment(ctxt);
11311 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011312 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011313 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011314 (ctxt->input->cur[2] == 'D') &&
11315 (ctxt->input->cur[3] == 'O') &&
11316 (ctxt->input->cur[4] == 'C') &&
11317 (ctxt->input->cur[5] == 'T') &&
11318 (ctxt->input->cur[6] == 'Y') &&
11319 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011320 (ctxt->input->cur[8] == 'E')) {
11321 if ((!terminate) &&
11322 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11323 goto done;
11324#ifdef DEBUG_PUSH
11325 xmlGenericError(xmlGenericErrorContext,
11326 "PP: Parsing internal subset\n");
11327#endif
11328 ctxt->inSubset = 1;
11329 xmlParseDocTypeDecl(ctxt);
11330 if (RAW == '[') {
11331 ctxt->instate = XML_PARSER_DTD;
11332#ifdef DEBUG_PUSH
11333 xmlGenericError(xmlGenericErrorContext,
11334 "PP: entering DTD\n");
11335#endif
11336 } else {
11337 /*
11338 * Create and update the external subset.
11339 */
11340 ctxt->inSubset = 2;
11341 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11342 (ctxt->sax->externalSubset != NULL))
11343 ctxt->sax->externalSubset(ctxt->userData,
11344 ctxt->intSubName, ctxt->extSubSystem,
11345 ctxt->extSubURI);
11346 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011347 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011348 ctxt->instate = XML_PARSER_PROLOG;
11349#ifdef DEBUG_PUSH
11350 xmlGenericError(xmlGenericErrorContext,
11351 "PP: entering PROLOG\n");
11352#endif
11353 }
11354 } else if ((cur == '<') && (next == '!') &&
11355 (avail < 9)) {
11356 goto done;
11357 } else {
11358 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011359 ctxt->progressive = 1;
11360 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011361#ifdef DEBUG_PUSH
11362 xmlGenericError(xmlGenericErrorContext,
11363 "PP: entering START_TAG\n");
11364#endif
11365 }
11366 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011367 case XML_PARSER_PROLOG:
11368 SKIP_BLANKS;
11369 if (ctxt->input->buf == NULL)
11370 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11371 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011372 avail = xmlBufUse(ctxt->input->buf->buffer) -
11373 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011374 if (avail < 2)
11375 goto done;
11376 cur = ctxt->input->cur[0];
11377 next = ctxt->input->cur[1];
11378 if ((cur == '<') && (next == '?')) {
11379 if ((!terminate) &&
11380 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11381 goto done;
11382#ifdef DEBUG_PUSH
11383 xmlGenericError(xmlGenericErrorContext,
11384 "PP: Parsing PI\n");
11385#endif
11386 xmlParsePI(ctxt);
11387 } else if ((cur == '<') && (next == '!') &&
11388 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11389 if ((!terminate) &&
11390 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11391 goto done;
11392#ifdef DEBUG_PUSH
11393 xmlGenericError(xmlGenericErrorContext,
11394 "PP: Parsing Comment\n");
11395#endif
11396 xmlParseComment(ctxt);
11397 ctxt->instate = XML_PARSER_PROLOG;
11398 } else if ((cur == '<') && (next == '!') &&
11399 (avail < 4)) {
11400 goto done;
11401 } else {
11402 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011403 if (ctxt->progressive == 0)
11404 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011405 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011406#ifdef DEBUG_PUSH
11407 xmlGenericError(xmlGenericErrorContext,
11408 "PP: entering START_TAG\n");
11409#endif
11410 }
11411 break;
11412 case XML_PARSER_EPILOG:
11413 SKIP_BLANKS;
11414 if (ctxt->input->buf == NULL)
11415 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11416 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011417 avail = xmlBufUse(ctxt->input->buf->buffer) -
11418 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011419 if (avail < 2)
11420 goto done;
11421 cur = ctxt->input->cur[0];
11422 next = ctxt->input->cur[1];
11423 if ((cur == '<') && (next == '?')) {
11424 if ((!terminate) &&
11425 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11426 goto done;
11427#ifdef DEBUG_PUSH
11428 xmlGenericError(xmlGenericErrorContext,
11429 "PP: Parsing PI\n");
11430#endif
11431 xmlParsePI(ctxt);
11432 ctxt->instate = XML_PARSER_EPILOG;
11433 } else if ((cur == '<') && (next == '!') &&
11434 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11435 if ((!terminate) &&
11436 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11437 goto done;
11438#ifdef DEBUG_PUSH
11439 xmlGenericError(xmlGenericErrorContext,
11440 "PP: Parsing Comment\n");
11441#endif
11442 xmlParseComment(ctxt);
11443 ctxt->instate = XML_PARSER_EPILOG;
11444 } else if ((cur == '<') && (next == '!') &&
11445 (avail < 4)) {
11446 goto done;
11447 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011448 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011449 ctxt->instate = XML_PARSER_EOF;
11450#ifdef DEBUG_PUSH
11451 xmlGenericError(xmlGenericErrorContext,
11452 "PP: entering EOF\n");
11453#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011454 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011455 ctxt->sax->endDocument(ctxt->userData);
11456 goto done;
11457 }
11458 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011459 case XML_PARSER_DTD: {
11460 /*
11461 * Sorry but progressive parsing of the internal subset
11462 * is not expected to be supported. We first check that
11463 * the full content of the internal subset is available and
11464 * the parsing is launched only at that point.
11465 * Internal subset ends up with "']' S? '>'" in an unescaped
11466 * section and not in a ']]>' sequence which are conditional
11467 * sections (whoever argued to keep that crap in XML deserve
11468 * a place in hell !).
11469 */
11470 int base, i;
11471 xmlChar *buf;
11472 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011473 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000011474
11475 base = ctxt->input->cur - ctxt->input->base;
11476 if (base < 0) return(0);
11477 if (ctxt->checkIndex > base)
11478 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011479 buf = xmlBufContent(ctxt->input->buf->buffer);
11480 use = xmlBufUse(ctxt->input->buf->buffer);
11481 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000011482 if (quote != 0) {
11483 if (buf[base] == quote)
11484 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011485 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000011486 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011487 if ((quote == 0) && (buf[base] == '<')) {
11488 int found = 0;
11489 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011490 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000011491 (buf[base + 1] == '!') &&
11492 (buf[base + 2] == '-') &&
11493 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011494 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000011495 if ((buf[base] == '-') &&
11496 (buf[base + 1] == '-') &&
11497 (buf[base + 2] == '>')) {
11498 found = 1;
11499 base += 2;
11500 break;
11501 }
11502 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011503 if (!found) {
11504#if 0
11505 fprintf(stderr, "unfinished comment\n");
11506#endif
11507 break; /* for */
11508 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011509 continue;
11510 }
11511 }
Owen Taylor3473f882001-02-23 17:55:21 +000011512 if (buf[base] == '"') {
11513 quote = '"';
11514 continue;
11515 }
11516 if (buf[base] == '\'') {
11517 quote = '\'';
11518 continue;
11519 }
11520 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011521#if 0
11522 fprintf(stderr, "%c%c%c%c: ", buf[base],
11523 buf[base + 1], buf[base + 2], buf[base + 3]);
11524#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011525 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000011526 break;
11527 if (buf[base + 1] == ']') {
11528 /* conditional crap, skip both ']' ! */
11529 base++;
11530 continue;
11531 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011532 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011533 if (buf[base + i] == '>') {
11534#if 0
11535 fprintf(stderr, "found\n");
11536#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011537 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011538 }
11539 if (!IS_BLANK_CH(buf[base + i])) {
11540#if 0
11541 fprintf(stderr, "not found\n");
11542#endif
11543 goto not_end_of_int_subset;
11544 }
Owen Taylor3473f882001-02-23 17:55:21 +000011545 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011546#if 0
11547 fprintf(stderr, "end of stream\n");
11548#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011549 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011550
Owen Taylor3473f882001-02-23 17:55:21 +000011551 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011552not_end_of_int_subset:
11553 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011554 }
11555 /*
11556 * We didn't found the end of the Internal subset
11557 */
Owen Taylor3473f882001-02-23 17:55:21 +000011558#ifdef DEBUG_PUSH
11559 if (next == 0)
11560 xmlGenericError(xmlGenericErrorContext,
11561 "PP: lookup of int subset end filed\n");
11562#endif
11563 goto done;
11564
11565found_end_int_subset:
11566 xmlParseInternalSubset(ctxt);
11567 ctxt->inSubset = 2;
11568 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11569 (ctxt->sax->externalSubset != NULL))
11570 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11571 ctxt->extSubSystem, ctxt->extSubURI);
11572 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011573 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011574 ctxt->instate = XML_PARSER_PROLOG;
11575 ctxt->checkIndex = 0;
11576#ifdef DEBUG_PUSH
11577 xmlGenericError(xmlGenericErrorContext,
11578 "PP: entering PROLOG\n");
11579#endif
11580 break;
11581 }
11582 case XML_PARSER_COMMENT:
11583 xmlGenericError(xmlGenericErrorContext,
11584 "PP: internal error, state == COMMENT\n");
11585 ctxt->instate = XML_PARSER_CONTENT;
11586#ifdef DEBUG_PUSH
11587 xmlGenericError(xmlGenericErrorContext,
11588 "PP: entering CONTENT\n");
11589#endif
11590 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011591 case XML_PARSER_IGNORE:
11592 xmlGenericError(xmlGenericErrorContext,
11593 "PP: internal error, state == IGNORE");
11594 ctxt->instate = XML_PARSER_DTD;
11595#ifdef DEBUG_PUSH
11596 xmlGenericError(xmlGenericErrorContext,
11597 "PP: entering DTD\n");
11598#endif
11599 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011600 case XML_PARSER_PI:
11601 xmlGenericError(xmlGenericErrorContext,
11602 "PP: internal error, state == PI\n");
11603 ctxt->instate = XML_PARSER_CONTENT;
11604#ifdef DEBUG_PUSH
11605 xmlGenericError(xmlGenericErrorContext,
11606 "PP: entering CONTENT\n");
11607#endif
11608 break;
11609 case XML_PARSER_ENTITY_DECL:
11610 xmlGenericError(xmlGenericErrorContext,
11611 "PP: internal error, state == ENTITY_DECL\n");
11612 ctxt->instate = XML_PARSER_DTD;
11613#ifdef DEBUG_PUSH
11614 xmlGenericError(xmlGenericErrorContext,
11615 "PP: entering DTD\n");
11616#endif
11617 break;
11618 case XML_PARSER_ENTITY_VALUE:
11619 xmlGenericError(xmlGenericErrorContext,
11620 "PP: internal error, state == ENTITY_VALUE\n");
11621 ctxt->instate = XML_PARSER_CONTENT;
11622#ifdef DEBUG_PUSH
11623 xmlGenericError(xmlGenericErrorContext,
11624 "PP: entering DTD\n");
11625#endif
11626 break;
11627 case XML_PARSER_ATTRIBUTE_VALUE:
11628 xmlGenericError(xmlGenericErrorContext,
11629 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11630 ctxt->instate = XML_PARSER_START_TAG;
11631#ifdef DEBUG_PUSH
11632 xmlGenericError(xmlGenericErrorContext,
11633 "PP: entering START_TAG\n");
11634#endif
11635 break;
11636 case XML_PARSER_SYSTEM_LITERAL:
11637 xmlGenericError(xmlGenericErrorContext,
11638 "PP: internal error, state == SYSTEM_LITERAL\n");
11639 ctxt->instate = XML_PARSER_START_TAG;
11640#ifdef DEBUG_PUSH
11641 xmlGenericError(xmlGenericErrorContext,
11642 "PP: entering START_TAG\n");
11643#endif
11644 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011645 case XML_PARSER_PUBLIC_LITERAL:
11646 xmlGenericError(xmlGenericErrorContext,
11647 "PP: internal error, state == PUBLIC_LITERAL\n");
11648 ctxt->instate = XML_PARSER_START_TAG;
11649#ifdef DEBUG_PUSH
11650 xmlGenericError(xmlGenericErrorContext,
11651 "PP: entering START_TAG\n");
11652#endif
11653 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011654 }
11655 }
11656done:
11657#ifdef DEBUG_PUSH
11658 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11659#endif
11660 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011661encoding_error:
11662 {
11663 char buffer[150];
11664
11665 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11666 ctxt->input->cur[0], ctxt->input->cur[1],
11667 ctxt->input->cur[2], ctxt->input->cur[3]);
11668 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11669 "Input is not proper UTF-8, indicate encoding !\n%s",
11670 BAD_CAST buffer, NULL);
11671 }
11672 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000011673}
11674
11675/**
Owen Taylor3473f882001-02-23 17:55:21 +000011676 * xmlParseChunk:
11677 * @ctxt: an XML parser context
11678 * @chunk: an char array
11679 * @size: the size in byte of the chunk
11680 * @terminate: last chunk indicator
11681 *
11682 * Parse a Chunk of memory
11683 *
11684 * Returns zero if no error, the xmlParserErrors otherwise.
11685 */
11686int
11687xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11688 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000011689 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011690 int remain = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000011691
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011692 if (ctxt == NULL)
11693 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000011694 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011695 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000011696 if (ctxt->instate == XML_PARSER_START)
11697 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000011698 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11699 (chunk[size - 1] == '\r')) {
11700 end_in_lf = 1;
11701 size--;
11702 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011703
11704xmldecl_done:
11705
Owen Taylor3473f882001-02-23 17:55:21 +000011706 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11707 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011708 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
11709 size_t cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000011710 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011711
11712 /*
11713 * Specific handling if we autodetected an encoding, we should not
11714 * push more than the first line ... which depend on the encoding
11715 * And only push the rest once the final encoding was detected
11716 */
11717 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
11718 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010011719 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011720
11721 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11722 BAD_CAST "UTF-16")) ||
11723 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11724 BAD_CAST "UTF16")))
11725 len = 90;
11726 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11727 BAD_CAST "UCS-4")) ||
11728 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11729 BAD_CAST "UCS4")))
11730 len = 180;
11731
11732 if (ctxt->input->buf->rawconsumed < len)
11733 len -= ctxt->input->buf->rawconsumed;
11734
Raul Hudeaba9716a2010-03-15 10:13:29 +010011735 /*
11736 * Change size for reading the initial declaration only
11737 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
11738 * will blindly copy extra bytes from memory.
11739 */
Daniel Veillard60587d62010-11-04 15:16:27 +010011740 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010011741 remain = size - len;
11742 size = len;
11743 } else {
11744 remain = 0;
11745 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011746 }
William M. Bracka3215c72004-07-31 16:24:01 +000011747 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11748 if (res < 0) {
11749 ctxt->errNo = XML_PARSER_EOF;
11750 ctxt->disableSAX = 1;
11751 return (XML_PARSER_EOF);
11752 }
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011753 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000011754#ifdef DEBUG_PUSH
11755 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11756#endif
11757
Owen Taylor3473f882001-02-23 17:55:21 +000011758 } else if (ctxt->instate != XML_PARSER_EOF) {
11759 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11760 xmlParserInputBufferPtr in = ctxt->input->buf;
11761 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11762 (in->raw != NULL)) {
11763 int nbchars;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011764
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011765 nbchars = xmlCharEncInput(in);
Owen Taylor3473f882001-02-23 17:55:21 +000011766 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011767 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000011768 xmlGenericError(xmlGenericErrorContext,
11769 "xmlParseChunk: encoder error\n");
11770 return(XML_ERR_INVALID_ENCODING);
11771 }
11772 }
11773 }
11774 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011775 if (remain != 0)
11776 xmlParseTryOrFinish(ctxt, 0);
11777 else
11778 xmlParseTryOrFinish(ctxt, terminate);
11779 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11780 return(ctxt->errNo);
11781
11782 if (remain != 0) {
11783 chunk += size;
11784 size = remain;
11785 remain = 0;
11786 goto xmldecl_done;
11787 }
Daniel Veillarda617e242006-01-09 14:38:44 +000011788 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11789 (ctxt->input->buf != NULL)) {
11790 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11791 }
Owen Taylor3473f882001-02-23 17:55:21 +000011792 if (terminate) {
11793 /*
11794 * Check for termination
11795 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011796 int avail = 0;
11797
11798 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011799 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011800 avail = ctxt->input->length -
11801 (ctxt->input->cur - ctxt->input->base);
11802 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011803 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011804 (ctxt->input->cur - ctxt->input->base);
11805 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011806
Owen Taylor3473f882001-02-23 17:55:21 +000011807 if ((ctxt->instate != XML_PARSER_EOF) &&
11808 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011809 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011810 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011811 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011812 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011813 }
Owen Taylor3473f882001-02-23 17:55:21 +000011814 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011815 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011816 ctxt->sax->endDocument(ctxt->userData);
11817 }
11818 ctxt->instate = XML_PARSER_EOF;
11819 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011820 return((xmlParserErrors) ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000011821}
11822
11823/************************************************************************
11824 * *
11825 * I/O front end functions to the parser *
11826 * *
11827 ************************************************************************/
11828
11829/**
Owen Taylor3473f882001-02-23 17:55:21 +000011830 * xmlCreatePushParserCtxt:
11831 * @sax: a SAX handler
11832 * @user_data: The user data returned on SAX callbacks
11833 * @chunk: a pointer to an array of chars
11834 * @size: number of chars in the array
11835 * @filename: an optional file name or URI
11836 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000011837 * Create a parser context for using the XML parser in push mode.
11838 * If @buffer and @size are non-NULL, the data is used to detect
11839 * the encoding. The remaining characters will be parsed so they
11840 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000011841 * To allow content encoding detection, @size should be >= 4
11842 * The value of @filename is used for fetching external entities
11843 * and error/warning reports.
11844 *
11845 * Returns the new parser context or NULL
11846 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000011847
Owen Taylor3473f882001-02-23 17:55:21 +000011848xmlParserCtxtPtr
11849xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11850 const char *chunk, int size, const char *filename) {
11851 xmlParserCtxtPtr ctxt;
11852 xmlParserInputPtr inputStream;
11853 xmlParserInputBufferPtr buf;
11854 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11855
11856 /*
11857 * plug some encoding conversion routines
11858 */
11859 if ((chunk != NULL) && (size >= 4))
11860 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11861
11862 buf = xmlAllocParserInputBuffer(enc);
11863 if (buf == NULL) return(NULL);
11864
11865 ctxt = xmlNewParserCtxt();
11866 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011867 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011868 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011869 return(NULL);
11870 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011871 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011872 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11873 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011874 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011875 xmlFreeParserInputBuffer(buf);
11876 xmlFreeParserCtxt(ctxt);
11877 return(NULL);
11878 }
Owen Taylor3473f882001-02-23 17:55:21 +000011879 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011880#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011881 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011882#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011883 xmlFree(ctxt->sax);
11884 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11885 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011886 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011887 xmlFreeParserInputBuffer(buf);
11888 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011889 return(NULL);
11890 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011891 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11892 if (sax->initialized == XML_SAX2_MAGIC)
11893 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11894 else
11895 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011896 if (user_data != NULL)
11897 ctxt->userData = user_data;
11898 }
11899 if (filename == NULL) {
11900 ctxt->directory = NULL;
11901 } else {
11902 ctxt->directory = xmlParserGetDirectory(filename);
11903 }
11904
11905 inputStream = xmlNewInputStream(ctxt);
11906 if (inputStream == NULL) {
11907 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011908 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011909 return(NULL);
11910 }
11911
11912 if (filename == NULL)
11913 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011914 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011915 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011916 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011917 if (inputStream->filename == NULL) {
11918 xmlFreeParserCtxt(ctxt);
11919 xmlFreeParserInputBuffer(buf);
11920 return(NULL);
11921 }
11922 }
Owen Taylor3473f882001-02-23 17:55:21 +000011923 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080011924 xmlBufResetInput(inputStream->buf->buffer, inputStream);
Owen Taylor3473f882001-02-23 17:55:21 +000011925 inputPush(ctxt, inputStream);
11926
William M. Brack3a1cd212005-02-11 14:35:54 +000011927 /*
11928 * If the caller didn't provide an initial 'chunk' for determining
11929 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11930 * that it can be automatically determined later
11931 */
11932 if ((size == 0) || (chunk == NULL)) {
11933 ctxt->charset = XML_CHAR_ENCODING_NONE;
11934 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011935 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
11936 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011937
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011938 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011939
Daniel Veillard00ac0d32012-07-16 18:03:01 +080011940 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Owen Taylor3473f882001-02-23 17:55:21 +000011941#ifdef DEBUG_PUSH
11942 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11943#endif
11944 }
11945
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011946 if (enc != XML_CHAR_ENCODING_NONE) {
11947 xmlSwitchEncoding(ctxt, enc);
11948 }
11949
Owen Taylor3473f882001-02-23 17:55:21 +000011950 return(ctxt);
11951}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011952#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011953
11954/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011955 * xmlStopParser:
11956 * @ctxt: an XML parser context
11957 *
11958 * Blocks further parser processing
11959 */
11960void
11961xmlStopParser(xmlParserCtxtPtr ctxt) {
11962 if (ctxt == NULL)
11963 return;
11964 ctxt->instate = XML_PARSER_EOF;
11965 ctxt->disableSAX = 1;
11966 if (ctxt->input != NULL) {
11967 ctxt->input->cur = BAD_CAST"";
11968 ctxt->input->base = ctxt->input->cur;
11969 }
11970}
11971
11972/**
Owen Taylor3473f882001-02-23 17:55:21 +000011973 * xmlCreateIOParserCtxt:
11974 * @sax: a SAX handler
11975 * @user_data: The user data returned on SAX callbacks
11976 * @ioread: an I/O read function
11977 * @ioclose: an I/O close function
11978 * @ioctx: an I/O handler
11979 * @enc: the charset encoding if known
11980 *
11981 * Create a parser context for using the XML parser with an existing
11982 * I/O stream
11983 *
11984 * Returns the new parser context or NULL
11985 */
11986xmlParserCtxtPtr
11987xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11988 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11989 void *ioctx, xmlCharEncoding enc) {
11990 xmlParserCtxtPtr ctxt;
11991 xmlParserInputPtr inputStream;
11992 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080011993
Daniel Veillard42595322004-11-08 10:52:06 +000011994 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011995
11996 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080011997 if (buf == NULL) {
11998 if (ioclose != NULL)
11999 ioclose(ioctx);
12000 return (NULL);
12001 }
Owen Taylor3473f882001-02-23 17:55:21 +000012002
12003 ctxt = xmlNewParserCtxt();
12004 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012005 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012006 return(NULL);
12007 }
12008 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012009#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012010 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012011#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012012 xmlFree(ctxt->sax);
12013 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12014 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012015 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012016 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012017 return(NULL);
12018 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012019 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12020 if (sax->initialized == XML_SAX2_MAGIC)
12021 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12022 else
12023 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012024 if (user_data != NULL)
12025 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012026 }
Owen Taylor3473f882001-02-23 17:55:21 +000012027
12028 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12029 if (inputStream == NULL) {
12030 xmlFreeParserCtxt(ctxt);
12031 return(NULL);
12032 }
12033 inputPush(ctxt, inputStream);
12034
12035 return(ctxt);
12036}
12037
Daniel Veillard4432df22003-09-28 18:58:27 +000012038#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012039/************************************************************************
12040 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012041 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012042 * *
12043 ************************************************************************/
12044
12045/**
12046 * xmlIOParseDTD:
12047 * @sax: the SAX handler block or NULL
12048 * @input: an Input Buffer
12049 * @enc: the charset encoding if known
12050 *
12051 * Load and parse a DTD
12052 *
12053 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012054 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012055 */
12056
12057xmlDtdPtr
12058xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12059 xmlCharEncoding enc) {
12060 xmlDtdPtr ret = NULL;
12061 xmlParserCtxtPtr ctxt;
12062 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012063 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012064
12065 if (input == NULL)
12066 return(NULL);
12067
12068 ctxt = xmlNewParserCtxt();
12069 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012070 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012071 return(NULL);
12072 }
12073
12074 /*
12075 * Set-up the SAX context
12076 */
12077 if (sax != NULL) {
12078 if (ctxt->sax != NULL)
12079 xmlFree(ctxt->sax);
12080 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012081 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012082 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012083 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012084
12085 /*
12086 * generate a parser input from the I/O handler
12087 */
12088
Daniel Veillard43caefb2003-12-07 19:32:22 +000012089 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012090 if (pinput == NULL) {
12091 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012092 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012093 xmlFreeParserCtxt(ctxt);
12094 return(NULL);
12095 }
12096
12097 /*
12098 * plug some encoding conversion routines here.
12099 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012100 if (xmlPushInput(ctxt, pinput) < 0) {
12101 if (sax != NULL) ctxt->sax = NULL;
12102 xmlFreeParserCtxt(ctxt);
12103 return(NULL);
12104 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012105 if (enc != XML_CHAR_ENCODING_NONE) {
12106 xmlSwitchEncoding(ctxt, enc);
12107 }
Owen Taylor3473f882001-02-23 17:55:21 +000012108
12109 pinput->filename = NULL;
12110 pinput->line = 1;
12111 pinput->col = 1;
12112 pinput->base = ctxt->input->cur;
12113 pinput->cur = ctxt->input->cur;
12114 pinput->free = NULL;
12115
12116 /*
12117 * let's parse that entity knowing it's an external subset.
12118 */
12119 ctxt->inSubset = 2;
12120 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012121 if (ctxt->myDoc == NULL) {
12122 xmlErrMemory(ctxt, "New Doc failed");
12123 return(NULL);
12124 }
12125 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012126 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12127 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012128
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012129 if ((enc == XML_CHAR_ENCODING_NONE) &&
12130 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000012131 /*
12132 * Get the 4 first bytes and decode the charset
12133 * if enc != XML_CHAR_ENCODING_NONE
12134 * plug some encoding conversion routines.
12135 */
12136 start[0] = RAW;
12137 start[1] = NXT(1);
12138 start[2] = NXT(2);
12139 start[3] = NXT(3);
12140 enc = xmlDetectCharEncoding(start, 4);
12141 if (enc != XML_CHAR_ENCODING_NONE) {
12142 xmlSwitchEncoding(ctxt, enc);
12143 }
12144 }
12145
Owen Taylor3473f882001-02-23 17:55:21 +000012146 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12147
12148 if (ctxt->myDoc != NULL) {
12149 if (ctxt->wellFormed) {
12150 ret = ctxt->myDoc->extSubset;
12151 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012152 if (ret != NULL) {
12153 xmlNodePtr tmp;
12154
12155 ret->doc = NULL;
12156 tmp = ret->children;
12157 while (tmp != NULL) {
12158 tmp->doc = NULL;
12159 tmp = tmp->next;
12160 }
12161 }
Owen Taylor3473f882001-02-23 17:55:21 +000012162 } else {
12163 ret = NULL;
12164 }
12165 xmlFreeDoc(ctxt->myDoc);
12166 ctxt->myDoc = NULL;
12167 }
12168 if (sax != NULL) ctxt->sax = NULL;
12169 xmlFreeParserCtxt(ctxt);
12170
12171 return(ret);
12172}
12173
12174/**
12175 * xmlSAXParseDTD:
12176 * @sax: the SAX handler block
12177 * @ExternalID: a NAME* containing the External ID of the DTD
12178 * @SystemID: a NAME* containing the URL to the DTD
12179 *
12180 * Load and parse an external subset.
12181 *
12182 * Returns the resulting xmlDtdPtr or NULL in case of error.
12183 */
12184
12185xmlDtdPtr
12186xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12187 const xmlChar *SystemID) {
12188 xmlDtdPtr ret = NULL;
12189 xmlParserCtxtPtr ctxt;
12190 xmlParserInputPtr input = NULL;
12191 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012192 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012193
12194 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12195
12196 ctxt = xmlNewParserCtxt();
12197 if (ctxt == NULL) {
12198 return(NULL);
12199 }
12200
12201 /*
12202 * Set-up the SAX context
12203 */
12204 if (sax != NULL) {
12205 if (ctxt->sax != NULL)
12206 xmlFree(ctxt->sax);
12207 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012208 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012209 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012210
12211 /*
12212 * Canonicalise the system ID
12213 */
12214 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012215 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012216 xmlFreeParserCtxt(ctxt);
12217 return(NULL);
12218 }
Owen Taylor3473f882001-02-23 17:55:21 +000012219
12220 /*
12221 * Ask the Entity resolver to load the damn thing
12222 */
12223
12224 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012225 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12226 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012227 if (input == NULL) {
12228 if (sax != NULL) ctxt->sax = NULL;
12229 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012230 if (systemIdCanonic != NULL)
12231 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012232 return(NULL);
12233 }
12234
12235 /*
12236 * plug some encoding conversion routines here.
12237 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012238 if (xmlPushInput(ctxt, input) < 0) {
12239 if (sax != NULL) ctxt->sax = NULL;
12240 xmlFreeParserCtxt(ctxt);
12241 if (systemIdCanonic != NULL)
12242 xmlFree(systemIdCanonic);
12243 return(NULL);
12244 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012245 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12246 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12247 xmlSwitchEncoding(ctxt, enc);
12248 }
Owen Taylor3473f882001-02-23 17:55:21 +000012249
12250 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012251 input->filename = (char *) systemIdCanonic;
12252 else
12253 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012254 input->line = 1;
12255 input->col = 1;
12256 input->base = ctxt->input->cur;
12257 input->cur = ctxt->input->cur;
12258 input->free = NULL;
12259
12260 /*
12261 * let's parse that entity knowing it's an external subset.
12262 */
12263 ctxt->inSubset = 2;
12264 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012265 if (ctxt->myDoc == NULL) {
12266 xmlErrMemory(ctxt, "New Doc failed");
12267 if (sax != NULL) ctxt->sax = NULL;
12268 xmlFreeParserCtxt(ctxt);
12269 return(NULL);
12270 }
12271 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012272 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12273 ExternalID, SystemID);
12274 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12275
12276 if (ctxt->myDoc != NULL) {
12277 if (ctxt->wellFormed) {
12278 ret = ctxt->myDoc->extSubset;
12279 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012280 if (ret != NULL) {
12281 xmlNodePtr tmp;
12282
12283 ret->doc = NULL;
12284 tmp = ret->children;
12285 while (tmp != NULL) {
12286 tmp->doc = NULL;
12287 tmp = tmp->next;
12288 }
12289 }
Owen Taylor3473f882001-02-23 17:55:21 +000012290 } else {
12291 ret = NULL;
12292 }
12293 xmlFreeDoc(ctxt->myDoc);
12294 ctxt->myDoc = NULL;
12295 }
12296 if (sax != NULL) ctxt->sax = NULL;
12297 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012298
Owen Taylor3473f882001-02-23 17:55:21 +000012299 return(ret);
12300}
12301
Daniel Veillard4432df22003-09-28 18:58:27 +000012302
Owen Taylor3473f882001-02-23 17:55:21 +000012303/**
12304 * xmlParseDTD:
12305 * @ExternalID: a NAME* containing the External ID of the DTD
12306 * @SystemID: a NAME* containing the URL to the DTD
12307 *
12308 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012309 *
Owen Taylor3473f882001-02-23 17:55:21 +000012310 * Returns the resulting xmlDtdPtr or NULL in case of error.
12311 */
12312
12313xmlDtdPtr
12314xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12315 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12316}
Daniel Veillard4432df22003-09-28 18:58:27 +000012317#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012318
12319/************************************************************************
12320 * *
12321 * Front ends when parsing an Entity *
12322 * *
12323 ************************************************************************/
12324
12325/**
Owen Taylor3473f882001-02-23 17:55:21 +000012326 * xmlParseCtxtExternalEntity:
12327 * @ctx: the existing parsing context
12328 * @URL: the URL for the entity to load
12329 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012330 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012331 *
12332 * Parse an external general entity within an existing parsing context
12333 * An external general parsed entity is well-formed if it matches the
12334 * production labeled extParsedEnt.
12335 *
12336 * [78] extParsedEnt ::= TextDecl? content
12337 *
12338 * Returns 0 if the entity is well formed, -1 in case of args problem and
12339 * the parser error code otherwise
12340 */
12341
12342int
12343xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012344 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012345 xmlParserCtxtPtr ctxt;
12346 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012347 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012348 xmlSAXHandlerPtr oldsax = NULL;
12349 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012350 xmlChar start[4];
12351 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012352
Daniel Veillardce682bc2004-11-05 17:22:25 +000012353 if (ctx == NULL) return(-1);
12354
Daniel Veillard0161e632008-08-28 15:36:32 +000012355 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12356 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012357 return(XML_ERR_ENTITY_LOOP);
12358 }
12359
Daniel Veillardcda96922001-08-21 10:56:31 +000012360 if (lst != NULL)
12361 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012362 if ((URL == NULL) && (ID == NULL))
12363 return(-1);
12364 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12365 return(-1);
12366
Rob Richards798743a2009-06-19 13:54:25 -040012367 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012368 if (ctxt == NULL) {
12369 return(-1);
12370 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012371
Owen Taylor3473f882001-02-23 17:55:21 +000012372 oldsax = ctxt->sax;
12373 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012374 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012375 newDoc = xmlNewDoc(BAD_CAST "1.0");
12376 if (newDoc == NULL) {
12377 xmlFreeParserCtxt(ctxt);
12378 return(-1);
12379 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012380 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012381 if (ctx->myDoc->dict) {
12382 newDoc->dict = ctx->myDoc->dict;
12383 xmlDictReference(newDoc->dict);
12384 }
Owen Taylor3473f882001-02-23 17:55:21 +000012385 if (ctx->myDoc != NULL) {
12386 newDoc->intSubset = ctx->myDoc->intSubset;
12387 newDoc->extSubset = ctx->myDoc->extSubset;
12388 }
12389 if (ctx->myDoc->URL != NULL) {
12390 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12391 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012392 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12393 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012394 ctxt->sax = oldsax;
12395 xmlFreeParserCtxt(ctxt);
12396 newDoc->intSubset = NULL;
12397 newDoc->extSubset = NULL;
12398 xmlFreeDoc(newDoc);
12399 return(-1);
12400 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012401 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012402 nodePush(ctxt, newDoc->children);
12403 if (ctx->myDoc == NULL) {
12404 ctxt->myDoc = newDoc;
12405 } else {
12406 ctxt->myDoc = ctx->myDoc;
12407 newDoc->children->doc = ctx->myDoc;
12408 }
12409
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012410 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012411 * Get the 4 first bytes and decode the charset
12412 * if enc != XML_CHAR_ENCODING_NONE
12413 * plug some encoding conversion routines.
12414 */
12415 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012416 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12417 start[0] = RAW;
12418 start[1] = NXT(1);
12419 start[2] = NXT(2);
12420 start[3] = NXT(3);
12421 enc = xmlDetectCharEncoding(start, 4);
12422 if (enc != XML_CHAR_ENCODING_NONE) {
12423 xmlSwitchEncoding(ctxt, enc);
12424 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012425 }
12426
Owen Taylor3473f882001-02-23 17:55:21 +000012427 /*
12428 * Parse a possible text declaration first
12429 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012430 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012431 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012432 /*
12433 * An XML-1.0 document can't reference an entity not XML-1.0
12434 */
12435 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12436 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12437 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12438 "Version mismatch between document and entity\n");
12439 }
Owen Taylor3473f882001-02-23 17:55:21 +000012440 }
12441
12442 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012443 * If the user provided its own SAX callbacks then reuse the
12444 * useData callback field, otherwise the expected setup in a
12445 * DOM builder is to have userData == ctxt
12446 */
12447 if (ctx->userData == ctx)
12448 ctxt->userData = ctxt;
12449 else
12450 ctxt->userData = ctx->userData;
12451
12452 /*
Owen Taylor3473f882001-02-23 17:55:21 +000012453 * Doing validity checking on chunk doesn't make sense
12454 */
12455 ctxt->instate = XML_PARSER_CONTENT;
12456 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012457 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012458 ctxt->loadsubset = ctx->loadsubset;
12459 ctxt->depth = ctx->depth + 1;
12460 ctxt->replaceEntities = ctx->replaceEntities;
12461 if (ctxt->validate) {
12462 ctxt->vctxt.error = ctx->vctxt.error;
12463 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012464 } else {
12465 ctxt->vctxt.error = NULL;
12466 ctxt->vctxt.warning = NULL;
12467 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012468 ctxt->vctxt.nodeTab = NULL;
12469 ctxt->vctxt.nodeNr = 0;
12470 ctxt->vctxt.nodeMax = 0;
12471 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012472 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12473 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012474 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12475 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12476 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012477 ctxt->dictNames = ctx->dictNames;
12478 ctxt->attsDefault = ctx->attsDefault;
12479 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012480 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012481
12482 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012483
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012484 ctx->validate = ctxt->validate;
12485 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012486 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012487 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012488 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012489 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012490 }
12491 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012492 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012493 }
12494
12495 if (!ctxt->wellFormed) {
12496 if (ctxt->errNo == 0)
12497 ret = 1;
12498 else
12499 ret = ctxt->errNo;
12500 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012501 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012502 xmlNodePtr cur;
12503
12504 /*
12505 * Return the newly created nodeset after unlinking it from
12506 * they pseudo parent.
12507 */
12508 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012509 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012510 while (cur != NULL) {
12511 cur->parent = NULL;
12512 cur = cur->next;
12513 }
12514 newDoc->children->children = NULL;
12515 }
12516 ret = 0;
12517 }
12518 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012519 ctxt->dict = NULL;
12520 ctxt->attsDefault = NULL;
12521 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012522 xmlFreeParserCtxt(ctxt);
12523 newDoc->intSubset = NULL;
12524 newDoc->extSubset = NULL;
12525 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012526
Owen Taylor3473f882001-02-23 17:55:21 +000012527 return(ret);
12528}
12529
12530/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012531 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012532 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012533 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012534 * @sax: the SAX handler bloc (possibly NULL)
12535 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12536 * @depth: Used for loop detection, use 0
12537 * @URL: the URL for the entity to load
12538 * @ID: the System ID for the entity to load
12539 * @list: the return value for the set of parsed nodes
12540 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012541 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012542 *
12543 * Returns 0 if the entity is well formed, -1 in case of args problem and
12544 * the parser error code otherwise
12545 */
12546
Daniel Veillard7d515752003-09-26 19:12:37 +000012547static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012548xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12549 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012550 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012551 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012552 xmlParserCtxtPtr ctxt;
12553 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012554 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012555 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012556 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012557 xmlChar start[4];
12558 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012559
Daniel Veillard0161e632008-08-28 15:36:32 +000012560 if (((depth > 40) &&
12561 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12562 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012563 return(XML_ERR_ENTITY_LOOP);
12564 }
12565
Owen Taylor3473f882001-02-23 17:55:21 +000012566 if (list != NULL)
12567 *list = NULL;
12568 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012569 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012570 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012571 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012572
12573
Rob Richards9c0aa472009-03-26 18:10:19 +000012574 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000012575 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012576 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012577 if (oldctxt != NULL) {
12578 ctxt->_private = oldctxt->_private;
12579 ctxt->loadsubset = oldctxt->loadsubset;
12580 ctxt->validate = oldctxt->validate;
12581 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012582 ctxt->record_info = oldctxt->record_info;
12583 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12584 ctxt->node_seq.length = oldctxt->node_seq.length;
12585 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012586 } else {
12587 /*
12588 * Doing validity checking on chunk without context
12589 * doesn't make sense
12590 */
12591 ctxt->_private = NULL;
12592 ctxt->validate = 0;
12593 ctxt->external = 2;
12594 ctxt->loadsubset = 0;
12595 }
Owen Taylor3473f882001-02-23 17:55:21 +000012596 if (sax != NULL) {
12597 oldsax = ctxt->sax;
12598 ctxt->sax = sax;
12599 if (user_data != NULL)
12600 ctxt->userData = user_data;
12601 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012602 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012603 newDoc = xmlNewDoc(BAD_CAST "1.0");
12604 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012605 ctxt->node_seq.maximum = 0;
12606 ctxt->node_seq.length = 0;
12607 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012608 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000012609 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012610 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012611 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000012612 newDoc->intSubset = doc->intSubset;
12613 newDoc->extSubset = doc->extSubset;
12614 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012615 xmlDictReference(newDoc->dict);
12616
Owen Taylor3473f882001-02-23 17:55:21 +000012617 if (doc->URL != NULL) {
12618 newDoc->URL = xmlStrdup(doc->URL);
12619 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012620 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12621 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012622 if (sax != NULL)
12623 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012624 ctxt->node_seq.maximum = 0;
12625 ctxt->node_seq.length = 0;
12626 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012627 xmlFreeParserCtxt(ctxt);
12628 newDoc->intSubset = NULL;
12629 newDoc->extSubset = NULL;
12630 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000012631 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012632 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012633 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012634 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000012635 ctxt->myDoc = doc;
12636 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000012637
Daniel Veillard0161e632008-08-28 15:36:32 +000012638 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012639 * Get the 4 first bytes and decode the charset
12640 * if enc != XML_CHAR_ENCODING_NONE
12641 * plug some encoding conversion routines.
12642 */
12643 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012644 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12645 start[0] = RAW;
12646 start[1] = NXT(1);
12647 start[2] = NXT(2);
12648 start[3] = NXT(3);
12649 enc = xmlDetectCharEncoding(start, 4);
12650 if (enc != XML_CHAR_ENCODING_NONE) {
12651 xmlSwitchEncoding(ctxt, enc);
12652 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012653 }
12654
Owen Taylor3473f882001-02-23 17:55:21 +000012655 /*
12656 * Parse a possible text declaration first
12657 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012658 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012659 xmlParseTextDecl(ctxt);
12660 }
12661
Owen Taylor3473f882001-02-23 17:55:21 +000012662 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012663 ctxt->depth = depth;
12664
12665 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012666
Daniel Veillard561b7f82002-03-20 21:55:57 +000012667 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012668 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000012669 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012670 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012671 }
12672 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012673 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012674 }
12675
12676 if (!ctxt->wellFormed) {
12677 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012678 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000012679 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012680 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000012681 } else {
12682 if (list != NULL) {
12683 xmlNodePtr cur;
12684
12685 /*
12686 * Return the newly created nodeset after unlinking it from
12687 * they pseudo parent.
12688 */
12689 cur = newDoc->children->children;
12690 *list = cur;
12691 while (cur != NULL) {
12692 cur->parent = NULL;
12693 cur = cur->next;
12694 }
12695 newDoc->children->children = NULL;
12696 }
Daniel Veillard7d515752003-09-26 19:12:37 +000012697 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000012698 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012699
12700 /*
12701 * Record in the parent context the number of entities replacement
12702 * done when parsing that reference.
12703 */
Daniel Veillard76d36452009-09-07 11:19:33 +020012704 if (oldctxt != NULL)
12705 oldctxt->nbentities += ctxt->nbentities;
12706
Daniel Veillard0161e632008-08-28 15:36:32 +000012707 /*
12708 * Also record the size of the entity parsed
12709 */
12710 if (ctxt->input != NULL) {
12711 oldctxt->sizeentities += ctxt->input->consumed;
12712 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12713 }
12714 /*
12715 * And record the last error if any
12716 */
12717 if (ctxt->lastError.code != XML_ERR_OK)
12718 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12719
Owen Taylor3473f882001-02-23 17:55:21 +000012720 if (sax != NULL)
12721 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000012722 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12723 oldctxt->node_seq.length = ctxt->node_seq.length;
12724 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012725 ctxt->node_seq.maximum = 0;
12726 ctxt->node_seq.length = 0;
12727 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012728 xmlFreeParserCtxt(ctxt);
12729 newDoc->intSubset = NULL;
12730 newDoc->extSubset = NULL;
12731 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000012732
Owen Taylor3473f882001-02-23 17:55:21 +000012733 return(ret);
12734}
12735
Daniel Veillard81273902003-09-30 00:43:48 +000012736#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012737/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012738 * xmlParseExternalEntity:
12739 * @doc: the document the chunk pertains to
12740 * @sax: the SAX handler bloc (possibly NULL)
12741 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12742 * @depth: Used for loop detection, use 0
12743 * @URL: the URL for the entity to load
12744 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012745 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000012746 *
12747 * Parse an external general entity
12748 * An external general parsed entity is well-formed if it matches the
12749 * production labeled extParsedEnt.
12750 *
12751 * [78] extParsedEnt ::= TextDecl? content
12752 *
12753 * Returns 0 if the entity is well formed, -1 in case of args problem and
12754 * the parser error code otherwise
12755 */
12756
12757int
12758xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000012759 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012760 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012761 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000012762}
12763
12764/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000012765 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000012766 * @doc: the document the chunk pertains to
12767 * @sax: the SAX handler bloc (possibly NULL)
12768 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12769 * @depth: Used for loop detection, use 0
12770 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000012771 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012772 *
12773 * Parse a well-balanced chunk of an XML document
12774 * called by the parser
12775 * The allowed sequence for the Well Balanced Chunk is the one defined by
12776 * the content production in the XML grammar:
12777 *
12778 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12779 *
12780 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12781 * the parser error code otherwise
12782 */
12783
12784int
12785xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000012786 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012787 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12788 depth, string, lst, 0 );
12789}
Daniel Veillard81273902003-09-30 00:43:48 +000012790#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000012791
12792/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000012793 * xmlParseBalancedChunkMemoryInternal:
12794 * @oldctxt: the existing parsing context
12795 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12796 * @user_data: the user data field for the parser context
12797 * @lst: the return value for the set of parsed nodes
12798 *
12799 *
12800 * Parse a well-balanced chunk of an XML document
12801 * called by the parser
12802 * The allowed sequence for the Well Balanced Chunk is the one defined by
12803 * the content production in the XML grammar:
12804 *
12805 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12806 *
Daniel Veillard7d515752003-09-26 19:12:37 +000012807 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12808 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000012809 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000012810 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000012811 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000012812 */
Daniel Veillard7d515752003-09-26 19:12:37 +000012813static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000012814xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12815 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12816 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012817 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012818 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012819 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012820 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012821 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012822 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000012823 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020012824#ifdef SAX2
12825 int i;
12826#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000012827
Daniel Veillard0161e632008-08-28 15:36:32 +000012828 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12829 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012830 return(XML_ERR_ENTITY_LOOP);
12831 }
12832
12833
12834 if (lst != NULL)
12835 *lst = NULL;
12836 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000012837 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012838
12839 size = xmlStrlen(string);
12840
12841 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000012842 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012843 if (user_data != NULL)
12844 ctxt->userData = user_data;
12845 else
12846 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012847 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12848 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012849 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12850 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12851 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012852
Daniel Veillard74eaec12009-08-26 15:57:20 +020012853#ifdef SAX2
12854 /* propagate namespaces down the entity */
12855 for (i = 0;i < oldctxt->nsNr;i += 2) {
12856 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12857 }
12858#endif
12859
Daniel Veillard328f48c2002-11-15 15:24:34 +000012860 oldsax = ctxt->sax;
12861 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012862 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012863 ctxt->replaceEntities = oldctxt->replaceEntities;
12864 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000012865
Daniel Veillarde1ca5032002-12-09 14:13:43 +000012866 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012867 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012868 newDoc = xmlNewDoc(BAD_CAST "1.0");
12869 if (newDoc == NULL) {
12870 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012871 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012872 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000012873 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012874 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012875 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012876 newDoc->dict = ctxt->dict;
12877 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012878 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012879 } else {
12880 ctxt->myDoc = oldctxt->myDoc;
12881 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012882 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012883 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012884 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12885 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012886 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012887 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012888 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012889 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012890 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012891 }
William M. Brack7b9154b2003-09-27 19:23:50 +000012892 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012893 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012894 ctxt->myDoc->children = NULL;
12895 ctxt->myDoc->last = NULL;
12896 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012897 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012898 ctxt->instate = XML_PARSER_CONTENT;
12899 ctxt->depth = oldctxt->depth + 1;
12900
Daniel Veillard328f48c2002-11-15 15:24:34 +000012901 ctxt->validate = 0;
12902 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000012903 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12904 /*
12905 * ID/IDREF registration will be done in xmlValidateElement below
12906 */
12907 ctxt->loadsubset |= XML_SKIP_IDS;
12908 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012909 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012910 ctxt->attsDefault = oldctxt->attsDefault;
12911 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012912
Daniel Veillard68e9e742002-11-16 15:35:11 +000012913 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012914 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012915 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012916 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012917 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012918 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012919 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012920 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012921 }
12922
12923 if (!ctxt->wellFormed) {
12924 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012925 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012926 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012927 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012928 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012929 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012930 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012931
William M. Brack7b9154b2003-09-27 19:23:50 +000012932 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012933 xmlNodePtr cur;
12934
12935 /*
12936 * Return the newly created nodeset after unlinking it from
12937 * they pseudo parent.
12938 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012939 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012940 *lst = cur;
12941 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012942#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012943 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12944 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12945 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012946 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12947 oldctxt->myDoc, cur);
12948 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012949#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012950 cur->parent = NULL;
12951 cur = cur->next;
12952 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012953 ctxt->myDoc->children->children = NULL;
12954 }
12955 if (ctxt->myDoc != NULL) {
12956 xmlFreeNode(ctxt->myDoc->children);
12957 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012958 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012959 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012960
12961 /*
12962 * Record in the parent context the number of entities replacement
12963 * done when parsing that reference.
12964 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020012965 if (oldctxt != NULL)
12966 oldctxt->nbentities += ctxt->nbentities;
12967
Daniel Veillard0161e632008-08-28 15:36:32 +000012968 /*
12969 * Also record the last error if any
12970 */
12971 if (ctxt->lastError.code != XML_ERR_OK)
12972 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12973
Daniel Veillard328f48c2002-11-15 15:24:34 +000012974 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012975 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012976 ctxt->attsDefault = NULL;
12977 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012978 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012979 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012980 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012981 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012982
Daniel Veillard328f48c2002-11-15 15:24:34 +000012983 return(ret);
12984}
12985
Daniel Veillard29b17482004-08-16 00:39:03 +000012986/**
12987 * xmlParseInNodeContext:
12988 * @node: the context node
12989 * @data: the input string
12990 * @datalen: the input string length in bytes
12991 * @options: a combination of xmlParserOption
12992 * @lst: the return value for the set of parsed nodes
12993 *
12994 * Parse a well-balanced chunk of an XML document
12995 * within the context (DTD, namespaces, etc ...) of the given node.
12996 *
12997 * The allowed sequence for the data is a Well Balanced Chunk defined by
12998 * the content production in the XML grammar:
12999 *
13000 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13001 *
13002 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13003 * error code otherwise
13004 */
13005xmlParserErrors
13006xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13007 int options, xmlNodePtr *lst) {
13008#ifdef SAX2
13009 xmlParserCtxtPtr ctxt;
13010 xmlDocPtr doc = NULL;
13011 xmlNodePtr fake, cur;
13012 int nsnr = 0;
13013
13014 xmlParserErrors ret = XML_ERR_OK;
13015
13016 /*
13017 * check all input parameters, grab the document
13018 */
13019 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13020 return(XML_ERR_INTERNAL_ERROR);
13021 switch (node->type) {
13022 case XML_ELEMENT_NODE:
13023 case XML_ATTRIBUTE_NODE:
13024 case XML_TEXT_NODE:
13025 case XML_CDATA_SECTION_NODE:
13026 case XML_ENTITY_REF_NODE:
13027 case XML_PI_NODE:
13028 case XML_COMMENT_NODE:
13029 case XML_DOCUMENT_NODE:
13030 case XML_HTML_DOCUMENT_NODE:
13031 break;
13032 default:
13033 return(XML_ERR_INTERNAL_ERROR);
13034
13035 }
13036 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13037 (node->type != XML_DOCUMENT_NODE) &&
13038 (node->type != XML_HTML_DOCUMENT_NODE))
13039 node = node->parent;
13040 if (node == NULL)
13041 return(XML_ERR_INTERNAL_ERROR);
13042 if (node->type == XML_ELEMENT_NODE)
13043 doc = node->doc;
13044 else
13045 doc = (xmlDocPtr) node;
13046 if (doc == NULL)
13047 return(XML_ERR_INTERNAL_ERROR);
13048
13049 /*
13050 * allocate a context and set-up everything not related to the
13051 * node position in the tree
13052 */
13053 if (doc->type == XML_DOCUMENT_NODE)
13054 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13055#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013056 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013057 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013058 /*
13059 * When parsing in context, it makes no sense to add implied
13060 * elements like html/body/etc...
13061 */
13062 options |= HTML_PARSE_NOIMPLIED;
13063 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013064#endif
13065 else
13066 return(XML_ERR_INTERNAL_ERROR);
13067
13068 if (ctxt == NULL)
13069 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013070
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013071 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013072 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13073 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13074 * we must wait until the last moment to free the original one.
13075 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013076 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013077 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013078 xmlDictFree(ctxt->dict);
13079 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013080 } else
13081 options |= XML_PARSE_NODICT;
13082
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013083 if (doc->encoding != NULL) {
13084 xmlCharEncodingHandlerPtr hdlr;
13085
13086 if (ctxt->encoding != NULL)
13087 xmlFree((xmlChar *) ctxt->encoding);
13088 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13089
13090 hdlr = xmlFindCharEncodingHandler(doc->encoding);
13091 if (hdlr != NULL) {
13092 xmlSwitchToEncoding(ctxt, hdlr);
13093 } else {
13094 return(XML_ERR_UNSUPPORTED_ENCODING);
13095 }
13096 }
13097
Daniel Veillard37334572008-07-31 08:20:02 +000013098 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013099 xmlDetectSAX2(ctxt);
13100 ctxt->myDoc = doc;
13101
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013102 fake = xmlNewComment(NULL);
13103 if (fake == NULL) {
13104 xmlFreeParserCtxt(ctxt);
13105 return(XML_ERR_NO_MEMORY);
13106 }
13107 xmlAddChild(node, fake);
13108
Daniel Veillard29b17482004-08-16 00:39:03 +000013109 if (node->type == XML_ELEMENT_NODE) {
13110 nodePush(ctxt, node);
13111 /*
13112 * initialize the SAX2 namespaces stack
13113 */
13114 cur = node;
13115 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13116 xmlNsPtr ns = cur->nsDef;
13117 const xmlChar *iprefix, *ihref;
13118
13119 while (ns != NULL) {
13120 if (ctxt->dict) {
13121 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13122 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13123 } else {
13124 iprefix = ns->prefix;
13125 ihref = ns->href;
13126 }
13127
13128 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13129 nsPush(ctxt, iprefix, ihref);
13130 nsnr++;
13131 }
13132 ns = ns->next;
13133 }
13134 cur = cur->parent;
13135 }
13136 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000013137 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013138
13139 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13140 /*
13141 * ID/IDREF registration will be done in xmlValidateElement below
13142 */
13143 ctxt->loadsubset |= XML_SKIP_IDS;
13144 }
13145
Daniel Veillard499cc922006-01-18 17:22:35 +000013146#ifdef LIBXML_HTML_ENABLED
13147 if (doc->type == XML_HTML_DOCUMENT_NODE)
13148 __htmlParseContent(ctxt);
13149 else
13150#endif
13151 xmlParseContent(ctxt);
13152
Daniel Veillard29b17482004-08-16 00:39:03 +000013153 nsPop(ctxt, nsnr);
13154 if ((RAW == '<') && (NXT(1) == '/')) {
13155 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13156 } else if (RAW != 0) {
13157 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13158 }
13159 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13160 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13161 ctxt->wellFormed = 0;
13162 }
13163
13164 if (!ctxt->wellFormed) {
13165 if (ctxt->errNo == 0)
13166 ret = XML_ERR_INTERNAL_ERROR;
13167 else
13168 ret = (xmlParserErrors)ctxt->errNo;
13169 } else {
13170 ret = XML_ERR_OK;
13171 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013172
Daniel Veillard29b17482004-08-16 00:39:03 +000013173 /*
13174 * Return the newly created nodeset after unlinking it from
13175 * the pseudo sibling.
13176 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013177
Daniel Veillard29b17482004-08-16 00:39:03 +000013178 cur = fake->next;
13179 fake->next = NULL;
13180 node->last = fake;
13181
13182 if (cur != NULL) {
13183 cur->prev = NULL;
13184 }
13185
13186 *lst = cur;
13187
13188 while (cur != NULL) {
13189 cur->parent = NULL;
13190 cur = cur->next;
13191 }
13192
13193 xmlUnlinkNode(fake);
13194 xmlFreeNode(fake);
13195
13196
13197 if (ret != XML_ERR_OK) {
13198 xmlFreeNodeList(*lst);
13199 *lst = NULL;
13200 }
William M. Brackc3f81342004-10-03 01:22:44 +000013201
William M. Brackb7b54de2004-10-06 16:38:01 +000013202 if (doc->dict != NULL)
13203 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013204 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013205
Daniel Veillard29b17482004-08-16 00:39:03 +000013206 return(ret);
13207#else /* !SAX2 */
13208 return(XML_ERR_INTERNAL_ERROR);
13209#endif
13210}
13211
Daniel Veillard81273902003-09-30 00:43:48 +000013212#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013213/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013214 * xmlParseBalancedChunkMemoryRecover:
13215 * @doc: the document the chunk pertains to
13216 * @sax: the SAX handler bloc (possibly NULL)
13217 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13218 * @depth: Used for loop detection, use 0
13219 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13220 * @lst: the return value for the set of parsed nodes
13221 * @recover: return nodes even if the data is broken (use 0)
13222 *
13223 *
13224 * Parse a well-balanced chunk of an XML document
13225 * called by the parser
13226 * The allowed sequence for the Well Balanced Chunk is the one defined by
13227 * the content production in the XML grammar:
13228 *
13229 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13230 *
13231 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13232 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013233 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013234 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013235 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13236 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013237 */
13238int
13239xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013240 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013241 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013242 xmlParserCtxtPtr ctxt;
13243 xmlDocPtr newDoc;
13244 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013245 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013246 int size;
13247 int ret = 0;
13248
Daniel Veillard0161e632008-08-28 15:36:32 +000013249 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013250 return(XML_ERR_ENTITY_LOOP);
13251 }
13252
13253
Daniel Veillardcda96922001-08-21 10:56:31 +000013254 if (lst != NULL)
13255 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013256 if (string == NULL)
13257 return(-1);
13258
13259 size = xmlStrlen(string);
13260
13261 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13262 if (ctxt == NULL) return(-1);
13263 ctxt->userData = ctxt;
13264 if (sax != NULL) {
13265 oldsax = ctxt->sax;
13266 ctxt->sax = sax;
13267 if (user_data != NULL)
13268 ctxt->userData = user_data;
13269 }
13270 newDoc = xmlNewDoc(BAD_CAST "1.0");
13271 if (newDoc == NULL) {
13272 xmlFreeParserCtxt(ctxt);
13273 return(-1);
13274 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013275 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013276 if ((doc != NULL) && (doc->dict != NULL)) {
13277 xmlDictFree(ctxt->dict);
13278 ctxt->dict = doc->dict;
13279 xmlDictReference(ctxt->dict);
13280 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13281 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13282 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13283 ctxt->dictNames = 1;
13284 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013285 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013286 }
Owen Taylor3473f882001-02-23 17:55:21 +000013287 if (doc != NULL) {
13288 newDoc->intSubset = doc->intSubset;
13289 newDoc->extSubset = doc->extSubset;
13290 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013291 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13292 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013293 if (sax != NULL)
13294 ctxt->sax = oldsax;
13295 xmlFreeParserCtxt(ctxt);
13296 newDoc->intSubset = NULL;
13297 newDoc->extSubset = NULL;
13298 xmlFreeDoc(newDoc);
13299 return(-1);
13300 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013301 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13302 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013303 if (doc == NULL) {
13304 ctxt->myDoc = newDoc;
13305 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013306 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013307 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013308 /* Ensure that doc has XML spec namespace */
13309 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13310 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013311 }
13312 ctxt->instate = XML_PARSER_CONTENT;
13313 ctxt->depth = depth;
13314
13315 /*
13316 * Doing validity checking on chunk doesn't make sense
13317 */
13318 ctxt->validate = 0;
13319 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013320 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013321
Daniel Veillardb39bc392002-10-26 19:29:51 +000013322 if ( doc != NULL ){
13323 content = doc->children;
13324 doc->children = NULL;
13325 xmlParseContent(ctxt);
13326 doc->children = content;
13327 }
13328 else {
13329 xmlParseContent(ctxt);
13330 }
Owen Taylor3473f882001-02-23 17:55:21 +000013331 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013332 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013333 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013334 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013335 }
13336 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013337 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013338 }
13339
13340 if (!ctxt->wellFormed) {
13341 if (ctxt->errNo == 0)
13342 ret = 1;
13343 else
13344 ret = ctxt->errNo;
13345 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013346 ret = 0;
13347 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013348
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013349 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13350 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013351
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013352 /*
13353 * Return the newly created nodeset after unlinking it from
13354 * they pseudo parent.
13355 */
13356 cur = newDoc->children->children;
13357 *lst = cur;
13358 while (cur != NULL) {
13359 xmlSetTreeDoc(cur, doc);
13360 cur->parent = NULL;
13361 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013362 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013363 newDoc->children->children = NULL;
13364 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013365
13366 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013367 ctxt->sax = oldsax;
13368 xmlFreeParserCtxt(ctxt);
13369 newDoc->intSubset = NULL;
13370 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013371 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013372 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013373
Owen Taylor3473f882001-02-23 17:55:21 +000013374 return(ret);
13375}
13376
13377/**
13378 * xmlSAXParseEntity:
13379 * @sax: the SAX handler block
13380 * @filename: the filename
13381 *
13382 * parse an XML external entity out of context and build a tree.
13383 * It use the given SAX function block to handle the parsing callback.
13384 * If sax is NULL, fallback to the default DOM tree building routines.
13385 *
13386 * [78] extParsedEnt ::= TextDecl? content
13387 *
13388 * This correspond to a "Well Balanced" chunk
13389 *
13390 * Returns the resulting document tree
13391 */
13392
13393xmlDocPtr
13394xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13395 xmlDocPtr ret;
13396 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013397
13398 ctxt = xmlCreateFileParserCtxt(filename);
13399 if (ctxt == NULL) {
13400 return(NULL);
13401 }
13402 if (sax != NULL) {
13403 if (ctxt->sax != NULL)
13404 xmlFree(ctxt->sax);
13405 ctxt->sax = sax;
13406 ctxt->userData = NULL;
13407 }
13408
Owen Taylor3473f882001-02-23 17:55:21 +000013409 xmlParseExtParsedEnt(ctxt);
13410
13411 if (ctxt->wellFormed)
13412 ret = ctxt->myDoc;
13413 else {
13414 ret = NULL;
13415 xmlFreeDoc(ctxt->myDoc);
13416 ctxt->myDoc = NULL;
13417 }
13418 if (sax != NULL)
13419 ctxt->sax = NULL;
13420 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013421
Owen Taylor3473f882001-02-23 17:55:21 +000013422 return(ret);
13423}
13424
13425/**
13426 * xmlParseEntity:
13427 * @filename: the filename
13428 *
13429 * parse an XML external entity out of context and build a tree.
13430 *
13431 * [78] extParsedEnt ::= TextDecl? content
13432 *
13433 * This correspond to a "Well Balanced" chunk
13434 *
13435 * Returns the resulting document tree
13436 */
13437
13438xmlDocPtr
13439xmlParseEntity(const char *filename) {
13440 return(xmlSAXParseEntity(NULL, filename));
13441}
Daniel Veillard81273902003-09-30 00:43:48 +000013442#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013443
13444/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013445 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013446 * @URL: the entity URL
13447 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013448 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013449 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013450 *
13451 * Create a parser context for an external entity
13452 * Automatic support for ZLIB/Compress compressed document is provided
13453 * by default if found at compile-time.
13454 *
13455 * Returns the new parser context or NULL
13456 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013457static xmlParserCtxtPtr
13458xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13459 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013460 xmlParserCtxtPtr ctxt;
13461 xmlParserInputPtr inputStream;
13462 char *directory = NULL;
13463 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013464
Owen Taylor3473f882001-02-23 17:55:21 +000013465 ctxt = xmlNewParserCtxt();
13466 if (ctxt == NULL) {
13467 return(NULL);
13468 }
13469
Daniel Veillard48247b42009-07-10 16:12:46 +020013470 if (pctx != NULL) {
13471 ctxt->options = pctx->options;
13472 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000013473 }
13474
Owen Taylor3473f882001-02-23 17:55:21 +000013475 uri = xmlBuildURI(URL, base);
13476
13477 if (uri == NULL) {
13478 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13479 if (inputStream == NULL) {
13480 xmlFreeParserCtxt(ctxt);
13481 return(NULL);
13482 }
13483
13484 inputPush(ctxt, inputStream);
13485
13486 if ((ctxt->directory == NULL) && (directory == NULL))
13487 directory = xmlParserGetDirectory((char *)URL);
13488 if ((ctxt->directory == NULL) && (directory != NULL))
13489 ctxt->directory = directory;
13490 } else {
13491 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13492 if (inputStream == NULL) {
13493 xmlFree(uri);
13494 xmlFreeParserCtxt(ctxt);
13495 return(NULL);
13496 }
13497
13498 inputPush(ctxt, inputStream);
13499
13500 if ((ctxt->directory == NULL) && (directory == NULL))
13501 directory = xmlParserGetDirectory((char *)uri);
13502 if ((ctxt->directory == NULL) && (directory != NULL))
13503 ctxt->directory = directory;
13504 xmlFree(uri);
13505 }
Owen Taylor3473f882001-02-23 17:55:21 +000013506 return(ctxt);
13507}
13508
Rob Richards9c0aa472009-03-26 18:10:19 +000013509/**
13510 * xmlCreateEntityParserCtxt:
13511 * @URL: the entity URL
13512 * @ID: the entity PUBLIC ID
13513 * @base: a possible base for the target URI
13514 *
13515 * Create a parser context for an external entity
13516 * Automatic support for ZLIB/Compress compressed document is provided
13517 * by default if found at compile-time.
13518 *
13519 * Returns the new parser context or NULL
13520 */
13521xmlParserCtxtPtr
13522xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13523 const xmlChar *base) {
13524 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13525
13526}
13527
Owen Taylor3473f882001-02-23 17:55:21 +000013528/************************************************************************
13529 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000013530 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000013531 * *
13532 ************************************************************************/
13533
13534/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013535 * xmlCreateURLParserCtxt:
13536 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013537 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013538 *
Daniel Veillard61b93382003-11-03 14:28:31 +000013539 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000013540 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000013541 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000013542 *
13543 * Returns the new parser context or NULL
13544 */
13545xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000013546xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000013547{
13548 xmlParserCtxtPtr ctxt;
13549 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000013550 char *directory = NULL;
13551
Owen Taylor3473f882001-02-23 17:55:21 +000013552 ctxt = xmlNewParserCtxt();
13553 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000013554 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000013555 return(NULL);
13556 }
13557
Daniel Veillarddf292f72005-01-16 19:00:15 +000013558 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000013559 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000013560 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000013561
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000013562 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013563 if (inputStream == NULL) {
13564 xmlFreeParserCtxt(ctxt);
13565 return(NULL);
13566 }
13567
Owen Taylor3473f882001-02-23 17:55:21 +000013568 inputPush(ctxt, inputStream);
13569 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000013570 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013571 if ((ctxt->directory == NULL) && (directory != NULL))
13572 ctxt->directory = directory;
13573
13574 return(ctxt);
13575}
13576
Daniel Veillard61b93382003-11-03 14:28:31 +000013577/**
13578 * xmlCreateFileParserCtxt:
13579 * @filename: the filename
13580 *
13581 * Create a parser context for a file content.
13582 * Automatic support for ZLIB/Compress compressed document is provided
13583 * by default if found at compile-time.
13584 *
13585 * Returns the new parser context or NULL
13586 */
13587xmlParserCtxtPtr
13588xmlCreateFileParserCtxt(const char *filename)
13589{
13590 return(xmlCreateURLParserCtxt(filename, 0));
13591}
13592
Daniel Veillard81273902003-09-30 00:43:48 +000013593#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013594/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013595 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000013596 * @sax: the SAX handler block
13597 * @filename: the filename
13598 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13599 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000013600 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000013601 *
13602 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13603 * compressed document is provided by default if found at compile-time.
13604 * It use the given SAX function block to handle the parsing callback.
13605 * If sax is NULL, fallback to the default DOM tree building routines.
13606 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000013607 * User data (void *) is stored within the parser context in the
13608 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000013609 *
Owen Taylor3473f882001-02-23 17:55:21 +000013610 * Returns the resulting document tree
13611 */
13612
13613xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000013614xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13615 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000013616 xmlDocPtr ret;
13617 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013618
Daniel Veillard635ef722001-10-29 11:48:19 +000013619 xmlInitParser();
13620
Owen Taylor3473f882001-02-23 17:55:21 +000013621 ctxt = xmlCreateFileParserCtxt(filename);
13622 if (ctxt == NULL) {
13623 return(NULL);
13624 }
13625 if (sax != NULL) {
13626 if (ctxt->sax != NULL)
13627 xmlFree(ctxt->sax);
13628 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013629 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013630 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000013631 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000013632 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000013633 }
Owen Taylor3473f882001-02-23 17:55:21 +000013634
Daniel Veillard37d2d162008-03-14 10:54:00 +000013635 if (ctxt->directory == NULL)
13636 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013637
Daniel Veillarddad3f682002-11-17 16:47:27 +000013638 ctxt->recovery = recovery;
13639
Owen Taylor3473f882001-02-23 17:55:21 +000013640 xmlParseDocument(ctxt);
13641
William M. Brackc07329e2003-09-08 01:57:30 +000013642 if ((ctxt->wellFormed) || recovery) {
13643 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000013644 if (ret != NULL) {
13645 if (ctxt->input->buf->compressed > 0)
13646 ret->compression = 9;
13647 else
13648 ret->compression = ctxt->input->buf->compressed;
13649 }
William M. Brackc07329e2003-09-08 01:57:30 +000013650 }
Owen Taylor3473f882001-02-23 17:55:21 +000013651 else {
13652 ret = NULL;
13653 xmlFreeDoc(ctxt->myDoc);
13654 ctxt->myDoc = NULL;
13655 }
13656 if (sax != NULL)
13657 ctxt->sax = NULL;
13658 xmlFreeParserCtxt(ctxt);
13659
13660 return(ret);
13661}
13662
13663/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013664 * xmlSAXParseFile:
13665 * @sax: the SAX handler block
13666 * @filename: the filename
13667 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13668 * documents
13669 *
13670 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13671 * compressed document is provided by default if found at compile-time.
13672 * It use the given SAX function block to handle the parsing callback.
13673 * If sax is NULL, fallback to the default DOM tree building routines.
13674 *
13675 * Returns the resulting document tree
13676 */
13677
13678xmlDocPtr
13679xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13680 int recovery) {
13681 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13682}
13683
13684/**
Owen Taylor3473f882001-02-23 17:55:21 +000013685 * xmlRecoverDoc:
13686 * @cur: a pointer to an array of xmlChar
13687 *
13688 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013689 * In the case the document is not Well Formed, a attempt to build a
13690 * tree is tried anyway
13691 *
13692 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013693 */
13694
13695xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020013696xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013697 return(xmlSAXParseDoc(NULL, cur, 1));
13698}
13699
13700/**
13701 * xmlParseFile:
13702 * @filename: the filename
13703 *
13704 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13705 * compressed document is provided by default if found at compile-time.
13706 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000013707 * Returns the resulting document tree if the file was wellformed,
13708 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000013709 */
13710
13711xmlDocPtr
13712xmlParseFile(const char *filename) {
13713 return(xmlSAXParseFile(NULL, filename, 0));
13714}
13715
13716/**
13717 * xmlRecoverFile:
13718 * @filename: the filename
13719 *
13720 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13721 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013722 * In the case the document is not Well Formed, it attempts to build
13723 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000013724 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000013725 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013726 */
13727
13728xmlDocPtr
13729xmlRecoverFile(const char *filename) {
13730 return(xmlSAXParseFile(NULL, filename, 1));
13731}
13732
13733
13734/**
13735 * xmlSetupParserForBuffer:
13736 * @ctxt: an XML parser context
13737 * @buffer: a xmlChar * buffer
13738 * @filename: a file name
13739 *
13740 * Setup the parser context to parse a new buffer; Clears any prior
13741 * contents from the parser context. The buffer parameter must not be
13742 * NULL, but the filename parameter can be
13743 */
13744void
13745xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13746 const char* filename)
13747{
13748 xmlParserInputPtr input;
13749
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013750 if ((ctxt == NULL) || (buffer == NULL))
13751 return;
13752
Owen Taylor3473f882001-02-23 17:55:21 +000013753 input = xmlNewInputStream(ctxt);
13754 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000013755 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013756 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013757 return;
13758 }
13759
13760 xmlClearParserCtxt(ctxt);
13761 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000013762 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013763 input->base = buffer;
13764 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013765 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000013766 inputPush(ctxt, input);
13767}
13768
13769/**
13770 * xmlSAXUserParseFile:
13771 * @sax: a SAX handler
13772 * @user_data: The user data returned on SAX callbacks
13773 * @filename: a file name
13774 *
13775 * parse an XML file and call the given SAX handler routines.
13776 * Automatic support for ZLIB/Compress compressed document is provided
13777 *
13778 * Returns 0 in case of success or a error number otherwise
13779 */
13780int
13781xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13782 const char *filename) {
13783 int ret = 0;
13784 xmlParserCtxtPtr ctxt;
13785
13786 ctxt = xmlCreateFileParserCtxt(filename);
13787 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000013788 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000013789 xmlFree(ctxt->sax);
13790 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013791 xmlDetectSAX2(ctxt);
13792
Owen Taylor3473f882001-02-23 17:55:21 +000013793 if (user_data != NULL)
13794 ctxt->userData = user_data;
13795
13796 xmlParseDocument(ctxt);
13797
13798 if (ctxt->wellFormed)
13799 ret = 0;
13800 else {
13801 if (ctxt->errNo != 0)
13802 ret = ctxt->errNo;
13803 else
13804 ret = -1;
13805 }
13806 if (sax != NULL)
13807 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013808 if (ctxt->myDoc != NULL) {
13809 xmlFreeDoc(ctxt->myDoc);
13810 ctxt->myDoc = NULL;
13811 }
Owen Taylor3473f882001-02-23 17:55:21 +000013812 xmlFreeParserCtxt(ctxt);
13813
13814 return ret;
13815}
Daniel Veillard81273902003-09-30 00:43:48 +000013816#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013817
13818/************************************************************************
13819 * *
13820 * Front ends when parsing from memory *
13821 * *
13822 ************************************************************************/
13823
13824/**
13825 * xmlCreateMemoryParserCtxt:
13826 * @buffer: a pointer to a char array
13827 * @size: the size of the array
13828 *
13829 * Create a parser context for an XML in-memory document.
13830 *
13831 * Returns the new parser context or NULL
13832 */
13833xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013834xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013835 xmlParserCtxtPtr ctxt;
13836 xmlParserInputPtr input;
13837 xmlParserInputBufferPtr buf;
13838
13839 if (buffer == NULL)
13840 return(NULL);
13841 if (size <= 0)
13842 return(NULL);
13843
13844 ctxt = xmlNewParserCtxt();
13845 if (ctxt == NULL)
13846 return(NULL);
13847
Daniel Veillard53350552003-09-18 13:35:51 +000013848 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000013849 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013850 if (buf == NULL) {
13851 xmlFreeParserCtxt(ctxt);
13852 return(NULL);
13853 }
Owen Taylor3473f882001-02-23 17:55:21 +000013854
13855 input = xmlNewInputStream(ctxt);
13856 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013857 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000013858 xmlFreeParserCtxt(ctxt);
13859 return(NULL);
13860 }
13861
13862 input->filename = NULL;
13863 input->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080013864 xmlBufResetInput(input->buf->buffer, input);
Owen Taylor3473f882001-02-23 17:55:21 +000013865
13866 inputPush(ctxt, input);
13867 return(ctxt);
13868}
13869
Daniel Veillard81273902003-09-30 00:43:48 +000013870#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013871/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013872 * xmlSAXParseMemoryWithData:
13873 * @sax: the SAX handler block
13874 * @buffer: an pointer to a char array
13875 * @size: the size of the array
13876 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13877 * documents
13878 * @data: the userdata
13879 *
13880 * parse an XML in-memory block and use the given SAX function block
13881 * to handle the parsing callback. If sax is NULL, fallback to the default
13882 * DOM tree building routines.
13883 *
13884 * User data (void *) is stored within the parser context in the
13885 * context's _private member, so it is available nearly everywhere in libxml
13886 *
13887 * Returns the resulting document tree
13888 */
13889
13890xmlDocPtr
13891xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13892 int size, int recovery, void *data) {
13893 xmlDocPtr ret;
13894 xmlParserCtxtPtr ctxt;
13895
Daniel Veillardab2a7632009-07-09 08:45:03 +020013896 xmlInitParser();
13897
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013898 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13899 if (ctxt == NULL) return(NULL);
13900 if (sax != NULL) {
13901 if (ctxt->sax != NULL)
13902 xmlFree(ctxt->sax);
13903 ctxt->sax = sax;
13904 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013905 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013906 if (data!=NULL) {
13907 ctxt->_private=data;
13908 }
13909
Daniel Veillardadba5f12003-04-04 16:09:01 +000013910 ctxt->recovery = recovery;
13911
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013912 xmlParseDocument(ctxt);
13913
13914 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13915 else {
13916 ret = NULL;
13917 xmlFreeDoc(ctxt->myDoc);
13918 ctxt->myDoc = NULL;
13919 }
13920 if (sax != NULL)
13921 ctxt->sax = NULL;
13922 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020013923
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013924 return(ret);
13925}
13926
13927/**
Owen Taylor3473f882001-02-23 17:55:21 +000013928 * xmlSAXParseMemory:
13929 * @sax: the SAX handler block
13930 * @buffer: an pointer to a char array
13931 * @size: the size of the array
13932 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13933 * documents
13934 *
13935 * parse an XML in-memory block and use the given SAX function block
13936 * to handle the parsing callback. If sax is NULL, fallback to the default
13937 * DOM tree building routines.
13938 *
13939 * Returns the resulting document tree
13940 */
13941xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000013942xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13943 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013944 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013945}
13946
13947/**
13948 * xmlParseMemory:
13949 * @buffer: an pointer to a char array
13950 * @size: the size of the array
13951 *
13952 * parse an XML in-memory block and build a tree.
13953 *
13954 * Returns the resulting document tree
13955 */
13956
Daniel Veillard50822cb2001-07-26 20:05:51 +000013957xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013958 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13959}
13960
13961/**
13962 * xmlRecoverMemory:
13963 * @buffer: an pointer to a char array
13964 * @size: the size of the array
13965 *
13966 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013967 * In the case the document is not Well Formed, an attempt to
13968 * build a tree is tried anyway
13969 *
13970 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000013971 */
13972
Daniel Veillard50822cb2001-07-26 20:05:51 +000013973xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013974 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13975}
13976
13977/**
13978 * xmlSAXUserParseMemory:
13979 * @sax: a SAX handler
13980 * @user_data: The user data returned on SAX callbacks
13981 * @buffer: an in-memory XML document input
13982 * @size: the length of the XML document in bytes
13983 *
13984 * A better SAX parsing routine.
13985 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020013986 *
Owen Taylor3473f882001-02-23 17:55:21 +000013987 * Returns 0 in case of success or a error number otherwise
13988 */
13989int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013990 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013991 int ret = 0;
13992 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020013993
13994 xmlInitParser();
13995
Owen Taylor3473f882001-02-23 17:55:21 +000013996 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13997 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013998 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13999 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014000 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014001 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014002
Daniel Veillard30211a02001-04-26 09:33:18 +000014003 if (user_data != NULL)
14004 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014005
Owen Taylor3473f882001-02-23 17:55:21 +000014006 xmlParseDocument(ctxt);
14007
14008 if (ctxt->wellFormed)
14009 ret = 0;
14010 else {
14011 if (ctxt->errNo != 0)
14012 ret = ctxt->errNo;
14013 else
14014 ret = -1;
14015 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014016 if (sax != NULL)
14017 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014018 if (ctxt->myDoc != NULL) {
14019 xmlFreeDoc(ctxt->myDoc);
14020 ctxt->myDoc = NULL;
14021 }
Owen Taylor3473f882001-02-23 17:55:21 +000014022 xmlFreeParserCtxt(ctxt);
14023
14024 return ret;
14025}
Daniel Veillard81273902003-09-30 00:43:48 +000014026#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014027
14028/**
14029 * xmlCreateDocParserCtxt:
14030 * @cur: a pointer to an array of xmlChar
14031 *
14032 * Creates a parser context for an XML in-memory document.
14033 *
14034 * Returns the new parser context or NULL
14035 */
14036xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014037xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014038 int len;
14039
14040 if (cur == NULL)
14041 return(NULL);
14042 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014043 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014044}
14045
Daniel Veillard81273902003-09-30 00:43:48 +000014046#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014047/**
14048 * xmlSAXParseDoc:
14049 * @sax: the SAX handler block
14050 * @cur: a pointer to an array of xmlChar
14051 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14052 * documents
14053 *
14054 * parse an XML in-memory document and build a tree.
14055 * It use the given SAX function block to handle the parsing callback.
14056 * If sax is NULL, fallback to the default DOM tree building routines.
14057 *
14058 * Returns the resulting document tree
14059 */
14060
14061xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014062xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014063 xmlDocPtr ret;
14064 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014065 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014066
Daniel Veillard38936062004-11-04 17:45:11 +000014067 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014068
14069
14070 ctxt = xmlCreateDocParserCtxt(cur);
14071 if (ctxt == NULL) return(NULL);
14072 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014073 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014074 ctxt->sax = sax;
14075 ctxt->userData = NULL;
14076 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014077 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014078
14079 xmlParseDocument(ctxt);
14080 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14081 else {
14082 ret = NULL;
14083 xmlFreeDoc(ctxt->myDoc);
14084 ctxt->myDoc = NULL;
14085 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014086 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014087 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014088 xmlFreeParserCtxt(ctxt);
14089
14090 return(ret);
14091}
14092
14093/**
14094 * xmlParseDoc:
14095 * @cur: a pointer to an array of xmlChar
14096 *
14097 * parse an XML in-memory document and build a tree.
14098 *
14099 * Returns the resulting document tree
14100 */
14101
14102xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014103xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014104 return(xmlSAXParseDoc(NULL, cur, 0));
14105}
Daniel Veillard81273902003-09-30 00:43:48 +000014106#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014107
Daniel Veillard81273902003-09-30 00:43:48 +000014108#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014109/************************************************************************
14110 * *
14111 * Specific function to keep track of entities references *
14112 * and used by the XSLT debugger *
14113 * *
14114 ************************************************************************/
14115
14116static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14117
14118/**
14119 * xmlAddEntityReference:
14120 * @ent : A valid entity
14121 * @firstNode : A valid first node for children of entity
14122 * @lastNode : A valid last node of children entity
14123 *
14124 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14125 */
14126static void
14127xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14128 xmlNodePtr lastNode)
14129{
14130 if (xmlEntityRefFunc != NULL) {
14131 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14132 }
14133}
14134
14135
14136/**
14137 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014138 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014139 *
14140 * Set the function to call call back when a xml reference has been made
14141 */
14142void
14143xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14144{
14145 xmlEntityRefFunc = func;
14146}
Daniel Veillard81273902003-09-30 00:43:48 +000014147#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014148
14149/************************************************************************
14150 * *
14151 * Miscellaneous *
14152 * *
14153 ************************************************************************/
14154
14155#ifdef LIBXML_XPATH_ENABLED
14156#include <libxml/xpath.h>
14157#endif
14158
Daniel Veillardffa3c742005-07-21 13:24:09 +000014159extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014160static int xmlParserInitialized = 0;
14161
14162/**
14163 * xmlInitParser:
14164 *
14165 * Initialization function for the XML parser.
14166 * This is not reentrant. Call once before processing in case of
14167 * use in multithreaded programs.
14168 */
14169
14170void
14171xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014172 if (xmlParserInitialized != 0)
14173 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014174
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014175#ifdef LIBXML_THREAD_ENABLED
14176 __xmlGlobalInitMutexLock();
14177 if (xmlParserInitialized == 0) {
14178#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014179 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014180 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014181 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14182 (xmlGenericError == NULL))
14183 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014184 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014185 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014186 xmlInitCharEncodingHandlers();
14187 xmlDefaultSAXHandlerInit();
14188 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014189#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014190 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014191#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014192#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014193 htmlInitAutoClose();
14194 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014195#endif
14196#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014197 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014198#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014199 xmlParserInitialized = 1;
14200#ifdef LIBXML_THREAD_ENABLED
14201 }
14202 __xmlGlobalInitMutexUnlock();
14203#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014204}
14205
14206/**
14207 * xmlCleanupParser:
14208 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014209 * This function name is somewhat misleading. It does not clean up
14210 * parser state, it cleans up memory allocated by the library itself.
14211 * It is a cleanup function for the XML library. It tries to reclaim all
14212 * related global memory allocated for the library processing.
14213 * It doesn't deallocate any document related memory. One should
14214 * call xmlCleanupParser() only when the process has finished using
14215 * the library and all XML/HTML documents built with it.
14216 * See also xmlInitParser() which has the opposite function of preparing
14217 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014218 *
14219 * WARNING: if your application is multithreaded or has plugin support
14220 * calling this may crash the application if another thread or
14221 * a plugin is still using libxml2. It's sometimes very hard to
14222 * guess if libxml2 is in use in the application, some libraries
14223 * or plugins may use it without notice. In case of doubt abstain
14224 * from calling this function or do it just before calling exit()
14225 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014226 */
14227
14228void
14229xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014230 if (!xmlParserInitialized)
14231 return;
14232
Owen Taylor3473f882001-02-23 17:55:21 +000014233 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014234#ifdef LIBXML_CATALOG_ENABLED
14235 xmlCatalogCleanup();
14236#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014237 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014238 xmlCleanupInputCallbacks();
14239#ifdef LIBXML_OUTPUT_ENABLED
14240 xmlCleanupOutputCallbacks();
14241#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014242#ifdef LIBXML_SCHEMAS_ENABLED
14243 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014244 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014245#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000014246 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014247 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014248 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014249 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014250 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014251}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014252
14253/************************************************************************
14254 * *
14255 * New set (2.6.0) of simpler and more flexible APIs *
14256 * *
14257 ************************************************************************/
14258
14259/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014260 * DICT_FREE:
14261 * @str: a string
14262 *
14263 * Free a string if it is not owned by the "dict" dictionnary in the
14264 * current scope
14265 */
14266#define DICT_FREE(str) \
14267 if ((str) && ((!dict) || \
14268 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14269 xmlFree((char *)(str));
14270
14271/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014272 * xmlCtxtReset:
14273 * @ctxt: an XML parser context
14274 *
14275 * Reset a parser context
14276 */
14277void
14278xmlCtxtReset(xmlParserCtxtPtr ctxt)
14279{
14280 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014281 xmlDictPtr dict;
14282
14283 if (ctxt == NULL)
14284 return;
14285
14286 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014287
14288 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14289 xmlFreeInputStream(input);
14290 }
14291 ctxt->inputNr = 0;
14292 ctxt->input = NULL;
14293
14294 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014295 if (ctxt->spaceTab != NULL) {
14296 ctxt->spaceTab[0] = -1;
14297 ctxt->space = &ctxt->spaceTab[0];
14298 } else {
14299 ctxt->space = NULL;
14300 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014301
14302
14303 ctxt->nodeNr = 0;
14304 ctxt->node = NULL;
14305
14306 ctxt->nameNr = 0;
14307 ctxt->name = NULL;
14308
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014309 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014310 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014311 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014312 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014313 DICT_FREE(ctxt->directory);
14314 ctxt->directory = NULL;
14315 DICT_FREE(ctxt->extSubURI);
14316 ctxt->extSubURI = NULL;
14317 DICT_FREE(ctxt->extSubSystem);
14318 ctxt->extSubSystem = NULL;
14319 if (ctxt->myDoc != NULL)
14320 xmlFreeDoc(ctxt->myDoc);
14321 ctxt->myDoc = NULL;
14322
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014323 ctxt->standalone = -1;
14324 ctxt->hasExternalSubset = 0;
14325 ctxt->hasPErefs = 0;
14326 ctxt->html = 0;
14327 ctxt->external = 0;
14328 ctxt->instate = XML_PARSER_START;
14329 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014330
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014331 ctxt->wellFormed = 1;
14332 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014333 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014334 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014335#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014336 ctxt->vctxt.userData = ctxt;
14337 ctxt->vctxt.error = xmlParserValidityError;
14338 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014339#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014340 ctxt->record_info = 0;
14341 ctxt->nbChars = 0;
14342 ctxt->checkIndex = 0;
14343 ctxt->inSubset = 0;
14344 ctxt->errNo = XML_ERR_OK;
14345 ctxt->depth = 0;
14346 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14347 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014348 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014349 ctxt->sizeentities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014350 xmlInitNodeInfoSeq(&ctxt->node_seq);
14351
14352 if (ctxt->attsDefault != NULL) {
14353 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14354 ctxt->attsDefault = NULL;
14355 }
14356 if (ctxt->attsSpecial != NULL) {
14357 xmlHashFree(ctxt->attsSpecial, NULL);
14358 ctxt->attsSpecial = NULL;
14359 }
14360
Daniel Veillard4432df22003-09-28 18:58:27 +000014361#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014362 if (ctxt->catalogs != NULL)
14363 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014364#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014365 if (ctxt->lastError.code != XML_ERR_OK)
14366 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014367}
14368
14369/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014370 * xmlCtxtResetPush:
14371 * @ctxt: an XML parser context
14372 * @chunk: a pointer to an array of chars
14373 * @size: number of chars in the array
14374 * @filename: an optional file name or URI
14375 * @encoding: the document encoding, or NULL
14376 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014377 * Reset a push parser context
14378 *
14379 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014380 */
14381int
14382xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14383 int size, const char *filename, const char *encoding)
14384{
14385 xmlParserInputPtr inputStream;
14386 xmlParserInputBufferPtr buf;
14387 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14388
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014389 if (ctxt == NULL)
14390 return(1);
14391
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014392 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14393 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14394
14395 buf = xmlAllocParserInputBuffer(enc);
14396 if (buf == NULL)
14397 return(1);
14398
14399 if (ctxt == NULL) {
14400 xmlFreeParserInputBuffer(buf);
14401 return(1);
14402 }
14403
14404 xmlCtxtReset(ctxt);
14405
14406 if (ctxt->pushTab == NULL) {
14407 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14408 sizeof(xmlChar *));
14409 if (ctxt->pushTab == NULL) {
14410 xmlErrMemory(ctxt, NULL);
14411 xmlFreeParserInputBuffer(buf);
14412 return(1);
14413 }
14414 }
14415
14416 if (filename == NULL) {
14417 ctxt->directory = NULL;
14418 } else {
14419 ctxt->directory = xmlParserGetDirectory(filename);
14420 }
14421
14422 inputStream = xmlNewInputStream(ctxt);
14423 if (inputStream == NULL) {
14424 xmlFreeParserInputBuffer(buf);
14425 return(1);
14426 }
14427
14428 if (filename == NULL)
14429 inputStream->filename = NULL;
14430 else
14431 inputStream->filename = (char *)
14432 xmlCanonicPath((const xmlChar *) filename);
14433 inputStream->buf = buf;
Daniel Veillard61551a12012-07-16 16:28:47 +080014434 xmlBufResetInput(buf->buffer, inputStream);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014435
14436 inputPush(ctxt, inputStream);
14437
14438 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14439 (ctxt->input->buf != NULL)) {
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014440 size_t base = xmlBufGetInputBase(ctxt->input->buf->buffer, ctxt->input);
14441 size_t cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014442
14443 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14444
Daniel Veillard00ac0d32012-07-16 18:03:01 +080014445 xmlBufSetInputBaseCur(ctxt->input->buf->buffer, ctxt->input, base, cur);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014446#ifdef DEBUG_PUSH
14447 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14448#endif
14449 }
14450
14451 if (encoding != NULL) {
14452 xmlCharEncodingHandlerPtr hdlr;
14453
Daniel Veillard37334572008-07-31 08:20:02 +000014454 if (ctxt->encoding != NULL)
14455 xmlFree((xmlChar *) ctxt->encoding);
14456 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14457
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014458 hdlr = xmlFindCharEncodingHandler(encoding);
14459 if (hdlr != NULL) {
14460 xmlSwitchToEncoding(ctxt, hdlr);
14461 } else {
14462 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14463 "Unsupported encoding %s\n", BAD_CAST encoding);
14464 }
14465 } else if (enc != XML_CHAR_ENCODING_NONE) {
14466 xmlSwitchEncoding(ctxt, enc);
14467 }
14468
14469 return(0);
14470}
14471
Daniel Veillard37334572008-07-31 08:20:02 +000014472
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014473/**
Daniel Veillard37334572008-07-31 08:20:02 +000014474 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014475 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014476 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014477 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014478 *
14479 * Applies the options to the parser context
14480 *
14481 * Returns 0 in case of success, the set of unknown or unimplemented options
14482 * in case of error.
14483 */
Daniel Veillard37334572008-07-31 08:20:02 +000014484static int
14485xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014486{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014487 if (ctxt == NULL)
14488 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014489 if (encoding != NULL) {
14490 if (ctxt->encoding != NULL)
14491 xmlFree((xmlChar *) ctxt->encoding);
14492 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14493 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014494 if (options & XML_PARSE_RECOVER) {
14495 ctxt->recovery = 1;
14496 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014497 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014498 } else
14499 ctxt->recovery = 0;
14500 if (options & XML_PARSE_DTDLOAD) {
14501 ctxt->loadsubset = XML_DETECT_IDS;
14502 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014503 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014504 } else
14505 ctxt->loadsubset = 0;
14506 if (options & XML_PARSE_DTDATTR) {
14507 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14508 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014509 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014510 }
14511 if (options & XML_PARSE_NOENT) {
14512 ctxt->replaceEntities = 1;
14513 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14514 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014515 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014516 } else
14517 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014518 if (options & XML_PARSE_PEDANTIC) {
14519 ctxt->pedantic = 1;
14520 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014521 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014522 } else
14523 ctxt->pedantic = 0;
14524 if (options & XML_PARSE_NOBLANKS) {
14525 ctxt->keepBlanks = 0;
14526 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14527 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014528 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014529 } else
14530 ctxt->keepBlanks = 1;
14531 if (options & XML_PARSE_DTDVALID) {
14532 ctxt->validate = 1;
14533 if (options & XML_PARSE_NOWARNING)
14534 ctxt->vctxt.warning = NULL;
14535 if (options & XML_PARSE_NOERROR)
14536 ctxt->vctxt.error = NULL;
14537 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014538 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014539 } else
14540 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000014541 if (options & XML_PARSE_NOWARNING) {
14542 ctxt->sax->warning = NULL;
14543 options -= XML_PARSE_NOWARNING;
14544 }
14545 if (options & XML_PARSE_NOERROR) {
14546 ctxt->sax->error = NULL;
14547 ctxt->sax->fatalError = NULL;
14548 options -= XML_PARSE_NOERROR;
14549 }
Daniel Veillard81273902003-09-30 00:43:48 +000014550#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014551 if (options & XML_PARSE_SAX1) {
14552 ctxt->sax->startElement = xmlSAX2StartElement;
14553 ctxt->sax->endElement = xmlSAX2EndElement;
14554 ctxt->sax->startElementNs = NULL;
14555 ctxt->sax->endElementNs = NULL;
14556 ctxt->sax->initialized = 1;
14557 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014558 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014559 }
Daniel Veillard81273902003-09-30 00:43:48 +000014560#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014561 if (options & XML_PARSE_NODICT) {
14562 ctxt->dictNames = 0;
14563 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014564 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014565 } else {
14566 ctxt->dictNames = 1;
14567 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014568 if (options & XML_PARSE_NOCDATA) {
14569 ctxt->sax->cdataBlock = NULL;
14570 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014571 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014572 }
14573 if (options & XML_PARSE_NSCLEAN) {
14574 ctxt->options |= XML_PARSE_NSCLEAN;
14575 options -= XML_PARSE_NSCLEAN;
14576 }
Daniel Veillard61b93382003-11-03 14:28:31 +000014577 if (options & XML_PARSE_NONET) {
14578 ctxt->options |= XML_PARSE_NONET;
14579 options -= XML_PARSE_NONET;
14580 }
Daniel Veillard8874b942005-08-25 13:19:21 +000014581 if (options & XML_PARSE_COMPACT) {
14582 ctxt->options |= XML_PARSE_COMPACT;
14583 options -= XML_PARSE_COMPACT;
14584 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000014585 if (options & XML_PARSE_OLD10) {
14586 ctxt->options |= XML_PARSE_OLD10;
14587 options -= XML_PARSE_OLD10;
14588 }
Daniel Veillard8915c152008-08-26 13:05:34 +000014589 if (options & XML_PARSE_NOBASEFIX) {
14590 ctxt->options |= XML_PARSE_NOBASEFIX;
14591 options -= XML_PARSE_NOBASEFIX;
14592 }
14593 if (options & XML_PARSE_HUGE) {
14594 ctxt->options |= XML_PARSE_HUGE;
14595 options -= XML_PARSE_HUGE;
14596 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000014597 if (options & XML_PARSE_OLDSAX) {
14598 ctxt->options |= XML_PARSE_OLDSAX;
14599 options -= XML_PARSE_OLDSAX;
14600 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080014601 if (options & XML_PARSE_IGNORE_ENC) {
14602 ctxt->options |= XML_PARSE_IGNORE_ENC;
14603 options -= XML_PARSE_IGNORE_ENC;
14604 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000014605 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014606 return (options);
14607}
14608
14609/**
Daniel Veillard37334572008-07-31 08:20:02 +000014610 * xmlCtxtUseOptions:
14611 * @ctxt: an XML parser context
14612 * @options: a combination of xmlParserOption
14613 *
14614 * Applies the options to the parser context
14615 *
14616 * Returns 0 in case of success, the set of unknown or unimplemented options
14617 * in case of error.
14618 */
14619int
14620xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14621{
14622 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14623}
14624
14625/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014626 * xmlDoRead:
14627 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000014628 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014629 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014630 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014631 * @reuse: keep the context for reuse
14632 *
14633 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000014634 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014635 * Returns the resulting document tree or NULL
14636 */
14637static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014638xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14639 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014640{
14641 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000014642
14643 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014644 if (encoding != NULL) {
14645 xmlCharEncodingHandlerPtr hdlr;
14646
14647 hdlr = xmlFindCharEncodingHandler(encoding);
14648 if (hdlr != NULL)
14649 xmlSwitchToEncoding(ctxt, hdlr);
14650 }
Daniel Veillard60942de2003-09-25 21:05:58 +000014651 if ((URL != NULL) && (ctxt->input != NULL) &&
14652 (ctxt->input->filename == NULL))
14653 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014654 xmlParseDocument(ctxt);
14655 if ((ctxt->wellFormed) || ctxt->recovery)
14656 ret = ctxt->myDoc;
14657 else {
14658 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014659 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014660 xmlFreeDoc(ctxt->myDoc);
14661 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014662 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014663 ctxt->myDoc = NULL;
14664 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014665 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014666 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014667
14668 return (ret);
14669}
14670
14671/**
14672 * xmlReadDoc:
14673 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014674 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014675 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014676 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014677 *
14678 * parse an XML in-memory document and build a tree.
14679 *
14680 * Returns the resulting document tree
14681 */
14682xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014683xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014684{
14685 xmlParserCtxtPtr ctxt;
14686
14687 if (cur == NULL)
14688 return (NULL);
14689
14690 ctxt = xmlCreateDocParserCtxt(cur);
14691 if (ctxt == NULL)
14692 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014693 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014694}
14695
14696/**
14697 * xmlReadFile:
14698 * @filename: a file or URL
14699 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014700 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014701 *
14702 * parse an XML file from the filesystem or the network.
14703 *
14704 * Returns the resulting document tree
14705 */
14706xmlDocPtr
14707xmlReadFile(const char *filename, const char *encoding, int options)
14708{
14709 xmlParserCtxtPtr ctxt;
14710
Daniel Veillard61b93382003-11-03 14:28:31 +000014711 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014712 if (ctxt == NULL)
14713 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014714 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014715}
14716
14717/**
14718 * xmlReadMemory:
14719 * @buffer: a pointer to a char array
14720 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014721 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014722 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014723 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014724 *
14725 * parse an XML in-memory document and build a tree.
14726 *
14727 * Returns the resulting document tree
14728 */
14729xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014730xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014731{
14732 xmlParserCtxtPtr ctxt;
14733
14734 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14735 if (ctxt == NULL)
14736 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014737 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014738}
14739
14740/**
14741 * xmlReadFd:
14742 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014743 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014744 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014745 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014746 *
14747 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014748 * NOTE that the file descriptor will not be closed when the
14749 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014750 *
14751 * Returns the resulting document tree
14752 */
14753xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014754xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014755{
14756 xmlParserCtxtPtr ctxt;
14757 xmlParserInputBufferPtr input;
14758 xmlParserInputPtr stream;
14759
14760 if (fd < 0)
14761 return (NULL);
14762
14763 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14764 if (input == NULL)
14765 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014766 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014767 ctxt = xmlNewParserCtxt();
14768 if (ctxt == NULL) {
14769 xmlFreeParserInputBuffer(input);
14770 return (NULL);
14771 }
14772 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14773 if (stream == NULL) {
14774 xmlFreeParserInputBuffer(input);
14775 xmlFreeParserCtxt(ctxt);
14776 return (NULL);
14777 }
14778 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014779 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014780}
14781
14782/**
14783 * xmlReadIO:
14784 * @ioread: an I/O read function
14785 * @ioclose: an I/O close function
14786 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014787 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014788 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014789 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014790 *
14791 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080014792 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014793 * Returns the resulting document tree
14794 */
14795xmlDocPtr
14796xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000014797 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014798{
14799 xmlParserCtxtPtr ctxt;
14800 xmlParserInputBufferPtr input;
14801 xmlParserInputPtr stream;
14802
14803 if (ioread == NULL)
14804 return (NULL);
14805
14806 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14807 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080014808 if (input == NULL) {
14809 if (ioclose != NULL)
14810 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014811 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080014812 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014813 ctxt = xmlNewParserCtxt();
14814 if (ctxt == NULL) {
14815 xmlFreeParserInputBuffer(input);
14816 return (NULL);
14817 }
14818 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14819 if (stream == NULL) {
14820 xmlFreeParserInputBuffer(input);
14821 xmlFreeParserCtxt(ctxt);
14822 return (NULL);
14823 }
14824 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014825 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014826}
14827
14828/**
14829 * xmlCtxtReadDoc:
14830 * @ctxt: an XML parser context
14831 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014832 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014833 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014834 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014835 *
14836 * parse an XML in-memory document and build a tree.
14837 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080014838 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014839 * Returns the resulting document tree
14840 */
14841xmlDocPtr
14842xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000014843 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014844{
14845 xmlParserInputPtr stream;
14846
14847 if (cur == NULL)
14848 return (NULL);
14849 if (ctxt == NULL)
14850 return (NULL);
14851
14852 xmlCtxtReset(ctxt);
14853
14854 stream = xmlNewStringInputStream(ctxt, cur);
14855 if (stream == NULL) {
14856 return (NULL);
14857 }
14858 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014859 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014860}
14861
14862/**
14863 * xmlCtxtReadFile:
14864 * @ctxt: an XML parser context
14865 * @filename: a file or URL
14866 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014867 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014868 *
14869 * parse an XML file from the filesystem or the network.
14870 * This reuses the existing @ctxt parser context
14871 *
14872 * Returns the resulting document tree
14873 */
14874xmlDocPtr
14875xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14876 const char *encoding, int options)
14877{
14878 xmlParserInputPtr stream;
14879
14880 if (filename == NULL)
14881 return (NULL);
14882 if (ctxt == NULL)
14883 return (NULL);
14884
14885 xmlCtxtReset(ctxt);
14886
Daniel Veillard29614c72004-11-26 10:47:26 +000014887 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014888 if (stream == NULL) {
14889 return (NULL);
14890 }
14891 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014892 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014893}
14894
14895/**
14896 * xmlCtxtReadMemory:
14897 * @ctxt: an XML parser context
14898 * @buffer: a pointer to a char array
14899 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014900 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014901 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014902 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014903 *
14904 * parse an XML in-memory document and build a tree.
14905 * This reuses the existing @ctxt parser context
14906 *
14907 * Returns the resulting document tree
14908 */
14909xmlDocPtr
14910xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000014911 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014912{
14913 xmlParserInputBufferPtr input;
14914 xmlParserInputPtr stream;
14915
14916 if (ctxt == NULL)
14917 return (NULL);
14918 if (buffer == NULL)
14919 return (NULL);
14920
14921 xmlCtxtReset(ctxt);
14922
14923 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14924 if (input == NULL) {
14925 return(NULL);
14926 }
14927
14928 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14929 if (stream == NULL) {
14930 xmlFreeParserInputBuffer(input);
14931 return(NULL);
14932 }
14933
14934 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014935 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014936}
14937
14938/**
14939 * xmlCtxtReadFd:
14940 * @ctxt: an XML parser context
14941 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014942 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014943 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014944 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014945 *
14946 * parse an XML from a file descriptor and build a tree.
14947 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014948 * NOTE that the file descriptor will not be closed when the
14949 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014950 *
14951 * Returns the resulting document tree
14952 */
14953xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014954xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14955 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014956{
14957 xmlParserInputBufferPtr input;
14958 xmlParserInputPtr stream;
14959
14960 if (fd < 0)
14961 return (NULL);
14962 if (ctxt == NULL)
14963 return (NULL);
14964
14965 xmlCtxtReset(ctxt);
14966
14967
14968 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14969 if (input == NULL)
14970 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014971 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014972 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14973 if (stream == NULL) {
14974 xmlFreeParserInputBuffer(input);
14975 return (NULL);
14976 }
14977 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014978 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014979}
14980
14981/**
14982 * xmlCtxtReadIO:
14983 * @ctxt: an XML parser context
14984 * @ioread: an I/O read function
14985 * @ioclose: an I/O close function
14986 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014987 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014988 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014989 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014990 *
14991 * parse an XML document from I/O functions and source and build a tree.
14992 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080014993 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014994 * Returns the resulting document tree
14995 */
14996xmlDocPtr
14997xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14998 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000014999 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015000 const char *encoding, int options)
15001{
15002 xmlParserInputBufferPtr input;
15003 xmlParserInputPtr stream;
15004
15005 if (ioread == NULL)
15006 return (NULL);
15007 if (ctxt == NULL)
15008 return (NULL);
15009
15010 xmlCtxtReset(ctxt);
15011
15012 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15013 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015014 if (input == NULL) {
15015 if (ioclose != NULL)
15016 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015017 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015018 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015019 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15020 if (stream == NULL) {
15021 xmlFreeParserInputBuffer(input);
15022 return (NULL);
15023 }
15024 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015025 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015026}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015027
15028#define bottom_parser
15029#include "elfgcchack.h"