blob: a0183f24d447f229377e4cfc56b55fc56a07f6a2 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080043#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000045#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000047#include <libxml/threads.h>
48#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <libxml/tree.h>
50#include <libxml/parser.h>
51#include <libxml/parserInternals.h>
52#include <libxml/valid.h>
53#include <libxml/entities.h>
54#include <libxml/xmlerror.h>
55#include <libxml/encoding.h>
56#include <libxml/xmlIO.h>
57#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000058#ifdef LIBXML_CATALOG_ENABLED
59#include <libxml/catalog.h>
60#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000061#ifdef LIBXML_SCHEMAS_ENABLED
62#include <libxml/xmlschemastypes.h>
63#include <libxml/relaxng.h>
64#endif
Owen Taylor3473f882001-02-23 17:55:21 +000065#ifdef HAVE_CTYPE_H
66#include <ctype.h>
67#endif
68#ifdef HAVE_STDLIB_H
69#include <stdlib.h>
70#endif
71#ifdef HAVE_SYS_STAT_H
72#include <sys/stat.h>
73#endif
74#ifdef HAVE_FCNTL_H
75#include <fcntl.h>
76#endif
77#ifdef HAVE_UNISTD_H
78#include <unistd.h>
79#endif
80#ifdef HAVE_ZLIB_H
81#include <zlib.h>
82#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +020083#ifdef HAVE_LZMA_H
84#include <lzma.h>
85#endif
Owen Taylor3473f882001-02-23 17:55:21 +000086
Daniel Veillard0161e632008-08-28 15:36:32 +000087static void
88xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
89
Rob Richards9c0aa472009-03-26 18:10:19 +000090static xmlParserCtxtPtr
91xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
92 const xmlChar *base, xmlParserCtxtPtr pctx);
93
Daniel Veillard0161e632008-08-28 15:36:32 +000094/************************************************************************
95 * *
96 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
97 * *
98 ************************************************************************/
99
100#define XML_PARSER_BIG_ENTITY 1000
101#define XML_PARSER_LOT_ENTITY 5000
102
103/*
104 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
105 * replacement over the size in byte of the input indicates that you have
106 * and eponential behaviour. A value of 10 correspond to at least 3 entity
107 * replacement per byte of input.
108 */
109#define XML_PARSER_NON_LINEAR 10
110
111/*
112 * xmlParserEntityCheck
113 *
114 * Function to check non-linear entity expansion behaviour
115 * This is here to detect and stop exponential linear entity expansion
116 * This is not a limitation of the parser but a safety
117 * boundary feature. It can be disabled with the XML_PARSE_HUGE
118 * parser option.
119 */
120static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800121xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard0161e632008-08-28 15:36:32 +0000122 xmlEntityPtr ent)
123{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800124 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000125
126 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
127 return (0);
128 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
129 return (1);
130 if (size != 0) {
131 /*
132 * Do the check based on the replacement size of the entity
133 */
134 if (size < XML_PARSER_BIG_ENTITY)
135 return(0);
136
137 /*
138 * A limit on the amount of text data reasonably used
139 */
140 if (ctxt->input != NULL) {
141 consumed = ctxt->input->consumed +
142 (ctxt->input->cur - ctxt->input->base);
143 }
144 consumed += ctxt->sizeentities;
145
146 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
147 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
148 return (0);
149 } else if (ent != NULL) {
150 /*
151 * use the number of parsed entities in the replacement
152 */
153 size = ent->checked;
154
155 /*
156 * The amount of data parsed counting entities size only once
157 */
158 if (ctxt->input != NULL) {
159 consumed = ctxt->input->consumed +
160 (ctxt->input->cur - ctxt->input->base);
161 }
162 consumed += ctxt->sizeentities;
163
164 /*
165 * Check the density of entities for the amount of data
166 * knowing an entity reference will take at least 3 bytes
167 */
168 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
169 return (0);
170 } else {
171 /*
172 * strange we got no data for checking just return
173 */
174 return (0);
175 }
176
177 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
178 return (1);
179}
180
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000181/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000182 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000183 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000184 * arbitrary depth limit for the XML documents that we allow to
185 * process. This is not a limitation of the parser but a safety
186 * boundary feature. It can be disabled with the XML_PARSE_HUGE
187 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000188 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000189unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000190
Daniel Veillard0fb18932003-09-07 09:14:37 +0000191
Daniel Veillard0161e632008-08-28 15:36:32 +0000192
193#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000194#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000195#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000196#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
197
Owen Taylor3473f882001-02-23 17:55:21 +0000198/*
Owen Taylor3473f882001-02-23 17:55:21 +0000199 * List of XML prefixed PI allowed by W3C specs
200 */
201
Daniel Veillardb44025c2001-10-11 22:55:55 +0000202static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000203 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800204 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000205 NULL
206};
207
Daniel Veillarda07050d2003-10-19 14:46:32 +0000208
Owen Taylor3473f882001-02-23 17:55:21 +0000209/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200210static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
211 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000212
Daniel Veillard7d515752003-09-26 19:12:37 +0000213static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000214xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
215 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000216 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000217 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000218
Daniel Veillard37334572008-07-31 08:20:02 +0000219static int
220xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
221 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000222#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000223static void
224xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
225 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000226#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000227
Daniel Veillard7d515752003-09-26 19:12:37 +0000228static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000229xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
230 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000231
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000232static int
233xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
234
Daniel Veillarde57ec792003-09-10 10:50:59 +0000235/************************************************************************
236 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000237 * Some factorized error routines *
238 * *
239 ************************************************************************/
240
241/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000242 * xmlErrAttributeDup:
243 * @ctxt: an XML parser context
244 * @prefix: the attribute prefix
245 * @localname: the attribute localname
246 *
247 * Handle a redefinition of attribute error
248 */
249static void
250xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
251 const xmlChar * localname)
252{
Daniel Veillard157fee02003-10-31 10:36:03 +0000253 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
254 (ctxt->instate == XML_PARSER_EOF))
255 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000256 if (ctxt != NULL)
257 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200258
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000259 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000260 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200261 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000262 (const char *) localname, NULL, NULL, 0, 0,
263 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000264 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000265 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200266 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000267 (const char *) prefix, (const char *) localname,
268 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
269 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000270 if (ctxt != NULL) {
271 ctxt->wellFormed = 0;
272 if (ctxt->recovery == 0)
273 ctxt->disableSAX = 1;
274 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000275}
276
277/**
278 * xmlFatalErr:
279 * @ctxt: an XML parser context
280 * @error: the error number
281 * @extra: extra information string
282 *
283 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
284 */
285static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000286xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287{
288 const char *errmsg;
289
Daniel Veillard157fee02003-10-31 10:36:03 +0000290 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
291 (ctxt->instate == XML_PARSER_EOF))
292 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 switch (error) {
294 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000295 errmsg = "CharRef: invalid hexadecimal value\n";
296 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000297 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000298 errmsg = "CharRef: invalid decimal value\n";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "CharRef: invalid value\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "internal error";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "PEReference at end of document\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "PEReference in prolog\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "PEReference in epilog\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "PEReference: no name\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "PEReference: expecting ';'\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "Detected an entity reference loop\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "EntityValue: \" or ' expected\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "PEReferences forbidden in internal subset\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "EntityValue: \" or ' expected\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "AttValue: \" or ' expected\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Unescaped '<' not allowed in attributes values\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "SystemLiteral \" or ' expected\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Unfinished System or Public ID \" or ' expected\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "Sequence ']]>' not allowed in content\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "PUBLIC, the Public Identifier is missing\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "Comment must not contain '--' (double-hyphen)\n";
356 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 errmsg = "xmlParsePI : no target name\n";
359 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000360 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 errmsg = "Invalid PI name\n";
362 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 errmsg = "NOTATION: Name expected here\n";
365 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000367 errmsg = "'>' required to close NOTATION declaration\n";
368 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000369 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000370 errmsg = "Entity value required\n";
371 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000372 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000373 errmsg = "Fragment not allowed";
374 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000375 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000376 errmsg = "'(' required to start ATTLIST enumeration\n";
377 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000378 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000379 errmsg = "NmToken expected in ATTLIST enumeration\n";
380 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000381 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000382 errmsg = "')' required to finish ATTLIST enumeration\n";
383 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000384 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
386 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000387 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
389 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000390 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000391 errmsg = "ContentDecl : Name or '(' expected\n";
392 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000393 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000394 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
395 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000396 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000397 errmsg =
398 "PEReference: forbidden within markup decl in internal subset\n";
399 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000400 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000401 errmsg = "expected '>'\n";
402 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000403 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000404 errmsg = "XML conditional section '[' expected\n";
405 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000406 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000407 errmsg = "Content error in the external subset\n";
408 break;
409 case XML_ERR_CONDSEC_INVALID_KEYWORD:
410 errmsg =
411 "conditional section INCLUDE or IGNORE keyword expected\n";
412 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000413 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 errmsg = "XML conditional section not closed\n";
415 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000416 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 errmsg = "Text declaration '<?xml' required\n";
418 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000419 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 errmsg = "parsing XML declaration: '?>' expected\n";
421 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000422 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 errmsg = "external parsed entities cannot be standalone\n";
424 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000425 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 errmsg = "EntityRef: expecting ';'\n";
427 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000428 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000429 errmsg = "DOCTYPE improperly terminated\n";
430 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000431 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000432 errmsg = "EndTag: '</' not found\n";
433 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000434 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000435 errmsg = "expected '='\n";
436 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000437 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000438 errmsg = "String not closed expecting \" or '\n";
439 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000440 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000441 errmsg = "String not started expecting ' or \"\n";
442 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000443 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000444 errmsg = "Invalid XML encoding name\n";
445 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000446 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000447 errmsg = "standalone accepts only 'yes' or 'no'\n";
448 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000449 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000450 errmsg = "Document is empty\n";
451 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000452 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000453 errmsg = "Extra content at the end of the document\n";
454 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000455 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000456 errmsg = "chunk is not well balanced\n";
457 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000458 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000459 errmsg = "extra content at the end of well balanced chunk\n";
460 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000461 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000462 errmsg = "Malformed declaration expecting version\n";
463 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000464#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000465 case:
466 errmsg = "\n";
467 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000468#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000469 default:
470 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000471 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000472 if (ctxt != NULL)
473 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000474 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000475 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
476 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000477 if (ctxt != NULL) {
478 ctxt->wellFormed = 0;
479 if (ctxt->recovery == 0)
480 ctxt->disableSAX = 1;
481 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000482}
483
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000484/**
485 * xmlFatalErrMsg:
486 * @ctxt: an XML parser context
487 * @error: the error number
488 * @msg: the error message
489 *
490 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
491 */
492static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000493xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
494 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000495{
Daniel Veillard157fee02003-10-31 10:36:03 +0000496 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
497 (ctxt->instate == XML_PARSER_EOF))
498 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000499 if (ctxt != NULL)
500 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000501 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200502 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000503 if (ctxt != NULL) {
504 ctxt->wellFormed = 0;
505 if (ctxt->recovery == 0)
506 ctxt->disableSAX = 1;
507 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000508}
509
510/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000511 * xmlWarningMsg:
512 * @ctxt: an XML parser context
513 * @error: the error number
514 * @msg: the error message
515 * @str1: extra data
516 * @str2: extra data
517 *
518 * Handle a warning.
519 */
520static void
521xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
522 const char *msg, const xmlChar *str1, const xmlChar *str2)
523{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000524 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000525
Daniel Veillard157fee02003-10-31 10:36:03 +0000526 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
527 (ctxt->instate == XML_PARSER_EOF))
528 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000529 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
530 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000531 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200532 if (ctxt != NULL) {
533 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000534 (ctxt->sax) ? ctxt->sax->warning : NULL,
535 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000536 ctxt, NULL, XML_FROM_PARSER, error,
537 XML_ERR_WARNING, NULL, 0,
538 (const char *) str1, (const char *) str2, NULL, 0, 0,
539 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200540 } else {
541 __xmlRaiseError(schannel, NULL, NULL,
542 ctxt, NULL, XML_FROM_PARSER, error,
543 XML_ERR_WARNING, NULL, 0,
544 (const char *) str1, (const char *) str2, NULL, 0, 0,
545 msg, (const char *) str1, (const char *) str2);
546 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000547}
548
549/**
550 * xmlValidityError:
551 * @ctxt: an XML parser context
552 * @error: the error number
553 * @msg: the error message
554 * @str1: extra data
555 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000556 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000557 */
558static void
559xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000560 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000561{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000562 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000563
564 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
565 (ctxt->instate == XML_PARSER_EOF))
566 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000567 if (ctxt != NULL) {
568 ctxt->errNo = error;
569 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
570 schannel = ctxt->sax->serror;
571 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200572 if (ctxt != NULL) {
573 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000574 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000575 ctxt, NULL, XML_FROM_DTD, error,
576 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000577 (const char *) str2, NULL, 0, 0,
578 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000579 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200580 } else {
581 __xmlRaiseError(schannel, NULL, NULL,
582 ctxt, NULL, XML_FROM_DTD, error,
583 XML_ERR_ERROR, NULL, 0, (const char *) str1,
584 (const char *) str2, NULL, 0, 0,
585 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000586 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000587}
588
589/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000590 * xmlFatalErrMsgInt:
591 * @ctxt: an XML parser context
592 * @error: the error number
593 * @msg: the error message
594 * @val: an integer value
595 *
596 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
597 */
598static void
599xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000600 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000601{
Daniel Veillard157fee02003-10-31 10:36:03 +0000602 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
603 (ctxt->instate == XML_PARSER_EOF))
604 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000605 if (ctxt != NULL)
606 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000607 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000608 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
609 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000610 if (ctxt != NULL) {
611 ctxt->wellFormed = 0;
612 if (ctxt->recovery == 0)
613 ctxt->disableSAX = 1;
614 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000615}
616
617/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000618 * xmlFatalErrMsgStrIntStr:
619 * @ctxt: an XML parser context
620 * @error: the error number
621 * @msg: the error message
622 * @str1: an string info
623 * @val: an integer value
624 * @str2: an string info
625 *
626 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
627 */
628static void
629xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
630 const char *msg, const xmlChar *str1, int val,
631 const xmlChar *str2)
632{
Daniel Veillard157fee02003-10-31 10:36:03 +0000633 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634 (ctxt->instate == XML_PARSER_EOF))
635 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000636 if (ctxt != NULL)
637 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000638 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000639 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
640 NULL, 0, (const char *) str1, (const char *) str2,
641 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000642 if (ctxt != NULL) {
643 ctxt->wellFormed = 0;
644 if (ctxt->recovery == 0)
645 ctxt->disableSAX = 1;
646 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000647}
648
649/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000650 * xmlFatalErrMsgStr:
651 * @ctxt: an XML parser context
652 * @error: the error number
653 * @msg: the error message
654 * @val: a string value
655 *
656 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
657 */
658static void
659xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000660 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000661{
Daniel Veillard157fee02003-10-31 10:36:03 +0000662 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
663 (ctxt->instate == XML_PARSER_EOF))
664 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000665 if (ctxt != NULL)
666 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000667 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000668 XML_FROM_PARSER, error, XML_ERR_FATAL,
669 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
670 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000671 if (ctxt != NULL) {
672 ctxt->wellFormed = 0;
673 if (ctxt->recovery == 0)
674 ctxt->disableSAX = 1;
675 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000676}
677
678/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000679 * xmlErrMsgStr:
680 * @ctxt: an XML parser context
681 * @error: the error number
682 * @msg: the error message
683 * @val: a string value
684 *
685 * Handle a non fatal parser error
686 */
687static void
688xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
689 const char *msg, const xmlChar * val)
690{
Daniel Veillard157fee02003-10-31 10:36:03 +0000691 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
692 (ctxt->instate == XML_PARSER_EOF))
693 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000694 if (ctxt != NULL)
695 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000696 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000697 XML_FROM_PARSER, error, XML_ERR_ERROR,
698 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
699 val);
700}
701
702/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000703 * xmlNsErr:
704 * @ctxt: an XML parser context
705 * @error: the error number
706 * @msg: the message
707 * @info1: extra information string
708 * @info2: extra information string
709 *
710 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
711 */
712static void
713xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000715 const xmlChar * info1, const xmlChar * info2,
716 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000717{
Daniel Veillard157fee02003-10-31 10:36:03 +0000718 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
719 (ctxt->instate == XML_PARSER_EOF))
720 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000721 if (ctxt != NULL)
722 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000723 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000724 XML_ERR_ERROR, NULL, 0, (const char *) info1,
725 (const char *) info2, (const char *) info3, 0, 0, msg,
726 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000727 if (ctxt != NULL)
728 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000729}
730
Daniel Veillard37334572008-07-31 08:20:02 +0000731/**
732 * xmlNsWarn
733 * @ctxt: an XML parser context
734 * @error: the error number
735 * @msg: the message
736 * @info1: extra information string
737 * @info2: extra information string
738 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800739 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000740 */
741static void
742xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
743 const char *msg,
744 const xmlChar * info1, const xmlChar * info2,
745 const xmlChar * info3)
746{
747 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
748 (ctxt->instate == XML_PARSER_EOF))
749 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000750 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
751 XML_ERR_WARNING, NULL, 0, (const char *) info1,
752 (const char *) info2, (const char *) info3, 0, 0, msg,
753 info1, info2, info3);
754}
755
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000756/************************************************************************
757 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000758 * Library wide options *
759 * *
760 ************************************************************************/
761
762/**
763 * xmlHasFeature:
764 * @feature: the feature to be examined
765 *
766 * Examines if the library has been compiled with a given feature.
767 *
768 * Returns a non-zero value if the feature exist, otherwise zero.
769 * Returns zero (0) if the feature does not exist or an unknown
770 * unknown feature is requested, non-zero otherwise.
771 */
772int
773xmlHasFeature(xmlFeature feature)
774{
775 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000776 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000777#ifdef LIBXML_THREAD_ENABLED
778 return(1);
779#else
780 return(0);
781#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000782 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000783#ifdef LIBXML_TREE_ENABLED
784 return(1);
785#else
786 return(0);
787#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000788 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000789#ifdef LIBXML_OUTPUT_ENABLED
790 return(1);
791#else
792 return(0);
793#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000794 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000795#ifdef LIBXML_PUSH_ENABLED
796 return(1);
797#else
798 return(0);
799#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000800 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000801#ifdef LIBXML_READER_ENABLED
802 return(1);
803#else
804 return(0);
805#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000806 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000807#ifdef LIBXML_PATTERN_ENABLED
808 return(1);
809#else
810 return(0);
811#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000812 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000813#ifdef LIBXML_WRITER_ENABLED
814 return(1);
815#else
816 return(0);
817#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000818 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000819#ifdef LIBXML_SAX1_ENABLED
820 return(1);
821#else
822 return(0);
823#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000824 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000825#ifdef LIBXML_FTP_ENABLED
826 return(1);
827#else
828 return(0);
829#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000830 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000831#ifdef LIBXML_HTTP_ENABLED
832 return(1);
833#else
834 return(0);
835#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000836 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000837#ifdef LIBXML_VALID_ENABLED
838 return(1);
839#else
840 return(0);
841#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000842 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000843#ifdef LIBXML_HTML_ENABLED
844 return(1);
845#else
846 return(0);
847#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000848 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000849#ifdef LIBXML_LEGACY_ENABLED
850 return(1);
851#else
852 return(0);
853#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000854 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000855#ifdef LIBXML_C14N_ENABLED
856 return(1);
857#else
858 return(0);
859#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000860 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000861#ifdef LIBXML_CATALOG_ENABLED
862 return(1);
863#else
864 return(0);
865#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000866 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000867#ifdef LIBXML_XPATH_ENABLED
868 return(1);
869#else
870 return(0);
871#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000872 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000873#ifdef LIBXML_XPTR_ENABLED
874 return(1);
875#else
876 return(0);
877#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000878 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000879#ifdef LIBXML_XINCLUDE_ENABLED
880 return(1);
881#else
882 return(0);
883#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000884 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000885#ifdef LIBXML_ICONV_ENABLED
886 return(1);
887#else
888 return(0);
889#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000890 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000891#ifdef LIBXML_ISO8859X_ENABLED
892 return(1);
893#else
894 return(0);
895#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000896 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000897#ifdef LIBXML_UNICODE_ENABLED
898 return(1);
899#else
900 return(0);
901#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000902 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000903#ifdef LIBXML_REGEXP_ENABLED
904 return(1);
905#else
906 return(0);
907#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000908 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000909#ifdef LIBXML_AUTOMATA_ENABLED
910 return(1);
911#else
912 return(0);
913#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000914 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000915#ifdef LIBXML_EXPR_ENABLED
916 return(1);
917#else
918 return(0);
919#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000920 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000921#ifdef LIBXML_SCHEMAS_ENABLED
922 return(1);
923#else
924 return(0);
925#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000926 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000927#ifdef LIBXML_SCHEMATRON_ENABLED
928 return(1);
929#else
930 return(0);
931#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000932 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000933#ifdef LIBXML_MODULES_ENABLED
934 return(1);
935#else
936 return(0);
937#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000938 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000939#ifdef LIBXML_DEBUG_ENABLED
940 return(1);
941#else
942 return(0);
943#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000944 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000945#ifdef DEBUG_MEMORY_LOCATION
946 return(1);
947#else
948 return(0);
949#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000950 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000951#ifdef LIBXML_DEBUG_RUNTIME
952 return(1);
953#else
954 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000955#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000956 case XML_WITH_ZLIB:
957#ifdef LIBXML_ZLIB_ENABLED
958 return(1);
959#else
960 return(0);
961#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +0200962 case XML_WITH_LZMA:
963#ifdef LIBXML_LZMA_ENABLED
964 return(1);
965#else
966 return(0);
967#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100968 case XML_WITH_ICU:
969#ifdef LIBXML_ICU_ENABLED
970 return(1);
971#else
972 return(0);
973#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000974 default:
975 break;
976 }
977 return(0);
978}
979
980/************************************************************************
981 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000982 * SAX2 defaulted attributes handling *
983 * *
984 ************************************************************************/
985
986/**
987 * xmlDetectSAX2:
988 * @ctxt: an XML parser context
989 *
990 * Do the SAX2 detection and specific intialization
991 */
992static void
993xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
994 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000995#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000996 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
997 ((ctxt->sax->startElementNs != NULL) ||
998 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000999#else
1000 ctxt->sax2 = 1;
1001#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001002
1003 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1004 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1005 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +00001006 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1007 (ctxt->str_xml_ns == NULL)) {
1008 xmlErrMemory(ctxt, NULL);
1009 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001010}
1011
Daniel Veillarde57ec792003-09-10 10:50:59 +00001012typedef struct _xmlDefAttrs xmlDefAttrs;
1013typedef xmlDefAttrs *xmlDefAttrsPtr;
1014struct _xmlDefAttrs {
1015 int nbAttrs; /* number of defaulted attributes on that element */
1016 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +00001017 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001018};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001019
1020/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001021 * xmlAttrNormalizeSpace:
1022 * @src: the source string
1023 * @dst: the target string
1024 *
1025 * Normalize the space in non CDATA attribute values:
1026 * If the attribute type is not CDATA, then the XML processor MUST further
1027 * process the normalized attribute value by discarding any leading and
1028 * trailing space (#x20) characters, and by replacing sequences of space
1029 * (#x20) characters by a single space (#x20) character.
1030 * Note that the size of dst need to be at least src, and if one doesn't need
1031 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1032 * passing src as dst is just fine.
1033 *
1034 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1035 * is needed.
1036 */
1037static xmlChar *
1038xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1039{
1040 if ((src == NULL) || (dst == NULL))
1041 return(NULL);
1042
1043 while (*src == 0x20) src++;
1044 while (*src != 0) {
1045 if (*src == 0x20) {
1046 while (*src == 0x20) src++;
1047 if (*src != 0)
1048 *dst++ = 0x20;
1049 } else {
1050 *dst++ = *src++;
1051 }
1052 }
1053 *dst = 0;
1054 if (dst == src)
1055 return(NULL);
1056 return(dst);
1057}
1058
1059/**
1060 * xmlAttrNormalizeSpace2:
1061 * @src: the source string
1062 *
1063 * Normalize the space in non CDATA attribute values, a slightly more complex
1064 * front end to avoid allocation problems when running on attribute values
1065 * coming from the input.
1066 *
1067 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1068 * is needed.
1069 */
1070static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001071xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001072{
1073 int i;
1074 int remove_head = 0;
1075 int need_realloc = 0;
1076 const xmlChar *cur;
1077
1078 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1079 return(NULL);
1080 i = *len;
1081 if (i <= 0)
1082 return(NULL);
1083
1084 cur = src;
1085 while (*cur == 0x20) {
1086 cur++;
1087 remove_head++;
1088 }
1089 while (*cur != 0) {
1090 if (*cur == 0x20) {
1091 cur++;
1092 if ((*cur == 0x20) || (*cur == 0)) {
1093 need_realloc = 1;
1094 break;
1095 }
1096 } else
1097 cur++;
1098 }
1099 if (need_realloc) {
1100 xmlChar *ret;
1101
1102 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1103 if (ret == NULL) {
1104 xmlErrMemory(ctxt, NULL);
1105 return(NULL);
1106 }
1107 xmlAttrNormalizeSpace(ret, ret);
1108 *len = (int) strlen((const char *)ret);
1109 return(ret);
1110 } else if (remove_head) {
1111 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001112 memmove(src, src + remove_head, 1 + *len);
1113 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001114 }
1115 return(NULL);
1116}
1117
1118/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001119 * xmlAddDefAttrs:
1120 * @ctxt: an XML parser context
1121 * @fullname: the element fullname
1122 * @fullattr: the attribute fullname
1123 * @value: the attribute value
1124 *
1125 * Add a defaulted attribute for an element
1126 */
1127static void
1128xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1129 const xmlChar *fullname,
1130 const xmlChar *fullattr,
1131 const xmlChar *value) {
1132 xmlDefAttrsPtr defaults;
1133 int len;
1134 const xmlChar *name;
1135 const xmlChar *prefix;
1136
Daniel Veillard6a31b832008-03-26 14:06:44 +00001137 /*
1138 * Allows to detect attribute redefinitions
1139 */
1140 if (ctxt->attsSpecial != NULL) {
1141 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1142 return;
1143 }
1144
Daniel Veillarde57ec792003-09-10 10:50:59 +00001145 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001146 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001147 if (ctxt->attsDefault == NULL)
1148 goto mem_error;
1149 }
1150
1151 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001152 * split the element name into prefix:localname , the string found
1153 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001154 */
1155 name = xmlSplitQName3(fullname, &len);
1156 if (name == NULL) {
1157 name = xmlDictLookup(ctxt->dict, fullname, -1);
1158 prefix = NULL;
1159 } else {
1160 name = xmlDictLookup(ctxt->dict, name, -1);
1161 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1162 }
1163
1164 /*
1165 * make sure there is some storage
1166 */
1167 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1168 if (defaults == NULL) {
1169 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001170 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001171 if (defaults == NULL)
1172 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001173 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001174 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001175 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1176 defaults, NULL) < 0) {
1177 xmlFree(defaults);
1178 goto mem_error;
1179 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001180 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001181 xmlDefAttrsPtr temp;
1182
1183 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001184 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001185 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001186 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001187 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001188 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001189 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1190 defaults, NULL) < 0) {
1191 xmlFree(defaults);
1192 goto mem_error;
1193 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001194 }
1195
1196 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001197 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001198 * are within the DTD and hen not associated to namespace names.
1199 */
1200 name = xmlSplitQName3(fullattr, &len);
1201 if (name == NULL) {
1202 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1203 prefix = NULL;
1204 } else {
1205 name = xmlDictLookup(ctxt->dict, name, -1);
1206 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1207 }
1208
Daniel Veillardae0765b2008-07-31 19:54:59 +00001209 defaults->values[5 * defaults->nbAttrs] = name;
1210 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001211 /* intern the string and precompute the end */
1212 len = xmlStrlen(value);
1213 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001214 defaults->values[5 * defaults->nbAttrs + 2] = value;
1215 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1216 if (ctxt->external)
1217 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1218 else
1219 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001220 defaults->nbAttrs++;
1221
1222 return;
1223
1224mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001225 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001226 return;
1227}
1228
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001229/**
1230 * xmlAddSpecialAttr:
1231 * @ctxt: an XML parser context
1232 * @fullname: the element fullname
1233 * @fullattr: the attribute fullname
1234 * @type: the attribute type
1235 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001236 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001237 */
1238static void
1239xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1240 const xmlChar *fullname,
1241 const xmlChar *fullattr,
1242 int type)
1243{
1244 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001245 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001246 if (ctxt->attsSpecial == NULL)
1247 goto mem_error;
1248 }
1249
Daniel Veillardac4118d2008-01-11 05:27:32 +00001250 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1251 return;
1252
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001253 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1254 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001255 return;
1256
1257mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001258 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001259 return;
1260}
1261
Daniel Veillard4432df22003-09-28 18:58:27 +00001262/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001263 * xmlCleanSpecialAttrCallback:
1264 *
1265 * Removes CDATA attributes from the special attribute table
1266 */
1267static void
1268xmlCleanSpecialAttrCallback(void *payload, void *data,
1269 const xmlChar *fullname, const xmlChar *fullattr,
1270 const xmlChar *unused ATTRIBUTE_UNUSED) {
1271 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1272
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001273 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001274 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1275 }
1276}
1277
1278/**
1279 * xmlCleanSpecialAttr:
1280 * @ctxt: an XML parser context
1281 *
1282 * Trim the list of attributes defined to remove all those of type
1283 * CDATA as they are not special. This call should be done when finishing
1284 * to parse the DTD and before starting to parse the document root.
1285 */
1286static void
1287xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1288{
1289 if (ctxt->attsSpecial == NULL)
1290 return;
1291
1292 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1293
1294 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1295 xmlHashFree(ctxt->attsSpecial, NULL);
1296 ctxt->attsSpecial = NULL;
1297 }
1298 return;
1299}
1300
1301/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001302 * xmlCheckLanguageID:
1303 * @lang: pointer to the string value
1304 *
1305 * Checks that the value conforms to the LanguageID production:
1306 *
1307 * NOTE: this is somewhat deprecated, those productions were removed from
1308 * the XML Second edition.
1309 *
1310 * [33] LanguageID ::= Langcode ('-' Subcode)*
1311 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1312 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1313 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1314 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1315 * [38] Subcode ::= ([a-z] | [A-Z])+
1316 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001317 * The current REC reference the sucessors of RFC 1766, currently 5646
1318 *
1319 * http://www.rfc-editor.org/rfc/rfc5646.txt
1320 * langtag = language
1321 * ["-" script]
1322 * ["-" region]
1323 * *("-" variant)
1324 * *("-" extension)
1325 * ["-" privateuse]
1326 * language = 2*3ALPHA ; shortest ISO 639 code
1327 * ["-" extlang] ; sometimes followed by
1328 * ; extended language subtags
1329 * / 4ALPHA ; or reserved for future use
1330 * / 5*8ALPHA ; or registered language subtag
1331 *
1332 * extlang = 3ALPHA ; selected ISO 639 codes
1333 * *2("-" 3ALPHA) ; permanently reserved
1334 *
1335 * script = 4ALPHA ; ISO 15924 code
1336 *
1337 * region = 2ALPHA ; ISO 3166-1 code
1338 * / 3DIGIT ; UN M.49 code
1339 *
1340 * variant = 5*8alphanum ; registered variants
1341 * / (DIGIT 3alphanum)
1342 *
1343 * extension = singleton 1*("-" (2*8alphanum))
1344 *
1345 * ; Single alphanumerics
1346 * ; "x" reserved for private use
1347 * singleton = DIGIT ; 0 - 9
1348 * / %x41-57 ; A - W
1349 * / %x59-5A ; Y - Z
1350 * / %x61-77 ; a - w
1351 * / %x79-7A ; y - z
1352 *
1353 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1354 * The parser below doesn't try to cope with extension or privateuse
1355 * that could be added but that's not interoperable anyway
1356 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001357 * Returns 1 if correct 0 otherwise
1358 **/
1359int
1360xmlCheckLanguageID(const xmlChar * lang)
1361{
Daniel Veillard60587d62010-11-04 15:16:27 +01001362 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001363
1364 if (cur == NULL)
1365 return (0);
1366 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001367 ((cur[0] == 'I') && (cur[1] == '-')) ||
1368 ((cur[0] == 'x') && (cur[1] == '-')) ||
1369 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001370 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001371 * Still allow IANA code and user code which were coming
1372 * from the previous version of the XML-1.0 specification
1373 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001374 */
1375 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001376 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001377 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1378 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001379 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001380 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001381 nxt = cur;
1382 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1383 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1384 nxt++;
1385 if (nxt - cur >= 4) {
1386 /*
1387 * Reserved
1388 */
1389 if ((nxt - cur > 8) || (nxt[0] != 0))
1390 return(0);
1391 return(1);
1392 }
1393 if (nxt - cur < 2)
1394 return(0);
1395 /* we got an ISO 639 code */
1396 if (nxt[0] == 0)
1397 return(1);
1398 if (nxt[0] != '-')
1399 return(0);
1400
1401 nxt++;
1402 cur = nxt;
1403 /* now we can have extlang or script or region or variant */
1404 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1405 goto region_m49;
1406
1407 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1408 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1409 nxt++;
1410 if (nxt - cur == 4)
1411 goto script;
1412 if (nxt - cur == 2)
1413 goto region;
1414 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1415 goto variant;
1416 if (nxt - cur != 3)
1417 return(0);
1418 /* we parsed an extlang */
1419 if (nxt[0] == 0)
1420 return(1);
1421 if (nxt[0] != '-')
1422 return(0);
1423
1424 nxt++;
1425 cur = nxt;
1426 /* now we can have script or region or variant */
1427 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1428 goto region_m49;
1429
1430 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1431 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1432 nxt++;
1433 if (nxt - cur == 2)
1434 goto region;
1435 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1436 goto variant;
1437 if (nxt - cur != 4)
1438 return(0);
1439 /* we parsed a script */
1440script:
1441 if (nxt[0] == 0)
1442 return(1);
1443 if (nxt[0] != '-')
1444 return(0);
1445
1446 nxt++;
1447 cur = nxt;
1448 /* now we can have region or variant */
1449 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1450 goto region_m49;
1451
1452 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1453 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1454 nxt++;
1455
1456 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1457 goto variant;
1458 if (nxt - cur != 2)
1459 return(0);
1460 /* we parsed a region */
1461region:
1462 if (nxt[0] == 0)
1463 return(1);
1464 if (nxt[0] != '-')
1465 return(0);
1466
1467 nxt++;
1468 cur = nxt;
1469 /* now we can just have a variant */
1470 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1471 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1472 nxt++;
1473
1474 if ((nxt - cur < 5) || (nxt - cur > 8))
1475 return(0);
1476
1477 /* we parsed a variant */
1478variant:
1479 if (nxt[0] == 0)
1480 return(1);
1481 if (nxt[0] != '-')
1482 return(0);
1483 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001484 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001485
1486region_m49:
1487 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1488 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1489 nxt += 3;
1490 goto region;
1491 }
1492 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001493}
1494
Owen Taylor3473f882001-02-23 17:55:21 +00001495/************************************************************************
1496 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001497 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001498 * *
1499 ************************************************************************/
1500
Daniel Veillard8ed10722009-08-20 19:17:36 +02001501static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1502 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001503
Daniel Veillard0fb18932003-09-07 09:14:37 +00001504#ifdef SAX2
1505/**
1506 * nsPush:
1507 * @ctxt: an XML parser context
1508 * @prefix: the namespace prefix or NULL
1509 * @URL: the namespace name
1510 *
1511 * Pushes a new parser namespace on top of the ns stack
1512 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001513 * Returns -1 in case of error, -2 if the namespace should be discarded
1514 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001515 */
1516static int
1517nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1518{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001519 if (ctxt->options & XML_PARSE_NSCLEAN) {
1520 int i;
1521 for (i = 0;i < ctxt->nsNr;i += 2) {
1522 if (ctxt->nsTab[i] == prefix) {
1523 /* in scope */
1524 if (ctxt->nsTab[i + 1] == URL)
1525 return(-2);
1526 /* out of scope keep it */
1527 break;
1528 }
1529 }
1530 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001531 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1532 ctxt->nsMax = 10;
1533 ctxt->nsNr = 0;
1534 ctxt->nsTab = (const xmlChar **)
1535 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1536 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001537 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001538 ctxt->nsMax = 0;
1539 return (-1);
1540 }
1541 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001542 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001543 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001544 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1545 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1546 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001547 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001548 ctxt->nsMax /= 2;
1549 return (-1);
1550 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001551 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001552 }
1553 ctxt->nsTab[ctxt->nsNr++] = prefix;
1554 ctxt->nsTab[ctxt->nsNr++] = URL;
1555 return (ctxt->nsNr);
1556}
1557/**
1558 * nsPop:
1559 * @ctxt: an XML parser context
1560 * @nr: the number to pop
1561 *
1562 * Pops the top @nr parser prefix/namespace from the ns stack
1563 *
1564 * Returns the number of namespaces removed
1565 */
1566static int
1567nsPop(xmlParserCtxtPtr ctxt, int nr)
1568{
1569 int i;
1570
1571 if (ctxt->nsTab == NULL) return(0);
1572 if (ctxt->nsNr < nr) {
1573 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1574 nr = ctxt->nsNr;
1575 }
1576 if (ctxt->nsNr <= 0)
1577 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001578
Daniel Veillard0fb18932003-09-07 09:14:37 +00001579 for (i = 0;i < nr;i++) {
1580 ctxt->nsNr--;
1581 ctxt->nsTab[ctxt->nsNr] = NULL;
1582 }
1583 return(nr);
1584}
1585#endif
1586
1587static int
1588xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1589 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001590 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001591 int maxatts;
1592
1593 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001594 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001595 atts = (const xmlChar **)
1596 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001597 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001598 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001599 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1600 if (attallocs == NULL) goto mem_error;
1601 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001602 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001603 } else if (nr + 5 > ctxt->maxatts) {
1604 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001605 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1606 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001607 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001608 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001609 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1610 (maxatts / 5) * sizeof(int));
1611 if (attallocs == NULL) goto mem_error;
1612 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001613 ctxt->maxatts = maxatts;
1614 }
1615 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001616mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001617 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001618 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001619}
1620
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001621/**
1622 * inputPush:
1623 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001624 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001625 *
1626 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001627 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001628 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001629 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001630int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001631inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1632{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001633 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001634 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001635 if (ctxt->inputNr >= ctxt->inputMax) {
1636 ctxt->inputMax *= 2;
1637 ctxt->inputTab =
1638 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1639 ctxt->inputMax *
1640 sizeof(ctxt->inputTab[0]));
1641 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001642 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001643 xmlFreeInputStream(value);
1644 ctxt->inputMax /= 2;
1645 value = NULL;
1646 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001647 }
1648 }
1649 ctxt->inputTab[ctxt->inputNr] = value;
1650 ctxt->input = value;
1651 return (ctxt->inputNr++);
1652}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001653/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001654 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001655 * @ctxt: an XML parser context
1656 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001657 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001658 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001659 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001660 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001661xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001662inputPop(xmlParserCtxtPtr ctxt)
1663{
1664 xmlParserInputPtr ret;
1665
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001666 if (ctxt == NULL)
1667 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001668 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001669 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001670 ctxt->inputNr--;
1671 if (ctxt->inputNr > 0)
1672 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1673 else
1674 ctxt->input = NULL;
1675 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001676 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001677 return (ret);
1678}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001679/**
1680 * nodePush:
1681 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001682 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001683 *
1684 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001685 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001686 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001687 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001688int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001689nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1690{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001691 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001692 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001693 xmlNodePtr *tmp;
1694
1695 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1696 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001697 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001698 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001699 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001700 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001701 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001702 ctxt->nodeTab = tmp;
1703 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001704 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001705 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1706 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001707 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001708 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001709 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001710 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001711 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001712 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001713 ctxt->nodeTab[ctxt->nodeNr] = value;
1714 ctxt->node = value;
1715 return (ctxt->nodeNr++);
1716}
Daniel Veillard8915c152008-08-26 13:05:34 +00001717
Daniel Veillard1c732d22002-11-30 11:22:59 +00001718/**
1719 * nodePop:
1720 * @ctxt: an XML parser context
1721 *
1722 * Pops the top element node from the node stack
1723 *
1724 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001725 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001726xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001727nodePop(xmlParserCtxtPtr ctxt)
1728{
1729 xmlNodePtr ret;
1730
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001731 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001732 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001733 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001734 ctxt->nodeNr--;
1735 if (ctxt->nodeNr > 0)
1736 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1737 else
1738 ctxt->node = NULL;
1739 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001740 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001741 return (ret);
1742}
Daniel Veillarda2351322004-06-27 12:08:10 +00001743
1744#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001745/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001746 * nameNsPush:
1747 * @ctxt: an XML parser context
1748 * @value: the element name
1749 * @prefix: the element prefix
1750 * @URI: the element namespace name
1751 *
1752 * Pushes a new element name/prefix/URL on top of the name stack
1753 *
1754 * Returns -1 in case of error, the index in the stack otherwise
1755 */
1756static int
1757nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1758 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1759{
1760 if (ctxt->nameNr >= ctxt->nameMax) {
1761 const xmlChar * *tmp;
1762 void **tmp2;
1763 ctxt->nameMax *= 2;
1764 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1765 ctxt->nameMax *
1766 sizeof(ctxt->nameTab[0]));
1767 if (tmp == NULL) {
1768 ctxt->nameMax /= 2;
1769 goto mem_error;
1770 }
1771 ctxt->nameTab = tmp;
1772 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1773 ctxt->nameMax * 3 *
1774 sizeof(ctxt->pushTab[0]));
1775 if (tmp2 == NULL) {
1776 ctxt->nameMax /= 2;
1777 goto mem_error;
1778 }
1779 ctxt->pushTab = tmp2;
1780 }
1781 ctxt->nameTab[ctxt->nameNr] = value;
1782 ctxt->name = value;
1783 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1784 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001785 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001786 return (ctxt->nameNr++);
1787mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001788 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001789 return (-1);
1790}
1791/**
1792 * nameNsPop:
1793 * @ctxt: an XML parser context
1794 *
1795 * Pops the top element/prefix/URI name from the name stack
1796 *
1797 * Returns the name just removed
1798 */
1799static const xmlChar *
1800nameNsPop(xmlParserCtxtPtr ctxt)
1801{
1802 const xmlChar *ret;
1803
1804 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001805 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001806 ctxt->nameNr--;
1807 if (ctxt->nameNr > 0)
1808 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1809 else
1810 ctxt->name = NULL;
1811 ret = ctxt->nameTab[ctxt->nameNr];
1812 ctxt->nameTab[ctxt->nameNr] = NULL;
1813 return (ret);
1814}
Daniel Veillarda2351322004-06-27 12:08:10 +00001815#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001816
1817/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001818 * namePush:
1819 * @ctxt: an XML parser context
1820 * @value: the element name
1821 *
1822 * Pushes a new element name on top of the name stack
1823 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001824 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001825 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001826int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001827namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001828{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001829 if (ctxt == NULL) return (-1);
1830
Daniel Veillard1c732d22002-11-30 11:22:59 +00001831 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001832 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001833 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001834 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001835 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001836 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001837 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001838 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001839 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001840 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001841 }
1842 ctxt->nameTab[ctxt->nameNr] = value;
1843 ctxt->name = value;
1844 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001845mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001846 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001847 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001848}
1849/**
1850 * namePop:
1851 * @ctxt: an XML parser context
1852 *
1853 * Pops the top element name from the name stack
1854 *
1855 * Returns the name just removed
1856 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001857const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001858namePop(xmlParserCtxtPtr ctxt)
1859{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001860 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001861
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001862 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1863 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001864 ctxt->nameNr--;
1865 if (ctxt->nameNr > 0)
1866 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1867 else
1868 ctxt->name = NULL;
1869 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001870 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001871 return (ret);
1872}
Owen Taylor3473f882001-02-23 17:55:21 +00001873
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001874static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001875 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001876 int *tmp;
1877
Owen Taylor3473f882001-02-23 17:55:21 +00001878 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001879 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1880 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1881 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001882 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001883 ctxt->spaceMax /=2;
1884 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001885 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001886 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001887 }
1888 ctxt->spaceTab[ctxt->spaceNr] = val;
1889 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1890 return(ctxt->spaceNr++);
1891}
1892
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001893static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001894 int ret;
1895 if (ctxt->spaceNr <= 0) return(0);
1896 ctxt->spaceNr--;
1897 if (ctxt->spaceNr > 0)
1898 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1899 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001900 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001901 ret = ctxt->spaceTab[ctxt->spaceNr];
1902 ctxt->spaceTab[ctxt->spaceNr] = -1;
1903 return(ret);
1904}
1905
1906/*
1907 * Macros for accessing the content. Those should be used only by the parser,
1908 * and not exported.
1909 *
1910 * Dirty macros, i.e. one often need to make assumption on the context to
1911 * use them
1912 *
1913 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1914 * To be used with extreme caution since operations consuming
1915 * characters may move the input buffer to a different location !
1916 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1917 * This should be used internally by the parser
1918 * only to compare to ASCII values otherwise it would break when
1919 * running with UTF-8 encoding.
1920 * RAW same as CUR but in the input buffer, bypass any token
1921 * extraction that may have been done
1922 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1923 * to compare on ASCII based substring.
1924 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001925 * strings without newlines within the parser.
1926 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1927 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001928 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1929 *
1930 * NEXT Skip to the next character, this does the proper decoding
1931 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001932 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001933 * CUR_CHAR(l) returns the current unicode character (int), set l
1934 * to the number of xmlChars used for the encoding [0-5].
1935 * CUR_SCHAR same but operate on a string instead of the context
1936 * COPY_BUF copy the current unicode char to the target buffer, increment
1937 * the index
1938 * GROW, SHRINK handling of input buffers
1939 */
1940
Daniel Veillardfdc91562002-07-01 21:52:03 +00001941#define RAW (*ctxt->input->cur)
1942#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001943#define NXT(val) ctxt->input->cur[(val)]
1944#define CUR_PTR ctxt->input->cur
1945
Daniel Veillarda07050d2003-10-19 14:46:32 +00001946#define CMP4( s, c1, c2, c3, c4 ) \
1947 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1948 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1949#define CMP5( s, c1, c2, c3, c4, c5 ) \
1950 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1951#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1952 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1953#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1954 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1955#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1956 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1957#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1958 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1959 ((unsigned char *) s)[ 8 ] == c9 )
1960#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1961 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1962 ((unsigned char *) s)[ 9 ] == c10 )
1963
Owen Taylor3473f882001-02-23 17:55:21 +00001964#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001965 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001966 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001967 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001968 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1969 xmlPopInput(ctxt); \
1970 } while (0)
1971
Daniel Veillard0b787f32004-03-26 17:29:53 +00001972#define SKIPL(val) do { \
1973 int skipl; \
1974 for(skipl=0; skipl<val; skipl++) { \
1975 if (*(ctxt->input->cur) == '\n') { \
1976 ctxt->input->line++; ctxt->input->col = 1; \
1977 } else ctxt->input->col++; \
1978 ctxt->nbChars++; \
1979 ctxt->input->cur++; \
1980 } \
1981 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1982 if ((*ctxt->input->cur == 0) && \
1983 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1984 xmlPopInput(ctxt); \
1985 } while (0)
1986
Daniel Veillarda880b122003-04-21 21:36:41 +00001987#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001988 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1989 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001990 xmlSHRINK (ctxt);
1991
1992static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1993 xmlParserInputShrink(ctxt->input);
1994 if ((*ctxt->input->cur == 0) &&
1995 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1996 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001997 }
Owen Taylor3473f882001-02-23 17:55:21 +00001998
Daniel Veillarda880b122003-04-21 21:36:41 +00001999#define GROW if ((ctxt->progressive == 0) && \
2000 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002001 xmlGROW (ctxt);
2002
2003static void xmlGROW (xmlParserCtxtPtr ctxt) {
2004 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard59df7832010-02-02 10:24:01 +01002005 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
Daniel Veillard46de64e2002-05-29 08:21:33 +00002006 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2007 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00002008}
Owen Taylor3473f882001-02-23 17:55:21 +00002009
2010#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2011
2012#define NEXT xmlNextChar(ctxt)
2013
Daniel Veillard21a0f912001-02-25 19:54:14 +00002014#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002015 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002016 ctxt->input->cur++; \
2017 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002018 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002019 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2020 }
2021
Owen Taylor3473f882001-02-23 17:55:21 +00002022#define NEXTL(l) do { \
2023 if (*(ctxt->input->cur) == '\n') { \
2024 ctxt->input->line++; ctxt->input->col = 1; \
2025 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002026 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002027 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00002028 } while (0)
2029
2030#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2031#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2032
2033#define COPY_BUF(l,b,i,v) \
2034 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002035 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002036
2037/**
2038 * xmlSkipBlankChars:
2039 * @ctxt: the XML parser context
2040 *
2041 * skip all blanks character found at that point in the input streams.
2042 * It pops up finished entities in the process if allowable at that point.
2043 *
2044 * Returns the number of space chars skipped
2045 */
2046
2047int
2048xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002049 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002050
2051 /*
2052 * It's Okay to use CUR/NEXT here since all the blanks are on
2053 * the ASCII range.
2054 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002055 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2056 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002057 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002058 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002059 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002060 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002061 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002062 if (*cur == '\n') {
2063 ctxt->input->line++; ctxt->input->col = 1;
2064 }
2065 cur++;
2066 res++;
2067 if (*cur == 0) {
2068 ctxt->input->cur = cur;
2069 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2070 cur = ctxt->input->cur;
2071 }
2072 }
2073 ctxt->input->cur = cur;
2074 } else {
2075 int cur;
2076 do {
2077 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00002078 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002079 NEXT;
2080 cur = CUR;
2081 res++;
2082 }
2083 while ((cur == 0) && (ctxt->inputNr > 1) &&
2084 (ctxt->instate != XML_PARSER_COMMENT)) {
2085 xmlPopInput(ctxt);
2086 cur = CUR;
2087 }
2088 /*
2089 * Need to handle support of entities branching here
2090 */
2091 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2092 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2093 }
Owen Taylor3473f882001-02-23 17:55:21 +00002094 return(res);
2095}
2096
2097/************************************************************************
2098 * *
2099 * Commodity functions to handle entities *
2100 * *
2101 ************************************************************************/
2102
2103/**
2104 * xmlPopInput:
2105 * @ctxt: an XML parser context
2106 *
2107 * xmlPopInput: the current input pointed by ctxt->input came to an end
2108 * pop it and return the next char.
2109 *
2110 * Returns the current xmlChar in the parser context
2111 */
2112xmlChar
2113xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002114 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002115 if (xmlParserDebugEntities)
2116 xmlGenericError(xmlGenericErrorContext,
2117 "Popping input %d\n", ctxt->inputNr);
2118 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00002119 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00002120 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2121 return(xmlPopInput(ctxt));
2122 return(CUR);
2123}
2124
2125/**
2126 * xmlPushInput:
2127 * @ctxt: an XML parser context
2128 * @input: an XML parser input fragment (entity, XML fragment ...).
2129 *
2130 * xmlPushInput: switch to a new input stream which is stacked on top
2131 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002132 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002133 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002134int
Owen Taylor3473f882001-02-23 17:55:21 +00002135xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002136 int ret;
2137 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002138
2139 if (xmlParserDebugEntities) {
2140 if ((ctxt->input != NULL) && (ctxt->input->filename))
2141 xmlGenericError(xmlGenericErrorContext,
2142 "%s(%d): ", ctxt->input->filename,
2143 ctxt->input->line);
2144 xmlGenericError(xmlGenericErrorContext,
2145 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2146 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002147 ret = inputPush(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002148 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002149 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002150}
2151
2152/**
2153 * xmlParseCharRef:
2154 * @ctxt: an XML parser context
2155 *
2156 * parse Reference declarations
2157 *
2158 * [66] CharRef ::= '&#' [0-9]+ ';' |
2159 * '&#x' [0-9a-fA-F]+ ';'
2160 *
2161 * [ WFC: Legal Character ]
2162 * Characters referred to using character references must match the
2163 * production for Char.
2164 *
2165 * Returns the value parsed (as an int), 0 in case of error
2166 */
2167int
2168xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002169 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002170 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002171 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002172
Owen Taylor3473f882001-02-23 17:55:21 +00002173 /*
2174 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2175 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002176 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002177 (NXT(2) == 'x')) {
2178 SKIP(3);
2179 GROW;
2180 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002181 if (count++ > 20) {
2182 count = 0;
2183 GROW;
2184 }
2185 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002186 val = val * 16 + (CUR - '0');
2187 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2188 val = val * 16 + (CUR - 'a') + 10;
2189 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2190 val = val * 16 + (CUR - 'A') + 10;
2191 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002192 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002193 val = 0;
2194 break;
2195 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002196 if (val > 0x10FFFF)
2197 outofrange = val;
2198
Owen Taylor3473f882001-02-23 17:55:21 +00002199 NEXT;
2200 count++;
2201 }
2202 if (RAW == ';') {
2203 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002204 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002205 ctxt->nbChars ++;
2206 ctxt->input->cur++;
2207 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002208 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002209 SKIP(2);
2210 GROW;
2211 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002212 if (count++ > 20) {
2213 count = 0;
2214 GROW;
2215 }
2216 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002217 val = val * 10 + (CUR - '0');
2218 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002219 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002220 val = 0;
2221 break;
2222 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002223 if (val > 0x10FFFF)
2224 outofrange = val;
2225
Owen Taylor3473f882001-02-23 17:55:21 +00002226 NEXT;
2227 count++;
2228 }
2229 if (RAW == ';') {
2230 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002231 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002232 ctxt->nbChars ++;
2233 ctxt->input->cur++;
2234 }
2235 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002236 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002237 }
2238
2239 /*
2240 * [ WFC: Legal Character ]
2241 * Characters referred to using character references must match the
2242 * production for Char.
2243 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002244 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002245 return(val);
2246 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002247 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2248 "xmlParseCharRef: invalid xmlChar value %d\n",
2249 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002250 }
2251 return(0);
2252}
2253
2254/**
2255 * xmlParseStringCharRef:
2256 * @ctxt: an XML parser context
2257 * @str: a pointer to an index in the string
2258 *
2259 * parse Reference declarations, variant parsing from a string rather
2260 * than an an input flow.
2261 *
2262 * [66] CharRef ::= '&#' [0-9]+ ';' |
2263 * '&#x' [0-9a-fA-F]+ ';'
2264 *
2265 * [ WFC: Legal Character ]
2266 * Characters referred to using character references must match the
2267 * production for Char.
2268 *
2269 * Returns the value parsed (as an int), 0 in case of error, str will be
2270 * updated to the current value of the index
2271 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002272static int
Owen Taylor3473f882001-02-23 17:55:21 +00002273xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2274 const xmlChar *ptr;
2275 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002276 unsigned int val = 0;
2277 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002278
2279 if ((str == NULL) || (*str == NULL)) return(0);
2280 ptr = *str;
2281 cur = *ptr;
2282 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2283 ptr += 3;
2284 cur = *ptr;
2285 while (cur != ';') { /* Non input consuming loop */
2286 if ((cur >= '0') && (cur <= '9'))
2287 val = val * 16 + (cur - '0');
2288 else if ((cur >= 'a') && (cur <= 'f'))
2289 val = val * 16 + (cur - 'a') + 10;
2290 else if ((cur >= 'A') && (cur <= 'F'))
2291 val = val * 16 + (cur - 'A') + 10;
2292 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002293 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002294 val = 0;
2295 break;
2296 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002297 if (val > 0x10FFFF)
2298 outofrange = val;
2299
Owen Taylor3473f882001-02-23 17:55:21 +00002300 ptr++;
2301 cur = *ptr;
2302 }
2303 if (cur == ';')
2304 ptr++;
2305 } else if ((cur == '&') && (ptr[1] == '#')){
2306 ptr += 2;
2307 cur = *ptr;
2308 while (cur != ';') { /* Non input consuming loops */
2309 if ((cur >= '0') && (cur <= '9'))
2310 val = val * 10 + (cur - '0');
2311 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002312 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002313 val = 0;
2314 break;
2315 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002316 if (val > 0x10FFFF)
2317 outofrange = val;
2318
Owen Taylor3473f882001-02-23 17:55:21 +00002319 ptr++;
2320 cur = *ptr;
2321 }
2322 if (cur == ';')
2323 ptr++;
2324 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002325 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002326 return(0);
2327 }
2328 *str = ptr;
2329
2330 /*
2331 * [ WFC: Legal Character ]
2332 * Characters referred to using character references must match the
2333 * production for Char.
2334 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002335 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002336 return(val);
2337 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002338 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2339 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2340 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002341 }
2342 return(0);
2343}
2344
2345/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002346 * xmlNewBlanksWrapperInputStream:
2347 * @ctxt: an XML parser context
2348 * @entity: an Entity pointer
2349 *
2350 * Create a new input stream for wrapping
2351 * blanks around a PEReference
2352 *
2353 * Returns the new input stream or NULL
2354 */
2355
2356static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2357
Daniel Veillardf4862f02002-09-10 11:13:43 +00002358static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002359xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2360 xmlParserInputPtr input;
2361 xmlChar *buffer;
2362 size_t length;
2363 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002364 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2365 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002366 return(NULL);
2367 }
2368 if (xmlParserDebugEntities)
2369 xmlGenericError(xmlGenericErrorContext,
2370 "new blanks wrapper for entity: %s\n", entity->name);
2371 input = xmlNewInputStream(ctxt);
2372 if (input == NULL) {
2373 return(NULL);
2374 }
2375 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002376 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002377 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002378 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002379 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002380 return(NULL);
2381 }
2382 buffer [0] = ' ';
2383 buffer [1] = '%';
2384 buffer [length-3] = ';';
2385 buffer [length-2] = ' ';
2386 buffer [length-1] = 0;
2387 memcpy(buffer + 2, entity->name, length - 5);
2388 input->free = deallocblankswrapper;
2389 input->base = buffer;
2390 input->cur = buffer;
2391 input->length = length;
2392 input->end = &buffer[length];
2393 return(input);
2394}
2395
2396/**
Owen Taylor3473f882001-02-23 17:55:21 +00002397 * xmlParserHandlePEReference:
2398 * @ctxt: the parser context
2399 *
2400 * [69] PEReference ::= '%' Name ';'
2401 *
2402 * [ WFC: No Recursion ]
2403 * A parsed entity must not contain a recursive
2404 * reference to itself, either directly or indirectly.
2405 *
2406 * [ WFC: Entity Declared ]
2407 * In a document without any DTD, a document with only an internal DTD
2408 * subset which contains no parameter entity references, or a document
2409 * with "standalone='yes'", ... ... The declaration of a parameter
2410 * entity must precede any reference to it...
2411 *
2412 * [ VC: Entity Declared ]
2413 * In a document with an external subset or external parameter entities
2414 * with "standalone='no'", ... ... The declaration of a parameter entity
2415 * must precede any reference to it...
2416 *
2417 * [ WFC: In DTD ]
2418 * Parameter-entity references may only appear in the DTD.
2419 * NOTE: misleading but this is handled.
2420 *
2421 * A PEReference may have been detected in the current input stream
2422 * the handling is done accordingly to
2423 * http://www.w3.org/TR/REC-xml#entproc
2424 * i.e.
2425 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002426 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002427 */
2428void
2429xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002430 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002431 xmlEntityPtr entity = NULL;
2432 xmlParserInputPtr input;
2433
Owen Taylor3473f882001-02-23 17:55:21 +00002434 if (RAW != '%') return;
2435 switch(ctxt->instate) {
2436 case XML_PARSER_CDATA_SECTION:
2437 return;
2438 case XML_PARSER_COMMENT:
2439 return;
2440 case XML_PARSER_START_TAG:
2441 return;
2442 case XML_PARSER_END_TAG:
2443 return;
2444 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002445 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002446 return;
2447 case XML_PARSER_PROLOG:
2448 case XML_PARSER_START:
2449 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002450 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002451 return;
2452 case XML_PARSER_ENTITY_DECL:
2453 case XML_PARSER_CONTENT:
2454 case XML_PARSER_ATTRIBUTE_VALUE:
2455 case XML_PARSER_PI:
2456 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002457 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002458 /* we just ignore it there */
2459 return;
2460 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002461 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002462 return;
2463 case XML_PARSER_ENTITY_VALUE:
2464 /*
2465 * NOTE: in the case of entity values, we don't do the
2466 * substitution here since we need the literal
2467 * entity value to be able to save the internal
2468 * subset of the document.
2469 * This will be handled by xmlStringDecodeEntities
2470 */
2471 return;
2472 case XML_PARSER_DTD:
2473 /*
2474 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2475 * In the internal DTD subset, parameter-entity references
2476 * can occur only where markup declarations can occur, not
2477 * within markup declarations.
2478 * In that case this is handled in xmlParseMarkupDecl
2479 */
2480 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2481 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002482 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002483 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002484 break;
2485 case XML_PARSER_IGNORE:
2486 return;
2487 }
2488
2489 NEXT;
2490 name = xmlParseName(ctxt);
2491 if (xmlParserDebugEntities)
2492 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002493 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002494 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002495 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002496 } else {
2497 if (RAW == ';') {
2498 NEXT;
2499 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2500 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2501 if (entity == NULL) {
2502
2503 /*
2504 * [ WFC: Entity Declared ]
2505 * In a document without any DTD, a document with only an
2506 * internal DTD subset which contains no parameter entity
2507 * references, or a document with "standalone='yes'", ...
2508 * ... The declaration of a parameter entity must precede
2509 * any reference to it...
2510 */
2511 if ((ctxt->standalone == 1) ||
2512 ((ctxt->hasExternalSubset == 0) &&
2513 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002514 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002515 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002516 } else {
2517 /*
2518 * [ VC: Entity Declared ]
2519 * In a document with an external subset or external
2520 * parameter entities with "standalone='no'", ...
2521 * ... The declaration of a parameter entity must precede
2522 * any reference to it...
2523 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002524 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2525 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2526 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002527 name, NULL);
Daniel Veillard24eb9782003-10-04 21:08:09 +00002528 } else
2529 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2530 "PEReference: %%%s; not found\n",
2531 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002532 ctxt->valid = 0;
2533 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002534 } else if (ctxt->input->free != deallocblankswrapper) {
2535 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002536 if (xmlPushInput(ctxt, input) < 0)
2537 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002538 } else {
2539 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2540 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002541 xmlChar start[4];
2542 xmlCharEncoding enc;
2543
Owen Taylor3473f882001-02-23 17:55:21 +00002544 /*
2545 * handle the extra spaces added before and after
2546 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002547 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002548 */
2549 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002550 if (xmlPushInput(ctxt, input) < 0)
2551 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002552
2553 /*
2554 * Get the 4 first bytes and decode the charset
2555 * if enc != XML_CHAR_ENCODING_NONE
2556 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002557 * Note that, since we may have some non-UTF8
2558 * encoding (like UTF16, bug 135229), the 'length'
2559 * is not known, but we can calculate based upon
2560 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002561 */
2562 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002563 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002564 start[0] = RAW;
2565 start[1] = NXT(1);
2566 start[2] = NXT(2);
2567 start[3] = NXT(3);
2568 enc = xmlDetectCharEncoding(start, 4);
2569 if (enc != XML_CHAR_ENCODING_NONE) {
2570 xmlSwitchEncoding(ctxt, enc);
2571 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002572 }
2573
Owen Taylor3473f882001-02-23 17:55:21 +00002574 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002575 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2576 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002577 xmlParseTextDecl(ctxt);
2578 }
Owen Taylor3473f882001-02-23 17:55:21 +00002579 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002580 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2581 "PEReference: %s is not a parameter entity\n",
2582 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002583 }
2584 }
2585 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002586 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002587 }
Owen Taylor3473f882001-02-23 17:55:21 +00002588 }
2589}
2590
2591/*
2592 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002593 * buffer##_size is expected to be a size_t
2594 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002595 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002596#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002597 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002598 size_t new_size = buffer##_size * 2 + n; \
2599 if (new_size < buffer##_size) goto mem_error; \
2600 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002601 if (tmp == NULL) goto mem_error; \
2602 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002603 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002604}
2605
2606/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002607 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002608 * @ctxt: the parser context
2609 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002610 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002611 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2612 * @end: an end marker xmlChar, 0 if none
2613 * @end2: an end marker xmlChar, 0 if none
2614 * @end3: an end marker xmlChar, 0 if none
2615 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002616 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002617 *
2618 * [67] Reference ::= EntityRef | CharRef
2619 *
2620 * [69] PEReference ::= '%' Name ';'
2621 *
2622 * Returns A newly allocated string with the substitution done. The caller
2623 * must deallocate it !
2624 */
2625xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002626xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2627 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002628 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002629 size_t buffer_size = 0;
2630 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002631
2632 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002633 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002634 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002635 xmlEntityPtr ent;
2636 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002637
Daniel Veillarda82b1822004-11-08 16:24:57 +00002638 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002639 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002640 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002641
Daniel Veillard0161e632008-08-28 15:36:32 +00002642 if (((ctxt->depth > 40) &&
2643 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2644 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002645 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002646 return(NULL);
2647 }
2648
2649 /*
2650 * allocate a translation buffer.
2651 */
2652 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002653 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002654 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002655
2656 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002657 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002658 * we are operating on already parsed values.
2659 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002660 if (str < last)
2661 c = CUR_SCHAR(str, l);
2662 else
2663 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002664 while ((c != 0) && (c != end) && /* non input consuming loop */
2665 (c != end2) && (c != end3)) {
2666
2667 if (c == 0) break;
2668 if ((c == '&') && (str[1] == '#')) {
2669 int val = xmlParseStringCharRef(ctxt, &str);
2670 if (val != 0) {
2671 COPY_BUF(0,buffer,nbchars,val);
2672 }
Daniel Veillard459eeb92012-07-17 16:19:17 +08002673 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002674 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002675 }
Owen Taylor3473f882001-02-23 17:55:21 +00002676 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2677 if (xmlParserDebugEntities)
2678 xmlGenericError(xmlGenericErrorContext,
2679 "String decoding Entity Reference: %.30s\n",
2680 str);
2681 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002682 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2683 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002684 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002685 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002686 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002687 if ((ent != NULL) &&
2688 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2689 if (ent->content != NULL) {
2690 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002691 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002692 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002693 }
Owen Taylor3473f882001-02-23 17:55:21 +00002694 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002695 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2696 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002697 }
2698 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002699 ctxt->depth++;
2700 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2701 0, 0, 0);
2702 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002703
Owen Taylor3473f882001-02-23 17:55:21 +00002704 if (rep != NULL) {
2705 current = rep;
2706 while (*current != 0) { /* non input consuming loop */
2707 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002708 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002709 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2710 goto int_error;
2711 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002712 }
2713 }
2714 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002715 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002716 }
2717 } else if (ent != NULL) {
2718 int i = xmlStrlen(ent->name);
2719 const xmlChar *cur = ent->name;
2720
2721 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002722 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002723 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002724 }
2725 for (;i > 0;i--)
2726 buffer[nbchars++] = *cur++;
2727 buffer[nbchars++] = ';';
2728 }
2729 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2730 if (xmlParserDebugEntities)
2731 xmlGenericError(xmlGenericErrorContext,
2732 "String decoding PE Reference: %.30s\n", str);
2733 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002734 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2735 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002736 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002737 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002738 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002739 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002740 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002741 }
Owen Taylor3473f882001-02-23 17:55:21 +00002742 ctxt->depth++;
2743 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2744 0, 0, 0);
2745 ctxt->depth--;
2746 if (rep != NULL) {
2747 current = rep;
2748 while (*current != 0) { /* non input consuming loop */
2749 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002750 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002751 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2752 goto int_error;
2753 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002754 }
2755 }
2756 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002757 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002758 }
2759 }
2760 } else {
2761 COPY_BUF(l,buffer,nbchars,c);
2762 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002763 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2764 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002765 }
2766 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002767 if (str < last)
2768 c = CUR_SCHAR(str, l);
2769 else
2770 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002771 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002772 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002773 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002774
2775mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002776 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002777int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002778 if (rep != NULL)
2779 xmlFree(rep);
2780 if (buffer != NULL)
2781 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002782 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002783}
2784
Daniel Veillarde57ec792003-09-10 10:50:59 +00002785/**
2786 * xmlStringDecodeEntities:
2787 * @ctxt: the parser context
2788 * @str: the input string
2789 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2790 * @end: an end marker xmlChar, 0 if none
2791 * @end2: an end marker xmlChar, 0 if none
2792 * @end3: an end marker xmlChar, 0 if none
2793 *
2794 * Takes a entity string content and process to do the adequate substitutions.
2795 *
2796 * [67] Reference ::= EntityRef | CharRef
2797 *
2798 * [69] PEReference ::= '%' Name ';'
2799 *
2800 * Returns A newly allocated string with the substitution done. The caller
2801 * must deallocate it !
2802 */
2803xmlChar *
2804xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2805 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002806 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002807 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2808 end, end2, end3));
2809}
Owen Taylor3473f882001-02-23 17:55:21 +00002810
2811/************************************************************************
2812 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002813 * Commodity functions, cleanup needed ? *
2814 * *
2815 ************************************************************************/
2816
2817/**
2818 * areBlanks:
2819 * @ctxt: an XML parser context
2820 * @str: a xmlChar *
2821 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002822 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002823 *
2824 * Is this a sequence of blank chars that one can ignore ?
2825 *
2826 * Returns 1 if ignorable 0 otherwise.
2827 */
2828
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002829static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2830 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002831 int i, ret;
2832 xmlNodePtr lastChild;
2833
Daniel Veillard05c13a22001-09-09 08:38:09 +00002834 /*
2835 * Don't spend time trying to differentiate them, the same callback is
2836 * used !
2837 */
2838 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002839 return(0);
2840
Owen Taylor3473f882001-02-23 17:55:21 +00002841 /*
2842 * Check for xml:space value.
2843 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002844 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2845 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002846 return(0);
2847
2848 /*
2849 * Check that the string is made of blanks
2850 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002851 if (blank_chars == 0) {
2852 for (i = 0;i < len;i++)
2853 if (!(IS_BLANK_CH(str[i]))) return(0);
2854 }
Owen Taylor3473f882001-02-23 17:55:21 +00002855
2856 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002857 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002858 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002859 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002860 if (ctxt->myDoc != NULL) {
2861 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2862 if (ret == 0) return(1);
2863 if (ret == 1) return(0);
2864 }
2865
2866 /*
2867 * Otherwise, heuristic :-\
2868 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002869 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002870 if ((ctxt->node->children == NULL) &&
2871 (RAW == '<') && (NXT(1) == '/')) return(0);
2872
2873 lastChild = xmlGetLastChild(ctxt->node);
2874 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002875 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2876 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002877 } else if (xmlNodeIsText(lastChild))
2878 return(0);
2879 else if ((ctxt->node->children != NULL) &&
2880 (xmlNodeIsText(ctxt->node->children)))
2881 return(0);
2882 return(1);
2883}
2884
Owen Taylor3473f882001-02-23 17:55:21 +00002885/************************************************************************
2886 * *
2887 * Extra stuff for namespace support *
2888 * Relates to http://www.w3.org/TR/WD-xml-names *
2889 * *
2890 ************************************************************************/
2891
2892/**
2893 * xmlSplitQName:
2894 * @ctxt: an XML parser context
2895 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002896 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002897 *
2898 * parse an UTF8 encoded XML qualified name string
2899 *
2900 * [NS 5] QName ::= (Prefix ':')? LocalPart
2901 *
2902 * [NS 6] Prefix ::= NCName
2903 *
2904 * [NS 7] LocalPart ::= NCName
2905 *
2906 * Returns the local part, and prefix is updated
2907 * to get the Prefix if any.
2908 */
2909
2910xmlChar *
2911xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2912 xmlChar buf[XML_MAX_NAMELEN + 5];
2913 xmlChar *buffer = NULL;
2914 int len = 0;
2915 int max = XML_MAX_NAMELEN;
2916 xmlChar *ret = NULL;
2917 const xmlChar *cur = name;
2918 int c;
2919
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002920 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002921 *prefix = NULL;
2922
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002923 if (cur == NULL) return(NULL);
2924
Owen Taylor3473f882001-02-23 17:55:21 +00002925#ifndef XML_XML_NAMESPACE
2926 /* xml: prefix is not really a namespace */
2927 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2928 (cur[2] == 'l') && (cur[3] == ':'))
2929 return(xmlStrdup(name));
2930#endif
2931
Daniel Veillard597bc482003-07-24 16:08:28 +00002932 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002933 if (cur[0] == ':')
2934 return(xmlStrdup(name));
2935
2936 c = *cur++;
2937 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2938 buf[len++] = c;
2939 c = *cur++;
2940 }
2941 if (len >= max) {
2942 /*
2943 * Okay someone managed to make a huge name, so he's ready to pay
2944 * for the processing speed.
2945 */
2946 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002947
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002948 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002949 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002950 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002951 return(NULL);
2952 }
2953 memcpy(buffer, buf, len);
2954 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2955 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002956 xmlChar *tmp;
2957
Owen Taylor3473f882001-02-23 17:55:21 +00002958 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002959 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002960 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002961 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002962 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002963 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002964 return(NULL);
2965 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002966 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002967 }
2968 buffer[len++] = c;
2969 c = *cur++;
2970 }
2971 buffer[len] = 0;
2972 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002973
Daniel Veillard597bc482003-07-24 16:08:28 +00002974 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002975 if (buffer != NULL)
2976 xmlFree(buffer);
2977 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002978 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002979 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002980
Owen Taylor3473f882001-02-23 17:55:21 +00002981 if (buffer == NULL)
2982 ret = xmlStrndup(buf, len);
2983 else {
2984 ret = buffer;
2985 buffer = NULL;
2986 max = XML_MAX_NAMELEN;
2987 }
2988
2989
2990 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002991 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002992 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002993 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002994 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002995 }
Owen Taylor3473f882001-02-23 17:55:21 +00002996 len = 0;
2997
Daniel Veillardbb284f42002-10-16 18:02:47 +00002998 /*
2999 * Check that the first character is proper to start
3000 * a new name
3001 */
3002 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3003 ((c >= 0x41) && (c <= 0x5A)) ||
3004 (c == '_') || (c == ':'))) {
3005 int l;
3006 int first = CUR_SCHAR(cur, l);
3007
3008 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003009 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003010 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003011 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003012 }
3013 }
3014 cur++;
3015
Owen Taylor3473f882001-02-23 17:55:21 +00003016 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3017 buf[len++] = c;
3018 c = *cur++;
3019 }
3020 if (len >= max) {
3021 /*
3022 * Okay someone managed to make a huge name, so he's ready to pay
3023 * for the processing speed.
3024 */
3025 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003026
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003027 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003028 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003029 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003030 return(NULL);
3031 }
3032 memcpy(buffer, buf, len);
3033 while (c != 0) { /* tested bigname2.xml */
3034 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003035 xmlChar *tmp;
3036
Owen Taylor3473f882001-02-23 17:55:21 +00003037 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003038 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003039 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003040 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003041 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003042 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003043 return(NULL);
3044 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003045 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003046 }
3047 buffer[len++] = c;
3048 c = *cur++;
3049 }
3050 buffer[len] = 0;
3051 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003052
Owen Taylor3473f882001-02-23 17:55:21 +00003053 if (buffer == NULL)
3054 ret = xmlStrndup(buf, len);
3055 else {
3056 ret = buffer;
3057 }
3058 }
3059
3060 return(ret);
3061}
3062
3063/************************************************************************
3064 * *
3065 * The parser itself *
3066 * Relates to http://www.w3.org/TR/REC-xml *
3067 * *
3068 ************************************************************************/
3069
Daniel Veillard34e3f642008-07-29 09:02:27 +00003070/************************************************************************
3071 * *
3072 * Routines to parse Name, NCName and NmToken *
3073 * *
3074 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003075#ifdef DEBUG
3076static unsigned long nbParseName = 0;
3077static unsigned long nbParseNmToken = 0;
3078static unsigned long nbParseNCName = 0;
3079static unsigned long nbParseNCNameComplex = 0;
3080static unsigned long nbParseNameComplex = 0;
3081static unsigned long nbParseStringName = 0;
3082#endif
3083
Daniel Veillard34e3f642008-07-29 09:02:27 +00003084/*
3085 * The two following functions are related to the change of accepted
3086 * characters for Name and NmToken in the Revision 5 of XML-1.0
3087 * They correspond to the modified production [4] and the new production [4a]
3088 * changes in that revision. Also note that the macros used for the
3089 * productions Letter, Digit, CombiningChar and Extender are not needed
3090 * anymore.
3091 * We still keep compatibility to pre-revision5 parsing semantic if the
3092 * new XML_PARSE_OLD10 option is given to the parser.
3093 */
3094static int
3095xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3096 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3097 /*
3098 * Use the new checks of production [4] [4a] amd [5] of the
3099 * Update 5 of XML-1.0
3100 */
3101 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3102 (((c >= 'a') && (c <= 'z')) ||
3103 ((c >= 'A') && (c <= 'Z')) ||
3104 (c == '_') || (c == ':') ||
3105 ((c >= 0xC0) && (c <= 0xD6)) ||
3106 ((c >= 0xD8) && (c <= 0xF6)) ||
3107 ((c >= 0xF8) && (c <= 0x2FF)) ||
3108 ((c >= 0x370) && (c <= 0x37D)) ||
3109 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3110 ((c >= 0x200C) && (c <= 0x200D)) ||
3111 ((c >= 0x2070) && (c <= 0x218F)) ||
3112 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3113 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3114 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3115 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3116 ((c >= 0x10000) && (c <= 0xEFFFF))))
3117 return(1);
3118 } else {
3119 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3120 return(1);
3121 }
3122 return(0);
3123}
3124
3125static int
3126xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3127 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3128 /*
3129 * Use the new checks of production [4] [4a] amd [5] of the
3130 * Update 5 of XML-1.0
3131 */
3132 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3133 (((c >= 'a') && (c <= 'z')) ||
3134 ((c >= 'A') && (c <= 'Z')) ||
3135 ((c >= '0') && (c <= '9')) || /* !start */
3136 (c == '_') || (c == ':') ||
3137 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3138 ((c >= 0xC0) && (c <= 0xD6)) ||
3139 ((c >= 0xD8) && (c <= 0xF6)) ||
3140 ((c >= 0xF8) && (c <= 0x2FF)) ||
3141 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3142 ((c >= 0x370) && (c <= 0x37D)) ||
3143 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3144 ((c >= 0x200C) && (c <= 0x200D)) ||
3145 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3146 ((c >= 0x2070) && (c <= 0x218F)) ||
3147 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3148 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3149 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3150 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3151 ((c >= 0x10000) && (c <= 0xEFFFF))))
3152 return(1);
3153 } else {
3154 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3155 (c == '.') || (c == '-') ||
3156 (c == '_') || (c == ':') ||
3157 (IS_COMBINING(c)) ||
3158 (IS_EXTENDER(c)))
3159 return(1);
3160 }
3161 return(0);
3162}
3163
Daniel Veillarde57ec792003-09-10 10:50:59 +00003164static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003165 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003166
Daniel Veillard34e3f642008-07-29 09:02:27 +00003167static const xmlChar *
3168xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3169 int len = 0, l;
3170 int c;
3171 int count = 0;
3172
Daniel Veillardc6561462009-03-25 10:22:31 +00003173#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003174 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003175#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003176
3177 /*
3178 * Handler for more complex cases
3179 */
3180 GROW;
3181 c = CUR_CHAR(l);
3182 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3183 /*
3184 * Use the new checks of production [4] [4a] amd [5] of the
3185 * Update 5 of XML-1.0
3186 */
3187 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3188 (!(((c >= 'a') && (c <= 'z')) ||
3189 ((c >= 'A') && (c <= 'Z')) ||
3190 (c == '_') || (c == ':') ||
3191 ((c >= 0xC0) && (c <= 0xD6)) ||
3192 ((c >= 0xD8) && (c <= 0xF6)) ||
3193 ((c >= 0xF8) && (c <= 0x2FF)) ||
3194 ((c >= 0x370) && (c <= 0x37D)) ||
3195 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3196 ((c >= 0x200C) && (c <= 0x200D)) ||
3197 ((c >= 0x2070) && (c <= 0x218F)) ||
3198 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3199 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3200 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3201 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3202 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3203 return(NULL);
3204 }
3205 len += l;
3206 NEXTL(l);
3207 c = CUR_CHAR(l);
3208 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3209 (((c >= 'a') && (c <= 'z')) ||
3210 ((c >= 'A') && (c <= 'Z')) ||
3211 ((c >= '0') && (c <= '9')) || /* !start */
3212 (c == '_') || (c == ':') ||
3213 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3214 ((c >= 0xC0) && (c <= 0xD6)) ||
3215 ((c >= 0xD8) && (c <= 0xF6)) ||
3216 ((c >= 0xF8) && (c <= 0x2FF)) ||
3217 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3218 ((c >= 0x370) && (c <= 0x37D)) ||
3219 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3220 ((c >= 0x200C) && (c <= 0x200D)) ||
3221 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3222 ((c >= 0x2070) && (c <= 0x218F)) ||
3223 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3224 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3225 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3226 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3227 ((c >= 0x10000) && (c <= 0xEFFFF))
3228 )) {
3229 if (count++ > 100) {
3230 count = 0;
3231 GROW;
3232 }
3233 len += l;
3234 NEXTL(l);
3235 c = CUR_CHAR(l);
3236 }
3237 } else {
3238 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3239 (!IS_LETTER(c) && (c != '_') &&
3240 (c != ':'))) {
3241 return(NULL);
3242 }
3243 len += l;
3244 NEXTL(l);
3245 c = CUR_CHAR(l);
3246
3247 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3248 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3249 (c == '.') || (c == '-') ||
3250 (c == '_') || (c == ':') ||
3251 (IS_COMBINING(c)) ||
3252 (IS_EXTENDER(c)))) {
3253 if (count++ > 100) {
3254 count = 0;
3255 GROW;
3256 }
3257 len += l;
3258 NEXTL(l);
3259 c = CUR_CHAR(l);
3260 }
3261 }
3262 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3263 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3264 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3265}
3266
Owen Taylor3473f882001-02-23 17:55:21 +00003267/**
3268 * xmlParseName:
3269 * @ctxt: an XML parser context
3270 *
3271 * parse an XML name.
3272 *
3273 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3274 * CombiningChar | Extender
3275 *
3276 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3277 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003278 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003279 *
3280 * Returns the Name parsed or NULL
3281 */
3282
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003283const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003284xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003285 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003286 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003287 int count = 0;
3288
3289 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003290
Daniel Veillardc6561462009-03-25 10:22:31 +00003291#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003292 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003293#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003294
Daniel Veillard48b2f892001-02-25 16:11:03 +00003295 /*
3296 * Accelerator for simple ASCII names
3297 */
3298 in = ctxt->input->cur;
3299 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3300 ((*in >= 0x41) && (*in <= 0x5A)) ||
3301 (*in == '_') || (*in == ':')) {
3302 in++;
3303 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3304 ((*in >= 0x41) && (*in <= 0x5A)) ||
3305 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003306 (*in == '_') || (*in == '-') ||
3307 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003308 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003309 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003310 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003311 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003312 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003313 ctxt->nbChars += count;
3314 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003315 if (ret == NULL)
3316 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003317 return(ret);
3318 }
3319 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003320 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003321 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003322}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003323
Daniel Veillard34e3f642008-07-29 09:02:27 +00003324static const xmlChar *
3325xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3326 int len = 0, l;
3327 int c;
3328 int count = 0;
3329
Daniel Veillardc6561462009-03-25 10:22:31 +00003330#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003331 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003332#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003333
3334 /*
3335 * Handler for more complex cases
3336 */
3337 GROW;
3338 c = CUR_CHAR(l);
3339 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3340 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3341 return(NULL);
3342 }
3343
3344 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3345 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3346 if (count++ > 100) {
3347 count = 0;
3348 GROW;
3349 }
3350 len += l;
3351 NEXTL(l);
3352 c = CUR_CHAR(l);
3353 }
3354 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3355}
3356
3357/**
3358 * xmlParseNCName:
3359 * @ctxt: an XML parser context
3360 * @len: lenght of the string parsed
3361 *
3362 * parse an XML name.
3363 *
3364 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3365 * CombiningChar | Extender
3366 *
3367 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3368 *
3369 * Returns the Name parsed or NULL
3370 */
3371
3372static const xmlChar *
3373xmlParseNCName(xmlParserCtxtPtr ctxt) {
3374 const xmlChar *in;
3375 const xmlChar *ret;
3376 int count = 0;
3377
Daniel Veillardc6561462009-03-25 10:22:31 +00003378#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003379 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003380#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003381
3382 /*
3383 * Accelerator for simple ASCII names
3384 */
3385 in = ctxt->input->cur;
3386 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387 ((*in >= 0x41) && (*in <= 0x5A)) ||
3388 (*in == '_')) {
3389 in++;
3390 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391 ((*in >= 0x41) && (*in <= 0x5A)) ||
3392 ((*in >= 0x30) && (*in <= 0x39)) ||
3393 (*in == '_') || (*in == '-') ||
3394 (*in == '.'))
3395 in++;
3396 if ((*in > 0) && (*in < 0x80)) {
3397 count = in - ctxt->input->cur;
3398 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3399 ctxt->input->cur = in;
3400 ctxt->nbChars += count;
3401 ctxt->input->col += count;
3402 if (ret == NULL) {
3403 xmlErrMemory(ctxt, NULL);
3404 }
3405 return(ret);
3406 }
3407 }
3408 return(xmlParseNCNameComplex(ctxt));
3409}
3410
Daniel Veillard46de64e2002-05-29 08:21:33 +00003411/**
3412 * xmlParseNameAndCompare:
3413 * @ctxt: an XML parser context
3414 *
3415 * parse an XML name and compares for match
3416 * (specialized for endtag parsing)
3417 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003418 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3419 * and the name for mismatch
3420 */
3421
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003422static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003423xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003424 register const xmlChar *cmp = other;
3425 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003426 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003427
3428 GROW;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003429
Daniel Veillard46de64e2002-05-29 08:21:33 +00003430 in = ctxt->input->cur;
3431 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003432 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003433 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003434 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003435 }
William M. Brack76e95df2003-10-18 16:20:14 +00003436 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003437 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003438 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003439 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003440 }
3441 /* failure (or end of input buffer), check with full function */
3442 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003443 /* strings coming from the dictionnary direct compare possible */
3444 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003445 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003446 }
3447 return ret;
3448}
3449
Owen Taylor3473f882001-02-23 17:55:21 +00003450/**
3451 * xmlParseStringName:
3452 * @ctxt: an XML parser context
3453 * @str: a pointer to the string pointer (IN/OUT)
3454 *
3455 * parse an XML name.
3456 *
3457 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3458 * CombiningChar | Extender
3459 *
3460 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3461 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003462 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003463 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003464 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003465 * is updated to the current location in the string.
3466 */
3467
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003468static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003469xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3470 xmlChar buf[XML_MAX_NAMELEN + 5];
3471 const xmlChar *cur = *str;
3472 int len = 0, l;
3473 int c;
3474
Daniel Veillardc6561462009-03-25 10:22:31 +00003475#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003476 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003477#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003478
Owen Taylor3473f882001-02-23 17:55:21 +00003479 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003480 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003481 return(NULL);
3482 }
3483
Daniel Veillard34e3f642008-07-29 09:02:27 +00003484 COPY_BUF(l,buf,len,c);
3485 cur += l;
3486 c = CUR_SCHAR(cur, l);
3487 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003488 COPY_BUF(l,buf,len,c);
3489 cur += l;
3490 c = CUR_SCHAR(cur, l);
3491 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3492 /*
3493 * Okay someone managed to make a huge name, so he's ready to pay
3494 * for the processing speed.
3495 */
3496 xmlChar *buffer;
3497 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003498
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003499 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003500 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003501 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003502 return(NULL);
3503 }
3504 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003505 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003506 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003507 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003508 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003509 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003510 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003511 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003512 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003513 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003514 return(NULL);
3515 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003516 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003517 }
3518 COPY_BUF(l,buffer,len,c);
3519 cur += l;
3520 c = CUR_SCHAR(cur, l);
3521 }
3522 buffer[len] = 0;
3523 *str = cur;
3524 return(buffer);
3525 }
3526 }
3527 *str = cur;
3528 return(xmlStrndup(buf, len));
3529}
3530
3531/**
3532 * xmlParseNmtoken:
3533 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003534 *
Owen Taylor3473f882001-02-23 17:55:21 +00003535 * parse an XML Nmtoken.
3536 *
3537 * [7] Nmtoken ::= (NameChar)+
3538 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003539 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003540 *
3541 * Returns the Nmtoken parsed or NULL
3542 */
3543
3544xmlChar *
3545xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3546 xmlChar buf[XML_MAX_NAMELEN + 5];
3547 int len = 0, l;
3548 int c;
3549 int count = 0;
3550
Daniel Veillardc6561462009-03-25 10:22:31 +00003551#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003552 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003553#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003554
Owen Taylor3473f882001-02-23 17:55:21 +00003555 GROW;
3556 c = CUR_CHAR(l);
3557
Daniel Veillard34e3f642008-07-29 09:02:27 +00003558 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003559 if (count++ > 100) {
3560 count = 0;
3561 GROW;
3562 }
3563 COPY_BUF(l,buf,len,c);
3564 NEXTL(l);
3565 c = CUR_CHAR(l);
3566 if (len >= XML_MAX_NAMELEN) {
3567 /*
3568 * Okay someone managed to make a huge token, so he's ready to pay
3569 * for the processing speed.
3570 */
3571 xmlChar *buffer;
3572 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003573
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003574 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003575 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003576 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003577 return(NULL);
3578 }
3579 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003580 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003581 if (count++ > 100) {
3582 count = 0;
3583 GROW;
3584 }
3585 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003586 xmlChar *tmp;
3587
Owen Taylor3473f882001-02-23 17:55:21 +00003588 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003589 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003590 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003591 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003592 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003593 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003594 return(NULL);
3595 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003596 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003597 }
3598 COPY_BUF(l,buffer,len,c);
3599 NEXTL(l);
3600 c = CUR_CHAR(l);
3601 }
3602 buffer[len] = 0;
3603 return(buffer);
3604 }
3605 }
3606 if (len == 0)
3607 return(NULL);
3608 return(xmlStrndup(buf, len));
3609}
3610
3611/**
3612 * xmlParseEntityValue:
3613 * @ctxt: an XML parser context
3614 * @orig: if non-NULL store a copy of the original entity value
3615 *
3616 * parse a value for ENTITY declarations
3617 *
3618 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3619 * "'" ([^%&'] | PEReference | Reference)* "'"
3620 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003621 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003622 */
3623
3624xmlChar *
3625xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3626 xmlChar *buf = NULL;
3627 int len = 0;
3628 int size = XML_PARSER_BUFFER_SIZE;
3629 int c, l;
3630 xmlChar stop;
3631 xmlChar *ret = NULL;
3632 const xmlChar *cur = NULL;
3633 xmlParserInputPtr input;
3634
3635 if (RAW == '"') stop = '"';
3636 else if (RAW == '\'') stop = '\'';
3637 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003638 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003639 return(NULL);
3640 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003641 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003642 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003643 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003644 return(NULL);
3645 }
3646
3647 /*
3648 * The content of the entity definition is copied in a buffer.
3649 */
3650
3651 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3652 input = ctxt->input;
3653 GROW;
3654 NEXT;
3655 c = CUR_CHAR(l);
3656 /*
3657 * NOTE: 4.4.5 Included in Literal
3658 * When a parameter entity reference appears in a literal entity
3659 * value, ... a single or double quote character in the replacement
3660 * text is always treated as a normal data character and will not
3661 * terminate the literal.
3662 * In practice it means we stop the loop only when back at parsing
3663 * the initial entity and the quote is found
3664 */
William M. Brack871611b2003-10-18 04:53:14 +00003665 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003666 (ctxt->input != input))) {
3667 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003668 xmlChar *tmp;
3669
Owen Taylor3473f882001-02-23 17:55:21 +00003670 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003671 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3672 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003673 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003674 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003675 return(NULL);
3676 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003677 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003678 }
3679 COPY_BUF(l,buf,len,c);
3680 NEXTL(l);
3681 /*
3682 * Pop-up of finished entities.
3683 */
3684 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3685 xmlPopInput(ctxt);
3686
3687 GROW;
3688 c = CUR_CHAR(l);
3689 if (c == 0) {
3690 GROW;
3691 c = CUR_CHAR(l);
3692 }
3693 }
3694 buf[len] = 0;
3695
3696 /*
3697 * Raise problem w.r.t. '&' and '%' being used in non-entities
3698 * reference constructs. Note Charref will be handled in
3699 * xmlStringDecodeEntities()
3700 */
3701 cur = buf;
3702 while (*cur != 0) { /* non input consuming */
3703 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3704 xmlChar *name;
3705 xmlChar tmp = *cur;
3706
3707 cur++;
3708 name = xmlParseStringName(ctxt, &cur);
3709 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003710 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003711 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003712 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003713 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003714 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3715 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003716 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003717 }
3718 if (name != NULL)
3719 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003720 if (*cur == 0)
3721 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003722 }
3723 cur++;
3724 }
3725
3726 /*
3727 * Then PEReference entities are substituted.
3728 */
3729 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003730 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003731 xmlFree(buf);
3732 } else {
3733 NEXT;
3734 /*
3735 * NOTE: 4.4.7 Bypassed
3736 * When a general entity reference appears in the EntityValue in
3737 * an entity declaration, it is bypassed and left as is.
3738 * so XML_SUBSTITUTE_REF is not set here.
3739 */
3740 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3741 0, 0, 0);
3742 if (orig != NULL)
3743 *orig = buf;
3744 else
3745 xmlFree(buf);
3746 }
3747
3748 return(ret);
3749}
3750
3751/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003752 * xmlParseAttValueComplex:
3753 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003754 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003755 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003756 *
3757 * parse a value for an attribute, this is the fallback function
3758 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003759 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003760 *
3761 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3762 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003763static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003764xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003765 xmlChar limit = 0;
3766 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003767 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003768 size_t len = 0;
3769 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003770 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003771 xmlChar *current = NULL;
3772 xmlEntityPtr ent;
3773
Owen Taylor3473f882001-02-23 17:55:21 +00003774 if (NXT(0) == '"') {
3775 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3776 limit = '"';
3777 NEXT;
3778 } else if (NXT(0) == '\'') {
3779 limit = '\'';
3780 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3781 NEXT;
3782 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003783 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003784 return(NULL);
3785 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003786
Owen Taylor3473f882001-02-23 17:55:21 +00003787 /*
3788 * allocate a translation buffer.
3789 */
3790 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003791 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003792 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003793
3794 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003795 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003796 */
3797 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003798 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003799 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003800 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003801 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003802 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003803 if (NXT(1) == '#') {
3804 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003805
Owen Taylor3473f882001-02-23 17:55:21 +00003806 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003807 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003808 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003809 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003810 }
3811 buf[len++] = '&';
3812 } else {
3813 /*
3814 * The reparsing will be done in xmlStringGetNodeList()
3815 * called by the attribute() function in SAX.c
3816 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08003817 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003818 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003819 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003820 buf[len++] = '&';
3821 buf[len++] = '#';
3822 buf[len++] = '3';
3823 buf[len++] = '8';
3824 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003825 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003826 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003827 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003828 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003829 }
Owen Taylor3473f882001-02-23 17:55:21 +00003830 len += xmlCopyChar(0, &buf[len], val);
3831 }
3832 } else {
3833 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003834 ctxt->nbentities++;
3835 if (ent != NULL)
3836 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003837 if ((ent != NULL) &&
3838 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003839 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003840 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003841 }
3842 if ((ctxt->replaceEntities == 0) &&
3843 (ent->content[0] == '&')) {
3844 buf[len++] = '&';
3845 buf[len++] = '#';
3846 buf[len++] = '3';
3847 buf[len++] = '8';
3848 buf[len++] = ';';
3849 } else {
3850 buf[len++] = ent->content[0];
3851 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003852 } else if ((ent != NULL) &&
3853 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003854 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3855 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003856 XML_SUBSTITUTE_REF,
3857 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003858 if (rep != NULL) {
3859 current = rep;
3860 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02003861 if ((*current == 0xD) || (*current == 0xA) ||
3862 (*current == 0x9)) {
3863 buf[len++] = 0x20;
3864 current++;
3865 } else
3866 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003867 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003868 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003869 }
3870 }
3871 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003872 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003873 }
3874 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003875 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003876 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003877 }
Owen Taylor3473f882001-02-23 17:55:21 +00003878 if (ent->content != NULL)
3879 buf[len++] = ent->content[0];
3880 }
3881 } else if (ent != NULL) {
3882 int i = xmlStrlen(ent->name);
3883 const xmlChar *cur = ent->name;
3884
3885 /*
3886 * This may look absurd but is needed to detect
3887 * entities problems
3888 */
3889 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3890 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003891 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003892 XML_SUBSTITUTE_REF, 0, 0, 0);
3893 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003894 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003895 rep = NULL;
3896 }
Owen Taylor3473f882001-02-23 17:55:21 +00003897 }
3898
3899 /*
3900 * Just output the reference
3901 */
3902 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08003903 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003904 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003905 }
3906 for (;i > 0;i--)
3907 buf[len++] = *cur++;
3908 buf[len++] = ';';
3909 }
3910 }
3911 } else {
3912 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003913 if ((len != 0) || (!normalize)) {
3914 if ((!normalize) || (!in_space)) {
3915 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08003916 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003917 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003918 }
3919 }
3920 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003921 }
3922 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003923 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003924 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08003925 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003926 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003927 }
3928 }
3929 NEXTL(l);
3930 }
3931 GROW;
3932 c = CUR_CHAR(l);
3933 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003934 if ((in_space) && (normalize)) {
3935 while (buf[len - 1] == 0x20) len--;
3936 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003937 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003938 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003939 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003940 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003941 if ((c != 0) && (!IS_CHAR(c))) {
3942 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3943 "invalid character in attribute value\n");
3944 } else {
3945 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3946 "AttValue: ' expected\n");
3947 }
Owen Taylor3473f882001-02-23 17:55:21 +00003948 } else
3949 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003950
3951 /*
3952 * There we potentially risk an overflow, don't allow attribute value of
3953 * lenght more than INT_MAX it is a very reasonnable assumption !
3954 */
3955 if (len >= INT_MAX) {
3956 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3957 "AttValue lenght too long\n");
3958 goto mem_error;
3959 }
3960
3961 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00003962 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003963
3964mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003965 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003966 if (buf != NULL)
3967 xmlFree(buf);
3968 if (rep != NULL)
3969 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003970 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003971}
3972
3973/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003974 * xmlParseAttValue:
3975 * @ctxt: an XML parser context
3976 *
3977 * parse a value for an attribute
3978 * Note: the parser won't do substitution of entities here, this
3979 * will be handled later in xmlStringGetNodeList
3980 *
3981 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3982 * "'" ([^<&'] | Reference)* "'"
3983 *
3984 * 3.3.3 Attribute-Value Normalization:
3985 * Before the value of an attribute is passed to the application or
3986 * checked for validity, the XML processor must normalize it as follows:
3987 * - a character reference is processed by appending the referenced
3988 * character to the attribute value
3989 * - an entity reference is processed by recursively processing the
3990 * replacement text of the entity
3991 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3992 * appending #x20 to the normalized value, except that only a single
3993 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3994 * parsed entity or the literal entity value of an internal parsed entity
3995 * - other characters are processed by appending them to the normalized value
3996 * If the declared value is not CDATA, then the XML processor must further
3997 * process the normalized attribute value by discarding any leading and
3998 * trailing space (#x20) characters, and by replacing sequences of space
3999 * (#x20) characters by a single space (#x20) character.
4000 * All attributes for which no declaration has been read should be treated
4001 * by a non-validating parser as if declared CDATA.
4002 *
4003 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4004 */
4005
4006
4007xmlChar *
4008xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004009 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004010 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004011}
4012
4013/**
Owen Taylor3473f882001-02-23 17:55:21 +00004014 * xmlParseSystemLiteral:
4015 * @ctxt: an XML parser context
4016 *
4017 * parse an XML Literal
4018 *
4019 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4020 *
4021 * Returns the SystemLiteral parsed or NULL
4022 */
4023
4024xmlChar *
4025xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4026 xmlChar *buf = NULL;
4027 int len = 0;
4028 int size = XML_PARSER_BUFFER_SIZE;
4029 int cur, l;
4030 xmlChar stop;
4031 int state = ctxt->instate;
4032 int count = 0;
4033
4034 SHRINK;
4035 if (RAW == '"') {
4036 NEXT;
4037 stop = '"';
4038 } else if (RAW == '\'') {
4039 NEXT;
4040 stop = '\'';
4041 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004042 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004043 return(NULL);
4044 }
4045
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004046 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004047 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004048 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004049 return(NULL);
4050 }
4051 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4052 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004053 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004054 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004055 xmlChar *tmp;
4056
Owen Taylor3473f882001-02-23 17:55:21 +00004057 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004058 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4059 if (tmp == NULL) {
4060 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004061 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004062 ctxt->instate = (xmlParserInputState) state;
4063 return(NULL);
4064 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004065 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004066 }
4067 count++;
4068 if (count > 50) {
4069 GROW;
4070 count = 0;
4071 }
4072 COPY_BUF(l,buf,len,cur);
4073 NEXTL(l);
4074 cur = CUR_CHAR(l);
4075 if (cur == 0) {
4076 GROW;
4077 SHRINK;
4078 cur = CUR_CHAR(l);
4079 }
4080 }
4081 buf[len] = 0;
4082 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004083 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004084 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004085 } else {
4086 NEXT;
4087 }
4088 return(buf);
4089}
4090
4091/**
4092 * xmlParsePubidLiteral:
4093 * @ctxt: an XML parser context
4094 *
4095 * parse an XML public literal
4096 *
4097 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4098 *
4099 * Returns the PubidLiteral parsed or NULL.
4100 */
4101
4102xmlChar *
4103xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4104 xmlChar *buf = NULL;
4105 int len = 0;
4106 int size = XML_PARSER_BUFFER_SIZE;
4107 xmlChar cur;
4108 xmlChar stop;
4109 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004110 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004111
4112 SHRINK;
4113 if (RAW == '"') {
4114 NEXT;
4115 stop = '"';
4116 } else if (RAW == '\'') {
4117 NEXT;
4118 stop = '\'';
4119 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004120 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004121 return(NULL);
4122 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004123 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004124 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004125 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004126 return(NULL);
4127 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004128 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004129 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004130 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004131 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004132 xmlChar *tmp;
4133
Owen Taylor3473f882001-02-23 17:55:21 +00004134 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004135 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4136 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004137 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004138 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004139 return(NULL);
4140 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004141 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004142 }
4143 buf[len++] = cur;
4144 count++;
4145 if (count > 50) {
4146 GROW;
4147 count = 0;
4148 }
4149 NEXT;
4150 cur = CUR;
4151 if (cur == 0) {
4152 GROW;
4153 SHRINK;
4154 cur = CUR;
4155 }
4156 }
4157 buf[len] = 0;
4158 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004159 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004160 } else {
4161 NEXT;
4162 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004163 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004164 return(buf);
4165}
4166
Daniel Veillard8ed10722009-08-20 19:17:36 +02004167static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004168
4169/*
4170 * used for the test in the inner loop of the char data testing
4171 */
4172static const unsigned char test_char_data[256] = {
4173 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4174 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4175 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4176 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4177 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4178 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4179 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4180 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4181 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4182 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4183 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4184 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4185 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4186 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4187 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4188 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4189 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4190 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4191 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4192 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4193 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4194 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4195 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4196 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4197 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4198 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4199 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4200 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4201 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4202 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4203 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4204 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4205};
4206
Owen Taylor3473f882001-02-23 17:55:21 +00004207/**
4208 * xmlParseCharData:
4209 * @ctxt: an XML parser context
4210 * @cdata: int indicating whether we are within a CDATA section
4211 *
4212 * parse a CharData section.
4213 * if we are within a CDATA section ']]>' marks an end of section.
4214 *
4215 * The right angle bracket (>) may be represented using the string "&gt;",
4216 * and must, for compatibility, be escaped using "&gt;" or a character
4217 * reference when it appears in the string "]]>" in content, when that
4218 * string is not marking the end of a CDATA section.
4219 *
4220 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4221 */
4222
4223void
4224xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004225 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004226 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004227 int line = ctxt->input->line;
4228 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004229 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004230
4231 SHRINK;
4232 GROW;
4233 /*
4234 * Accelerated common case where input don't need to be
4235 * modified before passing it to the handler.
4236 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004237 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004238 in = ctxt->input->cur;
4239 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004240get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004241 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004242 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004243 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004244 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004245 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004246 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004247 goto get_more_space;
4248 }
4249 if (*in == '<') {
4250 nbchar = in - ctxt->input->cur;
4251 if (nbchar > 0) {
4252 const xmlChar *tmp = ctxt->input->cur;
4253 ctxt->input->cur = in;
4254
Daniel Veillard34099b42004-11-04 17:34:35 +00004255 if ((ctxt->sax != NULL) &&
4256 (ctxt->sax->ignorableWhitespace !=
4257 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004258 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004259 if (ctxt->sax->ignorableWhitespace != NULL)
4260 ctxt->sax->ignorableWhitespace(ctxt->userData,
4261 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004262 } else {
4263 if (ctxt->sax->characters != NULL)
4264 ctxt->sax->characters(ctxt->userData,
4265 tmp, nbchar);
4266 if (*ctxt->space == -1)
4267 *ctxt->space = -2;
4268 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004269 } else if ((ctxt->sax != NULL) &&
4270 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004271 ctxt->sax->characters(ctxt->userData,
4272 tmp, nbchar);
4273 }
4274 }
4275 return;
4276 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004277
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004278get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004279 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004280 while (test_char_data[*in]) {
4281 in++;
4282 ccol++;
4283 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004284 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004285 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004286 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004287 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004288 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004289 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004290 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004291 }
4292 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004293 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004294 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004295 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004296 return;
4297 }
4298 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004299 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004300 goto get_more;
4301 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004302 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004303 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004304 if ((ctxt->sax != NULL) &&
4305 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004306 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004307 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004308 const xmlChar *tmp = ctxt->input->cur;
4309 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004310
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004311 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004312 if (ctxt->sax->ignorableWhitespace != NULL)
4313 ctxt->sax->ignorableWhitespace(ctxt->userData,
4314 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004315 } else {
4316 if (ctxt->sax->characters != NULL)
4317 ctxt->sax->characters(ctxt->userData,
4318 tmp, nbchar);
4319 if (*ctxt->space == -1)
4320 *ctxt->space = -2;
4321 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004322 line = ctxt->input->line;
4323 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004324 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004325 if (ctxt->sax->characters != NULL)
4326 ctxt->sax->characters(ctxt->userData,
4327 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004328 line = ctxt->input->line;
4329 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004330 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004331 /* something really bad happened in the SAX callback */
4332 if (ctxt->instate != XML_PARSER_CONTENT)
4333 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004334 }
4335 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004336 if (*in == 0xD) {
4337 in++;
4338 if (*in == 0xA) {
4339 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004340 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004341 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004342 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004343 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004344 in--;
4345 }
4346 if (*in == '<') {
4347 return;
4348 }
4349 if (*in == '&') {
4350 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004351 }
4352 SHRINK;
4353 GROW;
4354 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004355 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004356 nbchar = 0;
4357 }
Daniel Veillard50582112001-03-26 22:52:16 +00004358 ctxt->input->line = line;
4359 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004360 xmlParseCharDataComplex(ctxt, cdata);
4361}
4362
Daniel Veillard01c13b52002-12-10 15:19:08 +00004363/**
4364 * xmlParseCharDataComplex:
4365 * @ctxt: an XML parser context
4366 * @cdata: int indicating whether we are within a CDATA section
4367 *
4368 * parse a CharData section.this is the fallback function
4369 * of xmlParseCharData() when the parsing requires handling
4370 * of non-ASCII characters.
4371 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004372static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004373xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004374 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4375 int nbchar = 0;
4376 int cur, l;
4377 int count = 0;
4378
4379 SHRINK;
4380 GROW;
4381 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004382 while ((cur != '<') && /* checked */
4383 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004384 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004385 if ((cur == ']') && (NXT(1) == ']') &&
4386 (NXT(2) == '>')) {
4387 if (cdata) break;
4388 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004389 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004390 }
4391 }
4392 COPY_BUF(l,buf,nbchar,cur);
4393 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004394 buf[nbchar] = 0;
4395
Owen Taylor3473f882001-02-23 17:55:21 +00004396 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004397 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004398 */
4399 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004400 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004401 if (ctxt->sax->ignorableWhitespace != NULL)
4402 ctxt->sax->ignorableWhitespace(ctxt->userData,
4403 buf, nbchar);
4404 } else {
4405 if (ctxt->sax->characters != NULL)
4406 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004407 if ((ctxt->sax->characters !=
4408 ctxt->sax->ignorableWhitespace) &&
4409 (*ctxt->space == -1))
4410 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004411 }
4412 }
4413 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004414 /* something really bad happened in the SAX callback */
4415 if (ctxt->instate != XML_PARSER_CONTENT)
4416 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004417 }
4418 count++;
4419 if (count > 50) {
4420 GROW;
4421 count = 0;
4422 }
4423 NEXTL(l);
4424 cur = CUR_CHAR(l);
4425 }
4426 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004427 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004428 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004429 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004430 */
4431 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004432 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004433 if (ctxt->sax->ignorableWhitespace != NULL)
4434 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4435 } else {
4436 if (ctxt->sax->characters != NULL)
4437 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004438 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4439 (*ctxt->space == -1))
4440 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004441 }
4442 }
4443 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004444 if ((cur != 0) && (!IS_CHAR(cur))) {
4445 /* Generate the error and skip the offending character */
4446 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4447 "PCDATA invalid Char value %d\n",
4448 cur);
4449 NEXTL(l);
4450 }
Owen Taylor3473f882001-02-23 17:55:21 +00004451}
4452
4453/**
4454 * xmlParseExternalID:
4455 * @ctxt: an XML parser context
4456 * @publicID: a xmlChar** receiving PubidLiteral
4457 * @strict: indicate whether we should restrict parsing to only
4458 * production [75], see NOTE below
4459 *
4460 * Parse an External ID or a Public ID
4461 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004462 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004463 * 'PUBLIC' S PubidLiteral S SystemLiteral
4464 *
4465 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4466 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4467 *
4468 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4469 *
4470 * Returns the function returns SystemLiteral and in the second
4471 * case publicID receives PubidLiteral, is strict is off
4472 * it is possible to return NULL and have publicID set.
4473 */
4474
4475xmlChar *
4476xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4477 xmlChar *URI = NULL;
4478
4479 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004480
4481 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004482 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004483 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004484 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004485 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4486 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004487 }
4488 SKIP_BLANKS;
4489 URI = xmlParseSystemLiteral(ctxt);
4490 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004491 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004492 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004493 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004494 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004495 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004496 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004497 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004498 }
4499 SKIP_BLANKS;
4500 *publicID = xmlParsePubidLiteral(ctxt);
4501 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004502 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004503 }
4504 if (strict) {
4505 /*
4506 * We don't handle [83] so "S SystemLiteral" is required.
4507 */
William M. Brack76e95df2003-10-18 16:20:14 +00004508 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004509 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004510 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004511 }
4512 } else {
4513 /*
4514 * We handle [83] so we return immediately, if
4515 * "S SystemLiteral" is not detected. From a purely parsing
4516 * point of view that's a nice mess.
4517 */
4518 const xmlChar *ptr;
4519 GROW;
4520
4521 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004522 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004523
William M. Brack76e95df2003-10-18 16:20:14 +00004524 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004525 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4526 }
4527 SKIP_BLANKS;
4528 URI = xmlParseSystemLiteral(ctxt);
4529 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004530 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004531 }
4532 }
4533 return(URI);
4534}
4535
4536/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004537 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004538 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004539 * @buf: the already parsed part of the buffer
4540 * @len: number of bytes filles in the buffer
4541 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004542 *
4543 * Skip an XML (SGML) comment <!-- .... -->
4544 * The spec says that "For compatibility, the string "--" (double-hyphen)
4545 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004546 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004547 *
4548 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4549 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004550static void
4551xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004552 int q, ql;
4553 int r, rl;
4554 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00004555 int count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004556 int inputid;
4557
4558 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004559
Owen Taylor3473f882001-02-23 17:55:21 +00004560 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004561 len = 0;
4562 size = XML_PARSER_BUFFER_SIZE;
4563 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4564 if (buf == NULL) {
4565 xmlErrMemory(ctxt, NULL);
4566 return;
4567 }
Owen Taylor3473f882001-02-23 17:55:21 +00004568 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004569 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004570 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004571 if (q == 0)
4572 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004573 if (!IS_CHAR(q)) {
4574 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4575 "xmlParseComment: invalid xmlChar value %d\n",
4576 q);
4577 xmlFree (buf);
4578 return;
4579 }
Owen Taylor3473f882001-02-23 17:55:21 +00004580 NEXTL(ql);
4581 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004582 if (r == 0)
4583 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004584 if (!IS_CHAR(r)) {
4585 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4586 "xmlParseComment: invalid xmlChar value %d\n",
4587 q);
4588 xmlFree (buf);
4589 return;
4590 }
Owen Taylor3473f882001-02-23 17:55:21 +00004591 NEXTL(rl);
4592 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004593 if (cur == 0)
4594 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004595 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004596 ((cur != '>') ||
4597 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004598 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004599 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004600 }
4601 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004602 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004603 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004604 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4605 if (new_buf == NULL) {
4606 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004607 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004608 return;
4609 }
William M. Bracka3215c72004-07-31 16:24:01 +00004610 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004611 }
4612 COPY_BUF(ql,buf,len,q);
4613 q = r;
4614 ql = rl;
4615 r = cur;
4616 rl = l;
4617
4618 count++;
4619 if (count > 50) {
4620 GROW;
4621 count = 0;
4622 }
4623 NEXTL(l);
4624 cur = CUR_CHAR(l);
4625 if (cur == 0) {
4626 SHRINK;
4627 GROW;
4628 cur = CUR_CHAR(l);
4629 }
4630 }
4631 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004632 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004633 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004634 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004635 } else if (!IS_CHAR(cur)) {
4636 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4637 "xmlParseComment: invalid xmlChar value %d\n",
4638 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004639 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004640 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004641 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4642 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004643 }
4644 NEXT;
4645 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4646 (!ctxt->disableSAX))
4647 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004648 }
Daniel Veillardda629342007-08-01 07:49:06 +00004649 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004650 return;
4651not_terminated:
4652 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4653 "Comment not terminated\n", NULL);
4654 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004655 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004656}
Daniel Veillardda629342007-08-01 07:49:06 +00004657
Daniel Veillard4c778d82005-01-23 17:37:44 +00004658/**
4659 * xmlParseComment:
4660 * @ctxt: an XML parser context
4661 *
4662 * Skip an XML (SGML) comment <!-- .... -->
4663 * The spec says that "For compatibility, the string "--" (double-hyphen)
4664 * must not occur within comments. "
4665 *
4666 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4667 */
4668void
4669xmlParseComment(xmlParserCtxtPtr ctxt) {
4670 xmlChar *buf = NULL;
4671 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004672 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004673 xmlParserInputState state;
4674 const xmlChar *in;
4675 int nbchar = 0, ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004676 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004677
4678 /*
4679 * Check that there is a comment right here.
4680 */
4681 if ((RAW != '<') || (NXT(1) != '!') ||
4682 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004683 state = ctxt->instate;
4684 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004685 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004686 SKIP(4);
4687 SHRINK;
4688 GROW;
4689
4690 /*
4691 * Accelerated common case where input don't need to be
4692 * modified before passing it to the handler.
4693 */
4694 in = ctxt->input->cur;
4695 do {
4696 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004697 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004698 ctxt->input->line++; ctxt->input->col = 1;
4699 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004700 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004701 }
4702get_more:
4703 ccol = ctxt->input->col;
4704 while (((*in > '-') && (*in <= 0x7F)) ||
4705 ((*in >= 0x20) && (*in < '-')) ||
4706 (*in == 0x09)) {
4707 in++;
4708 ccol++;
4709 }
4710 ctxt->input->col = ccol;
4711 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004712 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004713 ctxt->input->line++; ctxt->input->col = 1;
4714 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004715 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004716 goto get_more;
4717 }
4718 nbchar = in - ctxt->input->cur;
4719 /*
4720 * save current set of data
4721 */
4722 if (nbchar > 0) {
4723 if ((ctxt->sax != NULL) &&
4724 (ctxt->sax->comment != NULL)) {
4725 if (buf == NULL) {
4726 if ((*in == '-') && (in[1] == '-'))
4727 size = nbchar + 1;
4728 else
4729 size = XML_PARSER_BUFFER_SIZE + nbchar;
4730 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4731 if (buf == NULL) {
4732 xmlErrMemory(ctxt, NULL);
4733 ctxt->instate = state;
4734 return;
4735 }
4736 len = 0;
4737 } else if (len + nbchar + 1 >= size) {
4738 xmlChar *new_buf;
4739 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4740 new_buf = (xmlChar *) xmlRealloc(buf,
4741 size * sizeof(xmlChar));
4742 if (new_buf == NULL) {
4743 xmlFree (buf);
4744 xmlErrMemory(ctxt, NULL);
4745 ctxt->instate = state;
4746 return;
4747 }
4748 buf = new_buf;
4749 }
4750 memcpy(&buf[len], ctxt->input->cur, nbchar);
4751 len += nbchar;
4752 buf[len] = 0;
4753 }
4754 }
4755 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004756 if (*in == 0xA) {
4757 in++;
4758 ctxt->input->line++; ctxt->input->col = 1;
4759 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004760 if (*in == 0xD) {
4761 in++;
4762 if (*in == 0xA) {
4763 ctxt->input->cur = in;
4764 in++;
4765 ctxt->input->line++; ctxt->input->col = 1;
4766 continue; /* while */
4767 }
4768 in--;
4769 }
4770 SHRINK;
4771 GROW;
4772 in = ctxt->input->cur;
4773 if (*in == '-') {
4774 if (in[1] == '-') {
4775 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004776 if (ctxt->input->id != inputid) {
4777 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4778 "comment doesn't start and stop in the same entity\n");
4779 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004780 SKIP(3);
4781 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4782 (!ctxt->disableSAX)) {
4783 if (buf != NULL)
4784 ctxt->sax->comment(ctxt->userData, buf);
4785 else
4786 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4787 }
4788 if (buf != NULL)
4789 xmlFree(buf);
4790 ctxt->instate = state;
4791 return;
4792 }
Bryan Henderson8658d272012-05-08 16:39:05 +08004793 if (buf != NULL) {
4794 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4795 "Double hyphen within comment: "
4796 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00004797 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08004798 } else
4799 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4800 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004801 in++;
4802 ctxt->input->col++;
4803 }
4804 in++;
4805 ctxt->input->col++;
4806 goto get_more;
4807 }
4808 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4809 xmlParseCommentComplex(ctxt, buf, len, size);
4810 ctxt->instate = state;
4811 return;
4812}
4813
Owen Taylor3473f882001-02-23 17:55:21 +00004814
4815/**
4816 * xmlParsePITarget:
4817 * @ctxt: an XML parser context
4818 *
4819 * parse the name of a PI
4820 *
4821 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4822 *
4823 * Returns the PITarget name or NULL
4824 */
4825
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004826const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004827xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004828 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004829
4830 name = xmlParseName(ctxt);
4831 if ((name != NULL) &&
4832 ((name[0] == 'x') || (name[0] == 'X')) &&
4833 ((name[1] == 'm') || (name[1] == 'M')) &&
4834 ((name[2] == 'l') || (name[2] == 'L'))) {
4835 int i;
4836 if ((name[0] == 'x') && (name[1] == 'm') &&
4837 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004838 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004839 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004840 return(name);
4841 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004842 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004843 return(name);
4844 }
4845 for (i = 0;;i++) {
4846 if (xmlW3CPIs[i] == NULL) break;
4847 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4848 return(name);
4849 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004850 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4851 "xmlParsePITarget: invalid name prefix 'xml'\n",
4852 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004853 }
Daniel Veillard37334572008-07-31 08:20:02 +00004854 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4855 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4856 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4857 }
Owen Taylor3473f882001-02-23 17:55:21 +00004858 return(name);
4859}
4860
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004861#ifdef LIBXML_CATALOG_ENABLED
4862/**
4863 * xmlParseCatalogPI:
4864 * @ctxt: an XML parser context
4865 * @catalog: the PI value string
4866 *
4867 * parse an XML Catalog Processing Instruction.
4868 *
4869 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4870 *
4871 * Occurs only if allowed by the user and if happening in the Misc
4872 * part of the document before any doctype informations
4873 * This will add the given catalog to the parsing context in order
4874 * to be used if there is a resolution need further down in the document
4875 */
4876
4877static void
4878xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4879 xmlChar *URL = NULL;
4880 const xmlChar *tmp, *base;
4881 xmlChar marker;
4882
4883 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004884 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004885 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4886 goto error;
4887 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004888 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004889 if (*tmp != '=') {
4890 return;
4891 }
4892 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004893 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004894 marker = *tmp;
4895 if ((marker != '\'') && (marker != '"'))
4896 goto error;
4897 tmp++;
4898 base = tmp;
4899 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4900 if (*tmp == 0)
4901 goto error;
4902 URL = xmlStrndup(base, tmp - base);
4903 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004904 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004905 if (*tmp != 0)
4906 goto error;
4907
4908 if (URL != NULL) {
4909 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4910 xmlFree(URL);
4911 }
4912 return;
4913
4914error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004915 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4916 "Catalog PI syntax error: %s\n",
4917 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004918 if (URL != NULL)
4919 xmlFree(URL);
4920}
4921#endif
4922
Owen Taylor3473f882001-02-23 17:55:21 +00004923/**
4924 * xmlParsePI:
4925 * @ctxt: an XML parser context
4926 *
4927 * parse an XML Processing Instruction.
4928 *
4929 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4930 *
4931 * The processing is transfered to SAX once parsed.
4932 */
4933
4934void
4935xmlParsePI(xmlParserCtxtPtr ctxt) {
4936 xmlChar *buf = NULL;
4937 int len = 0;
4938 int size = XML_PARSER_BUFFER_SIZE;
4939 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004940 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004941 xmlParserInputState state;
4942 int count = 0;
4943
4944 if ((RAW == '<') && (NXT(1) == '?')) {
4945 xmlParserInputPtr input = ctxt->input;
4946 state = ctxt->instate;
4947 ctxt->instate = XML_PARSER_PI;
4948 /*
4949 * this is a Processing Instruction.
4950 */
4951 SKIP(2);
4952 SHRINK;
4953
4954 /*
4955 * Parse the target name and check for special support like
4956 * namespace.
4957 */
4958 target = xmlParsePITarget(ctxt);
4959 if (target != NULL) {
4960 if ((RAW == '?') && (NXT(1) == '>')) {
4961 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004962 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4963 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004964 }
4965 SKIP(2);
4966
4967 /*
4968 * SAX: PI detected.
4969 */
4970 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4971 (ctxt->sax->processingInstruction != NULL))
4972 ctxt->sax->processingInstruction(ctxt->userData,
4973 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08004974 if (ctxt->instate != XML_PARSER_EOF)
4975 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004976 return;
4977 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004978 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004979 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004980 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004981 ctxt->instate = state;
4982 return;
4983 }
4984 cur = CUR;
4985 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004986 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4987 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004988 }
4989 SKIP_BLANKS;
4990 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004991 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004992 ((cur != '?') || (NXT(1) != '>'))) {
4993 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004994 xmlChar *tmp;
4995
Owen Taylor3473f882001-02-23 17:55:21 +00004996 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004997 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4998 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004999 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005000 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005001 ctxt->instate = state;
5002 return;
5003 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005004 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005005 }
5006 count++;
5007 if (count > 50) {
5008 GROW;
5009 count = 0;
5010 }
5011 COPY_BUF(l,buf,len,cur);
5012 NEXTL(l);
5013 cur = CUR_CHAR(l);
5014 if (cur == 0) {
5015 SHRINK;
5016 GROW;
5017 cur = CUR_CHAR(l);
5018 }
5019 }
5020 buf[len] = 0;
5021 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005022 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5023 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005024 } else {
5025 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005026 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5027 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005028 }
5029 SKIP(2);
5030
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005031#ifdef LIBXML_CATALOG_ENABLED
5032 if (((state == XML_PARSER_MISC) ||
5033 (state == XML_PARSER_START)) &&
5034 (xmlStrEqual(target, XML_CATALOG_PI))) {
5035 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5036 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5037 (allow == XML_CATA_ALLOW_ALL))
5038 xmlParseCatalogPI(ctxt, buf);
5039 }
5040#endif
5041
5042
Owen Taylor3473f882001-02-23 17:55:21 +00005043 /*
5044 * SAX: PI detected.
5045 */
5046 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5047 (ctxt->sax->processingInstruction != NULL))
5048 ctxt->sax->processingInstruction(ctxt->userData,
5049 target, buf);
5050 }
5051 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005052 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005053 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005054 }
Chris Evans77404b82011-12-14 16:18:25 +08005055 if (ctxt->instate != XML_PARSER_EOF)
5056 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005057 }
5058}
5059
5060/**
5061 * xmlParseNotationDecl:
5062 * @ctxt: an XML parser context
5063 *
5064 * parse a notation declaration
5065 *
5066 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5067 *
5068 * Hence there is actually 3 choices:
5069 * 'PUBLIC' S PubidLiteral
5070 * 'PUBLIC' S PubidLiteral S SystemLiteral
5071 * and 'SYSTEM' S SystemLiteral
5072 *
5073 * See the NOTE on xmlParseExternalID().
5074 */
5075
5076void
5077xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005078 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005079 xmlChar *Pubid;
5080 xmlChar *Systemid;
5081
Daniel Veillarda07050d2003-10-19 14:46:32 +00005082 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005083 xmlParserInputPtr input = ctxt->input;
5084 SHRINK;
5085 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00005086 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005087 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5088 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005089 return;
5090 }
5091 SKIP_BLANKS;
5092
Daniel Veillard76d66f42001-05-16 21:05:17 +00005093 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005094 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005095 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005096 return;
5097 }
William M. Brack76e95df2003-10-18 16:20:14 +00005098 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005099 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005100 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005101 return;
5102 }
Daniel Veillard37334572008-07-31 08:20:02 +00005103 if (xmlStrchr(name, ':') != NULL) {
5104 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5105 "colon are forbidden from notation names '%s'\n",
5106 name, NULL, NULL);
5107 }
Owen Taylor3473f882001-02-23 17:55:21 +00005108 SKIP_BLANKS;
5109
5110 /*
5111 * Parse the IDs.
5112 */
5113 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5114 SKIP_BLANKS;
5115
5116 if (RAW == '>') {
5117 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005118 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5119 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005120 }
5121 NEXT;
5122 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5123 (ctxt->sax->notationDecl != NULL))
5124 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5125 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005126 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005127 }
Owen Taylor3473f882001-02-23 17:55:21 +00005128 if (Systemid != NULL) xmlFree(Systemid);
5129 if (Pubid != NULL) xmlFree(Pubid);
5130 }
5131}
5132
5133/**
5134 * xmlParseEntityDecl:
5135 * @ctxt: an XML parser context
5136 *
5137 * parse <!ENTITY declarations
5138 *
5139 * [70] EntityDecl ::= GEDecl | PEDecl
5140 *
5141 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5142 *
5143 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5144 *
5145 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5146 *
5147 * [74] PEDef ::= EntityValue | ExternalID
5148 *
5149 * [76] NDataDecl ::= S 'NDATA' S Name
5150 *
5151 * [ VC: Notation Declared ]
5152 * The Name must match the declared name of a notation.
5153 */
5154
5155void
5156xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005157 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005158 xmlChar *value = NULL;
5159 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005160 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005161 int isParameter = 0;
5162 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005163 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00005164
Daniel Veillard4c778d82005-01-23 17:37:44 +00005165 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005166 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005167 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005168 SHRINK;
5169 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005170 skipped = SKIP_BLANKS;
5171 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005172 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5173 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005174 }
Owen Taylor3473f882001-02-23 17:55:21 +00005175
5176 if (RAW == '%') {
5177 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005178 skipped = SKIP_BLANKS;
5179 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005180 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5181 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005182 }
Owen Taylor3473f882001-02-23 17:55:21 +00005183 isParameter = 1;
5184 }
5185
Daniel Veillard76d66f42001-05-16 21:05:17 +00005186 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005187 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005188 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5189 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005190 return;
5191 }
Daniel Veillard37334572008-07-31 08:20:02 +00005192 if (xmlStrchr(name, ':') != NULL) {
5193 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5194 "colon are forbidden from entities names '%s'\n",
5195 name, NULL, NULL);
5196 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005197 skipped = SKIP_BLANKS;
5198 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005199 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5200 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005201 }
Owen Taylor3473f882001-02-23 17:55:21 +00005202
Daniel Veillardf5582f12002-06-11 10:08:16 +00005203 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005204 /*
5205 * handle the various case of definitions...
5206 */
5207 if (isParameter) {
5208 if ((RAW == '"') || (RAW == '\'')) {
5209 value = xmlParseEntityValue(ctxt, &orig);
5210 if (value) {
5211 if ((ctxt->sax != NULL) &&
5212 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5213 ctxt->sax->entityDecl(ctxt->userData, name,
5214 XML_INTERNAL_PARAMETER_ENTITY,
5215 NULL, NULL, value);
5216 }
5217 } else {
5218 URI = xmlParseExternalID(ctxt, &literal, 1);
5219 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005220 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005221 }
5222 if (URI) {
5223 xmlURIPtr uri;
5224
5225 uri = xmlParseURI((const char *) URI);
5226 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005227 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5228 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005229 /*
5230 * This really ought to be a well formedness error
5231 * but the XML Core WG decided otherwise c.f. issue
5232 * E26 of the XML erratas.
5233 */
Owen Taylor3473f882001-02-23 17:55:21 +00005234 } else {
5235 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005236 /*
5237 * Okay this is foolish to block those but not
5238 * invalid URIs.
5239 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005240 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005241 } else {
5242 if ((ctxt->sax != NULL) &&
5243 (!ctxt->disableSAX) &&
5244 (ctxt->sax->entityDecl != NULL))
5245 ctxt->sax->entityDecl(ctxt->userData, name,
5246 XML_EXTERNAL_PARAMETER_ENTITY,
5247 literal, URI, NULL);
5248 }
5249 xmlFreeURI(uri);
5250 }
5251 }
5252 }
5253 } else {
5254 if ((RAW == '"') || (RAW == '\'')) {
5255 value = xmlParseEntityValue(ctxt, &orig);
5256 if ((ctxt->sax != NULL) &&
5257 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5258 ctxt->sax->entityDecl(ctxt->userData, name,
5259 XML_INTERNAL_GENERAL_ENTITY,
5260 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005261 /*
5262 * For expat compatibility in SAX mode.
5263 */
5264 if ((ctxt->myDoc == NULL) ||
5265 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5266 if (ctxt->myDoc == NULL) {
5267 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005268 if (ctxt->myDoc == NULL) {
5269 xmlErrMemory(ctxt, "New Doc failed");
5270 return;
5271 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005272 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005273 }
5274 if (ctxt->myDoc->intSubset == NULL)
5275 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5276 BAD_CAST "fake", NULL, NULL);
5277
Daniel Veillard1af9a412003-08-20 22:54:39 +00005278 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5279 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005280 }
Owen Taylor3473f882001-02-23 17:55:21 +00005281 } else {
5282 URI = xmlParseExternalID(ctxt, &literal, 1);
5283 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005284 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005285 }
5286 if (URI) {
5287 xmlURIPtr uri;
5288
5289 uri = xmlParseURI((const char *)URI);
5290 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005291 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5292 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005293 /*
5294 * This really ought to be a well formedness error
5295 * but the XML Core WG decided otherwise c.f. issue
5296 * E26 of the XML erratas.
5297 */
Owen Taylor3473f882001-02-23 17:55:21 +00005298 } else {
5299 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005300 /*
5301 * Okay this is foolish to block those but not
5302 * invalid URIs.
5303 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005304 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005305 }
5306 xmlFreeURI(uri);
5307 }
5308 }
William M. Brack76e95df2003-10-18 16:20:14 +00005309 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005310 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5311 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005312 }
5313 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005314 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005315 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005316 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005317 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5318 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005319 }
5320 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005321 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005322 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5323 (ctxt->sax->unparsedEntityDecl != NULL))
5324 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5325 literal, URI, ndata);
5326 } else {
5327 if ((ctxt->sax != NULL) &&
5328 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5329 ctxt->sax->entityDecl(ctxt->userData, name,
5330 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5331 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005332 /*
5333 * For expat compatibility in SAX mode.
5334 * assuming the entity repalcement was asked for
5335 */
5336 if ((ctxt->replaceEntities != 0) &&
5337 ((ctxt->myDoc == NULL) ||
5338 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5339 if (ctxt->myDoc == NULL) {
5340 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005341 if (ctxt->myDoc == NULL) {
5342 xmlErrMemory(ctxt, "New Doc failed");
5343 return;
5344 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005345 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005346 }
5347
5348 if (ctxt->myDoc->intSubset == NULL)
5349 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5350 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005351 xmlSAX2EntityDecl(ctxt, name,
5352 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5353 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005354 }
Owen Taylor3473f882001-02-23 17:55:21 +00005355 }
5356 }
5357 }
5358 SKIP_BLANKS;
5359 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005360 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005361 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005362 } else {
5363 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005364 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5365 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005366 }
5367 NEXT;
5368 }
5369 if (orig != NULL) {
5370 /*
5371 * Ugly mechanism to save the raw entity value.
5372 */
5373 xmlEntityPtr cur = NULL;
5374
5375 if (isParameter) {
5376 if ((ctxt->sax != NULL) &&
5377 (ctxt->sax->getParameterEntity != NULL))
5378 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5379 } else {
5380 if ((ctxt->sax != NULL) &&
5381 (ctxt->sax->getEntity != NULL))
5382 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005383 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005384 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005385 }
Owen Taylor3473f882001-02-23 17:55:21 +00005386 }
5387 if (cur != NULL) {
5388 if (cur->orig != NULL)
5389 xmlFree(orig);
5390 else
5391 cur->orig = orig;
5392 } else
5393 xmlFree(orig);
5394 }
Owen Taylor3473f882001-02-23 17:55:21 +00005395 if (value != NULL) xmlFree(value);
5396 if (URI != NULL) xmlFree(URI);
5397 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005398 }
5399}
5400
5401/**
5402 * xmlParseDefaultDecl:
5403 * @ctxt: an XML parser context
5404 * @value: Receive a possible fixed default value for the attribute
5405 *
5406 * Parse an attribute default declaration
5407 *
5408 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5409 *
5410 * [ VC: Required Attribute ]
5411 * if the default declaration is the keyword #REQUIRED, then the
5412 * attribute must be specified for all elements of the type in the
5413 * attribute-list declaration.
5414 *
5415 * [ VC: Attribute Default Legal ]
5416 * The declared default value must meet the lexical constraints of
5417 * the declared attribute type c.f. xmlValidateAttributeDecl()
5418 *
5419 * [ VC: Fixed Attribute Default ]
5420 * if an attribute has a default value declared with the #FIXED
5421 * keyword, instances of that attribute must match the default value.
5422 *
5423 * [ WFC: No < in Attribute Values ]
5424 * handled in xmlParseAttValue()
5425 *
5426 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5427 * or XML_ATTRIBUTE_FIXED.
5428 */
5429
5430int
5431xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5432 int val;
5433 xmlChar *ret;
5434
5435 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005436 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005437 SKIP(9);
5438 return(XML_ATTRIBUTE_REQUIRED);
5439 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005440 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005441 SKIP(8);
5442 return(XML_ATTRIBUTE_IMPLIED);
5443 }
5444 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005445 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005446 SKIP(6);
5447 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005448 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005449 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5450 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005451 }
5452 SKIP_BLANKS;
5453 }
5454 ret = xmlParseAttValue(ctxt);
5455 ctxt->instate = XML_PARSER_DTD;
5456 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005457 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005458 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005459 } else
5460 *value = ret;
5461 return(val);
5462}
5463
5464/**
5465 * xmlParseNotationType:
5466 * @ctxt: an XML parser context
5467 *
5468 * parse an Notation attribute type.
5469 *
5470 * Note: the leading 'NOTATION' S part has already being parsed...
5471 *
5472 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5473 *
5474 * [ VC: Notation Attributes ]
5475 * Values of this type must match one of the notation names included
5476 * in the declaration; all notation names in the declaration must be declared.
5477 *
5478 * Returns: the notation attribute tree built while parsing
5479 */
5480
5481xmlEnumerationPtr
5482xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005483 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005484 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005485
5486 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005487 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005488 return(NULL);
5489 }
5490 SHRINK;
5491 do {
5492 NEXT;
5493 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005494 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005495 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005496 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5497 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005498 xmlFreeEnumeration(ret);
5499 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005500 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005501 tmp = ret;
5502 while (tmp != NULL) {
5503 if (xmlStrEqual(name, tmp->name)) {
5504 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5505 "standalone: attribute notation value token %s duplicated\n",
5506 name, NULL);
5507 if (!xmlDictOwns(ctxt->dict, name))
5508 xmlFree((xmlChar *) name);
5509 break;
5510 }
5511 tmp = tmp->next;
5512 }
5513 if (tmp == NULL) {
5514 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005515 if (cur == NULL) {
5516 xmlFreeEnumeration(ret);
5517 return(NULL);
5518 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005519 if (last == NULL) ret = last = cur;
5520 else {
5521 last->next = cur;
5522 last = cur;
5523 }
Owen Taylor3473f882001-02-23 17:55:21 +00005524 }
5525 SKIP_BLANKS;
5526 } while (RAW == '|');
5527 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005528 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005529 xmlFreeEnumeration(ret);
5530 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005531 }
5532 NEXT;
5533 return(ret);
5534}
5535
5536/**
5537 * xmlParseEnumerationType:
5538 * @ctxt: an XML parser context
5539 *
5540 * parse an Enumeration attribute type.
5541 *
5542 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5543 *
5544 * [ VC: Enumeration ]
5545 * Values of this type must match one of the Nmtoken tokens in
5546 * the declaration
5547 *
5548 * Returns: the enumeration attribute tree built while parsing
5549 */
5550
5551xmlEnumerationPtr
5552xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5553 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005554 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005555
5556 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005557 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005558 return(NULL);
5559 }
5560 SHRINK;
5561 do {
5562 NEXT;
5563 SKIP_BLANKS;
5564 name = xmlParseNmtoken(ctxt);
5565 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005566 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005567 return(ret);
5568 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005569 tmp = ret;
5570 while (tmp != NULL) {
5571 if (xmlStrEqual(name, tmp->name)) {
5572 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5573 "standalone: attribute enumeration value token %s duplicated\n",
5574 name, NULL);
5575 if (!xmlDictOwns(ctxt->dict, name))
5576 xmlFree(name);
5577 break;
5578 }
5579 tmp = tmp->next;
5580 }
5581 if (tmp == NULL) {
5582 cur = xmlCreateEnumeration(name);
5583 if (!xmlDictOwns(ctxt->dict, name))
5584 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005585 if (cur == NULL) {
5586 xmlFreeEnumeration(ret);
5587 return(NULL);
5588 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005589 if (last == NULL) ret = last = cur;
5590 else {
5591 last->next = cur;
5592 last = cur;
5593 }
Owen Taylor3473f882001-02-23 17:55:21 +00005594 }
5595 SKIP_BLANKS;
5596 } while (RAW == '|');
5597 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005598 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005599 return(ret);
5600 }
5601 NEXT;
5602 return(ret);
5603}
5604
5605/**
5606 * xmlParseEnumeratedType:
5607 * @ctxt: an XML parser context
5608 * @tree: the enumeration tree built while parsing
5609 *
5610 * parse an Enumerated attribute type.
5611 *
5612 * [57] EnumeratedType ::= NotationType | Enumeration
5613 *
5614 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5615 *
5616 *
5617 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5618 */
5619
5620int
5621xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005622 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005623 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005624 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005625 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5626 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005627 return(0);
5628 }
5629 SKIP_BLANKS;
5630 *tree = xmlParseNotationType(ctxt);
5631 if (*tree == NULL) return(0);
5632 return(XML_ATTRIBUTE_NOTATION);
5633 }
5634 *tree = xmlParseEnumerationType(ctxt);
5635 if (*tree == NULL) return(0);
5636 return(XML_ATTRIBUTE_ENUMERATION);
5637}
5638
5639/**
5640 * xmlParseAttributeType:
5641 * @ctxt: an XML parser context
5642 * @tree: the enumeration tree built while parsing
5643 *
5644 * parse the Attribute list def for an element
5645 *
5646 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5647 *
5648 * [55] StringType ::= 'CDATA'
5649 *
5650 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5651 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5652 *
5653 * Validity constraints for attribute values syntax are checked in
5654 * xmlValidateAttributeValue()
5655 *
5656 * [ VC: ID ]
5657 * Values of type ID must match the Name production. A name must not
5658 * appear more than once in an XML document as a value of this type;
5659 * i.e., ID values must uniquely identify the elements which bear them.
5660 *
5661 * [ VC: One ID per Element Type ]
5662 * No element type may have more than one ID attribute specified.
5663 *
5664 * [ VC: ID Attribute Default ]
5665 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5666 *
5667 * [ VC: IDREF ]
5668 * Values of type IDREF must match the Name production, and values
5669 * of type IDREFS must match Names; each IDREF Name must match the value
5670 * of an ID attribute on some element in the XML document; i.e. IDREF
5671 * values must match the value of some ID attribute.
5672 *
5673 * [ VC: Entity Name ]
5674 * Values of type ENTITY must match the Name production, values
5675 * of type ENTITIES must match Names; each Entity Name must match the
5676 * name of an unparsed entity declared in the DTD.
5677 *
5678 * [ VC: Name Token ]
5679 * Values of type NMTOKEN must match the Nmtoken production; values
5680 * of type NMTOKENS must match Nmtokens.
5681 *
5682 * Returns the attribute type
5683 */
5684int
5685xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5686 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005687 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005688 SKIP(5);
5689 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005690 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005691 SKIP(6);
5692 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005693 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005694 SKIP(5);
5695 return(XML_ATTRIBUTE_IDREF);
5696 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5697 SKIP(2);
5698 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005699 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005700 SKIP(6);
5701 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005702 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005703 SKIP(8);
5704 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005705 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005706 SKIP(8);
5707 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005708 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005709 SKIP(7);
5710 return(XML_ATTRIBUTE_NMTOKEN);
5711 }
5712 return(xmlParseEnumeratedType(ctxt, tree));
5713}
5714
5715/**
5716 * xmlParseAttributeListDecl:
5717 * @ctxt: an XML parser context
5718 *
5719 * : parse the Attribute list def for an element
5720 *
5721 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5722 *
5723 * [53] AttDef ::= S Name S AttType S DefaultDecl
5724 *
5725 */
5726void
5727xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005728 const xmlChar *elemName;
5729 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005730 xmlEnumerationPtr tree;
5731
Daniel Veillarda07050d2003-10-19 14:46:32 +00005732 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005733 xmlParserInputPtr input = ctxt->input;
5734
5735 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005736 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005737 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005738 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005739 }
5740 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005741 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005742 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005743 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5744 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005745 return;
5746 }
5747 SKIP_BLANKS;
5748 GROW;
5749 while (RAW != '>') {
5750 const xmlChar *check = CUR_PTR;
5751 int type;
5752 int def;
5753 xmlChar *defaultValue = NULL;
5754
5755 GROW;
5756 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005757 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005758 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005759 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5760 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005761 break;
5762 }
5763 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005764 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005765 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005766 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005767 break;
5768 }
5769 SKIP_BLANKS;
5770
5771 type = xmlParseAttributeType(ctxt, &tree);
5772 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005773 break;
5774 }
5775
5776 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005777 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005778 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5779 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005780 if (tree != NULL)
5781 xmlFreeEnumeration(tree);
5782 break;
5783 }
5784 SKIP_BLANKS;
5785
5786 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5787 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005788 if (defaultValue != NULL)
5789 xmlFree(defaultValue);
5790 if (tree != NULL)
5791 xmlFreeEnumeration(tree);
5792 break;
5793 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005794 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5795 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005796
5797 GROW;
5798 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005799 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005800 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005801 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005802 if (defaultValue != NULL)
5803 xmlFree(defaultValue);
5804 if (tree != NULL)
5805 xmlFreeEnumeration(tree);
5806 break;
5807 }
5808 SKIP_BLANKS;
5809 }
5810 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005811 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5812 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005813 if (defaultValue != NULL)
5814 xmlFree(defaultValue);
5815 if (tree != NULL)
5816 xmlFreeEnumeration(tree);
5817 break;
5818 }
5819 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5820 (ctxt->sax->attributeDecl != NULL))
5821 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5822 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005823 else if (tree != NULL)
5824 xmlFreeEnumeration(tree);
5825
5826 if ((ctxt->sax2) && (defaultValue != NULL) &&
5827 (def != XML_ATTRIBUTE_IMPLIED) &&
5828 (def != XML_ATTRIBUTE_REQUIRED)) {
5829 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5830 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005831 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005832 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5833 }
Owen Taylor3473f882001-02-23 17:55:21 +00005834 if (defaultValue != NULL)
5835 xmlFree(defaultValue);
5836 GROW;
5837 }
5838 if (RAW == '>') {
5839 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00005840 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5841 "Attribute list declaration doesn't start and stop in the same entity\n",
5842 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005843 }
5844 NEXT;
5845 }
Owen Taylor3473f882001-02-23 17:55:21 +00005846 }
5847}
5848
5849/**
5850 * xmlParseElementMixedContentDecl:
5851 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005852 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005853 *
5854 * parse the declaration for a Mixed Element content
5855 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5856 *
5857 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5858 * '(' S? '#PCDATA' S? ')'
5859 *
5860 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5861 *
5862 * [ VC: No Duplicate Types ]
5863 * The same name must not appear more than once in a single
5864 * mixed-content declaration.
5865 *
5866 * returns: the list of the xmlElementContentPtr describing the element choices
5867 */
5868xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005869xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005870 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005871 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005872
5873 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005874 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005875 SKIP(7);
5876 SKIP_BLANKS;
5877 SHRINK;
5878 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005879 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005880 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5881"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005882 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005883 }
Owen Taylor3473f882001-02-23 17:55:21 +00005884 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005885 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005886 if (ret == NULL)
5887 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005888 if (RAW == '*') {
5889 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5890 NEXT;
5891 }
5892 return(ret);
5893 }
5894 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005895 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005896 if (ret == NULL) return(NULL);
5897 }
5898 while (RAW == '|') {
5899 NEXT;
5900 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005901 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005902 if (ret == NULL) return(NULL);
5903 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005904 if (cur != NULL)
5905 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005906 cur = ret;
5907 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005908 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005909 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005910 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005911 if (n->c1 != NULL)
5912 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005913 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005914 if (n != NULL)
5915 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005916 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005917 }
5918 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005919 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005920 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005921 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005922 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005923 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005924 return(NULL);
5925 }
5926 SKIP_BLANKS;
5927 GROW;
5928 }
5929 if ((RAW == ')') && (NXT(1) == '*')) {
5930 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005931 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005932 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005933 if (cur->c2 != NULL)
5934 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005935 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02005936 if (ret != NULL)
5937 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005938 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005939 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5940"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005941 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005942 }
Owen Taylor3473f882001-02-23 17:55:21 +00005943 SKIP(2);
5944 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005945 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005946 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005947 return(NULL);
5948 }
5949
5950 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005951 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005952 }
5953 return(ret);
5954}
5955
5956/**
Daniel Veillard489f9672009-08-10 16:49:30 +02005957 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00005958 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005959 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02005960 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00005961 *
5962 * parse the declaration for a Mixed Element content
5963 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5964 *
5965 *
5966 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5967 *
5968 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5969 *
5970 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5971 *
5972 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5973 *
5974 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5975 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005976 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005977 * opening or closing parentheses in a choice, seq, or Mixed
5978 * construct is contained in the replacement text for a parameter
5979 * entity, both must be contained in the same replacement text. For
5980 * interoperability, if a parameter-entity reference appears in a
5981 * choice, seq, or Mixed construct, its replacement text should not
5982 * be empty, and neither the first nor last non-blank character of
5983 * the replacement text should be a connector (| or ,).
5984 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005985 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005986 * hierarchy.
5987 */
Daniel Veillard489f9672009-08-10 16:49:30 +02005988static xmlElementContentPtr
5989xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5990 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00005991 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005992 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005993 xmlChar type = 0;
5994
Daniel Veillard489f9672009-08-10 16:49:30 +02005995 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5996 (depth > 2048)) {
5997 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5998"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5999 depth);
6000 return(NULL);
6001 }
Owen Taylor3473f882001-02-23 17:55:21 +00006002 SKIP_BLANKS;
6003 GROW;
6004 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006005 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006006
Owen Taylor3473f882001-02-23 17:55:21 +00006007 /* Recurse on first child */
6008 NEXT;
6009 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006010 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6011 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006012 SKIP_BLANKS;
6013 GROW;
6014 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006015 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006016 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006017 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006018 return(NULL);
6019 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006020 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006021 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006022 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006023 return(NULL);
6024 }
Owen Taylor3473f882001-02-23 17:55:21 +00006025 GROW;
6026 if (RAW == '?') {
6027 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6028 NEXT;
6029 } else if (RAW == '*') {
6030 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6031 NEXT;
6032 } else if (RAW == '+') {
6033 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6034 NEXT;
6035 } else {
6036 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6037 }
Owen Taylor3473f882001-02-23 17:55:21 +00006038 GROW;
6039 }
6040 SKIP_BLANKS;
6041 SHRINK;
6042 while (RAW != ')') {
6043 /*
6044 * Each loop we parse one separator and one element.
6045 */
6046 if (RAW == ',') {
6047 if (type == 0) type = CUR;
6048
6049 /*
6050 * Detect "Name | Name , Name" error
6051 */
6052 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006053 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006054 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006055 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006056 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006057 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006058 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006059 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006060 return(NULL);
6061 }
6062 NEXT;
6063
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006064 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006065 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006066 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006067 xmlFreeDocElementContent(ctxt->myDoc, last);
6068 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006069 return(NULL);
6070 }
6071 if (last == NULL) {
6072 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006073 if (ret != NULL)
6074 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006075 ret = cur = op;
6076 } else {
6077 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006078 if (op != NULL)
6079 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006080 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006081 if (last != NULL)
6082 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006083 cur =op;
6084 last = NULL;
6085 }
6086 } else if (RAW == '|') {
6087 if (type == 0) type = CUR;
6088
6089 /*
6090 * Detect "Name , Name | Name" error
6091 */
6092 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006093 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006094 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006095 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006096 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006097 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006098 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006099 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006100 return(NULL);
6101 }
6102 NEXT;
6103
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006104 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006105 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006106 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006107 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006108 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006109 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006110 return(NULL);
6111 }
6112 if (last == NULL) {
6113 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006114 if (ret != NULL)
6115 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006116 ret = cur = op;
6117 } else {
6118 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006119 if (op != NULL)
6120 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006121 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006122 if (last != NULL)
6123 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006124 cur =op;
6125 last = NULL;
6126 }
6127 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006128 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006129 if ((last != NULL) && (last != ret))
6130 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006131 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006132 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006133 return(NULL);
6134 }
6135 GROW;
6136 SKIP_BLANKS;
6137 GROW;
6138 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006139 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006140 /* Recurse on second child */
6141 NEXT;
6142 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006143 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6144 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006145 SKIP_BLANKS;
6146 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006147 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006148 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006149 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006150 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006151 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006152 return(NULL);
6153 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006154 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006155 if (last == NULL) {
6156 if (ret != NULL)
6157 xmlFreeDocElementContent(ctxt->myDoc, ret);
6158 return(NULL);
6159 }
Owen Taylor3473f882001-02-23 17:55:21 +00006160 if (RAW == '?') {
6161 last->ocur = XML_ELEMENT_CONTENT_OPT;
6162 NEXT;
6163 } else if (RAW == '*') {
6164 last->ocur = XML_ELEMENT_CONTENT_MULT;
6165 NEXT;
6166 } else if (RAW == '+') {
6167 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6168 NEXT;
6169 } else {
6170 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6171 }
6172 }
6173 SKIP_BLANKS;
6174 GROW;
6175 }
6176 if ((cur != NULL) && (last != NULL)) {
6177 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006178 if (last != NULL)
6179 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006180 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006181 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006182 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6183"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006184 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006185 }
Owen Taylor3473f882001-02-23 17:55:21 +00006186 NEXT;
6187 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006188 if (ret != NULL) {
6189 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6190 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6191 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6192 else
6193 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6194 }
Owen Taylor3473f882001-02-23 17:55:21 +00006195 NEXT;
6196 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006197 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006198 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006199 cur = ret;
6200 /*
6201 * Some normalization:
6202 * (a | b* | c?)* == (a | b | c)*
6203 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006204 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006205 if ((cur->c1 != NULL) &&
6206 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6207 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6208 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6209 if ((cur->c2 != NULL) &&
6210 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6211 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6212 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6213 cur = cur->c2;
6214 }
6215 }
Owen Taylor3473f882001-02-23 17:55:21 +00006216 NEXT;
6217 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006218 if (ret != NULL) {
6219 int found = 0;
6220
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006221 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6222 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6223 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006224 else
6225 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006226 /*
6227 * Some normalization:
6228 * (a | b*)+ == (a | b)*
6229 * (a | b?)+ == (a | b)*
6230 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006231 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006232 if ((cur->c1 != NULL) &&
6233 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6234 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6235 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6236 found = 1;
6237 }
6238 if ((cur->c2 != NULL) &&
6239 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6240 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6241 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6242 found = 1;
6243 }
6244 cur = cur->c2;
6245 }
6246 if (found)
6247 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6248 }
Owen Taylor3473f882001-02-23 17:55:21 +00006249 NEXT;
6250 }
6251 return(ret);
6252}
6253
6254/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006255 * xmlParseElementChildrenContentDecl:
6256 * @ctxt: an XML parser context
6257 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006258 *
6259 * parse the declaration for a Mixed Element content
6260 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6261 *
6262 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6263 *
6264 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6265 *
6266 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6267 *
6268 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6269 *
6270 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6271 * TODO Parameter-entity replacement text must be properly nested
6272 * with parenthesized groups. That is to say, if either of the
6273 * opening or closing parentheses in a choice, seq, or Mixed
6274 * construct is contained in the replacement text for a parameter
6275 * entity, both must be contained in the same replacement text. For
6276 * interoperability, if a parameter-entity reference appears in a
6277 * choice, seq, or Mixed construct, its replacement text should not
6278 * be empty, and neither the first nor last non-blank character of
6279 * the replacement text should be a connector (| or ,).
6280 *
6281 * Returns the tree of xmlElementContentPtr describing the element
6282 * hierarchy.
6283 */
6284xmlElementContentPtr
6285xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6286 /* stub left for API/ABI compat */
6287 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6288}
6289
6290/**
Owen Taylor3473f882001-02-23 17:55:21 +00006291 * xmlParseElementContentDecl:
6292 * @ctxt: an XML parser context
6293 * @name: the name of the element being defined.
6294 * @result: the Element Content pointer will be stored here if any
6295 *
6296 * parse the declaration for an Element content either Mixed or Children,
6297 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6298 *
6299 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6300 *
6301 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6302 */
6303
6304int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006305xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006306 xmlElementContentPtr *result) {
6307
6308 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006309 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006310 int res;
6311
6312 *result = NULL;
6313
6314 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006315 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006316 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006317 return(-1);
6318 }
6319 NEXT;
6320 GROW;
6321 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006322 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006323 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006324 res = XML_ELEMENT_TYPE_MIXED;
6325 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006326 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006327 res = XML_ELEMENT_TYPE_ELEMENT;
6328 }
Owen Taylor3473f882001-02-23 17:55:21 +00006329 SKIP_BLANKS;
6330 *result = tree;
6331 return(res);
6332}
6333
6334/**
6335 * xmlParseElementDecl:
6336 * @ctxt: an XML parser context
6337 *
6338 * parse an Element declaration.
6339 *
6340 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6341 *
6342 * [ VC: Unique Element Type Declaration ]
6343 * No element type may be declared more than once
6344 *
6345 * Returns the type of the element, or -1 in case of error
6346 */
6347int
6348xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006349 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006350 int ret = -1;
6351 xmlElementContentPtr content = NULL;
6352
Daniel Veillard4c778d82005-01-23 17:37:44 +00006353 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006354 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006355 xmlParserInputPtr input = ctxt->input;
6356
6357 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006358 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006359 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6360 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006361 }
6362 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006363 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006364 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006365 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6366 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006367 return(-1);
6368 }
6369 while ((RAW == 0) && (ctxt->inputNr > 1))
6370 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006371 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006372 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6373 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006374 }
6375 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006376 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006377 SKIP(5);
6378 /*
6379 * Element must always be empty.
6380 */
6381 ret = XML_ELEMENT_TYPE_EMPTY;
6382 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6383 (NXT(2) == 'Y')) {
6384 SKIP(3);
6385 /*
6386 * Element is a generic container.
6387 */
6388 ret = XML_ELEMENT_TYPE_ANY;
6389 } else if (RAW == '(') {
6390 ret = xmlParseElementContentDecl(ctxt, name, &content);
6391 } else {
6392 /*
6393 * [ WFC: PEs in Internal Subset ] error handling.
6394 */
6395 if ((RAW == '%') && (ctxt->external == 0) &&
6396 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006397 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006398 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006399 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006400 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006401 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6402 }
Owen Taylor3473f882001-02-23 17:55:21 +00006403 return(-1);
6404 }
6405
6406 SKIP_BLANKS;
6407 /*
6408 * Pop-up of finished entities.
6409 */
6410 while ((RAW == 0) && (ctxt->inputNr > 1))
6411 xmlPopInput(ctxt);
6412 SKIP_BLANKS;
6413
6414 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006415 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006416 if (content != NULL) {
6417 xmlFreeDocElementContent(ctxt->myDoc, content);
6418 }
Owen Taylor3473f882001-02-23 17:55:21 +00006419 } else {
6420 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006421 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6422 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006423 }
6424
6425 NEXT;
6426 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006427 (ctxt->sax->elementDecl != NULL)) {
6428 if (content != NULL)
6429 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006430 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6431 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006432 if ((content != NULL) && (content->parent == NULL)) {
6433 /*
6434 * this is a trick: if xmlAddElementDecl is called,
6435 * instead of copying the full tree it is plugged directly
6436 * if called from the parser. Avoid duplicating the
6437 * interfaces or change the API/ABI
6438 */
6439 xmlFreeDocElementContent(ctxt->myDoc, content);
6440 }
6441 } else if (content != NULL) {
6442 xmlFreeDocElementContent(ctxt->myDoc, content);
6443 }
Owen Taylor3473f882001-02-23 17:55:21 +00006444 }
Owen Taylor3473f882001-02-23 17:55:21 +00006445 }
6446 return(ret);
6447}
6448
6449/**
Owen Taylor3473f882001-02-23 17:55:21 +00006450 * xmlParseConditionalSections
6451 * @ctxt: an XML parser context
6452 *
6453 * [61] conditionalSect ::= includeSect | ignoreSect
6454 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6455 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6456 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6457 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6458 */
6459
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006460static void
Owen Taylor3473f882001-02-23 17:55:21 +00006461xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006462 int id = ctxt->input->id;
6463
Owen Taylor3473f882001-02-23 17:55:21 +00006464 SKIP(3);
6465 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006466 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006467 SKIP(7);
6468 SKIP_BLANKS;
6469 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006470 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006471 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006472 if (ctxt->input->id != id) {
6473 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6474 "All markup of the conditional section is not in the same entity\n",
6475 NULL, NULL);
6476 }
Owen Taylor3473f882001-02-23 17:55:21 +00006477 NEXT;
6478 }
6479 if (xmlParserDebugEntities) {
6480 if ((ctxt->input != NULL) && (ctxt->input->filename))
6481 xmlGenericError(xmlGenericErrorContext,
6482 "%s(%d): ", ctxt->input->filename,
6483 ctxt->input->line);
6484 xmlGenericError(xmlGenericErrorContext,
6485 "Entering INCLUDE Conditional Section\n");
6486 }
6487
6488 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6489 (NXT(2) != '>'))) {
6490 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006491 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006492
6493 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6494 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006495 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006496 NEXT;
6497 } else if (RAW == '%') {
6498 xmlParsePEReference(ctxt);
6499 } else
6500 xmlParseMarkupDecl(ctxt);
6501
6502 /*
6503 * Pop-up of finished entities.
6504 */
6505 while ((RAW == 0) && (ctxt->inputNr > 1))
6506 xmlPopInput(ctxt);
6507
Daniel Veillardfdc91562002-07-01 21:52:03 +00006508 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006509 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006510 break;
6511 }
6512 }
6513 if (xmlParserDebugEntities) {
6514 if ((ctxt->input != NULL) && (ctxt->input->filename))
6515 xmlGenericError(xmlGenericErrorContext,
6516 "%s(%d): ", ctxt->input->filename,
6517 ctxt->input->line);
6518 xmlGenericError(xmlGenericErrorContext,
6519 "Leaving INCLUDE Conditional Section\n");
6520 }
6521
Daniel Veillarda07050d2003-10-19 14:46:32 +00006522 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006523 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006524 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006525 int depth = 0;
6526
6527 SKIP(6);
6528 SKIP_BLANKS;
6529 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006530 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006531 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006532 if (ctxt->input->id != id) {
6533 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6534 "All markup of the conditional section is not in the same entity\n",
6535 NULL, NULL);
6536 }
Owen Taylor3473f882001-02-23 17:55:21 +00006537 NEXT;
6538 }
6539 if (xmlParserDebugEntities) {
6540 if ((ctxt->input != NULL) && (ctxt->input->filename))
6541 xmlGenericError(xmlGenericErrorContext,
6542 "%s(%d): ", ctxt->input->filename,
6543 ctxt->input->line);
6544 xmlGenericError(xmlGenericErrorContext,
6545 "Entering IGNORE Conditional Section\n");
6546 }
6547
6548 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006549 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006550 * But disable SAX event generating DTD building in the meantime
6551 */
6552 state = ctxt->disableSAX;
6553 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006554 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006555 ctxt->instate = XML_PARSER_IGNORE;
6556
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006557 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006558 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6559 depth++;
6560 SKIP(3);
6561 continue;
6562 }
6563 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6564 if (--depth >= 0) SKIP(3);
6565 continue;
6566 }
6567 NEXT;
6568 continue;
6569 }
6570
6571 ctxt->disableSAX = state;
6572 ctxt->instate = instate;
6573
6574 if (xmlParserDebugEntities) {
6575 if ((ctxt->input != NULL) && (ctxt->input->filename))
6576 xmlGenericError(xmlGenericErrorContext,
6577 "%s(%d): ", ctxt->input->filename,
6578 ctxt->input->line);
6579 xmlGenericError(xmlGenericErrorContext,
6580 "Leaving IGNORE Conditional Section\n");
6581 }
6582
6583 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006584 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006585 }
6586
6587 if (RAW == 0)
6588 SHRINK;
6589
6590 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006591 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006592 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006593 if (ctxt->input->id != id) {
6594 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6595 "All markup of the conditional section is not in the same entity\n",
6596 NULL, NULL);
6597 }
Owen Taylor3473f882001-02-23 17:55:21 +00006598 SKIP(3);
6599 }
6600}
6601
6602/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006603 * xmlParseMarkupDecl:
6604 * @ctxt: an XML parser context
6605 *
6606 * parse Markup declarations
6607 *
6608 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6609 * NotationDecl | PI | Comment
6610 *
6611 * [ VC: Proper Declaration/PE Nesting ]
6612 * Parameter-entity replacement text must be properly nested with
6613 * markup declarations. That is to say, if either the first character
6614 * or the last character of a markup declaration (markupdecl above) is
6615 * contained in the replacement text for a parameter-entity reference,
6616 * both must be contained in the same replacement text.
6617 *
6618 * [ WFC: PEs in Internal Subset ]
6619 * In the internal DTD subset, parameter-entity references can occur
6620 * only where markup declarations can occur, not within markup declarations.
6621 * (This does not apply to references that occur in external parameter
6622 * entities or to the external subset.)
6623 */
6624void
6625xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6626 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006627 if (CUR == '<') {
6628 if (NXT(1) == '!') {
6629 switch (NXT(2)) {
6630 case 'E':
6631 if (NXT(3) == 'L')
6632 xmlParseElementDecl(ctxt);
6633 else if (NXT(3) == 'N')
6634 xmlParseEntityDecl(ctxt);
6635 break;
6636 case 'A':
6637 xmlParseAttributeListDecl(ctxt);
6638 break;
6639 case 'N':
6640 xmlParseNotationDecl(ctxt);
6641 break;
6642 case '-':
6643 xmlParseComment(ctxt);
6644 break;
6645 default:
6646 /* there is an error but it will be detected later */
6647 break;
6648 }
6649 } else if (NXT(1) == '?') {
6650 xmlParsePI(ctxt);
6651 }
6652 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006653 /*
6654 * This is only for internal subset. On external entities,
6655 * the replacement is done before parsing stage
6656 */
6657 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6658 xmlParsePEReference(ctxt);
6659
6660 /*
6661 * Conditional sections are allowed from entities included
6662 * by PE References in the internal subset.
6663 */
6664 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6665 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6666 xmlParseConditionalSections(ctxt);
6667 }
6668 }
6669
6670 ctxt->instate = XML_PARSER_DTD;
6671}
6672
6673/**
6674 * xmlParseTextDecl:
6675 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006676 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006677 * parse an XML declaration header for external entities
6678 *
6679 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006680 */
6681
6682void
6683xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6684 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006685 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006686
6687 /*
6688 * We know that '<?xml' is here.
6689 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006690 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006691 SKIP(5);
6692 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006693 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006694 return;
6695 }
6696
William M. Brack76e95df2003-10-18 16:20:14 +00006697 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006698 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6699 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006700 }
6701 SKIP_BLANKS;
6702
6703 /*
6704 * We may have the VersionInfo here.
6705 */
6706 version = xmlParseVersionInfo(ctxt);
6707 if (version == NULL)
6708 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006709 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006710 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006711 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6712 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006713 }
6714 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006715 ctxt->input->version = version;
6716
6717 /*
6718 * We must have the encoding declaration
6719 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006720 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006721 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6722 /*
6723 * The XML REC instructs us to stop parsing right here
6724 */
6725 return;
6726 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006727 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6728 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6729 "Missing encoding in text declaration\n");
6730 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006731
6732 SKIP_BLANKS;
6733 if ((RAW == '?') && (NXT(1) == '>')) {
6734 SKIP(2);
6735 } else if (RAW == '>') {
6736 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006737 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006738 NEXT;
6739 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006740 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006741 MOVETO_ENDTAG(CUR_PTR);
6742 NEXT;
6743 }
6744}
6745
6746/**
Owen Taylor3473f882001-02-23 17:55:21 +00006747 * xmlParseExternalSubset:
6748 * @ctxt: an XML parser context
6749 * @ExternalID: the external identifier
6750 * @SystemID: the system identifier (or URL)
6751 *
6752 * parse Markup declarations from an external subset
6753 *
6754 * [30] extSubset ::= textDecl? extSubsetDecl
6755 *
6756 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6757 */
6758void
6759xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6760 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006761 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006762 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006763
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01006764 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006765 (ctxt->input->end - ctxt->input->cur >= 4)) {
6766 xmlChar start[4];
6767 xmlCharEncoding enc;
6768
6769 start[0] = RAW;
6770 start[1] = NXT(1);
6771 start[2] = NXT(2);
6772 start[3] = NXT(3);
6773 enc = xmlDetectCharEncoding(start, 4);
6774 if (enc != XML_CHAR_ENCODING_NONE)
6775 xmlSwitchEncoding(ctxt, enc);
6776 }
6777
Daniel Veillarda07050d2003-10-19 14:46:32 +00006778 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006779 xmlParseTextDecl(ctxt);
6780 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6781 /*
6782 * The XML REC instructs us to stop parsing right here
6783 */
6784 ctxt->instate = XML_PARSER_EOF;
6785 return;
6786 }
6787 }
6788 if (ctxt->myDoc == NULL) {
6789 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006790 if (ctxt->myDoc == NULL) {
6791 xmlErrMemory(ctxt, "New Doc failed");
6792 return;
6793 }
6794 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006795 }
6796 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6797 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6798
6799 ctxt->instate = XML_PARSER_DTD;
6800 ctxt->external = 1;
6801 while (((RAW == '<') && (NXT(1) == '?')) ||
6802 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006803 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006804 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006805 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006806
6807 GROW;
6808 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6809 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006810 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006811 NEXT;
6812 } else if (RAW == '%') {
6813 xmlParsePEReference(ctxt);
6814 } else
6815 xmlParseMarkupDecl(ctxt);
6816
6817 /*
6818 * Pop-up of finished entities.
6819 */
6820 while ((RAW == 0) && (ctxt->inputNr > 1))
6821 xmlPopInput(ctxt);
6822
Daniel Veillardfdc91562002-07-01 21:52:03 +00006823 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006824 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006825 break;
6826 }
6827 }
6828
6829 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006830 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006831 }
6832
6833}
6834
6835/**
6836 * xmlParseReference:
6837 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006838 *
Owen Taylor3473f882001-02-23 17:55:21 +00006839 * parse and handle entity references in content, depending on the SAX
6840 * interface, this may end-up in a call to character() if this is a
6841 * CharRef, a predefined entity, if there is no reference() callback.
6842 * or if the parser was asked to switch to that mode.
6843 *
6844 * [67] Reference ::= EntityRef | CharRef
6845 */
6846void
6847xmlParseReference(xmlParserCtxtPtr ctxt) {
6848 xmlEntityPtr ent;
6849 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00006850 int was_checked;
6851 xmlNodePtr list = NULL;
6852 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006853
Daniel Veillard0161e632008-08-28 15:36:32 +00006854
6855 if (RAW != '&')
6856 return;
6857
6858 /*
6859 * Simple case of a CharRef
6860 */
Owen Taylor3473f882001-02-23 17:55:21 +00006861 if (NXT(1) == '#') {
6862 int i = 0;
6863 xmlChar out[10];
6864 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006865 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00006866
Daniel Veillarddc171602008-03-26 17:41:38 +00006867 if (value == 0)
6868 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006869 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6870 /*
6871 * So we are using non-UTF-8 buffers
6872 * Check that the char fit on 8bits, if not
6873 * generate a CharRef.
6874 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006875 if (value <= 0xFF) {
6876 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006877 out[1] = 0;
6878 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6879 (!ctxt->disableSAX))
6880 ctxt->sax->characters(ctxt->userData, out, 1);
6881 } else {
6882 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006883 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006884 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006885 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006886 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6887 (!ctxt->disableSAX))
6888 ctxt->sax->reference(ctxt->userData, out);
6889 }
6890 } else {
6891 /*
6892 * Just encode the value in UTF-8
6893 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006894 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006895 out[i] = 0;
6896 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6897 (!ctxt->disableSAX))
6898 ctxt->sax->characters(ctxt->userData, out, i);
6899 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006900 return;
6901 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006902
Daniel Veillard0161e632008-08-28 15:36:32 +00006903 /*
6904 * We are seeing an entity reference
6905 */
6906 ent = xmlParseEntityRef(ctxt);
6907 if (ent == NULL) return;
6908 if (!ctxt->wellFormed)
6909 return;
6910 was_checked = ent->checked;
6911
6912 /* special case of predefined entities */
6913 if ((ent->name == NULL) ||
6914 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6915 val = ent->content;
6916 if (val == NULL) return;
6917 /*
6918 * inline the entity.
6919 */
6920 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6921 (!ctxt->disableSAX))
6922 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6923 return;
6924 }
6925
6926 /*
6927 * The first reference to the entity trigger a parsing phase
6928 * where the ent->children is filled with the result from
6929 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08006930 * Note: external parsed entities will not be loaded, it is not
6931 * required for a non-validating parser, unless the parsing option
6932 * of validating, or substituting entities were given. Doing so is
6933 * far more secure as the parser will only process data coming from
6934 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00006935 */
Daniel Veillard4629ee02012-07-23 14:15:40 +08006936 if ((ent->checked == 0) &&
6937 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
6938 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Daniel Veillard0161e632008-08-28 15:36:32 +00006939 unsigned long oldnbent = ctxt->nbentities;
6940
6941 /*
6942 * This is a bit hackish but this seems the best
6943 * way to make sure both SAX and DOM entity support
6944 * behaves okay.
6945 */
6946 void *user_data;
6947 if (ctxt->userData == ctxt)
6948 user_data = NULL;
6949 else
6950 user_data = ctxt->userData;
6951
6952 /*
6953 * Check that this entity is well formed
6954 * 4.3.2: An internal general parsed entity is well-formed
6955 * if its replacement text matches the production labeled
6956 * content.
6957 */
6958 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6959 ctxt->depth++;
6960 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6961 user_data, &list);
6962 ctxt->depth--;
6963
6964 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6965 ctxt->depth++;
6966 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6967 user_data, ctxt->depth, ent->URI,
6968 ent->ExternalID, &list);
6969 ctxt->depth--;
6970 } else {
6971 ret = XML_ERR_ENTITY_PE_INTERNAL;
6972 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6973 "invalid entity type found\n", NULL);
6974 }
6975
6976 /*
6977 * Store the number of entities needing parsing for this entity
6978 * content and do checkings
6979 */
6980 ent->checked = ctxt->nbentities - oldnbent;
6981 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006982 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00006983 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006984 return;
6985 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006986 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6987 xmlFreeNodeList(list);
6988 return;
6989 }
Owen Taylor3473f882001-02-23 17:55:21 +00006990
Daniel Veillard0161e632008-08-28 15:36:32 +00006991 if ((ret == XML_ERR_OK) && (list != NULL)) {
6992 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6993 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6994 (ent->children == NULL)) {
6995 ent->children = list;
6996 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00006997 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006998 * Prune it directly in the generated document
6999 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00007000 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007001 if (((list->type == XML_TEXT_NODE) &&
7002 (list->next == NULL)) ||
7003 (ctxt->parseMode == XML_PARSE_READER)) {
7004 list->parent = (xmlNodePtr) ent;
7005 list = NULL;
7006 ent->owner = 1;
7007 } else {
7008 ent->owner = 0;
7009 while (list != NULL) {
7010 list->parent = (xmlNodePtr) ctxt->node;
7011 list->doc = ctxt->myDoc;
7012 if (list->next == NULL)
7013 ent->last = list;
7014 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007015 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007016 list = ent->children;
7017#ifdef LIBXML_LEGACY_ENABLED
7018 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7019 xmlAddEntityReference(ent, list, NULL);
7020#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007021 }
7022 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007023 ent->owner = 1;
7024 while (list != NULL) {
7025 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007026 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007027 if (list->next == NULL)
7028 ent->last = list;
7029 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007030 }
7031 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007032 } else {
7033 xmlFreeNodeList(list);
7034 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007035 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007036 } else if ((ret != XML_ERR_OK) &&
7037 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7038 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7039 "Entity '%s' failed to parse\n", ent->name);
7040 } else if (list != NULL) {
7041 xmlFreeNodeList(list);
7042 list = NULL;
7043 }
7044 if (ent->checked == 0)
7045 ent->checked = 1;
7046 } else if (ent->checked != 1) {
7047 ctxt->nbentities += ent->checked;
7048 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007049
Daniel Veillard0161e632008-08-28 15:36:32 +00007050 /*
7051 * Now that the entity content has been gathered
7052 * provide it to the application, this can take different forms based
7053 * on the parsing modes.
7054 */
7055 if (ent->children == NULL) {
7056 /*
7057 * Probably running in SAX mode and the callbacks don't
7058 * build the entity content. So unless we already went
7059 * though parsing for first checking go though the entity
7060 * content to generate callbacks associated to the entity
7061 */
7062 if (was_checked != 0) {
7063 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007064 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007065 * This is a bit hackish but this seems the best
7066 * way to make sure both SAX and DOM entity support
7067 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007068 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007069 if (ctxt->userData == ctxt)
7070 user_data = NULL;
7071 else
7072 user_data = ctxt->userData;
7073
7074 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7075 ctxt->depth++;
7076 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7077 ent->content, user_data, NULL);
7078 ctxt->depth--;
7079 } else if (ent->etype ==
7080 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7081 ctxt->depth++;
7082 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7083 ctxt->sax, user_data, ctxt->depth,
7084 ent->URI, ent->ExternalID, NULL);
7085 ctxt->depth--;
7086 } else {
7087 ret = XML_ERR_ENTITY_PE_INTERNAL;
7088 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7089 "invalid entity type found\n", NULL);
7090 }
7091 if (ret == XML_ERR_ENTITY_LOOP) {
7092 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7093 return;
7094 }
7095 }
7096 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7097 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7098 /*
7099 * Entity reference callback comes second, it's somewhat
7100 * superfluous but a compatibility to historical behaviour
7101 */
7102 ctxt->sax->reference(ctxt->userData, ent->name);
7103 }
7104 return;
7105 }
7106
7107 /*
7108 * If we didn't get any children for the entity being built
7109 */
7110 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7111 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7112 /*
7113 * Create a node.
7114 */
7115 ctxt->sax->reference(ctxt->userData, ent->name);
7116 return;
7117 }
7118
7119 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7120 /*
7121 * There is a problem on the handling of _private for entities
7122 * (bug 155816): Should we copy the content of the field from
7123 * the entity (possibly overwriting some value set by the user
7124 * when a copy is created), should we leave it alone, or should
7125 * we try to take care of different situations? The problem
7126 * is exacerbated by the usage of this field by the xmlReader.
7127 * To fix this bug, we look at _private on the created node
7128 * and, if it's NULL, we copy in whatever was in the entity.
7129 * If it's not NULL we leave it alone. This is somewhat of a
7130 * hack - maybe we should have further tests to determine
7131 * what to do.
7132 */
7133 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7134 /*
7135 * Seems we are generating the DOM content, do
7136 * a simple tree copy for all references except the first
7137 * In the first occurrence list contains the replacement.
7138 * progressive == 2 means we are operating on the Reader
7139 * and since nodes are discarded we must copy all the time.
7140 */
7141 if (((list == NULL) && (ent->owner == 0)) ||
7142 (ctxt->parseMode == XML_PARSE_READER)) {
7143 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7144
7145 /*
7146 * when operating on a reader, the entities definitions
7147 * are always owning the entities subtree.
7148 if (ctxt->parseMode == XML_PARSE_READER)
7149 ent->owner = 1;
7150 */
7151
7152 cur = ent->children;
7153 while (cur != NULL) {
7154 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7155 if (nw != NULL) {
7156 if (nw->_private == NULL)
7157 nw->_private = cur->_private;
7158 if (firstChild == NULL){
7159 firstChild = nw;
7160 }
7161 nw = xmlAddChild(ctxt->node, nw);
7162 }
7163 if (cur == ent->last) {
7164 /*
7165 * needed to detect some strange empty
7166 * node cases in the reader tests
7167 */
7168 if ((ctxt->parseMode == XML_PARSE_READER) &&
7169 (nw != NULL) &&
7170 (nw->type == XML_ELEMENT_NODE) &&
7171 (nw->children == NULL))
7172 nw->extra = 1;
7173
7174 break;
7175 }
7176 cur = cur->next;
7177 }
7178#ifdef LIBXML_LEGACY_ENABLED
7179 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7180 xmlAddEntityReference(ent, firstChild, nw);
7181#endif /* LIBXML_LEGACY_ENABLED */
7182 } else if (list == NULL) {
7183 xmlNodePtr nw = NULL, cur, next, last,
7184 firstChild = NULL;
7185 /*
7186 * Copy the entity child list and make it the new
7187 * entity child list. The goal is to make sure any
7188 * ID or REF referenced will be the one from the
7189 * document content and not the entity copy.
7190 */
7191 cur = ent->children;
7192 ent->children = NULL;
7193 last = ent->last;
7194 ent->last = NULL;
7195 while (cur != NULL) {
7196 next = cur->next;
7197 cur->next = NULL;
7198 cur->parent = NULL;
7199 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7200 if (nw != NULL) {
7201 if (nw->_private == NULL)
7202 nw->_private = cur->_private;
7203 if (firstChild == NULL){
7204 firstChild = cur;
7205 }
7206 xmlAddChild((xmlNodePtr) ent, nw);
7207 xmlAddChild(ctxt->node, cur);
7208 }
7209 if (cur == last)
7210 break;
7211 cur = next;
7212 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007213 if (ent->owner == 0)
7214 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007215#ifdef LIBXML_LEGACY_ENABLED
7216 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7217 xmlAddEntityReference(ent, firstChild, nw);
7218#endif /* LIBXML_LEGACY_ENABLED */
7219 } else {
7220 const xmlChar *nbktext;
7221
7222 /*
7223 * the name change is to avoid coalescing of the
7224 * node with a possible previous text one which
7225 * would make ent->children a dangling pointer
7226 */
7227 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7228 -1);
7229 if (ent->children->type == XML_TEXT_NODE)
7230 ent->children->name = nbktext;
7231 if ((ent->last != ent->children) &&
7232 (ent->last->type == XML_TEXT_NODE))
7233 ent->last->name = nbktext;
7234 xmlAddChildList(ctxt->node, ent->children);
7235 }
7236
7237 /*
7238 * This is to avoid a nasty side effect, see
7239 * characters() in SAX.c
7240 */
7241 ctxt->nodemem = 0;
7242 ctxt->nodelen = 0;
7243 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007244 }
7245 }
7246}
7247
7248/**
7249 * xmlParseEntityRef:
7250 * @ctxt: an XML parser context
7251 *
7252 * parse ENTITY references declarations
7253 *
7254 * [68] EntityRef ::= '&' Name ';'
7255 *
7256 * [ WFC: Entity Declared ]
7257 * In a document without any DTD, a document with only an internal DTD
7258 * subset which contains no parameter entity references, or a document
7259 * with "standalone='yes'", the Name given in the entity reference
7260 * must match that in an entity declaration, except that well-formed
7261 * documents need not declare any of the following entities: amp, lt,
7262 * gt, apos, quot. The declaration of a parameter entity must precede
7263 * any reference to it. Similarly, the declaration of a general entity
7264 * must precede any reference to it which appears in a default value in an
7265 * attribute-list declaration. Note that if entities are declared in the
7266 * external subset or in external parameter entities, a non-validating
7267 * processor is not obligated to read and process their declarations;
7268 * for such documents, the rule that an entity must be declared is a
7269 * well-formedness constraint only if standalone='yes'.
7270 *
7271 * [ WFC: Parsed Entity ]
7272 * An entity reference must not contain the name of an unparsed entity
7273 *
7274 * Returns the xmlEntityPtr if found, or NULL otherwise.
7275 */
7276xmlEntityPtr
7277xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007278 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007279 xmlEntityPtr ent = NULL;
7280
7281 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007282
Daniel Veillard0161e632008-08-28 15:36:32 +00007283 if (RAW != '&')
7284 return(NULL);
7285 NEXT;
7286 name = xmlParseName(ctxt);
7287 if (name == NULL) {
7288 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7289 "xmlParseEntityRef: no name\n");
7290 return(NULL);
7291 }
7292 if (RAW != ';') {
7293 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7294 return(NULL);
7295 }
7296 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007297
Daniel Veillard0161e632008-08-28 15:36:32 +00007298 /*
7299 * Predefined entites override any extra definition
7300 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007301 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7302 ent = xmlGetPredefinedEntity(name);
7303 if (ent != NULL)
7304 return(ent);
7305 }
Owen Taylor3473f882001-02-23 17:55:21 +00007306
Daniel Veillard0161e632008-08-28 15:36:32 +00007307 /*
7308 * Increate the number of entity references parsed
7309 */
7310 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007311
Daniel Veillard0161e632008-08-28 15:36:32 +00007312 /*
7313 * Ask first SAX for entity resolution, otherwise try the
7314 * entities which may have stored in the parser context.
7315 */
7316 if (ctxt->sax != NULL) {
7317 if (ctxt->sax->getEntity != NULL)
7318 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007319 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7320 (ctxt->options & XML_PARSE_OLDSAX))
7321 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007322 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7323 (ctxt->userData==ctxt)) {
7324 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007325 }
7326 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007327 /*
7328 * [ WFC: Entity Declared ]
7329 * In a document without any DTD, a document with only an
7330 * internal DTD subset which contains no parameter entity
7331 * references, or a document with "standalone='yes'", the
7332 * Name given in the entity reference must match that in an
7333 * entity declaration, except that well-formed documents
7334 * need not declare any of the following entities: amp, lt,
7335 * gt, apos, quot.
7336 * The declaration of a parameter entity must precede any
7337 * reference to it.
7338 * Similarly, the declaration of a general entity must
7339 * precede any reference to it which appears in a default
7340 * value in an attribute-list declaration. Note that if
7341 * entities are declared in the external subset or in
7342 * external parameter entities, a non-validating processor
7343 * is not obligated to read and process their declarations;
7344 * for such documents, the rule that an entity must be
7345 * declared is a well-formedness constraint only if
7346 * standalone='yes'.
7347 */
7348 if (ent == NULL) {
7349 if ((ctxt->standalone == 1) ||
7350 ((ctxt->hasExternalSubset == 0) &&
7351 (ctxt->hasPErefs == 0))) {
7352 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7353 "Entity '%s' not defined\n", name);
7354 } else {
7355 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7356 "Entity '%s' not defined\n", name);
7357 if ((ctxt->inSubset == 0) &&
7358 (ctxt->sax != NULL) &&
7359 (ctxt->sax->reference != NULL)) {
7360 ctxt->sax->reference(ctxt->userData, name);
7361 }
7362 }
7363 ctxt->valid = 0;
7364 }
7365
7366 /*
7367 * [ WFC: Parsed Entity ]
7368 * An entity reference must not contain the name of an
7369 * unparsed entity
7370 */
7371 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7372 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7373 "Entity reference to unparsed entity %s\n", name);
7374 }
7375
7376 /*
7377 * [ WFC: No External Entity References ]
7378 * Attribute values cannot contain direct or indirect
7379 * entity references to external entities.
7380 */
7381 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7382 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7383 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7384 "Attribute references external entity '%s'\n", name);
7385 }
7386 /*
7387 * [ WFC: No < in Attribute Values ]
7388 * The replacement text of any entity referred to directly or
7389 * indirectly in an attribute value (other than "&lt;") must
7390 * not contain a <.
7391 */
7392 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7393 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007394 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007395 (xmlStrchr(ent->content, '<'))) {
7396 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7397 "'<' in entity '%s' is not allowed in attributes values\n", name);
7398 }
7399
7400 /*
7401 * Internal check, no parameter entities here ...
7402 */
7403 else {
7404 switch (ent->etype) {
7405 case XML_INTERNAL_PARAMETER_ENTITY:
7406 case XML_EXTERNAL_PARAMETER_ENTITY:
7407 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7408 "Attempt to reference the parameter entity '%s'\n",
7409 name);
7410 break;
7411 default:
7412 break;
7413 }
7414 }
7415
7416 /*
7417 * [ WFC: No Recursion ]
7418 * A parsed entity must not contain a recursive reference
7419 * to itself, either directly or indirectly.
7420 * Done somewhere else
7421 */
Owen Taylor3473f882001-02-23 17:55:21 +00007422 return(ent);
7423}
7424
7425/**
7426 * xmlParseStringEntityRef:
7427 * @ctxt: an XML parser context
7428 * @str: a pointer to an index in the string
7429 *
7430 * parse ENTITY references declarations, but this version parses it from
7431 * a string value.
7432 *
7433 * [68] EntityRef ::= '&' Name ';'
7434 *
7435 * [ WFC: Entity Declared ]
7436 * In a document without any DTD, a document with only an internal DTD
7437 * subset which contains no parameter entity references, or a document
7438 * with "standalone='yes'", the Name given in the entity reference
7439 * must match that in an entity declaration, except that well-formed
7440 * documents need not declare any of the following entities: amp, lt,
7441 * gt, apos, quot. The declaration of a parameter entity must precede
7442 * any reference to it. Similarly, the declaration of a general entity
7443 * must precede any reference to it which appears in a default value in an
7444 * attribute-list declaration. Note that if entities are declared in the
7445 * external subset or in external parameter entities, a non-validating
7446 * processor is not obligated to read and process their declarations;
7447 * for such documents, the rule that an entity must be declared is a
7448 * well-formedness constraint only if standalone='yes'.
7449 *
7450 * [ WFC: Parsed Entity ]
7451 * An entity reference must not contain the name of an unparsed entity
7452 *
7453 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7454 * is updated to the current location in the string.
7455 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007456static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007457xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7458 xmlChar *name;
7459 const xmlChar *ptr;
7460 xmlChar cur;
7461 xmlEntityPtr ent = NULL;
7462
7463 if ((str == NULL) || (*str == NULL))
7464 return(NULL);
7465 ptr = *str;
7466 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007467 if (cur != '&')
7468 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007469
Daniel Veillard0161e632008-08-28 15:36:32 +00007470 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007471 name = xmlParseStringName(ctxt, &ptr);
7472 if (name == NULL) {
7473 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7474 "xmlParseStringEntityRef: no name\n");
7475 *str = ptr;
7476 return(NULL);
7477 }
7478 if (*ptr != ';') {
7479 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007480 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007481 *str = ptr;
7482 return(NULL);
7483 }
7484 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007485
Owen Taylor3473f882001-02-23 17:55:21 +00007486
Daniel Veillard0161e632008-08-28 15:36:32 +00007487 /*
7488 * Predefined entites override any extra definition
7489 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007490 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7491 ent = xmlGetPredefinedEntity(name);
7492 if (ent != NULL) {
7493 xmlFree(name);
7494 *str = ptr;
7495 return(ent);
7496 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007497 }
Owen Taylor3473f882001-02-23 17:55:21 +00007498
Daniel Veillard0161e632008-08-28 15:36:32 +00007499 /*
7500 * Increate the number of entity references parsed
7501 */
7502 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007503
Daniel Veillard0161e632008-08-28 15:36:32 +00007504 /*
7505 * Ask first SAX for entity resolution, otherwise try the
7506 * entities which may have stored in the parser context.
7507 */
7508 if (ctxt->sax != NULL) {
7509 if (ctxt->sax->getEntity != NULL)
7510 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007511 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7512 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007513 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7514 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007515 }
7516 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007517
7518 /*
7519 * [ WFC: Entity Declared ]
7520 * In a document without any DTD, a document with only an
7521 * internal DTD subset which contains no parameter entity
7522 * references, or a document with "standalone='yes'", the
7523 * Name given in the entity reference must match that in an
7524 * entity declaration, except that well-formed documents
7525 * need not declare any of the following entities: amp, lt,
7526 * gt, apos, quot.
7527 * The declaration of a parameter entity must precede any
7528 * reference to it.
7529 * Similarly, the declaration of a general entity must
7530 * precede any reference to it which appears in a default
7531 * value in an attribute-list declaration. Note that if
7532 * entities are declared in the external subset or in
7533 * external parameter entities, a non-validating processor
7534 * is not obligated to read and process their declarations;
7535 * for such documents, the rule that an entity must be
7536 * declared is a well-formedness constraint only if
7537 * standalone='yes'.
7538 */
7539 if (ent == NULL) {
7540 if ((ctxt->standalone == 1) ||
7541 ((ctxt->hasExternalSubset == 0) &&
7542 (ctxt->hasPErefs == 0))) {
7543 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7544 "Entity '%s' not defined\n", name);
7545 } else {
7546 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7547 "Entity '%s' not defined\n",
7548 name);
7549 }
7550 /* TODO ? check regressions ctxt->valid = 0; */
7551 }
7552
7553 /*
7554 * [ WFC: Parsed Entity ]
7555 * An entity reference must not contain the name of an
7556 * unparsed entity
7557 */
7558 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7559 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7560 "Entity reference to unparsed entity %s\n", name);
7561 }
7562
7563 /*
7564 * [ WFC: No External Entity References ]
7565 * Attribute values cannot contain direct or indirect
7566 * entity references to external entities.
7567 */
7568 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7569 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7570 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7571 "Attribute references external entity '%s'\n", name);
7572 }
7573 /*
7574 * [ WFC: No < in Attribute Values ]
7575 * The replacement text of any entity referred to directly or
7576 * indirectly in an attribute value (other than "&lt;") must
7577 * not contain a <.
7578 */
7579 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7580 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007581 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007582 (xmlStrchr(ent->content, '<'))) {
7583 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7584 "'<' in entity '%s' is not allowed in attributes values\n",
7585 name);
7586 }
7587
7588 /*
7589 * Internal check, no parameter entities here ...
7590 */
7591 else {
7592 switch (ent->etype) {
7593 case XML_INTERNAL_PARAMETER_ENTITY:
7594 case XML_EXTERNAL_PARAMETER_ENTITY:
7595 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7596 "Attempt to reference the parameter entity '%s'\n",
7597 name);
7598 break;
7599 default:
7600 break;
7601 }
7602 }
7603
7604 /*
7605 * [ WFC: No Recursion ]
7606 * A parsed entity must not contain a recursive reference
7607 * to itself, either directly or indirectly.
7608 * Done somewhere else
7609 */
7610
7611 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007612 *str = ptr;
7613 return(ent);
7614}
7615
7616/**
7617 * xmlParsePEReference:
7618 * @ctxt: an XML parser context
7619 *
7620 * parse PEReference declarations
7621 * The entity content is handled directly by pushing it's content as
7622 * a new input stream.
7623 *
7624 * [69] PEReference ::= '%' Name ';'
7625 *
7626 * [ WFC: No Recursion ]
7627 * A parsed entity must not contain a recursive
7628 * reference to itself, either directly or indirectly.
7629 *
7630 * [ WFC: Entity Declared ]
7631 * In a document without any DTD, a document with only an internal DTD
7632 * subset which contains no parameter entity references, or a document
7633 * with "standalone='yes'", ... ... The declaration of a parameter
7634 * entity must precede any reference to it...
7635 *
7636 * [ VC: Entity Declared ]
7637 * In a document with an external subset or external parameter entities
7638 * with "standalone='no'", ... ... The declaration of a parameter entity
7639 * must precede any reference to it...
7640 *
7641 * [ WFC: In DTD ]
7642 * Parameter-entity references may only appear in the DTD.
7643 * NOTE: misleading but this is handled.
7644 */
7645void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007646xmlParsePEReference(xmlParserCtxtPtr ctxt)
7647{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007648 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007649 xmlEntityPtr entity = NULL;
7650 xmlParserInputPtr input;
7651
Daniel Veillard0161e632008-08-28 15:36:32 +00007652 if (RAW != '%')
7653 return;
7654 NEXT;
7655 name = xmlParseName(ctxt);
7656 if (name == NULL) {
7657 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7658 "xmlParsePEReference: no name\n");
7659 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007660 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007661 if (RAW != ';') {
7662 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7663 return;
7664 }
7665
7666 NEXT;
7667
7668 /*
7669 * Increate the number of entity references parsed
7670 */
7671 ctxt->nbentities++;
7672
7673 /*
7674 * Request the entity from SAX
7675 */
7676 if ((ctxt->sax != NULL) &&
7677 (ctxt->sax->getParameterEntity != NULL))
7678 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7679 name);
7680 if (entity == NULL) {
7681 /*
7682 * [ WFC: Entity Declared ]
7683 * In a document without any DTD, a document with only an
7684 * internal DTD subset which contains no parameter entity
7685 * references, or a document with "standalone='yes'", ...
7686 * ... The declaration of a parameter entity must precede
7687 * any reference to it...
7688 */
7689 if ((ctxt->standalone == 1) ||
7690 ((ctxt->hasExternalSubset == 0) &&
7691 (ctxt->hasPErefs == 0))) {
7692 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7693 "PEReference: %%%s; not found\n",
7694 name);
7695 } else {
7696 /*
7697 * [ VC: Entity Declared ]
7698 * In a document with an external subset or external
7699 * parameter entities with "standalone='no'", ...
7700 * ... The declaration of a parameter entity must
7701 * precede any reference to it...
7702 */
7703 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7704 "PEReference: %%%s; not found\n",
7705 name, NULL);
7706 ctxt->valid = 0;
7707 }
7708 } else {
7709 /*
7710 * Internal checking in case the entity quest barfed
7711 */
7712 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7713 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7714 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7715 "Internal: %%%s; is not a parameter entity\n",
7716 name, NULL);
7717 } else if (ctxt->input->free != deallocblankswrapper) {
7718 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7719 if (xmlPushInput(ctxt, input) < 0)
7720 return;
7721 } else {
7722 /*
7723 * TODO !!!
7724 * handle the extra spaces added before and after
7725 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7726 */
7727 input = xmlNewEntityInputStream(ctxt, entity);
7728 if (xmlPushInput(ctxt, input) < 0)
7729 return;
7730 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7731 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7732 (IS_BLANK_CH(NXT(5)))) {
7733 xmlParseTextDecl(ctxt);
7734 if (ctxt->errNo ==
7735 XML_ERR_UNSUPPORTED_ENCODING) {
7736 /*
7737 * The XML REC instructs us to stop parsing
7738 * right here
7739 */
7740 ctxt->instate = XML_PARSER_EOF;
7741 return;
7742 }
7743 }
7744 }
7745 }
7746 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007747}
7748
7749/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007750 * xmlLoadEntityContent:
7751 * @ctxt: an XML parser context
7752 * @entity: an unloaded system entity
7753 *
7754 * Load the original content of the given system entity from the
7755 * ExternalID/SystemID given. This is to be used for Included in Literal
7756 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7757 *
7758 * Returns 0 in case of success and -1 in case of failure
7759 */
7760static int
7761xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7762 xmlParserInputPtr input;
7763 xmlBufferPtr buf;
7764 int l, c;
7765 int count = 0;
7766
7767 if ((ctxt == NULL) || (entity == NULL) ||
7768 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7769 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7770 (entity->content != NULL)) {
7771 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7772 "xmlLoadEntityContent parameter error");
7773 return(-1);
7774 }
7775
7776 if (xmlParserDebugEntities)
7777 xmlGenericError(xmlGenericErrorContext,
7778 "Reading %s entity content input\n", entity->name);
7779
7780 buf = xmlBufferCreate();
7781 if (buf == NULL) {
7782 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7783 "xmlLoadEntityContent parameter error");
7784 return(-1);
7785 }
7786
7787 input = xmlNewEntityInputStream(ctxt, entity);
7788 if (input == NULL) {
7789 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7790 "xmlLoadEntityContent input error");
7791 xmlBufferFree(buf);
7792 return(-1);
7793 }
7794
7795 /*
7796 * Push the entity as the current input, read char by char
7797 * saving to the buffer until the end of the entity or an error
7798 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00007799 if (xmlPushInput(ctxt, input) < 0) {
7800 xmlBufferFree(buf);
7801 return(-1);
7802 }
7803
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007804 GROW;
7805 c = CUR_CHAR(l);
7806 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7807 (IS_CHAR(c))) {
7808 xmlBufferAdd(buf, ctxt->input->cur, l);
7809 if (count++ > 100) {
7810 count = 0;
7811 GROW;
7812 }
7813 NEXTL(l);
7814 c = CUR_CHAR(l);
7815 }
7816
7817 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7818 xmlPopInput(ctxt);
7819 } else if (!IS_CHAR(c)) {
7820 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7821 "xmlLoadEntityContent: invalid char value %d\n",
7822 c);
7823 xmlBufferFree(buf);
7824 return(-1);
7825 }
7826 entity->content = buf->content;
7827 buf->content = NULL;
7828 xmlBufferFree(buf);
7829
7830 return(0);
7831}
7832
7833/**
Owen Taylor3473f882001-02-23 17:55:21 +00007834 * xmlParseStringPEReference:
7835 * @ctxt: an XML parser context
7836 * @str: a pointer to an index in the string
7837 *
7838 * parse PEReference declarations
7839 *
7840 * [69] PEReference ::= '%' Name ';'
7841 *
7842 * [ WFC: No Recursion ]
7843 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007844 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007845 *
7846 * [ WFC: Entity Declared ]
7847 * In a document without any DTD, a document with only an internal DTD
7848 * subset which contains no parameter entity references, or a document
7849 * with "standalone='yes'", ... ... The declaration of a parameter
7850 * entity must precede any reference to it...
7851 *
7852 * [ VC: Entity Declared ]
7853 * In a document with an external subset or external parameter entities
7854 * with "standalone='no'", ... ... The declaration of a parameter entity
7855 * must precede any reference to it...
7856 *
7857 * [ WFC: In DTD ]
7858 * Parameter-entity references may only appear in the DTD.
7859 * NOTE: misleading but this is handled.
7860 *
7861 * Returns the string of the entity content.
7862 * str is updated to the current value of the index
7863 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007864static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007865xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7866 const xmlChar *ptr;
7867 xmlChar cur;
7868 xmlChar *name;
7869 xmlEntityPtr entity = NULL;
7870
7871 if ((str == NULL) || (*str == NULL)) return(NULL);
7872 ptr = *str;
7873 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007874 if (cur != '%')
7875 return(NULL);
7876 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007877 name = xmlParseStringName(ctxt, &ptr);
7878 if (name == NULL) {
7879 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7880 "xmlParseStringPEReference: no name\n");
7881 *str = ptr;
7882 return(NULL);
7883 }
7884 cur = *ptr;
7885 if (cur != ';') {
7886 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7887 xmlFree(name);
7888 *str = ptr;
7889 return(NULL);
7890 }
7891 ptr++;
7892
7893 /*
7894 * Increate the number of entity references parsed
7895 */
7896 ctxt->nbentities++;
7897
7898 /*
7899 * Request the entity from SAX
7900 */
7901 if ((ctxt->sax != NULL) &&
7902 (ctxt->sax->getParameterEntity != NULL))
7903 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7904 name);
7905 if (entity == NULL) {
7906 /*
7907 * [ WFC: Entity Declared ]
7908 * In a document without any DTD, a document with only an
7909 * internal DTD subset which contains no parameter entity
7910 * references, or a document with "standalone='yes'", ...
7911 * ... The declaration of a parameter entity must precede
7912 * any reference to it...
7913 */
7914 if ((ctxt->standalone == 1) ||
7915 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7916 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7917 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007918 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007919 /*
7920 * [ VC: Entity Declared ]
7921 * In a document with an external subset or external
7922 * parameter entities with "standalone='no'", ...
7923 * ... The declaration of a parameter entity must
7924 * precede any reference to it...
7925 */
7926 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7927 "PEReference: %%%s; not found\n",
7928 name, NULL);
7929 ctxt->valid = 0;
7930 }
7931 } else {
7932 /*
7933 * Internal checking in case the entity quest barfed
7934 */
7935 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7936 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7937 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7938 "%%%s; is not a parameter entity\n",
7939 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007940 }
7941 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007942 ctxt->hasPErefs = 1;
7943 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007944 *str = ptr;
7945 return(entity);
7946}
7947
7948/**
7949 * xmlParseDocTypeDecl:
7950 * @ctxt: an XML parser context
7951 *
7952 * parse a DOCTYPE declaration
7953 *
7954 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7955 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7956 *
7957 * [ VC: Root Element Type ]
7958 * The Name in the document type declaration must match the element
7959 * type of the root element.
7960 */
7961
7962void
7963xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007964 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007965 xmlChar *ExternalID = NULL;
7966 xmlChar *URI = NULL;
7967
7968 /*
7969 * We know that '<!DOCTYPE' has been detected.
7970 */
7971 SKIP(9);
7972
7973 SKIP_BLANKS;
7974
7975 /*
7976 * Parse the DOCTYPE name.
7977 */
7978 name = xmlParseName(ctxt);
7979 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007980 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7981 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007982 }
7983 ctxt->intSubName = name;
7984
7985 SKIP_BLANKS;
7986
7987 /*
7988 * Check for SystemID and ExternalID
7989 */
7990 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7991
7992 if ((URI != NULL) || (ExternalID != NULL)) {
7993 ctxt->hasExternalSubset = 1;
7994 }
7995 ctxt->extSubURI = URI;
7996 ctxt->extSubSystem = ExternalID;
7997
7998 SKIP_BLANKS;
7999
8000 /*
8001 * Create and update the internal subset.
8002 */
8003 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8004 (!ctxt->disableSAX))
8005 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8006
8007 /*
8008 * Is there any internal subset declarations ?
8009 * they are handled separately in xmlParseInternalSubset()
8010 */
8011 if (RAW == '[')
8012 return;
8013
8014 /*
8015 * We should be at the end of the DOCTYPE declaration.
8016 */
8017 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008018 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008019 }
8020 NEXT;
8021}
8022
8023/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008024 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008025 * @ctxt: an XML parser context
8026 *
8027 * parse the internal subset declaration
8028 *
8029 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8030 */
8031
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008032static void
Owen Taylor3473f882001-02-23 17:55:21 +00008033xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8034 /*
8035 * Is there any DTD definition ?
8036 */
8037 if (RAW == '[') {
8038 ctxt->instate = XML_PARSER_DTD;
8039 NEXT;
8040 /*
8041 * Parse the succession of Markup declarations and
8042 * PEReferences.
8043 * Subsequence (markupdecl | PEReference | S)*
8044 */
8045 while (RAW != ']') {
8046 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008047 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008048
8049 SKIP_BLANKS;
8050 xmlParseMarkupDecl(ctxt);
8051 xmlParsePEReference(ctxt);
8052
8053 /*
8054 * Pop-up of finished entities.
8055 */
8056 while ((RAW == 0) && (ctxt->inputNr > 1))
8057 xmlPopInput(ctxt);
8058
8059 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008060 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008061 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008062 break;
8063 }
8064 }
8065 if (RAW == ']') {
8066 NEXT;
8067 SKIP_BLANKS;
8068 }
8069 }
8070
8071 /*
8072 * We should be at the end of the DOCTYPE declaration.
8073 */
8074 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008075 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008076 }
8077 NEXT;
8078}
8079
Daniel Veillard81273902003-09-30 00:43:48 +00008080#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008081/**
8082 * xmlParseAttribute:
8083 * @ctxt: an XML parser context
8084 * @value: a xmlChar ** used to store the value of the attribute
8085 *
8086 * parse an attribute
8087 *
8088 * [41] Attribute ::= Name Eq AttValue
8089 *
8090 * [ WFC: No External Entity References ]
8091 * Attribute values cannot contain direct or indirect entity references
8092 * to external entities.
8093 *
8094 * [ WFC: No < in Attribute Values ]
8095 * The replacement text of any entity referred to directly or indirectly in
8096 * an attribute value (other than "&lt;") must not contain a <.
8097 *
8098 * [ VC: Attribute Value Type ]
8099 * The attribute must have been declared; the value must be of the type
8100 * declared for it.
8101 *
8102 * [25] Eq ::= S? '=' S?
8103 *
8104 * With namespace:
8105 *
8106 * [NS 11] Attribute ::= QName Eq AttValue
8107 *
8108 * Also the case QName == xmlns:??? is handled independently as a namespace
8109 * definition.
8110 *
8111 * Returns the attribute name, and the value in *value.
8112 */
8113
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008114const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008115xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008116 const xmlChar *name;
8117 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008118
8119 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008120 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008121 name = xmlParseName(ctxt);
8122 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008123 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008124 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008125 return(NULL);
8126 }
8127
8128 /*
8129 * read the value
8130 */
8131 SKIP_BLANKS;
8132 if (RAW == '=') {
8133 NEXT;
8134 SKIP_BLANKS;
8135 val = xmlParseAttValue(ctxt);
8136 ctxt->instate = XML_PARSER_CONTENT;
8137 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008138 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00008139 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008140 return(NULL);
8141 }
8142
8143 /*
8144 * Check that xml:lang conforms to the specification
8145 * No more registered as an error, just generate a warning now
8146 * since this was deprecated in XML second edition
8147 */
8148 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8149 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008150 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8151 "Malformed value for xml:lang : %s\n",
8152 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008153 }
8154 }
8155
8156 /*
8157 * Check that xml:space conforms to the specification
8158 */
8159 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8160 if (xmlStrEqual(val, BAD_CAST "default"))
8161 *(ctxt->space) = 0;
8162 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8163 *(ctxt->space) = 1;
8164 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008165 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008166"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008167 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008168 }
8169 }
8170
8171 *value = val;
8172 return(name);
8173}
8174
8175/**
8176 * xmlParseStartTag:
8177 * @ctxt: an XML parser context
8178 *
8179 * parse a start of tag either for rule element or
8180 * EmptyElement. In both case we don't parse the tag closing chars.
8181 *
8182 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8183 *
8184 * [ WFC: Unique Att Spec ]
8185 * No attribute name may appear more than once in the same start-tag or
8186 * empty-element tag.
8187 *
8188 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8189 *
8190 * [ WFC: Unique Att Spec ]
8191 * No attribute name may appear more than once in the same start-tag or
8192 * empty-element tag.
8193 *
8194 * With namespace:
8195 *
8196 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8197 *
8198 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8199 *
8200 * Returns the element name parsed
8201 */
8202
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008203const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008204xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008205 const xmlChar *name;
8206 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008207 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008208 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008209 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008210 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008211 int i;
8212
8213 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008214 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008215
8216 name = xmlParseName(ctxt);
8217 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008218 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008219 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008220 return(NULL);
8221 }
8222
8223 /*
8224 * Now parse the attributes, it ends up with the ending
8225 *
8226 * (S Attribute)* S?
8227 */
8228 SKIP_BLANKS;
8229 GROW;
8230
Daniel Veillard21a0f912001-02-25 19:54:14 +00008231 while ((RAW != '>') &&
8232 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008233 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008234 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008235 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008236
8237 attname = xmlParseAttribute(ctxt, &attvalue);
8238 if ((attname != NULL) && (attvalue != NULL)) {
8239 /*
8240 * [ WFC: Unique Att Spec ]
8241 * No attribute name may appear more than once in the same
8242 * start-tag or empty-element tag.
8243 */
8244 for (i = 0; i < nbatts;i += 2) {
8245 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008246 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008247 xmlFree(attvalue);
8248 goto failed;
8249 }
8250 }
Owen Taylor3473f882001-02-23 17:55:21 +00008251 /*
8252 * Add the pair to atts
8253 */
8254 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008255 maxatts = 22; /* allow for 10 attrs by default */
8256 atts = (const xmlChar **)
8257 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008258 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008259 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008260 if (attvalue != NULL)
8261 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008262 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008263 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008264 ctxt->atts = atts;
8265 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008266 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008267 const xmlChar **n;
8268
Owen Taylor3473f882001-02-23 17:55:21 +00008269 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008270 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008271 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008272 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008273 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008274 if (attvalue != NULL)
8275 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008276 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008277 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008278 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008279 ctxt->atts = atts;
8280 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008281 }
8282 atts[nbatts++] = attname;
8283 atts[nbatts++] = attvalue;
8284 atts[nbatts] = NULL;
8285 atts[nbatts + 1] = NULL;
8286 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008287 if (attvalue != NULL)
8288 xmlFree(attvalue);
8289 }
8290
8291failed:
8292
Daniel Veillard3772de32002-12-17 10:31:45 +00008293 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008294 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8295 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008296 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008297 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8298 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008299 }
8300 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008301 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8302 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008303 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8304 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008305 break;
8306 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008307 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008308 GROW;
8309 }
8310
8311 /*
8312 * SAX: Start of Element !
8313 */
8314 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008315 (!ctxt->disableSAX)) {
8316 if (nbatts > 0)
8317 ctxt->sax->startElement(ctxt->userData, name, atts);
8318 else
8319 ctxt->sax->startElement(ctxt->userData, name, NULL);
8320 }
Owen Taylor3473f882001-02-23 17:55:21 +00008321
8322 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008323 /* Free only the content strings */
8324 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008325 if (atts[i] != NULL)
8326 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008327 }
8328 return(name);
8329}
8330
8331/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008332 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008333 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008334 * @line: line of the start tag
8335 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008336 *
8337 * parse an end of tag
8338 *
8339 * [42] ETag ::= '</' Name S? '>'
8340 *
8341 * With namespace
8342 *
8343 * [NS 9] ETag ::= '</' QName S? '>'
8344 */
8345
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008346static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008347xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008348 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008349
8350 GROW;
8351 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008352 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008353 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008354 return;
8355 }
8356 SKIP(2);
8357
Daniel Veillard46de64e2002-05-29 08:21:33 +00008358 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008359
8360 /*
8361 * We should definitely be at the ending "S? '>'" part
8362 */
8363 GROW;
8364 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008365 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008366 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008367 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008368 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008369
8370 /*
8371 * [ WFC: Element Type Match ]
8372 * The Name in an element's end-tag must match the element type in the
8373 * start-tag.
8374 *
8375 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008376 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008377 if (name == NULL) name = BAD_CAST "unparseable";
8378 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008379 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008380 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008381 }
8382
8383 /*
8384 * SAX: End of Tag
8385 */
8386 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8387 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008388 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008389
Daniel Veillarde57ec792003-09-10 10:50:59 +00008390 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008391 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008392 return;
8393}
8394
8395/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008396 * xmlParseEndTag:
8397 * @ctxt: an XML parser context
8398 *
8399 * parse an end of tag
8400 *
8401 * [42] ETag ::= '</' Name S? '>'
8402 *
8403 * With namespace
8404 *
8405 * [NS 9] ETag ::= '</' QName S? '>'
8406 */
8407
8408void
8409xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008410 xmlParseEndTag1(ctxt, 0);
8411}
Daniel Veillard81273902003-09-30 00:43:48 +00008412#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008413
8414/************************************************************************
8415 * *
8416 * SAX 2 specific operations *
8417 * *
8418 ************************************************************************/
8419
Daniel Veillard0fb18932003-09-07 09:14:37 +00008420/*
8421 * xmlGetNamespace:
8422 * @ctxt: an XML parser context
8423 * @prefix: the prefix to lookup
8424 *
8425 * Lookup the namespace name for the @prefix (which ca be NULL)
8426 * The prefix must come from the @ctxt->dict dictionnary
8427 *
8428 * Returns the namespace name or NULL if not bound
8429 */
8430static const xmlChar *
8431xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8432 int i;
8433
Daniel Veillarde57ec792003-09-10 10:50:59 +00008434 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008435 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008436 if (ctxt->nsTab[i] == prefix) {
8437 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8438 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008439 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008440 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008441 return(NULL);
8442}
8443
8444/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008445 * xmlParseQName:
8446 * @ctxt: an XML parser context
8447 * @prefix: pointer to store the prefix part
8448 *
8449 * parse an XML Namespace QName
8450 *
8451 * [6] QName ::= (Prefix ':')? LocalPart
8452 * [7] Prefix ::= NCName
8453 * [8] LocalPart ::= NCName
8454 *
8455 * Returns the Name parsed or NULL
8456 */
8457
8458static const xmlChar *
8459xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8460 const xmlChar *l, *p;
8461
8462 GROW;
8463
8464 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008465 if (l == NULL) {
8466 if (CUR == ':') {
8467 l = xmlParseName(ctxt);
8468 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008469 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8470 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008471 *prefix = NULL;
8472 return(l);
8473 }
8474 }
8475 return(NULL);
8476 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008477 if (CUR == ':') {
8478 NEXT;
8479 p = l;
8480 l = xmlParseNCName(ctxt);
8481 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008482 xmlChar *tmp;
8483
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008484 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8485 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008486 l = xmlParseNmtoken(ctxt);
8487 if (l == NULL)
8488 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8489 else {
8490 tmp = xmlBuildQName(l, p, NULL, 0);
8491 xmlFree((char *)l);
8492 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008493 p = xmlDictLookup(ctxt->dict, tmp, -1);
8494 if (tmp != NULL) xmlFree(tmp);
8495 *prefix = NULL;
8496 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008497 }
8498 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008499 xmlChar *tmp;
8500
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008501 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8502 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008503 NEXT;
8504 tmp = (xmlChar *) xmlParseName(ctxt);
8505 if (tmp != NULL) {
8506 tmp = xmlBuildQName(tmp, l, NULL, 0);
8507 l = xmlDictLookup(ctxt->dict, tmp, -1);
8508 if (tmp != NULL) xmlFree(tmp);
8509 *prefix = p;
8510 return(l);
8511 }
8512 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8513 l = xmlDictLookup(ctxt->dict, tmp, -1);
8514 if (tmp != NULL) xmlFree(tmp);
8515 *prefix = p;
8516 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008517 }
8518 *prefix = p;
8519 } else
8520 *prefix = NULL;
8521 return(l);
8522}
8523
8524/**
8525 * xmlParseQNameAndCompare:
8526 * @ctxt: an XML parser context
8527 * @name: the localname
8528 * @prefix: the prefix, if any.
8529 *
8530 * parse an XML name and compares for match
8531 * (specialized for endtag parsing)
8532 *
8533 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8534 * and the name for mismatch
8535 */
8536
8537static const xmlChar *
8538xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8539 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008540 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008541 const xmlChar *in;
8542 const xmlChar *ret;
8543 const xmlChar *prefix2;
8544
8545 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8546
8547 GROW;
8548 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008549
Daniel Veillard0fb18932003-09-07 09:14:37 +00008550 cmp = prefix;
8551 while (*in != 0 && *in == *cmp) {
8552 ++in;
8553 ++cmp;
8554 }
8555 if ((*cmp == 0) && (*in == ':')) {
8556 in++;
8557 cmp = name;
8558 while (*in != 0 && *in == *cmp) {
8559 ++in;
8560 ++cmp;
8561 }
William M. Brack76e95df2003-10-18 16:20:14 +00008562 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008563 /* success */
8564 ctxt->input->cur = in;
8565 return((const xmlChar*) 1);
8566 }
8567 }
8568 /*
8569 * all strings coms from the dictionary, equality can be done directly
8570 */
8571 ret = xmlParseQName (ctxt, &prefix2);
8572 if ((ret == name) && (prefix == prefix2))
8573 return((const xmlChar*) 1);
8574 return ret;
8575}
8576
8577/**
8578 * xmlParseAttValueInternal:
8579 * @ctxt: an XML parser context
8580 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008581 * @alloc: whether the attribute was reallocated as a new string
8582 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008583 *
8584 * parse a value for an attribute.
8585 * NOTE: if no normalization is needed, the routine will return pointers
8586 * directly from the data buffer.
8587 *
8588 * 3.3.3 Attribute-Value Normalization:
8589 * Before the value of an attribute is passed to the application or
8590 * checked for validity, the XML processor must normalize it as follows:
8591 * - a character reference is processed by appending the referenced
8592 * character to the attribute value
8593 * - an entity reference is processed by recursively processing the
8594 * replacement text of the entity
8595 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8596 * appending #x20 to the normalized value, except that only a single
8597 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8598 * parsed entity or the literal entity value of an internal parsed entity
8599 * - other characters are processed by appending them to the normalized value
8600 * If the declared value is not CDATA, then the XML processor must further
8601 * process the normalized attribute value by discarding any leading and
8602 * trailing space (#x20) characters, and by replacing sequences of space
8603 * (#x20) characters by a single space (#x20) character.
8604 * All attributes for which no declaration has been read should be treated
8605 * by a non-validating parser as if declared CDATA.
8606 *
8607 * Returns the AttValue parsed or NULL. The value has to be freed by the
8608 * caller if it was copied, this can be detected by val[*len] == 0.
8609 */
8610
8611static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008612xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8613 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008614{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008615 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008616 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008617 xmlChar *ret = NULL;
8618
8619 GROW;
8620 in = (xmlChar *) CUR_PTR;
8621 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008622 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008623 return (NULL);
8624 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008625 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008626
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008627 /*
8628 * try to handle in this routine the most common case where no
8629 * allocation of a new string is required and where content is
8630 * pure ASCII.
8631 */
8632 limit = *in++;
8633 end = ctxt->input->end;
8634 start = in;
8635 if (in >= end) {
8636 const xmlChar *oldbase = ctxt->input->base;
8637 GROW;
8638 if (oldbase != ctxt->input->base) {
8639 long delta = ctxt->input->base - oldbase;
8640 start = start + delta;
8641 in = in + delta;
8642 }
8643 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008644 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008645 if (normalize) {
8646 /*
8647 * Skip any leading spaces
8648 */
8649 while ((in < end) && (*in != limit) &&
8650 ((*in == 0x20) || (*in == 0x9) ||
8651 (*in == 0xA) || (*in == 0xD))) {
8652 in++;
8653 start = in;
8654 if (in >= end) {
8655 const xmlChar *oldbase = ctxt->input->base;
8656 GROW;
8657 if (oldbase != ctxt->input->base) {
8658 long delta = ctxt->input->base - oldbase;
8659 start = start + delta;
8660 in = in + delta;
8661 }
8662 end = ctxt->input->end;
8663 }
8664 }
8665 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8666 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8667 if ((*in++ == 0x20) && (*in == 0x20)) break;
8668 if (in >= end) {
8669 const xmlChar *oldbase = ctxt->input->base;
8670 GROW;
8671 if (oldbase != ctxt->input->base) {
8672 long delta = ctxt->input->base - oldbase;
8673 start = start + delta;
8674 in = in + delta;
8675 }
8676 end = ctxt->input->end;
8677 }
8678 }
8679 last = in;
8680 /*
8681 * skip the trailing blanks
8682 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008683 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008684 while ((in < end) && (*in != limit) &&
8685 ((*in == 0x20) || (*in == 0x9) ||
8686 (*in == 0xA) || (*in == 0xD))) {
8687 in++;
8688 if (in >= end) {
8689 const xmlChar *oldbase = ctxt->input->base;
8690 GROW;
8691 if (oldbase != ctxt->input->base) {
8692 long delta = ctxt->input->base - oldbase;
8693 start = start + delta;
8694 in = in + delta;
8695 last = last + delta;
8696 }
8697 end = ctxt->input->end;
8698 }
8699 }
8700 if (*in != limit) goto need_complex;
8701 } else {
8702 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8703 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8704 in++;
8705 if (in >= end) {
8706 const xmlChar *oldbase = ctxt->input->base;
8707 GROW;
8708 if (oldbase != ctxt->input->base) {
8709 long delta = ctxt->input->base - oldbase;
8710 start = start + delta;
8711 in = in + delta;
8712 }
8713 end = ctxt->input->end;
8714 }
8715 }
8716 last = in;
8717 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008718 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008719 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008720 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008721 *len = last - start;
8722 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008723 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008724 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008725 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008726 }
8727 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008728 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008729 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008730need_complex:
8731 if (alloc) *alloc = 1;
8732 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008733}
8734
8735/**
8736 * xmlParseAttribute2:
8737 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008738 * @pref: the element prefix
8739 * @elem: the element name
8740 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008741 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008742 * @len: an int * to save the length of the attribute
8743 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008744 *
8745 * parse an attribute in the new SAX2 framework.
8746 *
8747 * Returns the attribute name, and the value in *value, .
8748 */
8749
8750static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008751xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008752 const xmlChar * pref, const xmlChar * elem,
8753 const xmlChar ** prefix, xmlChar ** value,
8754 int *len, int *alloc)
8755{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008756 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008757 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008758 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008759
8760 *value = NULL;
8761 GROW;
8762 name = xmlParseQName(ctxt, prefix);
8763 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008764 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8765 "error parsing attribute name\n");
8766 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008767 }
8768
8769 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008770 * get the type if needed
8771 */
8772 if (ctxt->attsSpecial != NULL) {
8773 int type;
8774
8775 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008776 pref, elem, *prefix, name);
8777 if (type != 0)
8778 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008779 }
8780
8781 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008782 * read the value
8783 */
8784 SKIP_BLANKS;
8785 if (RAW == '=') {
8786 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008787 SKIP_BLANKS;
8788 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8789 if (normalize) {
8790 /*
8791 * Sometimes a second normalisation pass for spaces is needed
8792 * but that only happens if charrefs or entities refernces
8793 * have been used in the attribute value, i.e. the attribute
8794 * value have been extracted in an allocated string already.
8795 */
8796 if (*alloc) {
8797 const xmlChar *val2;
8798
8799 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008800 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008801 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008802 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008803 }
8804 }
8805 }
8806 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008807 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008808 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8809 "Specification mandate value for attribute %s\n",
8810 name);
8811 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008812 }
8813
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008814 if (*prefix == ctxt->str_xml) {
8815 /*
8816 * Check that xml:lang conforms to the specification
8817 * No more registered as an error, just generate a warning now
8818 * since this was deprecated in XML second edition
8819 */
8820 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8821 internal_val = xmlStrndup(val, *len);
8822 if (!xmlCheckLanguageID(internal_val)) {
8823 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8824 "Malformed value for xml:lang : %s\n",
8825 internal_val, NULL);
8826 }
8827 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008828
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008829 /*
8830 * Check that xml:space conforms to the specification
8831 */
8832 if (xmlStrEqual(name, BAD_CAST "space")) {
8833 internal_val = xmlStrndup(val, *len);
8834 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8835 *(ctxt->space) = 0;
8836 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8837 *(ctxt->space) = 1;
8838 else {
8839 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8840 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8841 internal_val, NULL);
8842 }
8843 }
8844 if (internal_val) {
8845 xmlFree(internal_val);
8846 }
8847 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008848
8849 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008850 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008851}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008852/**
8853 * xmlParseStartTag2:
8854 * @ctxt: an XML parser context
8855 *
8856 * parse a start of tag either for rule element or
8857 * EmptyElement. In both case we don't parse the tag closing chars.
8858 * This routine is called when running SAX2 parsing
8859 *
8860 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8861 *
8862 * [ WFC: Unique Att Spec ]
8863 * No attribute name may appear more than once in the same start-tag or
8864 * empty-element tag.
8865 *
8866 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8867 *
8868 * [ WFC: Unique Att Spec ]
8869 * No attribute name may appear more than once in the same start-tag or
8870 * empty-element tag.
8871 *
8872 * With namespace:
8873 *
8874 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8875 *
8876 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8877 *
8878 * Returns the element name parsed
8879 */
8880
8881static const xmlChar *
8882xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008883 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008884 const xmlChar *localname;
8885 const xmlChar *prefix;
8886 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008887 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008888 const xmlChar *nsname;
8889 xmlChar *attvalue;
8890 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008891 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008892 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008893 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008894 const xmlChar *base;
8895 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008896 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008897
8898 if (RAW != '<') return(NULL);
8899 NEXT1;
8900
8901 /*
8902 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8903 * point since the attribute values may be stored as pointers to
8904 * the buffer and calling SHRINK would destroy them !
8905 * The Shrinking is only possible once the full set of attribute
8906 * callbacks have been done.
8907 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008908reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008909 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008910 base = ctxt->input->base;
8911 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008912 oldline = ctxt->input->line;
8913 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008914 nbatts = 0;
8915 nratts = 0;
8916 nbdef = 0;
8917 nbNs = 0;
8918 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008919 /* Forget any namespaces added during an earlier parse of this element. */
8920 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008921
8922 localname = xmlParseQName(ctxt, &prefix);
8923 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008924 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8925 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008926 return(NULL);
8927 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008928 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008929
8930 /*
8931 * Now parse the attributes, it ends up with the ending
8932 *
8933 * (S Attribute)* S?
8934 */
8935 SKIP_BLANKS;
8936 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008937 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008938
8939 while ((RAW != '>') &&
8940 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008941 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008942 const xmlChar *q = CUR_PTR;
8943 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008944 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008945
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008946 attname = xmlParseAttribute2(ctxt, prefix, localname,
8947 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008948 if (ctxt->input->base != base) {
8949 if ((attvalue != NULL) && (alloc != 0))
8950 xmlFree(attvalue);
8951 attvalue = NULL;
8952 goto base_changed;
8953 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008954 if ((attname != NULL) && (attvalue != NULL)) {
8955 if (len < 0) len = xmlStrlen(attvalue);
8956 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008957 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8958 xmlURIPtr uri;
8959
8960 if (*URL != 0) {
8961 uri = xmlParseURI((const char *) URL);
8962 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008963 xmlNsErr(ctxt, XML_WAR_NS_URI,
8964 "xmlns: '%s' is not a valid URI\n",
8965 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008966 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00008967 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008968 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8969 "xmlns: URI %s is not absolute\n",
8970 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008971 }
8972 xmlFreeURI(uri);
8973 }
Daniel Veillard37334572008-07-31 08:20:02 +00008974 if (URL == ctxt->str_xml_ns) {
8975 if (attname != ctxt->str_xml) {
8976 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8977 "xml namespace URI cannot be the default namespace\n",
8978 NULL, NULL, NULL);
8979 }
8980 goto skip_default_ns;
8981 }
8982 if ((len == 29) &&
8983 (xmlStrEqual(URL,
8984 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8985 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8986 "reuse of the xmlns namespace name is forbidden\n",
8987 NULL, NULL, NULL);
8988 goto skip_default_ns;
8989 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008990 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008991 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008992 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008993 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008994 for (j = 1;j <= nbNs;j++)
8995 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8996 break;
8997 if (j <= nbNs)
8998 xmlErrAttributeDup(ctxt, NULL, attname);
8999 else
9000 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009001skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009002 if (alloc != 0) xmlFree(attvalue);
9003 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009004 continue;
9005 }
9006 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009007 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9008 xmlURIPtr uri;
9009
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009010 if (attname == ctxt->str_xml) {
9011 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009012 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9013 "xml namespace prefix mapped to wrong URI\n",
9014 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009015 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009016 /*
9017 * Do not keep a namespace definition node
9018 */
Daniel Veillard37334572008-07-31 08:20:02 +00009019 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009020 }
Daniel Veillard37334572008-07-31 08:20:02 +00009021 if (URL == ctxt->str_xml_ns) {
9022 if (attname != ctxt->str_xml) {
9023 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9024 "xml namespace URI mapped to wrong prefix\n",
9025 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009026 }
Daniel Veillard37334572008-07-31 08:20:02 +00009027 goto skip_ns;
9028 }
9029 if (attname == ctxt->str_xmlns) {
9030 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9031 "redefinition of the xmlns prefix is forbidden\n",
9032 NULL, NULL, NULL);
9033 goto skip_ns;
9034 }
9035 if ((len == 29) &&
9036 (xmlStrEqual(URL,
9037 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9038 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9039 "reuse of the xmlns namespace name is forbidden\n",
9040 NULL, NULL, NULL);
9041 goto skip_ns;
9042 }
9043 if ((URL == NULL) || (URL[0] == 0)) {
9044 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9045 "xmlns:%s: Empty XML namespace is not allowed\n",
9046 attname, NULL, NULL);
9047 goto skip_ns;
9048 } else {
9049 uri = xmlParseURI((const char *) URL);
9050 if (uri == NULL) {
9051 xmlNsErr(ctxt, XML_WAR_NS_URI,
9052 "xmlns:%s: '%s' is not a valid URI\n",
9053 attname, URL, NULL);
9054 } else {
9055 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9056 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9057 "xmlns:%s: URI %s is not absolute\n",
9058 attname, URL, NULL);
9059 }
9060 xmlFreeURI(uri);
9061 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009062 }
9063
Daniel Veillard0fb18932003-09-07 09:14:37 +00009064 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009065 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009066 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009067 for (j = 1;j <= nbNs;j++)
9068 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9069 break;
9070 if (j <= nbNs)
9071 xmlErrAttributeDup(ctxt, aprefix, attname);
9072 else
9073 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009074skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009075 if (alloc != 0) xmlFree(attvalue);
9076 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00009077 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009078 continue;
9079 }
9080
9081 /*
9082 * Add the pair to atts
9083 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009084 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9085 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009086 if (attvalue[len] == 0)
9087 xmlFree(attvalue);
9088 goto failed;
9089 }
9090 maxatts = ctxt->maxatts;
9091 atts = ctxt->atts;
9092 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009093 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009094 atts[nbatts++] = attname;
9095 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009096 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00009097 atts[nbatts++] = attvalue;
9098 attvalue += len;
9099 atts[nbatts++] = attvalue;
9100 /*
9101 * tag if some deallocation is needed
9102 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009103 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009104 } else {
9105 if ((attvalue != NULL) && (attvalue[len] == 0))
9106 xmlFree(attvalue);
9107 }
9108
Daniel Veillard37334572008-07-31 08:20:02 +00009109failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009110
9111 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00009112 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009113 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9114 break;
William M. Brack76e95df2003-10-18 16:20:14 +00009115 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009116 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9117 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009118 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009119 }
9120 SKIP_BLANKS;
9121 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9122 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009123 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009124 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009125 break;
9126 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009127 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009128 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009129 }
9130
Daniel Veillard0fb18932003-09-07 09:14:37 +00009131 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009132 * The attributes defaulting
9133 */
9134 if (ctxt->attsDefault != NULL) {
9135 xmlDefAttrsPtr defaults;
9136
9137 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9138 if (defaults != NULL) {
9139 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009140 attname = defaults->values[5 * i];
9141 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009142
9143 /*
9144 * special work for namespaces defaulted defs
9145 */
9146 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9147 /*
9148 * check that it's not a defined namespace
9149 */
9150 for (j = 1;j <= nbNs;j++)
9151 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9152 break;
9153 if (j <= nbNs) continue;
9154
9155 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009156 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009157 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009158 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009159 nbNs++;
9160 }
9161 } else if (aprefix == ctxt->str_xmlns) {
9162 /*
9163 * check that it's not a defined namespace
9164 */
9165 for (j = 1;j <= nbNs;j++)
9166 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9167 break;
9168 if (j <= nbNs) continue;
9169
9170 nsname = xmlGetNamespace(ctxt, attname);
9171 if (nsname != defaults->values[2]) {
9172 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009173 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009174 nbNs++;
9175 }
9176 } else {
9177 /*
9178 * check that it's not a defined attribute
9179 */
9180 for (j = 0;j < nbatts;j+=5) {
9181 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9182 break;
9183 }
9184 if (j < nbatts) continue;
9185
9186 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9187 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009188 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009189 }
9190 maxatts = ctxt->maxatts;
9191 atts = ctxt->atts;
9192 }
9193 atts[nbatts++] = attname;
9194 atts[nbatts++] = aprefix;
9195 if (aprefix == NULL)
9196 atts[nbatts++] = NULL;
9197 else
9198 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009199 atts[nbatts++] = defaults->values[5 * i + 2];
9200 atts[nbatts++] = defaults->values[5 * i + 3];
9201 if ((ctxt->standalone == 1) &&
9202 (defaults->values[5 * i + 4] != NULL)) {
9203 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9204 "standalone: attribute %s on %s defaulted from external subset\n",
9205 attname, localname);
9206 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009207 nbdef++;
9208 }
9209 }
9210 }
9211 }
9212
Daniel Veillarde70c8772003-11-25 07:21:18 +00009213 /*
9214 * The attributes checkings
9215 */
9216 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009217 /*
9218 * The default namespace does not apply to attribute names.
9219 */
9220 if (atts[i + 1] != NULL) {
9221 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9222 if (nsname == NULL) {
9223 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9224 "Namespace prefix %s for %s on %s is not defined\n",
9225 atts[i + 1], atts[i], localname);
9226 }
9227 atts[i + 2] = nsname;
9228 } else
9229 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009230 /*
9231 * [ WFC: Unique Att Spec ]
9232 * No attribute name may appear more than once in the same
9233 * start-tag or empty-element tag.
9234 * As extended by the Namespace in XML REC.
9235 */
9236 for (j = 0; j < i;j += 5) {
9237 if (atts[i] == atts[j]) {
9238 if (atts[i+1] == atts[j+1]) {
9239 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9240 break;
9241 }
9242 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9243 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9244 "Namespaced Attribute %s in '%s' redefined\n",
9245 atts[i], nsname, NULL);
9246 break;
9247 }
9248 }
9249 }
9250 }
9251
Daniel Veillarde57ec792003-09-10 10:50:59 +00009252 nsname = xmlGetNamespace(ctxt, prefix);
9253 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009254 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9255 "Namespace prefix %s on %s is not defined\n",
9256 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009257 }
9258 *pref = prefix;
9259 *URI = nsname;
9260
9261 /*
9262 * SAX: Start of Element !
9263 */
9264 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9265 (!ctxt->disableSAX)) {
9266 if (nbNs > 0)
9267 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9268 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9269 nbatts / 5, nbdef, atts);
9270 else
9271 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9272 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9273 }
9274
9275 /*
9276 * Free up attribute allocated strings if needed
9277 */
9278 if (attval != 0) {
9279 for (i = 3,j = 0; j < nratts;i += 5,j++)
9280 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9281 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009282 }
9283
9284 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009285
9286base_changed:
9287 /*
9288 * the attribute strings are valid iif the base didn't changed
9289 */
9290 if (attval != 0) {
9291 for (i = 3,j = 0; j < nratts;i += 5,j++)
9292 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9293 xmlFree((xmlChar *) atts[i]);
9294 }
9295 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009296 ctxt->input->line = oldline;
9297 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009298 if (ctxt->wellFormed == 1) {
9299 goto reparse;
9300 }
9301 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009302}
9303
9304/**
9305 * xmlParseEndTag2:
9306 * @ctxt: an XML parser context
9307 * @line: line of the start tag
9308 * @nsNr: number of namespaces on the start tag
9309 *
9310 * parse an end of tag
9311 *
9312 * [42] ETag ::= '</' Name S? '>'
9313 *
9314 * With namespace
9315 *
9316 * [NS 9] ETag ::= '</' QName S? '>'
9317 */
9318
9319static void
9320xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009321 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009322 const xmlChar *name;
9323
9324 GROW;
9325 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009326 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009327 return;
9328 }
9329 SKIP(2);
9330
William M. Brack13dfa872004-09-18 04:52:08 +00009331 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009332 if (ctxt->input->cur[tlen] == '>') {
9333 ctxt->input->cur += tlen + 1;
9334 goto done;
9335 }
9336 ctxt->input->cur += tlen;
9337 name = (xmlChar*)1;
9338 } else {
9339 if (prefix == NULL)
9340 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9341 else
9342 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9343 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009344
9345 /*
9346 * We should definitely be at the ending "S? '>'" part
9347 */
9348 GROW;
9349 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009350 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009351 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009352 } else
9353 NEXT1;
9354
9355 /*
9356 * [ WFC: Element Type Match ]
9357 * The Name in an element's end-tag must match the element type in the
9358 * start-tag.
9359 *
9360 */
9361 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009362 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009363 if ((line == 0) && (ctxt->node != NULL))
9364 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009365 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009366 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009367 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009368 }
9369
9370 /*
9371 * SAX: End of Tag
9372 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009373done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009374 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9375 (!ctxt->disableSAX))
9376 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9377
Daniel Veillard0fb18932003-09-07 09:14:37 +00009378 spacePop(ctxt);
9379 if (nsNr != 0)
9380 nsPop(ctxt, nsNr);
9381 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009382}
9383
9384/**
Owen Taylor3473f882001-02-23 17:55:21 +00009385 * xmlParseCDSect:
9386 * @ctxt: an XML parser context
9387 *
9388 * Parse escaped pure raw content.
9389 *
9390 * [18] CDSect ::= CDStart CData CDEnd
9391 *
9392 * [19] CDStart ::= '<![CDATA['
9393 *
9394 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9395 *
9396 * [21] CDEnd ::= ']]>'
9397 */
9398void
9399xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9400 xmlChar *buf = NULL;
9401 int len = 0;
9402 int size = XML_PARSER_BUFFER_SIZE;
9403 int r, rl;
9404 int s, sl;
9405 int cur, l;
9406 int count = 0;
9407
Daniel Veillard8f597c32003-10-06 08:19:27 +00009408 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009409 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009410 SKIP(9);
9411 } else
9412 return;
9413
9414 ctxt->instate = XML_PARSER_CDATA_SECTION;
9415 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009416 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009417 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009418 ctxt->instate = XML_PARSER_CONTENT;
9419 return;
9420 }
9421 NEXTL(rl);
9422 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009423 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009424 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009425 ctxt->instate = XML_PARSER_CONTENT;
9426 return;
9427 }
9428 NEXTL(sl);
9429 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009430 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009431 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009432 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009433 return;
9434 }
William M. Brack871611b2003-10-18 04:53:14 +00009435 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009436 ((r != ']') || (s != ']') || (cur != '>'))) {
9437 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009438 xmlChar *tmp;
9439
Owen Taylor3473f882001-02-23 17:55:21 +00009440 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009441 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9442 if (tmp == NULL) {
9443 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009444 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009445 return;
9446 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009447 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009448 }
9449 COPY_BUF(rl,buf,len,r);
9450 r = s;
9451 rl = sl;
9452 s = cur;
9453 sl = l;
9454 count++;
9455 if (count > 50) {
9456 GROW;
9457 count = 0;
9458 }
9459 NEXTL(l);
9460 cur = CUR_CHAR(l);
9461 }
9462 buf[len] = 0;
9463 ctxt->instate = XML_PARSER_CONTENT;
9464 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009465 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009466 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009467 xmlFree(buf);
9468 return;
9469 }
9470 NEXTL(l);
9471
9472 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009473 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009474 */
9475 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9476 if (ctxt->sax->cdataBlock != NULL)
9477 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009478 else if (ctxt->sax->characters != NULL)
9479 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009480 }
9481 xmlFree(buf);
9482}
9483
9484/**
9485 * xmlParseContent:
9486 * @ctxt: an XML parser context
9487 *
9488 * Parse a content:
9489 *
9490 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9491 */
9492
9493void
9494xmlParseContent(xmlParserCtxtPtr ctxt) {
9495 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009496 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009497 ((RAW != '<') || (NXT(1) != '/')) &&
9498 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009499 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009500 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009501 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009502
9503 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009504 * First case : a Processing Instruction.
9505 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009506 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009507 xmlParsePI(ctxt);
9508 }
9509
9510 /*
9511 * Second case : a CDSection
9512 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009513 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009514 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009515 xmlParseCDSect(ctxt);
9516 }
9517
9518 /*
9519 * Third case : a comment
9520 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009521 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009522 (NXT(2) == '-') && (NXT(3) == '-')) {
9523 xmlParseComment(ctxt);
9524 ctxt->instate = XML_PARSER_CONTENT;
9525 }
9526
9527 /*
9528 * Fourth case : a sub-element.
9529 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009530 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009531 xmlParseElement(ctxt);
9532 }
9533
9534 /*
9535 * Fifth case : a reference. If if has not been resolved,
9536 * parsing returns it's Name, create the node
9537 */
9538
Daniel Veillard21a0f912001-02-25 19:54:14 +00009539 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009540 xmlParseReference(ctxt);
9541 }
9542
9543 /*
9544 * Last case, text. Note that References are handled directly.
9545 */
9546 else {
9547 xmlParseCharData(ctxt, 0);
9548 }
9549
9550 GROW;
9551 /*
9552 * Pop-up of finished entities.
9553 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009554 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009555 xmlPopInput(ctxt);
9556 SHRINK;
9557
Daniel Veillardfdc91562002-07-01 21:52:03 +00009558 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009559 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9560 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009561 ctxt->instate = XML_PARSER_EOF;
9562 break;
9563 }
9564 }
9565}
9566
9567/**
9568 * xmlParseElement:
9569 * @ctxt: an XML parser context
9570 *
9571 * parse an XML element, this is highly recursive
9572 *
9573 * [39] element ::= EmptyElemTag | STag content ETag
9574 *
9575 * [ WFC: Element Type Match ]
9576 * The Name in an element's end-tag must match the element type in the
9577 * start-tag.
9578 *
Owen Taylor3473f882001-02-23 17:55:21 +00009579 */
9580
9581void
9582xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009583 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009584 const xmlChar *prefix = NULL;
9585 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009586 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +08009587 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00009588 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009589 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009590
Daniel Veillard8915c152008-08-26 13:05:34 +00009591 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9592 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9593 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9594 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9595 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009596 ctxt->instate = XML_PARSER_EOF;
9597 return;
9598 }
9599
Owen Taylor3473f882001-02-23 17:55:21 +00009600 /* Capture start position */
9601 if (ctxt->record_info) {
9602 node_info.begin_pos = ctxt->input->consumed +
9603 (CUR_PTR - ctxt->input->base);
9604 node_info.begin_line = ctxt->input->line;
9605 }
9606
9607 if (ctxt->spaceNr == 0)
9608 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009609 else if (*ctxt->space == -2)
9610 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009611 else
9612 spacePush(ctxt, *ctxt->space);
9613
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009614 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009615#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009616 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009617#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009618 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009619#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009620 else
9621 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009622#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +08009623 if (ctxt->instate == XML_PARSER_EOF)
9624 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009625 if (name == NULL) {
9626 spacePop(ctxt);
9627 return;
9628 }
9629 namePush(ctxt, name);
9630 ret = ctxt->node;
9631
Daniel Veillard4432df22003-09-28 18:58:27 +00009632#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009633 /*
9634 * [ VC: Root Element Type ]
9635 * The Name in the document type declaration must match the element
9636 * type of the root element.
9637 */
9638 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9639 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9640 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009641#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009642
9643 /*
9644 * Check for an Empty Element.
9645 */
9646 if ((RAW == '/') && (NXT(1) == '>')) {
9647 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009648 if (ctxt->sax2) {
9649 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9650 (!ctxt->disableSAX))
9651 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009652#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009653 } else {
9654 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9655 (!ctxt->disableSAX))
9656 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009657#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009658 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009659 namePop(ctxt);
9660 spacePop(ctxt);
9661 if (nsNr != ctxt->nsNr)
9662 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009663 if ( ret != NULL && ctxt->record_info ) {
9664 node_info.end_pos = ctxt->input->consumed +
9665 (CUR_PTR - ctxt->input->base);
9666 node_info.end_line = ctxt->input->line;
9667 node_info.node = ret;
9668 xmlParserAddNodeInfo(ctxt, &node_info);
9669 }
9670 return;
9671 }
9672 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009673 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009674 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009675 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9676 "Couldn't find end of Start Tag %s line %d\n",
9677 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009678
9679 /*
9680 * end of parsing of this node.
9681 */
9682 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009683 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009684 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009685 if (nsNr != ctxt->nsNr)
9686 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009687
9688 /*
9689 * Capture end position and add node
9690 */
9691 if ( ret != NULL && ctxt->record_info ) {
9692 node_info.end_pos = ctxt->input->consumed +
9693 (CUR_PTR - ctxt->input->base);
9694 node_info.end_line = ctxt->input->line;
9695 node_info.node = ret;
9696 xmlParserAddNodeInfo(ctxt, &node_info);
9697 }
9698 return;
9699 }
9700
9701 /*
9702 * Parse the content of the element:
9703 */
9704 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009705 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009706 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009707 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009708 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009709
9710 /*
9711 * end of parsing of this node.
9712 */
9713 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009714 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009715 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009716 if (nsNr != ctxt->nsNr)
9717 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009718 return;
9719 }
9720
9721 /*
9722 * parse the end of tag: '</' should be here.
9723 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009724 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009725 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009726 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009727 }
9728#ifdef LIBXML_SAX1_ENABLED
9729 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00009730 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00009731#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009732
9733 /*
9734 * Capture end position and add node
9735 */
9736 if ( ret != NULL && ctxt->record_info ) {
9737 node_info.end_pos = ctxt->input->consumed +
9738 (CUR_PTR - ctxt->input->base);
9739 node_info.end_line = ctxt->input->line;
9740 node_info.node = ret;
9741 xmlParserAddNodeInfo(ctxt, &node_info);
9742 }
9743}
9744
9745/**
9746 * xmlParseVersionNum:
9747 * @ctxt: an XML parser context
9748 *
9749 * parse the XML version value.
9750 *
Daniel Veillard34e3f642008-07-29 09:02:27 +00009751 * [26] VersionNum ::= '1.' [0-9]+
9752 *
9753 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +00009754 *
9755 * Returns the string giving the XML version number, or NULL
9756 */
9757xmlChar *
9758xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9759 xmlChar *buf = NULL;
9760 int len = 0;
9761 int size = 10;
9762 xmlChar cur;
9763
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009764 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009765 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009766 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009767 return(NULL);
9768 }
9769 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00009770 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009771 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009772 return(NULL);
9773 }
9774 buf[len++] = cur;
9775 NEXT;
9776 cur=CUR;
9777 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009778 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009779 return(NULL);
9780 }
9781 buf[len++] = cur;
9782 NEXT;
9783 cur=CUR;
9784 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009785 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009786 xmlChar *tmp;
9787
Owen Taylor3473f882001-02-23 17:55:21 +00009788 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009789 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9790 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009791 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009792 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009793 return(NULL);
9794 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009795 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009796 }
9797 buf[len++] = cur;
9798 NEXT;
9799 cur=CUR;
9800 }
9801 buf[len] = 0;
9802 return(buf);
9803}
9804
9805/**
9806 * xmlParseVersionInfo:
9807 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009808 *
Owen Taylor3473f882001-02-23 17:55:21 +00009809 * parse the XML version.
9810 *
9811 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009812 *
Owen Taylor3473f882001-02-23 17:55:21 +00009813 * [25] Eq ::= S? '=' S?
9814 *
9815 * Returns the version string, e.g. "1.0"
9816 */
9817
9818xmlChar *
9819xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9820 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009821
Daniel Veillarda07050d2003-10-19 14:46:32 +00009822 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009823 SKIP(7);
9824 SKIP_BLANKS;
9825 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009826 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009827 return(NULL);
9828 }
9829 NEXT;
9830 SKIP_BLANKS;
9831 if (RAW == '"') {
9832 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009833 version = xmlParseVersionNum(ctxt);
9834 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009835 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009836 } else
9837 NEXT;
9838 } else if (RAW == '\''){
9839 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009840 version = xmlParseVersionNum(ctxt);
9841 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009842 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009843 } else
9844 NEXT;
9845 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009846 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009847 }
9848 }
9849 return(version);
9850}
9851
9852/**
9853 * xmlParseEncName:
9854 * @ctxt: an XML parser context
9855 *
9856 * parse the XML encoding name
9857 *
9858 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9859 *
9860 * Returns the encoding name value or NULL
9861 */
9862xmlChar *
9863xmlParseEncName(xmlParserCtxtPtr ctxt) {
9864 xmlChar *buf = NULL;
9865 int len = 0;
9866 int size = 10;
9867 xmlChar cur;
9868
9869 cur = CUR;
9870 if (((cur >= 'a') && (cur <= 'z')) ||
9871 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009872 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009873 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009874 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009875 return(NULL);
9876 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00009877
Owen Taylor3473f882001-02-23 17:55:21 +00009878 buf[len++] = cur;
9879 NEXT;
9880 cur = CUR;
9881 while (((cur >= 'a') && (cur <= 'z')) ||
9882 ((cur >= 'A') && (cur <= 'Z')) ||
9883 ((cur >= '0') && (cur <= '9')) ||
9884 (cur == '.') || (cur == '_') ||
9885 (cur == '-')) {
9886 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009887 xmlChar *tmp;
9888
Owen Taylor3473f882001-02-23 17:55:21 +00009889 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009890 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9891 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009892 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009893 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009894 return(NULL);
9895 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009896 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009897 }
9898 buf[len++] = cur;
9899 NEXT;
9900 cur = CUR;
9901 if (cur == 0) {
9902 SHRINK;
9903 GROW;
9904 cur = CUR;
9905 }
9906 }
9907 buf[len] = 0;
9908 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009909 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009910 }
9911 return(buf);
9912}
9913
9914/**
9915 * xmlParseEncodingDecl:
9916 * @ctxt: an XML parser context
9917 *
9918 * parse the XML encoding declaration
9919 *
9920 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9921 *
9922 * this setups the conversion filters.
9923 *
9924 * Returns the encoding value or NULL
9925 */
9926
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009927const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009928xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9929 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009930
9931 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009932 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009933 SKIP(8);
9934 SKIP_BLANKS;
9935 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009936 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009937 return(NULL);
9938 }
9939 NEXT;
9940 SKIP_BLANKS;
9941 if (RAW == '"') {
9942 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009943 encoding = xmlParseEncName(ctxt);
9944 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009945 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009946 } else
9947 NEXT;
9948 } else if (RAW == '\''){
9949 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009950 encoding = xmlParseEncName(ctxt);
9951 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009952 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009953 } else
9954 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009955 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009956 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009957 }
Daniel Veillardc62efc82011-05-16 16:03:50 +08009958
9959 /*
9960 * Non standard parsing, allowing the user to ignore encoding
9961 */
9962 if (ctxt->options & XML_PARSE_IGNORE_ENC)
9963 return(encoding);
9964
Daniel Veillard6b621b82003-08-11 15:03:34 +00009965 /*
9966 * UTF-16 encoding stwich has already taken place at this stage,
9967 * more over the little-endian/big-endian selection is already done
9968 */
9969 if ((encoding != NULL) &&
9970 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9971 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +00009972 /*
9973 * If no encoding was passed to the parser, that we are
9974 * using UTF-16 and no decoder is present i.e. the
9975 * document is apparently UTF-8 compatible, then raise an
9976 * encoding mismatch fatal error
9977 */
9978 if ((ctxt->encoding == NULL) &&
9979 (ctxt->input->buf != NULL) &&
9980 (ctxt->input->buf->encoder == NULL)) {
9981 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9982 "Document labelled UTF-16 but has UTF-8 content\n");
9983 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009984 if (ctxt->encoding != NULL)
9985 xmlFree((xmlChar *) ctxt->encoding);
9986 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009987 }
9988 /*
9989 * UTF-8 encoding is handled natively
9990 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009991 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009992 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9993 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009994 if (ctxt->encoding != NULL)
9995 xmlFree((xmlChar *) ctxt->encoding);
9996 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009997 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009998 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009999 xmlCharEncodingHandlerPtr handler;
10000
10001 if (ctxt->input->encoding != NULL)
10002 xmlFree((xmlChar *) ctxt->input->encoding);
10003 ctxt->input->encoding = encoding;
10004
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010005 handler = xmlFindCharEncodingHandler((const char *) encoding);
10006 if (handler != NULL) {
10007 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +000010008 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010009 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010010 "Unsupported encoding %s\n", encoding);
10011 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010012 }
10013 }
10014 }
10015 return(encoding);
10016}
10017
10018/**
10019 * xmlParseSDDecl:
10020 * @ctxt: an XML parser context
10021 *
10022 * parse the XML standalone declaration
10023 *
10024 * [32] SDDecl ::= S 'standalone' Eq
10025 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10026 *
10027 * [ VC: Standalone Document Declaration ]
10028 * TODO The standalone document declaration must have the value "no"
10029 * if any external markup declarations contain declarations of:
10030 * - attributes with default values, if elements to which these
10031 * attributes apply appear in the document without specifications
10032 * of values for these attributes, or
10033 * - entities (other than amp, lt, gt, apos, quot), if references
10034 * to those entities appear in the document, or
10035 * - attributes with values subject to normalization, where the
10036 * attribute appears in the document with a value which will change
10037 * as a result of normalization, or
10038 * - element types with element content, if white space occurs directly
10039 * within any instance of those types.
10040 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010041 * Returns:
10042 * 1 if standalone="yes"
10043 * 0 if standalone="no"
10044 * -2 if standalone attribute is missing or invalid
10045 * (A standalone value of -2 means that the XML declaration was found,
10046 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010047 */
10048
10049int
10050xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010051 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010052
10053 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010054 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010055 SKIP(10);
10056 SKIP_BLANKS;
10057 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010058 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010059 return(standalone);
10060 }
10061 NEXT;
10062 SKIP_BLANKS;
10063 if (RAW == '\''){
10064 NEXT;
10065 if ((RAW == 'n') && (NXT(1) == 'o')) {
10066 standalone = 0;
10067 SKIP(2);
10068 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10069 (NXT(2) == 's')) {
10070 standalone = 1;
10071 SKIP(3);
10072 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010073 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010074 }
10075 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010076 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010077 } else
10078 NEXT;
10079 } else if (RAW == '"'){
10080 NEXT;
10081 if ((RAW == 'n') && (NXT(1) == 'o')) {
10082 standalone = 0;
10083 SKIP(2);
10084 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10085 (NXT(2) == 's')) {
10086 standalone = 1;
10087 SKIP(3);
10088 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010089 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010090 }
10091 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010092 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010093 } else
10094 NEXT;
10095 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010096 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010097 }
10098 }
10099 return(standalone);
10100}
10101
10102/**
10103 * xmlParseXMLDecl:
10104 * @ctxt: an XML parser context
10105 *
10106 * parse an XML declaration header
10107 *
10108 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10109 */
10110
10111void
10112xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10113 xmlChar *version;
10114
10115 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010116 * This value for standalone indicates that the document has an
10117 * XML declaration but it does not have a standalone attribute.
10118 * It will be overwritten later if a standalone attribute is found.
10119 */
10120 ctxt->input->standalone = -2;
10121
10122 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010123 * We know that '<?xml' is here.
10124 */
10125 SKIP(5);
10126
William M. Brack76e95df2003-10-18 16:20:14 +000010127 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010128 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10129 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010130 }
10131 SKIP_BLANKS;
10132
10133 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010134 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010135 */
10136 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010137 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010138 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010139 } else {
10140 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10141 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010142 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010143 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010144 if (ctxt->options & XML_PARSE_OLD10) {
10145 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10146 "Unsupported version '%s'\n",
10147 version);
10148 } else {
10149 if ((version[0] == '1') && ((version[1] == '.'))) {
10150 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10151 "Unsupported version '%s'\n",
10152 version, NULL);
10153 } else {
10154 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10155 "Unsupported version '%s'\n",
10156 version);
10157 }
10158 }
Daniel Veillard19840942001-11-29 16:11:38 +000010159 }
10160 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010161 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010162 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010163 }
Owen Taylor3473f882001-02-23 17:55:21 +000010164
10165 /*
10166 * We may have the encoding declaration
10167 */
William M. Brack76e95df2003-10-18 16:20:14 +000010168 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010169 if ((RAW == '?') && (NXT(1) == '>')) {
10170 SKIP(2);
10171 return;
10172 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010173 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010174 }
10175 xmlParseEncodingDecl(ctxt);
10176 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10177 /*
10178 * The XML REC instructs us to stop parsing right here
10179 */
10180 return;
10181 }
10182
10183 /*
10184 * We may have the standalone status.
10185 */
William M. Brack76e95df2003-10-18 16:20:14 +000010186 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010187 if ((RAW == '?') && (NXT(1) == '>')) {
10188 SKIP(2);
10189 return;
10190 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010191 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010192 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010193
10194 /*
10195 * We can grow the input buffer freely at that point
10196 */
10197 GROW;
10198
Owen Taylor3473f882001-02-23 17:55:21 +000010199 SKIP_BLANKS;
10200 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10201
10202 SKIP_BLANKS;
10203 if ((RAW == '?') && (NXT(1) == '>')) {
10204 SKIP(2);
10205 } else if (RAW == '>') {
10206 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010207 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010208 NEXT;
10209 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010210 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010211 MOVETO_ENDTAG(CUR_PTR);
10212 NEXT;
10213 }
10214}
10215
10216/**
10217 * xmlParseMisc:
10218 * @ctxt: an XML parser context
10219 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010220 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010221 *
10222 * [27] Misc ::= Comment | PI | S
10223 */
10224
10225void
10226xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010227 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +000010228 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +000010229 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010230 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010231 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010232 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010233 NEXT;
10234 } else
10235 xmlParseComment(ctxt);
10236 }
10237}
10238
10239/**
10240 * xmlParseDocument:
10241 * @ctxt: an XML parser context
10242 *
10243 * parse an XML document (and build a tree if using the standard SAX
10244 * interface).
10245 *
10246 * [1] document ::= prolog element Misc*
10247 *
10248 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10249 *
10250 * Returns 0, -1 in case of error. the parser context is augmented
10251 * as a result of the parsing.
10252 */
10253
10254int
10255xmlParseDocument(xmlParserCtxtPtr ctxt) {
10256 xmlChar start[4];
10257 xmlCharEncoding enc;
10258
10259 xmlInitParser();
10260
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010261 if ((ctxt == NULL) || (ctxt->input == NULL))
10262 return(-1);
10263
Owen Taylor3473f882001-02-23 17:55:21 +000010264 GROW;
10265
10266 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010267 * SAX: detecting the level.
10268 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010269 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010270
10271 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010272 * SAX: beginning of the document processing.
10273 */
10274 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10275 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10276
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010277 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010278 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +000010279 /*
10280 * Get the 4 first bytes and decode the charset
10281 * if enc != XML_CHAR_ENCODING_NONE
10282 * plug some encoding conversion routines.
10283 */
10284 start[0] = RAW;
10285 start[1] = NXT(1);
10286 start[2] = NXT(2);
10287 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010288 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010289 if (enc != XML_CHAR_ENCODING_NONE) {
10290 xmlSwitchEncoding(ctxt, enc);
10291 }
Owen Taylor3473f882001-02-23 17:55:21 +000010292 }
10293
10294
10295 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010296 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010297 }
10298
10299 /*
10300 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010301 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010302 * than just the first line, unless the amount of data is really
10303 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010304 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010305 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10306 GROW;
10307 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010308 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010309
10310 /*
10311 * Note that we will switch encoding on the fly.
10312 */
10313 xmlParseXMLDecl(ctxt);
10314 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10315 /*
10316 * The XML REC instructs us to stop parsing right here
10317 */
10318 return(-1);
10319 }
10320 ctxt->standalone = ctxt->input->standalone;
10321 SKIP_BLANKS;
10322 } else {
10323 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10324 }
10325 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10326 ctxt->sax->startDocument(ctxt->userData);
10327
10328 /*
10329 * The Misc part of the Prolog
10330 */
10331 GROW;
10332 xmlParseMisc(ctxt);
10333
10334 /*
10335 * Then possibly doc type declaration(s) and more Misc
10336 * (doctypedecl Misc*)?
10337 */
10338 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010339 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010340
10341 ctxt->inSubset = 1;
10342 xmlParseDocTypeDecl(ctxt);
10343 if (RAW == '[') {
10344 ctxt->instate = XML_PARSER_DTD;
10345 xmlParseInternalSubset(ctxt);
10346 }
10347
10348 /*
10349 * Create and update the external subset.
10350 */
10351 ctxt->inSubset = 2;
10352 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10353 (!ctxt->disableSAX))
10354 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10355 ctxt->extSubSystem, ctxt->extSubURI);
10356 ctxt->inSubset = 0;
10357
Daniel Veillardac4118d2008-01-11 05:27:32 +000010358 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010359
10360 ctxt->instate = XML_PARSER_PROLOG;
10361 xmlParseMisc(ctxt);
10362 }
10363
10364 /*
10365 * Time to start parsing the tree itself
10366 */
10367 GROW;
10368 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010369 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10370 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010371 } else {
10372 ctxt->instate = XML_PARSER_CONTENT;
10373 xmlParseElement(ctxt);
10374 ctxt->instate = XML_PARSER_EPILOG;
10375
10376
10377 /*
10378 * The Misc part at the end
10379 */
10380 xmlParseMisc(ctxt);
10381
Daniel Veillard561b7f82002-03-20 21:55:57 +000010382 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010383 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010384 }
10385 ctxt->instate = XML_PARSER_EOF;
10386 }
10387
10388 /*
10389 * SAX: end of the document processing.
10390 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010391 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010392 ctxt->sax->endDocument(ctxt->userData);
10393
Daniel Veillard5997aca2002-03-18 18:36:20 +000010394 /*
10395 * Remove locally kept entity definitions if the tree was not built
10396 */
10397 if ((ctxt->myDoc != NULL) &&
10398 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10399 xmlFreeDoc(ctxt->myDoc);
10400 ctxt->myDoc = NULL;
10401 }
10402
Daniel Veillardae0765b2008-07-31 19:54:59 +000010403 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10404 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10405 if (ctxt->valid)
10406 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10407 if (ctxt->nsWellFormed)
10408 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10409 if (ctxt->options & XML_PARSE_OLD10)
10410 ctxt->myDoc->properties |= XML_DOC_OLD10;
10411 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010412 if (! ctxt->wellFormed) {
10413 ctxt->valid = 0;
10414 return(-1);
10415 }
Owen Taylor3473f882001-02-23 17:55:21 +000010416 return(0);
10417}
10418
10419/**
10420 * xmlParseExtParsedEnt:
10421 * @ctxt: an XML parser context
10422 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010423 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010424 * An external general parsed entity is well-formed if it matches the
10425 * production labeled extParsedEnt.
10426 *
10427 * [78] extParsedEnt ::= TextDecl? content
10428 *
10429 * Returns 0, -1 in case of error. the parser context is augmented
10430 * as a result of the parsing.
10431 */
10432
10433int
10434xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10435 xmlChar start[4];
10436 xmlCharEncoding enc;
10437
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010438 if ((ctxt == NULL) || (ctxt->input == NULL))
10439 return(-1);
10440
Owen Taylor3473f882001-02-23 17:55:21 +000010441 xmlDefaultSAXHandlerInit();
10442
Daniel Veillard309f81d2003-09-23 09:02:53 +000010443 xmlDetectSAX2(ctxt);
10444
Owen Taylor3473f882001-02-23 17:55:21 +000010445 GROW;
10446
10447 /*
10448 * SAX: beginning of the document processing.
10449 */
10450 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10451 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10452
10453 /*
10454 * Get the 4 first bytes and decode the charset
10455 * if enc != XML_CHAR_ENCODING_NONE
10456 * plug some encoding conversion routines.
10457 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010458 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10459 start[0] = RAW;
10460 start[1] = NXT(1);
10461 start[2] = NXT(2);
10462 start[3] = NXT(3);
10463 enc = xmlDetectCharEncoding(start, 4);
10464 if (enc != XML_CHAR_ENCODING_NONE) {
10465 xmlSwitchEncoding(ctxt, enc);
10466 }
Owen Taylor3473f882001-02-23 17:55:21 +000010467 }
10468
10469
10470 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010471 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010472 }
10473
10474 /*
10475 * Check for the XMLDecl in the Prolog.
10476 */
10477 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010478 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010479
10480 /*
10481 * Note that we will switch encoding on the fly.
10482 */
10483 xmlParseXMLDecl(ctxt);
10484 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10485 /*
10486 * The XML REC instructs us to stop parsing right here
10487 */
10488 return(-1);
10489 }
10490 SKIP_BLANKS;
10491 } else {
10492 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10493 }
10494 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10495 ctxt->sax->startDocument(ctxt->userData);
10496
10497 /*
10498 * Doing validity checking on chunk doesn't make sense
10499 */
10500 ctxt->instate = XML_PARSER_CONTENT;
10501 ctxt->validate = 0;
10502 ctxt->loadsubset = 0;
10503 ctxt->depth = 0;
10504
10505 xmlParseContent(ctxt);
10506
10507 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010508 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010509 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010510 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010511 }
10512
10513 /*
10514 * SAX: end of the document processing.
10515 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010516 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010517 ctxt->sax->endDocument(ctxt->userData);
10518
10519 if (! ctxt->wellFormed) return(-1);
10520 return(0);
10521}
10522
Daniel Veillard73b013f2003-09-30 12:36:01 +000010523#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010524/************************************************************************
10525 * *
10526 * Progressive parsing interfaces *
10527 * *
10528 ************************************************************************/
10529
10530/**
10531 * xmlParseLookupSequence:
10532 * @ctxt: an XML parser context
10533 * @first: the first char to lookup
10534 * @next: the next char to lookup or zero
10535 * @third: the next char to lookup or zero
10536 *
10537 * Try to find if a sequence (first, next, third) or just (first next) or
10538 * (first) is available in the input stream.
10539 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10540 * to avoid rescanning sequences of bytes, it DOES change the state of the
10541 * parser, do not use liberally.
10542 *
10543 * Returns the index to the current parsing point if the full sequence
10544 * is available, -1 otherwise.
10545 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010546static int
Owen Taylor3473f882001-02-23 17:55:21 +000010547xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10548 xmlChar next, xmlChar third) {
10549 int base, len;
10550 xmlParserInputPtr in;
10551 const xmlChar *buf;
10552
10553 in = ctxt->input;
10554 if (in == NULL) return(-1);
10555 base = in->cur - in->base;
10556 if (base < 0) return(-1);
10557 if (ctxt->checkIndex > base)
10558 base = ctxt->checkIndex;
10559 if (in->buf == NULL) {
10560 buf = in->base;
10561 len = in->length;
10562 } else {
10563 buf = in->buf->buffer->content;
10564 len = in->buf->buffer->use;
10565 }
10566 /* take into account the sequence length */
10567 if (third) len -= 2;
10568 else if (next) len --;
10569 for (;base < len;base++) {
10570 if (buf[base] == first) {
10571 if (third != 0) {
10572 if ((buf[base + 1] != next) ||
10573 (buf[base + 2] != third)) continue;
10574 } else if (next != 0) {
10575 if (buf[base + 1] != next) continue;
10576 }
10577 ctxt->checkIndex = 0;
10578#ifdef DEBUG_PUSH
10579 if (next == 0)
10580 xmlGenericError(xmlGenericErrorContext,
10581 "PP: lookup '%c' found at %d\n",
10582 first, base);
10583 else if (third == 0)
10584 xmlGenericError(xmlGenericErrorContext,
10585 "PP: lookup '%c%c' found at %d\n",
10586 first, next, base);
10587 else
10588 xmlGenericError(xmlGenericErrorContext,
10589 "PP: lookup '%c%c%c' found at %d\n",
10590 first, next, third, base);
10591#endif
10592 return(base - (in->cur - in->base));
10593 }
10594 }
10595 ctxt->checkIndex = base;
10596#ifdef DEBUG_PUSH
10597 if (next == 0)
10598 xmlGenericError(xmlGenericErrorContext,
10599 "PP: lookup '%c' failed\n", first);
10600 else if (third == 0)
10601 xmlGenericError(xmlGenericErrorContext,
10602 "PP: lookup '%c%c' failed\n", first, next);
10603 else
10604 xmlGenericError(xmlGenericErrorContext,
10605 "PP: lookup '%c%c%c' failed\n", first, next, third);
10606#endif
10607 return(-1);
10608}
10609
10610/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010611 * xmlParseGetLasts:
10612 * @ctxt: an XML parser context
10613 * @lastlt: pointer to store the last '<' from the input
10614 * @lastgt: pointer to store the last '>' from the input
10615 *
10616 * Lookup the last < and > in the current chunk
10617 */
10618static void
10619xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10620 const xmlChar **lastgt) {
10621 const xmlChar *tmp;
10622
10623 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10624 xmlGenericError(xmlGenericErrorContext,
10625 "Internal error: xmlParseGetLasts\n");
10626 return;
10627 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010628 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010629 tmp = ctxt->input->end;
10630 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010631 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010632 if (tmp < ctxt->input->base) {
10633 *lastlt = NULL;
10634 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010635 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010636 *lastlt = tmp;
10637 tmp++;
10638 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10639 if (*tmp == '\'') {
10640 tmp++;
10641 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10642 if (tmp < ctxt->input->end) tmp++;
10643 } else if (*tmp == '"') {
10644 tmp++;
10645 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10646 if (tmp < ctxt->input->end) tmp++;
10647 } else
10648 tmp++;
10649 }
10650 if (tmp < ctxt->input->end)
10651 *lastgt = tmp;
10652 else {
10653 tmp = *lastlt;
10654 tmp--;
10655 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10656 if (tmp >= ctxt->input->base)
10657 *lastgt = tmp;
10658 else
10659 *lastgt = NULL;
10660 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010661 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010662 } else {
10663 *lastlt = NULL;
10664 *lastgt = NULL;
10665 }
10666}
10667/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010668 * xmlCheckCdataPush:
10669 * @cur: pointer to the bock of characters
10670 * @len: length of the block in bytes
10671 *
10672 * Check that the block of characters is okay as SCdata content [20]
10673 *
10674 * Returns the number of bytes to pass if okay, a negative index where an
10675 * UTF-8 error occured otherwise
10676 */
10677static int
10678xmlCheckCdataPush(const xmlChar *utf, int len) {
10679 int ix;
10680 unsigned char c;
10681 int codepoint;
10682
10683 if ((utf == NULL) || (len <= 0))
10684 return(0);
10685
10686 for (ix = 0; ix < len;) { /* string is 0-terminated */
10687 c = utf[ix];
10688 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10689 if (c >= 0x20)
10690 ix++;
10691 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10692 ix++;
10693 else
10694 return(-ix);
10695 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10696 if (ix + 2 > len) return(ix);
10697 if ((utf[ix+1] & 0xc0 ) != 0x80)
10698 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010699 codepoint = (utf[ix] & 0x1f) << 6;
10700 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010701 if (!xmlIsCharQ(codepoint))
10702 return(-ix);
10703 ix += 2;
10704 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10705 if (ix + 3 > len) return(ix);
10706 if (((utf[ix+1] & 0xc0) != 0x80) ||
10707 ((utf[ix+2] & 0xc0) != 0x80))
10708 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010709 codepoint = (utf[ix] & 0xf) << 12;
10710 codepoint |= (utf[ix+1] & 0x3f) << 6;
10711 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010712 if (!xmlIsCharQ(codepoint))
10713 return(-ix);
10714 ix += 3;
10715 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10716 if (ix + 4 > len) return(ix);
10717 if (((utf[ix+1] & 0xc0) != 0x80) ||
10718 ((utf[ix+2] & 0xc0) != 0x80) ||
10719 ((utf[ix+3] & 0xc0) != 0x80))
10720 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010721 codepoint = (utf[ix] & 0x7) << 18;
10722 codepoint |= (utf[ix+1] & 0x3f) << 12;
10723 codepoint |= (utf[ix+2] & 0x3f) << 6;
10724 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010725 if (!xmlIsCharQ(codepoint))
10726 return(-ix);
10727 ix += 4;
10728 } else /* unknown encoding */
10729 return(-ix);
10730 }
10731 return(ix);
10732}
10733
10734/**
Owen Taylor3473f882001-02-23 17:55:21 +000010735 * xmlParseTryOrFinish:
10736 * @ctxt: an XML parser context
10737 * @terminate: last chunk indicator
10738 *
10739 * Try to progress on parsing
10740 *
10741 * Returns zero if no parsing was possible
10742 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010743static int
Owen Taylor3473f882001-02-23 17:55:21 +000010744xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10745 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010746 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000010747 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000010748 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000010749
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010750 if (ctxt->input == NULL)
10751 return(0);
10752
Owen Taylor3473f882001-02-23 17:55:21 +000010753#ifdef DEBUG_PUSH
10754 switch (ctxt->instate) {
10755 case XML_PARSER_EOF:
10756 xmlGenericError(xmlGenericErrorContext,
10757 "PP: try EOF\n"); break;
10758 case XML_PARSER_START:
10759 xmlGenericError(xmlGenericErrorContext,
10760 "PP: try START\n"); break;
10761 case XML_PARSER_MISC:
10762 xmlGenericError(xmlGenericErrorContext,
10763 "PP: try MISC\n");break;
10764 case XML_PARSER_COMMENT:
10765 xmlGenericError(xmlGenericErrorContext,
10766 "PP: try COMMENT\n");break;
10767 case XML_PARSER_PROLOG:
10768 xmlGenericError(xmlGenericErrorContext,
10769 "PP: try PROLOG\n");break;
10770 case XML_PARSER_START_TAG:
10771 xmlGenericError(xmlGenericErrorContext,
10772 "PP: try START_TAG\n");break;
10773 case XML_PARSER_CONTENT:
10774 xmlGenericError(xmlGenericErrorContext,
10775 "PP: try CONTENT\n");break;
10776 case XML_PARSER_CDATA_SECTION:
10777 xmlGenericError(xmlGenericErrorContext,
10778 "PP: try CDATA_SECTION\n");break;
10779 case XML_PARSER_END_TAG:
10780 xmlGenericError(xmlGenericErrorContext,
10781 "PP: try END_TAG\n");break;
10782 case XML_PARSER_ENTITY_DECL:
10783 xmlGenericError(xmlGenericErrorContext,
10784 "PP: try ENTITY_DECL\n");break;
10785 case XML_PARSER_ENTITY_VALUE:
10786 xmlGenericError(xmlGenericErrorContext,
10787 "PP: try ENTITY_VALUE\n");break;
10788 case XML_PARSER_ATTRIBUTE_VALUE:
10789 xmlGenericError(xmlGenericErrorContext,
10790 "PP: try ATTRIBUTE_VALUE\n");break;
10791 case XML_PARSER_DTD:
10792 xmlGenericError(xmlGenericErrorContext,
10793 "PP: try DTD\n");break;
10794 case XML_PARSER_EPILOG:
10795 xmlGenericError(xmlGenericErrorContext,
10796 "PP: try EPILOG\n");break;
10797 case XML_PARSER_PI:
10798 xmlGenericError(xmlGenericErrorContext,
10799 "PP: try PI\n");break;
10800 case XML_PARSER_IGNORE:
10801 xmlGenericError(xmlGenericErrorContext,
10802 "PP: try IGNORE\n");break;
10803 }
10804#endif
10805
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010806 if ((ctxt->input != NULL) &&
10807 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010808 xmlSHRINK(ctxt);
10809 ctxt->checkIndex = 0;
10810 }
10811 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010812
Daniel Veillarda880b122003-04-21 21:36:41 +000010813 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010814 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010815 return(0);
10816
10817
Owen Taylor3473f882001-02-23 17:55:21 +000010818 /*
10819 * Pop-up of finished entities.
10820 */
10821 while ((RAW == 0) && (ctxt->inputNr > 1))
10822 xmlPopInput(ctxt);
10823
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010824 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010825 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010826 avail = ctxt->input->length -
10827 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010828 else {
10829 /*
10830 * If we are operating on converted input, try to flush
10831 * remainng chars to avoid them stalling in the non-converted
10832 * buffer.
10833 */
10834 if ((ctxt->input->buf->raw != NULL) &&
10835 (ctxt->input->buf->raw->use > 0)) {
10836 int base = ctxt->input->base -
10837 ctxt->input->buf->buffer->content;
10838 int current = ctxt->input->cur - ctxt->input->base;
10839
10840 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10841 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10842 ctxt->input->cur = ctxt->input->base + current;
10843 ctxt->input->end =
10844 &ctxt->input->buf->buffer->content[
10845 ctxt->input->buf->buffer->use];
10846 }
10847 avail = ctxt->input->buf->buffer->use -
10848 (ctxt->input->cur - ctxt->input->base);
10849 }
Owen Taylor3473f882001-02-23 17:55:21 +000010850 if (avail < 1)
10851 goto done;
10852 switch (ctxt->instate) {
10853 case XML_PARSER_EOF:
10854 /*
10855 * Document parsing is done !
10856 */
10857 goto done;
10858 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010859 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10860 xmlChar start[4];
10861 xmlCharEncoding enc;
10862
10863 /*
10864 * Very first chars read from the document flow.
10865 */
10866 if (avail < 4)
10867 goto done;
10868
10869 /*
10870 * Get the 4 first bytes and decode the charset
10871 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010872 * plug some encoding conversion routines,
10873 * else xmlSwitchEncoding will set to (default)
10874 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010875 */
10876 start[0] = RAW;
10877 start[1] = NXT(1);
10878 start[2] = NXT(2);
10879 start[3] = NXT(3);
10880 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010881 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010882 break;
10883 }
Owen Taylor3473f882001-02-23 17:55:21 +000010884
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010885 if (avail < 2)
10886 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010887 cur = ctxt->input->cur[0];
10888 next = ctxt->input->cur[1];
10889 if (cur == 0) {
10890 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10891 ctxt->sax->setDocumentLocator(ctxt->userData,
10892 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010893 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010894 ctxt->instate = XML_PARSER_EOF;
10895#ifdef DEBUG_PUSH
10896 xmlGenericError(xmlGenericErrorContext,
10897 "PP: entering EOF\n");
10898#endif
10899 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10900 ctxt->sax->endDocument(ctxt->userData);
10901 goto done;
10902 }
10903 if ((cur == '<') && (next == '?')) {
10904 /* PI or XML decl */
10905 if (avail < 5) return(ret);
10906 if ((!terminate) &&
10907 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10908 return(ret);
10909 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10910 ctxt->sax->setDocumentLocator(ctxt->userData,
10911 &xmlDefaultSAXLocator);
10912 if ((ctxt->input->cur[2] == 'x') &&
10913 (ctxt->input->cur[3] == 'm') &&
10914 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010915 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010916 ret += 5;
10917#ifdef DEBUG_PUSH
10918 xmlGenericError(xmlGenericErrorContext,
10919 "PP: Parsing XML Decl\n");
10920#endif
10921 xmlParseXMLDecl(ctxt);
10922 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10923 /*
10924 * The XML REC instructs us to stop parsing right
10925 * here
10926 */
10927 ctxt->instate = XML_PARSER_EOF;
10928 return(0);
10929 }
10930 ctxt->standalone = ctxt->input->standalone;
10931 if ((ctxt->encoding == NULL) &&
10932 (ctxt->input->encoding != NULL))
10933 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10934 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10935 (!ctxt->disableSAX))
10936 ctxt->sax->startDocument(ctxt->userData);
10937 ctxt->instate = XML_PARSER_MISC;
10938#ifdef DEBUG_PUSH
10939 xmlGenericError(xmlGenericErrorContext,
10940 "PP: entering MISC\n");
10941#endif
10942 } else {
10943 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10944 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10945 (!ctxt->disableSAX))
10946 ctxt->sax->startDocument(ctxt->userData);
10947 ctxt->instate = XML_PARSER_MISC;
10948#ifdef DEBUG_PUSH
10949 xmlGenericError(xmlGenericErrorContext,
10950 "PP: entering MISC\n");
10951#endif
10952 }
10953 } else {
10954 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10955 ctxt->sax->setDocumentLocator(ctxt->userData,
10956 &xmlDefaultSAXLocator);
10957 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010958 if (ctxt->version == NULL) {
10959 xmlErrMemory(ctxt, NULL);
10960 break;
10961 }
Owen Taylor3473f882001-02-23 17:55:21 +000010962 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10963 (!ctxt->disableSAX))
10964 ctxt->sax->startDocument(ctxt->userData);
10965 ctxt->instate = XML_PARSER_MISC;
10966#ifdef DEBUG_PUSH
10967 xmlGenericError(xmlGenericErrorContext,
10968 "PP: entering MISC\n");
10969#endif
10970 }
10971 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010972 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010973 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020010974 const xmlChar *prefix = NULL;
10975 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010976 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010977
10978 if ((avail < 2) && (ctxt->inputNr == 1))
10979 goto done;
10980 cur = ctxt->input->cur[0];
10981 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010982 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010983 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010984 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10985 ctxt->sax->endDocument(ctxt->userData);
10986 goto done;
10987 }
10988 if (!terminate) {
10989 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010990 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010991 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010992 goto done;
10993 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10994 goto done;
10995 }
10996 }
10997 if (ctxt->spaceNr == 0)
10998 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010999 else if (*ctxt->space == -2)
11000 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011001 else
11002 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011003#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011004 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011005#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011006 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011007#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011008 else
11009 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011010#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011011 if (ctxt->instate == XML_PARSER_EOF)
11012 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011013 if (name == NULL) {
11014 spacePop(ctxt);
11015 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000011016 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11017 ctxt->sax->endDocument(ctxt->userData);
11018 goto done;
11019 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011020#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011021 /*
11022 * [ VC: Root Element Type ]
11023 * The Name in the document type declaration must match
11024 * the element type of the root element.
11025 */
11026 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11027 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11028 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011029#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011030
11031 /*
11032 * Check for an Empty Element.
11033 */
11034 if ((RAW == '/') && (NXT(1) == '>')) {
11035 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011036
11037 if (ctxt->sax2) {
11038 if ((ctxt->sax != NULL) &&
11039 (ctxt->sax->endElementNs != NULL) &&
11040 (!ctxt->disableSAX))
11041 ctxt->sax->endElementNs(ctxt->userData, name,
11042 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011043 if (ctxt->nsNr - nsNr > 0)
11044 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011045#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011046 } else {
11047 if ((ctxt->sax != NULL) &&
11048 (ctxt->sax->endElement != NULL) &&
11049 (!ctxt->disableSAX))
11050 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011051#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011052 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011053 spacePop(ctxt);
11054 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011055 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011056 } else {
11057 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011058 }
11059 break;
11060 }
11061 if (RAW == '>') {
11062 NEXT;
11063 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011064 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011065 "Couldn't find end of Start Tag %s\n",
11066 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011067 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011068 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011069 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011070 if (ctxt->sax2)
11071 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011072#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011073 else
11074 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011075#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011076
Daniel Veillarda880b122003-04-21 21:36:41 +000011077 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011078 break;
11079 }
11080 case XML_PARSER_CONTENT: {
11081 const xmlChar *test;
11082 unsigned int cons;
11083 if ((avail < 2) && (ctxt->inputNr == 1))
11084 goto done;
11085 cur = ctxt->input->cur[0];
11086 next = ctxt->input->cur[1];
11087
11088 test = CUR_PTR;
11089 cons = ctxt->input->consumed;
11090 if ((cur == '<') && (next == '/')) {
11091 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011092 break;
11093 } else if ((cur == '<') && (next == '?')) {
11094 if ((!terminate) &&
11095 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11096 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011097 xmlParsePI(ctxt);
11098 } else if ((cur == '<') && (next != '!')) {
11099 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011100 break;
11101 } else if ((cur == '<') && (next == '!') &&
11102 (ctxt->input->cur[2] == '-') &&
11103 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011104 int term;
11105
11106 if (avail < 4)
11107 goto done;
11108 ctxt->input->cur += 4;
11109 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11110 ctxt->input->cur -= 4;
11111 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000011112 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011113 xmlParseComment(ctxt);
11114 ctxt->instate = XML_PARSER_CONTENT;
11115 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11116 (ctxt->input->cur[2] == '[') &&
11117 (ctxt->input->cur[3] == 'C') &&
11118 (ctxt->input->cur[4] == 'D') &&
11119 (ctxt->input->cur[5] == 'A') &&
11120 (ctxt->input->cur[6] == 'T') &&
11121 (ctxt->input->cur[7] == 'A') &&
11122 (ctxt->input->cur[8] == '[')) {
11123 SKIP(9);
11124 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011125 break;
11126 } else if ((cur == '<') && (next == '!') &&
11127 (avail < 9)) {
11128 goto done;
11129 } else if (cur == '&') {
11130 if ((!terminate) &&
11131 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11132 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011133 xmlParseReference(ctxt);
11134 } else {
11135 /* TODO Avoid the extra copy, handle directly !!! */
11136 /*
11137 * Goal of the following test is:
11138 * - minimize calls to the SAX 'character' callback
11139 * when they are mergeable
11140 * - handle an problem for isBlank when we only parse
11141 * a sequence of blank chars and the next one is
11142 * not available to check against '<' presence.
11143 * - tries to homogenize the differences in SAX
11144 * callbacks between the push and pull versions
11145 * of the parser.
11146 */
11147 if ((ctxt->inputNr == 1) &&
11148 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11149 if (!terminate) {
11150 if (ctxt->progressive) {
11151 if ((lastlt == NULL) ||
11152 (ctxt->input->cur > lastlt))
11153 goto done;
11154 } else if (xmlParseLookupSequence(ctxt,
11155 '<', 0, 0) < 0) {
11156 goto done;
11157 }
11158 }
11159 }
11160 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011161 xmlParseCharData(ctxt, 0);
11162 }
11163 /*
11164 * Pop-up of finished entities.
11165 */
11166 while ((RAW == 0) && (ctxt->inputNr > 1))
11167 xmlPopInput(ctxt);
11168 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011169 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11170 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000011171 ctxt->instate = XML_PARSER_EOF;
11172 break;
11173 }
11174 break;
11175 }
11176 case XML_PARSER_END_TAG:
11177 if (avail < 2)
11178 goto done;
11179 if (!terminate) {
11180 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011181 /* > can be found unescaped in attribute values */
11182 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011183 goto done;
11184 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11185 goto done;
11186 }
11187 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011188 if (ctxt->sax2) {
11189 xmlParseEndTag2(ctxt,
11190 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11191 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011192 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011193 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011194 }
11195#ifdef LIBXML_SAX1_ENABLED
11196 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011197 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011198#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011199 if (ctxt->instate == XML_PARSER_EOF) {
11200 /* Nothing */
11201 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011202 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011203 } else {
11204 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011205 }
11206 break;
11207 case XML_PARSER_CDATA_SECTION: {
11208 /*
11209 * The Push mode need to have the SAX callback for
11210 * cdataBlock merge back contiguous callbacks.
11211 */
11212 int base;
11213
11214 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11215 if (base < 0) {
11216 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011217 int tmp;
11218
11219 tmp = xmlCheckCdataPush(ctxt->input->cur,
11220 XML_PARSER_BIG_BUFFER_SIZE);
11221 if (tmp < 0) {
11222 tmp = -tmp;
11223 ctxt->input->cur += tmp;
11224 goto encoding_error;
11225 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011226 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11227 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011228 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011229 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011230 else if (ctxt->sax->characters != NULL)
11231 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011232 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011233 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011234 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011235 ctxt->checkIndex = 0;
11236 }
11237 goto done;
11238 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011239 int tmp;
11240
11241 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11242 if ((tmp < 0) || (tmp != base)) {
11243 tmp = -tmp;
11244 ctxt->input->cur += tmp;
11245 goto encoding_error;
11246 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011247 if ((ctxt->sax != NULL) && (base == 0) &&
11248 (ctxt->sax->cdataBlock != NULL) &&
11249 (!ctxt->disableSAX)) {
11250 /*
11251 * Special case to provide identical behaviour
11252 * between pull and push parsers on enpty CDATA
11253 * sections
11254 */
11255 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11256 (!strncmp((const char *)&ctxt->input->cur[-9],
11257 "<![CDATA[", 9)))
11258 ctxt->sax->cdataBlock(ctxt->userData,
11259 BAD_CAST "", 0);
11260 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011261 (!ctxt->disableSAX)) {
11262 if (ctxt->sax->cdataBlock != NULL)
11263 ctxt->sax->cdataBlock(ctxt->userData,
11264 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011265 else if (ctxt->sax->characters != NULL)
11266 ctxt->sax->characters(ctxt->userData,
11267 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011268 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000011269 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011270 ctxt->checkIndex = 0;
11271 ctxt->instate = XML_PARSER_CONTENT;
11272#ifdef DEBUG_PUSH
11273 xmlGenericError(xmlGenericErrorContext,
11274 "PP: entering CONTENT\n");
11275#endif
11276 }
11277 break;
11278 }
Owen Taylor3473f882001-02-23 17:55:21 +000011279 case XML_PARSER_MISC:
11280 SKIP_BLANKS;
11281 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011282 avail = ctxt->input->length -
11283 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011284 else
Daniel Veillarda880b122003-04-21 21:36:41 +000011285 avail = ctxt->input->buf->buffer->use -
11286 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011287 if (avail < 2)
11288 goto done;
11289 cur = ctxt->input->cur[0];
11290 next = ctxt->input->cur[1];
11291 if ((cur == '<') && (next == '?')) {
11292 if ((!terminate) &&
11293 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11294 goto done;
11295#ifdef DEBUG_PUSH
11296 xmlGenericError(xmlGenericErrorContext,
11297 "PP: Parsing PI\n");
11298#endif
11299 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000011300 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011301 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011302 (ctxt->input->cur[2] == '-') &&
11303 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011304 if ((!terminate) &&
11305 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11306 goto done;
11307#ifdef DEBUG_PUSH
11308 xmlGenericError(xmlGenericErrorContext,
11309 "PP: Parsing Comment\n");
11310#endif
11311 xmlParseComment(ctxt);
11312 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011313 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011314 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011315 (ctxt->input->cur[2] == 'D') &&
11316 (ctxt->input->cur[3] == 'O') &&
11317 (ctxt->input->cur[4] == 'C') &&
11318 (ctxt->input->cur[5] == 'T') &&
11319 (ctxt->input->cur[6] == 'Y') &&
11320 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011321 (ctxt->input->cur[8] == 'E')) {
11322 if ((!terminate) &&
11323 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11324 goto done;
11325#ifdef DEBUG_PUSH
11326 xmlGenericError(xmlGenericErrorContext,
11327 "PP: Parsing internal subset\n");
11328#endif
11329 ctxt->inSubset = 1;
11330 xmlParseDocTypeDecl(ctxt);
11331 if (RAW == '[') {
11332 ctxt->instate = XML_PARSER_DTD;
11333#ifdef DEBUG_PUSH
11334 xmlGenericError(xmlGenericErrorContext,
11335 "PP: entering DTD\n");
11336#endif
11337 } else {
11338 /*
11339 * Create and update the external subset.
11340 */
11341 ctxt->inSubset = 2;
11342 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11343 (ctxt->sax->externalSubset != NULL))
11344 ctxt->sax->externalSubset(ctxt->userData,
11345 ctxt->intSubName, ctxt->extSubSystem,
11346 ctxt->extSubURI);
11347 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011348 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011349 ctxt->instate = XML_PARSER_PROLOG;
11350#ifdef DEBUG_PUSH
11351 xmlGenericError(xmlGenericErrorContext,
11352 "PP: entering PROLOG\n");
11353#endif
11354 }
11355 } else if ((cur == '<') && (next == '!') &&
11356 (avail < 9)) {
11357 goto done;
11358 } else {
11359 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011360 ctxt->progressive = 1;
11361 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011362#ifdef DEBUG_PUSH
11363 xmlGenericError(xmlGenericErrorContext,
11364 "PP: entering START_TAG\n");
11365#endif
11366 }
11367 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011368 case XML_PARSER_PROLOG:
11369 SKIP_BLANKS;
11370 if (ctxt->input->buf == NULL)
11371 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11372 else
11373 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11374 if (avail < 2)
11375 goto done;
11376 cur = ctxt->input->cur[0];
11377 next = ctxt->input->cur[1];
11378 if ((cur == '<') && (next == '?')) {
11379 if ((!terminate) &&
11380 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11381 goto done;
11382#ifdef DEBUG_PUSH
11383 xmlGenericError(xmlGenericErrorContext,
11384 "PP: Parsing PI\n");
11385#endif
11386 xmlParsePI(ctxt);
11387 } else if ((cur == '<') && (next == '!') &&
11388 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11389 if ((!terminate) &&
11390 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11391 goto done;
11392#ifdef DEBUG_PUSH
11393 xmlGenericError(xmlGenericErrorContext,
11394 "PP: Parsing Comment\n");
11395#endif
11396 xmlParseComment(ctxt);
11397 ctxt->instate = XML_PARSER_PROLOG;
11398 } else if ((cur == '<') && (next == '!') &&
11399 (avail < 4)) {
11400 goto done;
11401 } else {
11402 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011403 if (ctxt->progressive == 0)
11404 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011405 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011406#ifdef DEBUG_PUSH
11407 xmlGenericError(xmlGenericErrorContext,
11408 "PP: entering START_TAG\n");
11409#endif
11410 }
11411 break;
11412 case XML_PARSER_EPILOG:
11413 SKIP_BLANKS;
11414 if (ctxt->input->buf == NULL)
11415 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11416 else
11417 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11418 if (avail < 2)
11419 goto done;
11420 cur = ctxt->input->cur[0];
11421 next = ctxt->input->cur[1];
11422 if ((cur == '<') && (next == '?')) {
11423 if ((!terminate) &&
11424 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11425 goto done;
11426#ifdef DEBUG_PUSH
11427 xmlGenericError(xmlGenericErrorContext,
11428 "PP: Parsing PI\n");
11429#endif
11430 xmlParsePI(ctxt);
11431 ctxt->instate = XML_PARSER_EPILOG;
11432 } else if ((cur == '<') && (next == '!') &&
11433 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11434 if ((!terminate) &&
11435 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11436 goto done;
11437#ifdef DEBUG_PUSH
11438 xmlGenericError(xmlGenericErrorContext,
11439 "PP: Parsing Comment\n");
11440#endif
11441 xmlParseComment(ctxt);
11442 ctxt->instate = XML_PARSER_EPILOG;
11443 } else if ((cur == '<') && (next == '!') &&
11444 (avail < 4)) {
11445 goto done;
11446 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011447 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011448 ctxt->instate = XML_PARSER_EOF;
11449#ifdef DEBUG_PUSH
11450 xmlGenericError(xmlGenericErrorContext,
11451 "PP: entering EOF\n");
11452#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011453 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011454 ctxt->sax->endDocument(ctxt->userData);
11455 goto done;
11456 }
11457 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011458 case XML_PARSER_DTD: {
11459 /*
11460 * Sorry but progressive parsing of the internal subset
11461 * is not expected to be supported. We first check that
11462 * the full content of the internal subset is available and
11463 * the parsing is launched only at that point.
11464 * Internal subset ends up with "']' S? '>'" in an unescaped
11465 * section and not in a ']]>' sequence which are conditional
11466 * sections (whoever argued to keep that crap in XML deserve
11467 * a place in hell !).
11468 */
11469 int base, i;
11470 xmlChar *buf;
11471 xmlChar quote = 0;
11472
11473 base = ctxt->input->cur - ctxt->input->base;
11474 if (base < 0) return(0);
11475 if (ctxt->checkIndex > base)
11476 base = ctxt->checkIndex;
11477 buf = ctxt->input->buf->buffer->content;
11478 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11479 base++) {
11480 if (quote != 0) {
11481 if (buf[base] == quote)
11482 quote = 0;
11483 continue;
11484 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011485 if ((quote == 0) && (buf[base] == '<')) {
11486 int found = 0;
11487 /* special handling of comments */
11488 if (((unsigned int) base + 4 <
11489 ctxt->input->buf->buffer->use) &&
11490 (buf[base + 1] == '!') &&
11491 (buf[base + 2] == '-') &&
11492 (buf[base + 3] == '-')) {
11493 for (;(unsigned int) base + 3 <
11494 ctxt->input->buf->buffer->use; base++) {
11495 if ((buf[base] == '-') &&
11496 (buf[base + 1] == '-') &&
11497 (buf[base + 2] == '>')) {
11498 found = 1;
11499 base += 2;
11500 break;
11501 }
11502 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011503 if (!found) {
11504#if 0
11505 fprintf(stderr, "unfinished comment\n");
11506#endif
11507 break; /* for */
11508 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011509 continue;
11510 }
11511 }
Owen Taylor3473f882001-02-23 17:55:21 +000011512 if (buf[base] == '"') {
11513 quote = '"';
11514 continue;
11515 }
11516 if (buf[base] == '\'') {
11517 quote = '\'';
11518 continue;
11519 }
11520 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011521#if 0
11522 fprintf(stderr, "%c%c%c%c: ", buf[base],
11523 buf[base + 1], buf[base + 2], buf[base + 3]);
11524#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011525 if ((unsigned int) base +1 >=
11526 ctxt->input->buf->buffer->use)
11527 break;
11528 if (buf[base + 1] == ']') {
11529 /* conditional crap, skip both ']' ! */
11530 base++;
11531 continue;
11532 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011533 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011534 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11535 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011536 if (buf[base + i] == '>') {
11537#if 0
11538 fprintf(stderr, "found\n");
11539#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011540 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011541 }
11542 if (!IS_BLANK_CH(buf[base + i])) {
11543#if 0
11544 fprintf(stderr, "not found\n");
11545#endif
11546 goto not_end_of_int_subset;
11547 }
Owen Taylor3473f882001-02-23 17:55:21 +000011548 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011549#if 0
11550 fprintf(stderr, "end of stream\n");
11551#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011552 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011553
Owen Taylor3473f882001-02-23 17:55:21 +000011554 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011555not_end_of_int_subset:
11556 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011557 }
11558 /*
11559 * We didn't found the end of the Internal subset
11560 */
Owen Taylor3473f882001-02-23 17:55:21 +000011561#ifdef DEBUG_PUSH
11562 if (next == 0)
11563 xmlGenericError(xmlGenericErrorContext,
11564 "PP: lookup of int subset end filed\n");
11565#endif
11566 goto done;
11567
11568found_end_int_subset:
11569 xmlParseInternalSubset(ctxt);
11570 ctxt->inSubset = 2;
11571 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11572 (ctxt->sax->externalSubset != NULL))
11573 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11574 ctxt->extSubSystem, ctxt->extSubURI);
11575 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011576 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011577 ctxt->instate = XML_PARSER_PROLOG;
11578 ctxt->checkIndex = 0;
11579#ifdef DEBUG_PUSH
11580 xmlGenericError(xmlGenericErrorContext,
11581 "PP: entering PROLOG\n");
11582#endif
11583 break;
11584 }
11585 case XML_PARSER_COMMENT:
11586 xmlGenericError(xmlGenericErrorContext,
11587 "PP: internal error, state == COMMENT\n");
11588 ctxt->instate = XML_PARSER_CONTENT;
11589#ifdef DEBUG_PUSH
11590 xmlGenericError(xmlGenericErrorContext,
11591 "PP: entering CONTENT\n");
11592#endif
11593 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011594 case XML_PARSER_IGNORE:
11595 xmlGenericError(xmlGenericErrorContext,
11596 "PP: internal error, state == IGNORE");
11597 ctxt->instate = XML_PARSER_DTD;
11598#ifdef DEBUG_PUSH
11599 xmlGenericError(xmlGenericErrorContext,
11600 "PP: entering DTD\n");
11601#endif
11602 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011603 case XML_PARSER_PI:
11604 xmlGenericError(xmlGenericErrorContext,
11605 "PP: internal error, state == PI\n");
11606 ctxt->instate = XML_PARSER_CONTENT;
11607#ifdef DEBUG_PUSH
11608 xmlGenericError(xmlGenericErrorContext,
11609 "PP: entering CONTENT\n");
11610#endif
11611 break;
11612 case XML_PARSER_ENTITY_DECL:
11613 xmlGenericError(xmlGenericErrorContext,
11614 "PP: internal error, state == ENTITY_DECL\n");
11615 ctxt->instate = XML_PARSER_DTD;
11616#ifdef DEBUG_PUSH
11617 xmlGenericError(xmlGenericErrorContext,
11618 "PP: entering DTD\n");
11619#endif
11620 break;
11621 case XML_PARSER_ENTITY_VALUE:
11622 xmlGenericError(xmlGenericErrorContext,
11623 "PP: internal error, state == ENTITY_VALUE\n");
11624 ctxt->instate = XML_PARSER_CONTENT;
11625#ifdef DEBUG_PUSH
11626 xmlGenericError(xmlGenericErrorContext,
11627 "PP: entering DTD\n");
11628#endif
11629 break;
11630 case XML_PARSER_ATTRIBUTE_VALUE:
11631 xmlGenericError(xmlGenericErrorContext,
11632 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11633 ctxt->instate = XML_PARSER_START_TAG;
11634#ifdef DEBUG_PUSH
11635 xmlGenericError(xmlGenericErrorContext,
11636 "PP: entering START_TAG\n");
11637#endif
11638 break;
11639 case XML_PARSER_SYSTEM_LITERAL:
11640 xmlGenericError(xmlGenericErrorContext,
11641 "PP: internal error, state == SYSTEM_LITERAL\n");
11642 ctxt->instate = XML_PARSER_START_TAG;
11643#ifdef DEBUG_PUSH
11644 xmlGenericError(xmlGenericErrorContext,
11645 "PP: entering START_TAG\n");
11646#endif
11647 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011648 case XML_PARSER_PUBLIC_LITERAL:
11649 xmlGenericError(xmlGenericErrorContext,
11650 "PP: internal error, state == PUBLIC_LITERAL\n");
11651 ctxt->instate = XML_PARSER_START_TAG;
11652#ifdef DEBUG_PUSH
11653 xmlGenericError(xmlGenericErrorContext,
11654 "PP: entering START_TAG\n");
11655#endif
11656 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011657 }
11658 }
11659done:
11660#ifdef DEBUG_PUSH
11661 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11662#endif
11663 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011664encoding_error:
11665 {
11666 char buffer[150];
11667
11668 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11669 ctxt->input->cur[0], ctxt->input->cur[1],
11670 ctxt->input->cur[2], ctxt->input->cur[3]);
11671 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11672 "Input is not proper UTF-8, indicate encoding !\n%s",
11673 BAD_CAST buffer, NULL);
11674 }
11675 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000011676}
11677
11678/**
Owen Taylor3473f882001-02-23 17:55:21 +000011679 * xmlParseChunk:
11680 * @ctxt: an XML parser context
11681 * @chunk: an char array
11682 * @size: the size in byte of the chunk
11683 * @terminate: last chunk indicator
11684 *
11685 * Parse a Chunk of memory
11686 *
11687 * Returns zero if no error, the xmlParserErrors otherwise.
11688 */
11689int
11690xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11691 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000011692 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011693 int remain = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000011694
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011695 if (ctxt == NULL)
11696 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000011697 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011698 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000011699 if (ctxt->instate == XML_PARSER_START)
11700 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000011701 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11702 (chunk[size - 1] == '\r')) {
11703 end_in_lf = 1;
11704 size--;
11705 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011706
11707xmldecl_done:
11708
Owen Taylor3473f882001-02-23 17:55:21 +000011709 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11710 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11711 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11712 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000011713 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011714
11715 /*
11716 * Specific handling if we autodetected an encoding, we should not
11717 * push more than the first line ... which depend on the encoding
11718 * And only push the rest once the final encoding was detected
11719 */
11720 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
11721 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010011722 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011723
11724 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11725 BAD_CAST "UTF-16")) ||
11726 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11727 BAD_CAST "UTF16")))
11728 len = 90;
11729 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11730 BAD_CAST "UCS-4")) ||
11731 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11732 BAD_CAST "UCS4")))
11733 len = 180;
11734
11735 if (ctxt->input->buf->rawconsumed < len)
11736 len -= ctxt->input->buf->rawconsumed;
11737
Raul Hudeaba9716a2010-03-15 10:13:29 +010011738 /*
11739 * Change size for reading the initial declaration only
11740 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
11741 * will blindly copy extra bytes from memory.
11742 */
Daniel Veillard60587d62010-11-04 15:16:27 +010011743 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010011744 remain = size - len;
11745 size = len;
11746 } else {
11747 remain = 0;
11748 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011749 }
William M. Bracka3215c72004-07-31 16:24:01 +000011750 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11751 if (res < 0) {
11752 ctxt->errNo = XML_PARSER_EOF;
11753 ctxt->disableSAX = 1;
11754 return (XML_PARSER_EOF);
11755 }
Owen Taylor3473f882001-02-23 17:55:21 +000011756 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11757 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011758 ctxt->input->end =
11759 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011760#ifdef DEBUG_PUSH
11761 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11762#endif
11763
Owen Taylor3473f882001-02-23 17:55:21 +000011764 } else if (ctxt->instate != XML_PARSER_EOF) {
11765 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11766 xmlParserInputBufferPtr in = ctxt->input->buf;
11767 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11768 (in->raw != NULL)) {
11769 int nbchars;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011770
Owen Taylor3473f882001-02-23 17:55:21 +000011771 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11772 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011773 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000011774 xmlGenericError(xmlGenericErrorContext,
11775 "xmlParseChunk: encoder error\n");
11776 return(XML_ERR_INVALID_ENCODING);
11777 }
11778 }
11779 }
11780 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011781 if (remain != 0)
11782 xmlParseTryOrFinish(ctxt, 0);
11783 else
11784 xmlParseTryOrFinish(ctxt, terminate);
11785 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11786 return(ctxt->errNo);
11787
11788 if (remain != 0) {
11789 chunk += size;
11790 size = remain;
11791 remain = 0;
11792 goto xmldecl_done;
11793 }
Daniel Veillarda617e242006-01-09 14:38:44 +000011794 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11795 (ctxt->input->buf != NULL)) {
11796 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11797 }
Owen Taylor3473f882001-02-23 17:55:21 +000011798 if (terminate) {
11799 /*
11800 * Check for termination
11801 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011802 int avail = 0;
11803
11804 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011805 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011806 avail = ctxt->input->length -
11807 (ctxt->input->cur - ctxt->input->base);
11808 else
11809 avail = ctxt->input->buf->buffer->use -
11810 (ctxt->input->cur - ctxt->input->base);
11811 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011812
Owen Taylor3473f882001-02-23 17:55:21 +000011813 if ((ctxt->instate != XML_PARSER_EOF) &&
11814 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011815 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011816 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011817 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011818 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011819 }
Owen Taylor3473f882001-02-23 17:55:21 +000011820 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011821 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011822 ctxt->sax->endDocument(ctxt->userData);
11823 }
11824 ctxt->instate = XML_PARSER_EOF;
11825 }
11826 return((xmlParserErrors) ctxt->errNo);
11827}
11828
11829/************************************************************************
11830 * *
11831 * I/O front end functions to the parser *
11832 * *
11833 ************************************************************************/
11834
11835/**
Owen Taylor3473f882001-02-23 17:55:21 +000011836 * xmlCreatePushParserCtxt:
11837 * @sax: a SAX handler
11838 * @user_data: The user data returned on SAX callbacks
11839 * @chunk: a pointer to an array of chars
11840 * @size: number of chars in the array
11841 * @filename: an optional file name or URI
11842 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000011843 * Create a parser context for using the XML parser in push mode.
11844 * If @buffer and @size are non-NULL, the data is used to detect
11845 * the encoding. The remaining characters will be parsed so they
11846 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000011847 * To allow content encoding detection, @size should be >= 4
11848 * The value of @filename is used for fetching external entities
11849 * and error/warning reports.
11850 *
11851 * Returns the new parser context or NULL
11852 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000011853
Owen Taylor3473f882001-02-23 17:55:21 +000011854xmlParserCtxtPtr
11855xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11856 const char *chunk, int size, const char *filename) {
11857 xmlParserCtxtPtr ctxt;
11858 xmlParserInputPtr inputStream;
11859 xmlParserInputBufferPtr buf;
11860 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11861
11862 /*
11863 * plug some encoding conversion routines
11864 */
11865 if ((chunk != NULL) && (size >= 4))
11866 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11867
11868 buf = xmlAllocParserInputBuffer(enc);
11869 if (buf == NULL) return(NULL);
11870
11871 ctxt = xmlNewParserCtxt();
11872 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011873 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011874 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011875 return(NULL);
11876 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011877 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011878 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11879 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011880 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011881 xmlFreeParserInputBuffer(buf);
11882 xmlFreeParserCtxt(ctxt);
11883 return(NULL);
11884 }
Owen Taylor3473f882001-02-23 17:55:21 +000011885 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011886#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011887 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011888#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011889 xmlFree(ctxt->sax);
11890 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11891 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011892 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011893 xmlFreeParserInputBuffer(buf);
11894 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011895 return(NULL);
11896 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011897 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11898 if (sax->initialized == XML_SAX2_MAGIC)
11899 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11900 else
11901 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011902 if (user_data != NULL)
11903 ctxt->userData = user_data;
11904 }
11905 if (filename == NULL) {
11906 ctxt->directory = NULL;
11907 } else {
11908 ctxt->directory = xmlParserGetDirectory(filename);
11909 }
11910
11911 inputStream = xmlNewInputStream(ctxt);
11912 if (inputStream == NULL) {
11913 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011914 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011915 return(NULL);
11916 }
11917
11918 if (filename == NULL)
11919 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011920 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011921 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011922 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011923 if (inputStream->filename == NULL) {
11924 xmlFreeParserCtxt(ctxt);
11925 xmlFreeParserInputBuffer(buf);
11926 return(NULL);
11927 }
11928 }
Owen Taylor3473f882001-02-23 17:55:21 +000011929 inputStream->buf = buf;
11930 inputStream->base = inputStream->buf->buffer->content;
11931 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011932 inputStream->end =
11933 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011934
11935 inputPush(ctxt, inputStream);
11936
William M. Brack3a1cd212005-02-11 14:35:54 +000011937 /*
11938 * If the caller didn't provide an initial 'chunk' for determining
11939 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11940 * that it can be automatically determined later
11941 */
11942 if ((size == 0) || (chunk == NULL)) {
11943 ctxt->charset = XML_CHAR_ENCODING_NONE;
11944 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011945 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11946 int cur = ctxt->input->cur - ctxt->input->base;
11947
Owen Taylor3473f882001-02-23 17:55:21 +000011948 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011949
11950 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11951 ctxt->input->cur = ctxt->input->base + cur;
11952 ctxt->input->end =
11953 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011954#ifdef DEBUG_PUSH
11955 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11956#endif
11957 }
11958
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011959 if (enc != XML_CHAR_ENCODING_NONE) {
11960 xmlSwitchEncoding(ctxt, enc);
11961 }
11962
Owen Taylor3473f882001-02-23 17:55:21 +000011963 return(ctxt);
11964}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011965#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011966
11967/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011968 * xmlStopParser:
11969 * @ctxt: an XML parser context
11970 *
11971 * Blocks further parser processing
11972 */
11973void
11974xmlStopParser(xmlParserCtxtPtr ctxt) {
11975 if (ctxt == NULL)
11976 return;
11977 ctxt->instate = XML_PARSER_EOF;
11978 ctxt->disableSAX = 1;
11979 if (ctxt->input != NULL) {
11980 ctxt->input->cur = BAD_CAST"";
11981 ctxt->input->base = ctxt->input->cur;
11982 }
11983}
11984
11985/**
Owen Taylor3473f882001-02-23 17:55:21 +000011986 * xmlCreateIOParserCtxt:
11987 * @sax: a SAX handler
11988 * @user_data: The user data returned on SAX callbacks
11989 * @ioread: an I/O read function
11990 * @ioclose: an I/O close function
11991 * @ioctx: an I/O handler
11992 * @enc: the charset encoding if known
11993 *
11994 * Create a parser context for using the XML parser with an existing
11995 * I/O stream
11996 *
11997 * Returns the new parser context or NULL
11998 */
11999xmlParserCtxtPtr
12000xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12001 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12002 void *ioctx, xmlCharEncoding enc) {
12003 xmlParserCtxtPtr ctxt;
12004 xmlParserInputPtr inputStream;
12005 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012006
Daniel Veillard42595322004-11-08 10:52:06 +000012007 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012008
12009 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012010 if (buf == NULL) {
12011 if (ioclose != NULL)
12012 ioclose(ioctx);
12013 return (NULL);
12014 }
Owen Taylor3473f882001-02-23 17:55:21 +000012015
12016 ctxt = xmlNewParserCtxt();
12017 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012018 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012019 return(NULL);
12020 }
12021 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012022#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012023 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012024#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012025 xmlFree(ctxt->sax);
12026 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12027 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012028 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012029 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012030 return(NULL);
12031 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012032 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12033 if (sax->initialized == XML_SAX2_MAGIC)
12034 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12035 else
12036 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012037 if (user_data != NULL)
12038 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012039 }
Owen Taylor3473f882001-02-23 17:55:21 +000012040
12041 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12042 if (inputStream == NULL) {
12043 xmlFreeParserCtxt(ctxt);
12044 return(NULL);
12045 }
12046 inputPush(ctxt, inputStream);
12047
12048 return(ctxt);
12049}
12050
Daniel Veillard4432df22003-09-28 18:58:27 +000012051#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012052/************************************************************************
12053 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012054 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012055 * *
12056 ************************************************************************/
12057
12058/**
12059 * xmlIOParseDTD:
12060 * @sax: the SAX handler block or NULL
12061 * @input: an Input Buffer
12062 * @enc: the charset encoding if known
12063 *
12064 * Load and parse a DTD
12065 *
12066 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012067 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012068 */
12069
12070xmlDtdPtr
12071xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12072 xmlCharEncoding enc) {
12073 xmlDtdPtr ret = NULL;
12074 xmlParserCtxtPtr ctxt;
12075 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012076 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012077
12078 if (input == NULL)
12079 return(NULL);
12080
12081 ctxt = xmlNewParserCtxt();
12082 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012083 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012084 return(NULL);
12085 }
12086
12087 /*
12088 * Set-up the SAX context
12089 */
12090 if (sax != NULL) {
12091 if (ctxt->sax != NULL)
12092 xmlFree(ctxt->sax);
12093 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012094 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012095 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012096 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012097
12098 /*
12099 * generate a parser input from the I/O handler
12100 */
12101
Daniel Veillard43caefb2003-12-07 19:32:22 +000012102 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012103 if (pinput == NULL) {
12104 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012105 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012106 xmlFreeParserCtxt(ctxt);
12107 return(NULL);
12108 }
12109
12110 /*
12111 * plug some encoding conversion routines here.
12112 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012113 if (xmlPushInput(ctxt, pinput) < 0) {
12114 if (sax != NULL) ctxt->sax = NULL;
12115 xmlFreeParserCtxt(ctxt);
12116 return(NULL);
12117 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012118 if (enc != XML_CHAR_ENCODING_NONE) {
12119 xmlSwitchEncoding(ctxt, enc);
12120 }
Owen Taylor3473f882001-02-23 17:55:21 +000012121
12122 pinput->filename = NULL;
12123 pinput->line = 1;
12124 pinput->col = 1;
12125 pinput->base = ctxt->input->cur;
12126 pinput->cur = ctxt->input->cur;
12127 pinput->free = NULL;
12128
12129 /*
12130 * let's parse that entity knowing it's an external subset.
12131 */
12132 ctxt->inSubset = 2;
12133 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012134 if (ctxt->myDoc == NULL) {
12135 xmlErrMemory(ctxt, "New Doc failed");
12136 return(NULL);
12137 }
12138 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012139 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12140 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012141
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012142 if ((enc == XML_CHAR_ENCODING_NONE) &&
12143 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000012144 /*
12145 * Get the 4 first bytes and decode the charset
12146 * if enc != XML_CHAR_ENCODING_NONE
12147 * plug some encoding conversion routines.
12148 */
12149 start[0] = RAW;
12150 start[1] = NXT(1);
12151 start[2] = NXT(2);
12152 start[3] = NXT(3);
12153 enc = xmlDetectCharEncoding(start, 4);
12154 if (enc != XML_CHAR_ENCODING_NONE) {
12155 xmlSwitchEncoding(ctxt, enc);
12156 }
12157 }
12158
Owen Taylor3473f882001-02-23 17:55:21 +000012159 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12160
12161 if (ctxt->myDoc != NULL) {
12162 if (ctxt->wellFormed) {
12163 ret = ctxt->myDoc->extSubset;
12164 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012165 if (ret != NULL) {
12166 xmlNodePtr tmp;
12167
12168 ret->doc = NULL;
12169 tmp = ret->children;
12170 while (tmp != NULL) {
12171 tmp->doc = NULL;
12172 tmp = tmp->next;
12173 }
12174 }
Owen Taylor3473f882001-02-23 17:55:21 +000012175 } else {
12176 ret = NULL;
12177 }
12178 xmlFreeDoc(ctxt->myDoc);
12179 ctxt->myDoc = NULL;
12180 }
12181 if (sax != NULL) ctxt->sax = NULL;
12182 xmlFreeParserCtxt(ctxt);
12183
12184 return(ret);
12185}
12186
12187/**
12188 * xmlSAXParseDTD:
12189 * @sax: the SAX handler block
12190 * @ExternalID: a NAME* containing the External ID of the DTD
12191 * @SystemID: a NAME* containing the URL to the DTD
12192 *
12193 * Load and parse an external subset.
12194 *
12195 * Returns the resulting xmlDtdPtr or NULL in case of error.
12196 */
12197
12198xmlDtdPtr
12199xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12200 const xmlChar *SystemID) {
12201 xmlDtdPtr ret = NULL;
12202 xmlParserCtxtPtr ctxt;
12203 xmlParserInputPtr input = NULL;
12204 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012205 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012206
12207 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12208
12209 ctxt = xmlNewParserCtxt();
12210 if (ctxt == NULL) {
12211 return(NULL);
12212 }
12213
12214 /*
12215 * Set-up the SAX context
12216 */
12217 if (sax != NULL) {
12218 if (ctxt->sax != NULL)
12219 xmlFree(ctxt->sax);
12220 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012221 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012222 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012223
12224 /*
12225 * Canonicalise the system ID
12226 */
12227 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012228 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012229 xmlFreeParserCtxt(ctxt);
12230 return(NULL);
12231 }
Owen Taylor3473f882001-02-23 17:55:21 +000012232
12233 /*
12234 * Ask the Entity resolver to load the damn thing
12235 */
12236
12237 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012238 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12239 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012240 if (input == NULL) {
12241 if (sax != NULL) ctxt->sax = NULL;
12242 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012243 if (systemIdCanonic != NULL)
12244 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012245 return(NULL);
12246 }
12247
12248 /*
12249 * plug some encoding conversion routines here.
12250 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012251 if (xmlPushInput(ctxt, input) < 0) {
12252 if (sax != NULL) ctxt->sax = NULL;
12253 xmlFreeParserCtxt(ctxt);
12254 if (systemIdCanonic != NULL)
12255 xmlFree(systemIdCanonic);
12256 return(NULL);
12257 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012258 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12259 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12260 xmlSwitchEncoding(ctxt, enc);
12261 }
Owen Taylor3473f882001-02-23 17:55:21 +000012262
12263 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012264 input->filename = (char *) systemIdCanonic;
12265 else
12266 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012267 input->line = 1;
12268 input->col = 1;
12269 input->base = ctxt->input->cur;
12270 input->cur = ctxt->input->cur;
12271 input->free = NULL;
12272
12273 /*
12274 * let's parse that entity knowing it's an external subset.
12275 */
12276 ctxt->inSubset = 2;
12277 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012278 if (ctxt->myDoc == NULL) {
12279 xmlErrMemory(ctxt, "New Doc failed");
12280 if (sax != NULL) ctxt->sax = NULL;
12281 xmlFreeParserCtxt(ctxt);
12282 return(NULL);
12283 }
12284 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012285 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12286 ExternalID, SystemID);
12287 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12288
12289 if (ctxt->myDoc != NULL) {
12290 if (ctxt->wellFormed) {
12291 ret = ctxt->myDoc->extSubset;
12292 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012293 if (ret != NULL) {
12294 xmlNodePtr tmp;
12295
12296 ret->doc = NULL;
12297 tmp = ret->children;
12298 while (tmp != NULL) {
12299 tmp->doc = NULL;
12300 tmp = tmp->next;
12301 }
12302 }
Owen Taylor3473f882001-02-23 17:55:21 +000012303 } else {
12304 ret = NULL;
12305 }
12306 xmlFreeDoc(ctxt->myDoc);
12307 ctxt->myDoc = NULL;
12308 }
12309 if (sax != NULL) ctxt->sax = NULL;
12310 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012311
Owen Taylor3473f882001-02-23 17:55:21 +000012312 return(ret);
12313}
12314
Daniel Veillard4432df22003-09-28 18:58:27 +000012315
Owen Taylor3473f882001-02-23 17:55:21 +000012316/**
12317 * xmlParseDTD:
12318 * @ExternalID: a NAME* containing the External ID of the DTD
12319 * @SystemID: a NAME* containing the URL to the DTD
12320 *
12321 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012322 *
Owen Taylor3473f882001-02-23 17:55:21 +000012323 * Returns the resulting xmlDtdPtr or NULL in case of error.
12324 */
12325
12326xmlDtdPtr
12327xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12328 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12329}
Daniel Veillard4432df22003-09-28 18:58:27 +000012330#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012331
12332/************************************************************************
12333 * *
12334 * Front ends when parsing an Entity *
12335 * *
12336 ************************************************************************/
12337
12338/**
Owen Taylor3473f882001-02-23 17:55:21 +000012339 * xmlParseCtxtExternalEntity:
12340 * @ctx: the existing parsing context
12341 * @URL: the URL for the entity to load
12342 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012343 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012344 *
12345 * Parse an external general entity within an existing parsing context
12346 * An external general parsed entity is well-formed if it matches the
12347 * production labeled extParsedEnt.
12348 *
12349 * [78] extParsedEnt ::= TextDecl? content
12350 *
12351 * Returns 0 if the entity is well formed, -1 in case of args problem and
12352 * the parser error code otherwise
12353 */
12354
12355int
12356xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012357 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012358 xmlParserCtxtPtr ctxt;
12359 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012360 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012361 xmlSAXHandlerPtr oldsax = NULL;
12362 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012363 xmlChar start[4];
12364 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012365
Daniel Veillardce682bc2004-11-05 17:22:25 +000012366 if (ctx == NULL) return(-1);
12367
Daniel Veillard0161e632008-08-28 15:36:32 +000012368 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12369 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012370 return(XML_ERR_ENTITY_LOOP);
12371 }
12372
Daniel Veillardcda96922001-08-21 10:56:31 +000012373 if (lst != NULL)
12374 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012375 if ((URL == NULL) && (ID == NULL))
12376 return(-1);
12377 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12378 return(-1);
12379
Rob Richards798743a2009-06-19 13:54:25 -040012380 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012381 if (ctxt == NULL) {
12382 return(-1);
12383 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012384
Owen Taylor3473f882001-02-23 17:55:21 +000012385 oldsax = ctxt->sax;
12386 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012387 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012388 newDoc = xmlNewDoc(BAD_CAST "1.0");
12389 if (newDoc == NULL) {
12390 xmlFreeParserCtxt(ctxt);
12391 return(-1);
12392 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012393 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012394 if (ctx->myDoc->dict) {
12395 newDoc->dict = ctx->myDoc->dict;
12396 xmlDictReference(newDoc->dict);
12397 }
Owen Taylor3473f882001-02-23 17:55:21 +000012398 if (ctx->myDoc != NULL) {
12399 newDoc->intSubset = ctx->myDoc->intSubset;
12400 newDoc->extSubset = ctx->myDoc->extSubset;
12401 }
12402 if (ctx->myDoc->URL != NULL) {
12403 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12404 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012405 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12406 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012407 ctxt->sax = oldsax;
12408 xmlFreeParserCtxt(ctxt);
12409 newDoc->intSubset = NULL;
12410 newDoc->extSubset = NULL;
12411 xmlFreeDoc(newDoc);
12412 return(-1);
12413 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012414 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012415 nodePush(ctxt, newDoc->children);
12416 if (ctx->myDoc == NULL) {
12417 ctxt->myDoc = newDoc;
12418 } else {
12419 ctxt->myDoc = ctx->myDoc;
12420 newDoc->children->doc = ctx->myDoc;
12421 }
12422
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012423 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012424 * Get the 4 first bytes and decode the charset
12425 * if enc != XML_CHAR_ENCODING_NONE
12426 * plug some encoding conversion routines.
12427 */
12428 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012429 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12430 start[0] = RAW;
12431 start[1] = NXT(1);
12432 start[2] = NXT(2);
12433 start[3] = NXT(3);
12434 enc = xmlDetectCharEncoding(start, 4);
12435 if (enc != XML_CHAR_ENCODING_NONE) {
12436 xmlSwitchEncoding(ctxt, enc);
12437 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012438 }
12439
Owen Taylor3473f882001-02-23 17:55:21 +000012440 /*
12441 * Parse a possible text declaration first
12442 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012443 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012444 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012445 /*
12446 * An XML-1.0 document can't reference an entity not XML-1.0
12447 */
12448 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12449 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12450 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12451 "Version mismatch between document and entity\n");
12452 }
Owen Taylor3473f882001-02-23 17:55:21 +000012453 }
12454
12455 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012456 * If the user provided its own SAX callbacks then reuse the
12457 * useData callback field, otherwise the expected setup in a
12458 * DOM builder is to have userData == ctxt
12459 */
12460 if (ctx->userData == ctx)
12461 ctxt->userData = ctxt;
12462 else
12463 ctxt->userData = ctx->userData;
12464
12465 /*
Owen Taylor3473f882001-02-23 17:55:21 +000012466 * Doing validity checking on chunk doesn't make sense
12467 */
12468 ctxt->instate = XML_PARSER_CONTENT;
12469 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012470 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012471 ctxt->loadsubset = ctx->loadsubset;
12472 ctxt->depth = ctx->depth + 1;
12473 ctxt->replaceEntities = ctx->replaceEntities;
12474 if (ctxt->validate) {
12475 ctxt->vctxt.error = ctx->vctxt.error;
12476 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012477 } else {
12478 ctxt->vctxt.error = NULL;
12479 ctxt->vctxt.warning = NULL;
12480 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012481 ctxt->vctxt.nodeTab = NULL;
12482 ctxt->vctxt.nodeNr = 0;
12483 ctxt->vctxt.nodeMax = 0;
12484 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012485 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12486 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012487 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12488 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12489 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012490 ctxt->dictNames = ctx->dictNames;
12491 ctxt->attsDefault = ctx->attsDefault;
12492 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012493 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012494
12495 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012496
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012497 ctx->validate = ctxt->validate;
12498 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012499 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012500 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012501 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012502 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012503 }
12504 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012505 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012506 }
12507
12508 if (!ctxt->wellFormed) {
12509 if (ctxt->errNo == 0)
12510 ret = 1;
12511 else
12512 ret = ctxt->errNo;
12513 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012514 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012515 xmlNodePtr cur;
12516
12517 /*
12518 * Return the newly created nodeset after unlinking it from
12519 * they pseudo parent.
12520 */
12521 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012522 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012523 while (cur != NULL) {
12524 cur->parent = NULL;
12525 cur = cur->next;
12526 }
12527 newDoc->children->children = NULL;
12528 }
12529 ret = 0;
12530 }
12531 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012532 ctxt->dict = NULL;
12533 ctxt->attsDefault = NULL;
12534 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012535 xmlFreeParserCtxt(ctxt);
12536 newDoc->intSubset = NULL;
12537 newDoc->extSubset = NULL;
12538 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012539
Owen Taylor3473f882001-02-23 17:55:21 +000012540 return(ret);
12541}
12542
12543/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012544 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012545 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012546 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012547 * @sax: the SAX handler bloc (possibly NULL)
12548 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12549 * @depth: Used for loop detection, use 0
12550 * @URL: the URL for the entity to load
12551 * @ID: the System ID for the entity to load
12552 * @list: the return value for the set of parsed nodes
12553 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012554 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012555 *
12556 * Returns 0 if the entity is well formed, -1 in case of args problem and
12557 * the parser error code otherwise
12558 */
12559
Daniel Veillard7d515752003-09-26 19:12:37 +000012560static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012561xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12562 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012563 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012564 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012565 xmlParserCtxtPtr ctxt;
12566 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012567 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012568 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012569 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012570 xmlChar start[4];
12571 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012572
Daniel Veillard0161e632008-08-28 15:36:32 +000012573 if (((depth > 40) &&
12574 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12575 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012576 return(XML_ERR_ENTITY_LOOP);
12577 }
12578
Owen Taylor3473f882001-02-23 17:55:21 +000012579 if (list != NULL)
12580 *list = NULL;
12581 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012582 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012583 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012584 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012585
12586
Rob Richards9c0aa472009-03-26 18:10:19 +000012587 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000012588 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012589 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012590 if (oldctxt != NULL) {
12591 ctxt->_private = oldctxt->_private;
12592 ctxt->loadsubset = oldctxt->loadsubset;
12593 ctxt->validate = oldctxt->validate;
12594 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012595 ctxt->record_info = oldctxt->record_info;
12596 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12597 ctxt->node_seq.length = oldctxt->node_seq.length;
12598 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012599 } else {
12600 /*
12601 * Doing validity checking on chunk without context
12602 * doesn't make sense
12603 */
12604 ctxt->_private = NULL;
12605 ctxt->validate = 0;
12606 ctxt->external = 2;
12607 ctxt->loadsubset = 0;
12608 }
Owen Taylor3473f882001-02-23 17:55:21 +000012609 if (sax != NULL) {
12610 oldsax = ctxt->sax;
12611 ctxt->sax = sax;
12612 if (user_data != NULL)
12613 ctxt->userData = user_data;
12614 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012615 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012616 newDoc = xmlNewDoc(BAD_CAST "1.0");
12617 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012618 ctxt->node_seq.maximum = 0;
12619 ctxt->node_seq.length = 0;
12620 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012621 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000012622 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012623 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012624 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000012625 newDoc->intSubset = doc->intSubset;
12626 newDoc->extSubset = doc->extSubset;
12627 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012628 xmlDictReference(newDoc->dict);
12629
Owen Taylor3473f882001-02-23 17:55:21 +000012630 if (doc->URL != NULL) {
12631 newDoc->URL = xmlStrdup(doc->URL);
12632 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012633 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12634 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012635 if (sax != NULL)
12636 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012637 ctxt->node_seq.maximum = 0;
12638 ctxt->node_seq.length = 0;
12639 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012640 xmlFreeParserCtxt(ctxt);
12641 newDoc->intSubset = NULL;
12642 newDoc->extSubset = NULL;
12643 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000012644 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012645 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012646 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012647 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000012648 ctxt->myDoc = doc;
12649 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000012650
Daniel Veillard0161e632008-08-28 15:36:32 +000012651 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012652 * Get the 4 first bytes and decode the charset
12653 * if enc != XML_CHAR_ENCODING_NONE
12654 * plug some encoding conversion routines.
12655 */
12656 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012657 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12658 start[0] = RAW;
12659 start[1] = NXT(1);
12660 start[2] = NXT(2);
12661 start[3] = NXT(3);
12662 enc = xmlDetectCharEncoding(start, 4);
12663 if (enc != XML_CHAR_ENCODING_NONE) {
12664 xmlSwitchEncoding(ctxt, enc);
12665 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012666 }
12667
Owen Taylor3473f882001-02-23 17:55:21 +000012668 /*
12669 * Parse a possible text declaration first
12670 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012671 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012672 xmlParseTextDecl(ctxt);
12673 }
12674
Owen Taylor3473f882001-02-23 17:55:21 +000012675 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012676 ctxt->depth = depth;
12677
12678 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012679
Daniel Veillard561b7f82002-03-20 21:55:57 +000012680 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012681 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000012682 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012683 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012684 }
12685 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012686 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012687 }
12688
12689 if (!ctxt->wellFormed) {
12690 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012691 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000012692 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012693 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000012694 } else {
12695 if (list != NULL) {
12696 xmlNodePtr cur;
12697
12698 /*
12699 * Return the newly created nodeset after unlinking it from
12700 * they pseudo parent.
12701 */
12702 cur = newDoc->children->children;
12703 *list = cur;
12704 while (cur != NULL) {
12705 cur->parent = NULL;
12706 cur = cur->next;
12707 }
12708 newDoc->children->children = NULL;
12709 }
Daniel Veillard7d515752003-09-26 19:12:37 +000012710 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000012711 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012712
12713 /*
12714 * Record in the parent context the number of entities replacement
12715 * done when parsing that reference.
12716 */
Daniel Veillard76d36452009-09-07 11:19:33 +020012717 if (oldctxt != NULL)
12718 oldctxt->nbentities += ctxt->nbentities;
12719
Daniel Veillard0161e632008-08-28 15:36:32 +000012720 /*
12721 * Also record the size of the entity parsed
12722 */
12723 if (ctxt->input != NULL) {
12724 oldctxt->sizeentities += ctxt->input->consumed;
12725 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12726 }
12727 /*
12728 * And record the last error if any
12729 */
12730 if (ctxt->lastError.code != XML_ERR_OK)
12731 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12732
Owen Taylor3473f882001-02-23 17:55:21 +000012733 if (sax != NULL)
12734 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000012735 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12736 oldctxt->node_seq.length = ctxt->node_seq.length;
12737 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012738 ctxt->node_seq.maximum = 0;
12739 ctxt->node_seq.length = 0;
12740 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012741 xmlFreeParserCtxt(ctxt);
12742 newDoc->intSubset = NULL;
12743 newDoc->extSubset = NULL;
12744 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000012745
Owen Taylor3473f882001-02-23 17:55:21 +000012746 return(ret);
12747}
12748
Daniel Veillard81273902003-09-30 00:43:48 +000012749#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012750/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012751 * xmlParseExternalEntity:
12752 * @doc: the document the chunk pertains to
12753 * @sax: the SAX handler bloc (possibly NULL)
12754 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12755 * @depth: Used for loop detection, use 0
12756 * @URL: the URL for the entity to load
12757 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012758 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000012759 *
12760 * Parse an external general entity
12761 * An external general parsed entity is well-formed if it matches the
12762 * production labeled extParsedEnt.
12763 *
12764 * [78] extParsedEnt ::= TextDecl? content
12765 *
12766 * Returns 0 if the entity is well formed, -1 in case of args problem and
12767 * the parser error code otherwise
12768 */
12769
12770int
12771xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000012772 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012773 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012774 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000012775}
12776
12777/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000012778 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000012779 * @doc: the document the chunk pertains to
12780 * @sax: the SAX handler bloc (possibly NULL)
12781 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12782 * @depth: Used for loop detection, use 0
12783 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000012784 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012785 *
12786 * Parse a well-balanced chunk of an XML document
12787 * called by the parser
12788 * The allowed sequence for the Well Balanced Chunk is the one defined by
12789 * the content production in the XML grammar:
12790 *
12791 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12792 *
12793 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12794 * the parser error code otherwise
12795 */
12796
12797int
12798xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000012799 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012800 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12801 depth, string, lst, 0 );
12802}
Daniel Veillard81273902003-09-30 00:43:48 +000012803#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000012804
12805/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000012806 * xmlParseBalancedChunkMemoryInternal:
12807 * @oldctxt: the existing parsing context
12808 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12809 * @user_data: the user data field for the parser context
12810 * @lst: the return value for the set of parsed nodes
12811 *
12812 *
12813 * Parse a well-balanced chunk of an XML document
12814 * called by the parser
12815 * The allowed sequence for the Well Balanced Chunk is the one defined by
12816 * the content production in the XML grammar:
12817 *
12818 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12819 *
Daniel Veillard7d515752003-09-26 19:12:37 +000012820 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12821 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000012822 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000012823 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000012824 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000012825 */
Daniel Veillard7d515752003-09-26 19:12:37 +000012826static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000012827xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12828 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12829 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012830 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012831 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012832 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012833 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012834 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012835 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000012836 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020012837#ifdef SAX2
12838 int i;
12839#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000012840
Daniel Veillard0161e632008-08-28 15:36:32 +000012841 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12842 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012843 return(XML_ERR_ENTITY_LOOP);
12844 }
12845
12846
12847 if (lst != NULL)
12848 *lst = NULL;
12849 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000012850 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012851
12852 size = xmlStrlen(string);
12853
12854 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000012855 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012856 if (user_data != NULL)
12857 ctxt->userData = user_data;
12858 else
12859 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012860 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12861 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012862 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12863 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12864 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012865
Daniel Veillard74eaec12009-08-26 15:57:20 +020012866#ifdef SAX2
12867 /* propagate namespaces down the entity */
12868 for (i = 0;i < oldctxt->nsNr;i += 2) {
12869 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12870 }
12871#endif
12872
Daniel Veillard328f48c2002-11-15 15:24:34 +000012873 oldsax = ctxt->sax;
12874 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012875 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012876 ctxt->replaceEntities = oldctxt->replaceEntities;
12877 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000012878
Daniel Veillarde1ca5032002-12-09 14:13:43 +000012879 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012880 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012881 newDoc = xmlNewDoc(BAD_CAST "1.0");
12882 if (newDoc == NULL) {
12883 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012884 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012885 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000012886 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012887 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012888 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012889 newDoc->dict = ctxt->dict;
12890 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012891 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012892 } else {
12893 ctxt->myDoc = oldctxt->myDoc;
12894 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012895 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012896 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012897 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12898 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012899 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012900 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012901 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012902 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012903 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012904 }
William M. Brack7b9154b2003-09-27 19:23:50 +000012905 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012906 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012907 ctxt->myDoc->children = NULL;
12908 ctxt->myDoc->last = NULL;
12909 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012910 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012911 ctxt->instate = XML_PARSER_CONTENT;
12912 ctxt->depth = oldctxt->depth + 1;
12913
Daniel Veillard328f48c2002-11-15 15:24:34 +000012914 ctxt->validate = 0;
12915 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000012916 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12917 /*
12918 * ID/IDREF registration will be done in xmlValidateElement below
12919 */
12920 ctxt->loadsubset |= XML_SKIP_IDS;
12921 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012922 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012923 ctxt->attsDefault = oldctxt->attsDefault;
12924 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012925
Daniel Veillard68e9e742002-11-16 15:35:11 +000012926 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012927 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012928 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012929 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012930 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012931 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012932 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012933 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012934 }
12935
12936 if (!ctxt->wellFormed) {
12937 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012938 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012939 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012940 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012941 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012942 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012943 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012944
William M. Brack7b9154b2003-09-27 19:23:50 +000012945 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012946 xmlNodePtr cur;
12947
12948 /*
12949 * Return the newly created nodeset after unlinking it from
12950 * they pseudo parent.
12951 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012952 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012953 *lst = cur;
12954 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012955#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012956 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12957 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12958 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012959 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12960 oldctxt->myDoc, cur);
12961 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012962#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012963 cur->parent = NULL;
12964 cur = cur->next;
12965 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012966 ctxt->myDoc->children->children = NULL;
12967 }
12968 if (ctxt->myDoc != NULL) {
12969 xmlFreeNode(ctxt->myDoc->children);
12970 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012971 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012972 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012973
12974 /*
12975 * Record in the parent context the number of entities replacement
12976 * done when parsing that reference.
12977 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020012978 if (oldctxt != NULL)
12979 oldctxt->nbentities += ctxt->nbentities;
12980
Daniel Veillard0161e632008-08-28 15:36:32 +000012981 /*
12982 * Also record the last error if any
12983 */
12984 if (ctxt->lastError.code != XML_ERR_OK)
12985 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12986
Daniel Veillard328f48c2002-11-15 15:24:34 +000012987 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012988 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012989 ctxt->attsDefault = NULL;
12990 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012991 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012992 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012993 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012994 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012995
Daniel Veillard328f48c2002-11-15 15:24:34 +000012996 return(ret);
12997}
12998
Daniel Veillard29b17482004-08-16 00:39:03 +000012999/**
13000 * xmlParseInNodeContext:
13001 * @node: the context node
13002 * @data: the input string
13003 * @datalen: the input string length in bytes
13004 * @options: a combination of xmlParserOption
13005 * @lst: the return value for the set of parsed nodes
13006 *
13007 * Parse a well-balanced chunk of an XML document
13008 * within the context (DTD, namespaces, etc ...) of the given node.
13009 *
13010 * The allowed sequence for the data is a Well Balanced Chunk defined by
13011 * the content production in the XML grammar:
13012 *
13013 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13014 *
13015 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13016 * error code otherwise
13017 */
13018xmlParserErrors
13019xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13020 int options, xmlNodePtr *lst) {
13021#ifdef SAX2
13022 xmlParserCtxtPtr ctxt;
13023 xmlDocPtr doc = NULL;
13024 xmlNodePtr fake, cur;
13025 int nsnr = 0;
13026
13027 xmlParserErrors ret = XML_ERR_OK;
13028
13029 /*
13030 * check all input parameters, grab the document
13031 */
13032 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13033 return(XML_ERR_INTERNAL_ERROR);
13034 switch (node->type) {
13035 case XML_ELEMENT_NODE:
13036 case XML_ATTRIBUTE_NODE:
13037 case XML_TEXT_NODE:
13038 case XML_CDATA_SECTION_NODE:
13039 case XML_ENTITY_REF_NODE:
13040 case XML_PI_NODE:
13041 case XML_COMMENT_NODE:
13042 case XML_DOCUMENT_NODE:
13043 case XML_HTML_DOCUMENT_NODE:
13044 break;
13045 default:
13046 return(XML_ERR_INTERNAL_ERROR);
13047
13048 }
13049 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13050 (node->type != XML_DOCUMENT_NODE) &&
13051 (node->type != XML_HTML_DOCUMENT_NODE))
13052 node = node->parent;
13053 if (node == NULL)
13054 return(XML_ERR_INTERNAL_ERROR);
13055 if (node->type == XML_ELEMENT_NODE)
13056 doc = node->doc;
13057 else
13058 doc = (xmlDocPtr) node;
13059 if (doc == NULL)
13060 return(XML_ERR_INTERNAL_ERROR);
13061
13062 /*
13063 * allocate a context and set-up everything not related to the
13064 * node position in the tree
13065 */
13066 if (doc->type == XML_DOCUMENT_NODE)
13067 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13068#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013069 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013070 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013071 /*
13072 * When parsing in context, it makes no sense to add implied
13073 * elements like html/body/etc...
13074 */
13075 options |= HTML_PARSE_NOIMPLIED;
13076 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013077#endif
13078 else
13079 return(XML_ERR_INTERNAL_ERROR);
13080
13081 if (ctxt == NULL)
13082 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013083
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013084 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013085 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13086 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13087 * we must wait until the last moment to free the original one.
13088 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013089 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013090 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013091 xmlDictFree(ctxt->dict);
13092 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013093 } else
13094 options |= XML_PARSE_NODICT;
13095
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013096 if (doc->encoding != NULL) {
13097 xmlCharEncodingHandlerPtr hdlr;
13098
13099 if (ctxt->encoding != NULL)
13100 xmlFree((xmlChar *) ctxt->encoding);
13101 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13102
13103 hdlr = xmlFindCharEncodingHandler(doc->encoding);
13104 if (hdlr != NULL) {
13105 xmlSwitchToEncoding(ctxt, hdlr);
13106 } else {
13107 return(XML_ERR_UNSUPPORTED_ENCODING);
13108 }
13109 }
13110
Daniel Veillard37334572008-07-31 08:20:02 +000013111 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013112 xmlDetectSAX2(ctxt);
13113 ctxt->myDoc = doc;
13114
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013115 fake = xmlNewComment(NULL);
13116 if (fake == NULL) {
13117 xmlFreeParserCtxt(ctxt);
13118 return(XML_ERR_NO_MEMORY);
13119 }
13120 xmlAddChild(node, fake);
13121
Daniel Veillard29b17482004-08-16 00:39:03 +000013122 if (node->type == XML_ELEMENT_NODE) {
13123 nodePush(ctxt, node);
13124 /*
13125 * initialize the SAX2 namespaces stack
13126 */
13127 cur = node;
13128 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13129 xmlNsPtr ns = cur->nsDef;
13130 const xmlChar *iprefix, *ihref;
13131
13132 while (ns != NULL) {
13133 if (ctxt->dict) {
13134 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13135 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13136 } else {
13137 iprefix = ns->prefix;
13138 ihref = ns->href;
13139 }
13140
13141 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13142 nsPush(ctxt, iprefix, ihref);
13143 nsnr++;
13144 }
13145 ns = ns->next;
13146 }
13147 cur = cur->parent;
13148 }
13149 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000013150 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013151
13152 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13153 /*
13154 * ID/IDREF registration will be done in xmlValidateElement below
13155 */
13156 ctxt->loadsubset |= XML_SKIP_IDS;
13157 }
13158
Daniel Veillard499cc922006-01-18 17:22:35 +000013159#ifdef LIBXML_HTML_ENABLED
13160 if (doc->type == XML_HTML_DOCUMENT_NODE)
13161 __htmlParseContent(ctxt);
13162 else
13163#endif
13164 xmlParseContent(ctxt);
13165
Daniel Veillard29b17482004-08-16 00:39:03 +000013166 nsPop(ctxt, nsnr);
13167 if ((RAW == '<') && (NXT(1) == '/')) {
13168 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13169 } else if (RAW != 0) {
13170 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13171 }
13172 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13173 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13174 ctxt->wellFormed = 0;
13175 }
13176
13177 if (!ctxt->wellFormed) {
13178 if (ctxt->errNo == 0)
13179 ret = XML_ERR_INTERNAL_ERROR;
13180 else
13181 ret = (xmlParserErrors)ctxt->errNo;
13182 } else {
13183 ret = XML_ERR_OK;
13184 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013185
Daniel Veillard29b17482004-08-16 00:39:03 +000013186 /*
13187 * Return the newly created nodeset after unlinking it from
13188 * the pseudo sibling.
13189 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013190
Daniel Veillard29b17482004-08-16 00:39:03 +000013191 cur = fake->next;
13192 fake->next = NULL;
13193 node->last = fake;
13194
13195 if (cur != NULL) {
13196 cur->prev = NULL;
13197 }
13198
13199 *lst = cur;
13200
13201 while (cur != NULL) {
13202 cur->parent = NULL;
13203 cur = cur->next;
13204 }
13205
13206 xmlUnlinkNode(fake);
13207 xmlFreeNode(fake);
13208
13209
13210 if (ret != XML_ERR_OK) {
13211 xmlFreeNodeList(*lst);
13212 *lst = NULL;
13213 }
William M. Brackc3f81342004-10-03 01:22:44 +000013214
William M. Brackb7b54de2004-10-06 16:38:01 +000013215 if (doc->dict != NULL)
13216 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013217 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013218
Daniel Veillard29b17482004-08-16 00:39:03 +000013219 return(ret);
13220#else /* !SAX2 */
13221 return(XML_ERR_INTERNAL_ERROR);
13222#endif
13223}
13224
Daniel Veillard81273902003-09-30 00:43:48 +000013225#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013226/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013227 * xmlParseBalancedChunkMemoryRecover:
13228 * @doc: the document the chunk pertains to
13229 * @sax: the SAX handler bloc (possibly NULL)
13230 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13231 * @depth: Used for loop detection, use 0
13232 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13233 * @lst: the return value for the set of parsed nodes
13234 * @recover: return nodes even if the data is broken (use 0)
13235 *
13236 *
13237 * Parse a well-balanced chunk of an XML document
13238 * called by the parser
13239 * The allowed sequence for the Well Balanced Chunk is the one defined by
13240 * the content production in the XML grammar:
13241 *
13242 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13243 *
13244 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13245 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013246 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013247 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013248 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13249 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013250 */
13251int
13252xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013253 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013254 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013255 xmlParserCtxtPtr ctxt;
13256 xmlDocPtr newDoc;
13257 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013258 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013259 int size;
13260 int ret = 0;
13261
Daniel Veillard0161e632008-08-28 15:36:32 +000013262 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013263 return(XML_ERR_ENTITY_LOOP);
13264 }
13265
13266
Daniel Veillardcda96922001-08-21 10:56:31 +000013267 if (lst != NULL)
13268 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013269 if (string == NULL)
13270 return(-1);
13271
13272 size = xmlStrlen(string);
13273
13274 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13275 if (ctxt == NULL) return(-1);
13276 ctxt->userData = ctxt;
13277 if (sax != NULL) {
13278 oldsax = ctxt->sax;
13279 ctxt->sax = sax;
13280 if (user_data != NULL)
13281 ctxt->userData = user_data;
13282 }
13283 newDoc = xmlNewDoc(BAD_CAST "1.0");
13284 if (newDoc == NULL) {
13285 xmlFreeParserCtxt(ctxt);
13286 return(-1);
13287 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013288 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013289 if ((doc != NULL) && (doc->dict != NULL)) {
13290 xmlDictFree(ctxt->dict);
13291 ctxt->dict = doc->dict;
13292 xmlDictReference(ctxt->dict);
13293 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13294 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13295 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13296 ctxt->dictNames = 1;
13297 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013298 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013299 }
Owen Taylor3473f882001-02-23 17:55:21 +000013300 if (doc != NULL) {
13301 newDoc->intSubset = doc->intSubset;
13302 newDoc->extSubset = doc->extSubset;
13303 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013304 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13305 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013306 if (sax != NULL)
13307 ctxt->sax = oldsax;
13308 xmlFreeParserCtxt(ctxt);
13309 newDoc->intSubset = NULL;
13310 newDoc->extSubset = NULL;
13311 xmlFreeDoc(newDoc);
13312 return(-1);
13313 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013314 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13315 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013316 if (doc == NULL) {
13317 ctxt->myDoc = newDoc;
13318 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013319 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013320 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013321 /* Ensure that doc has XML spec namespace */
13322 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13323 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013324 }
13325 ctxt->instate = XML_PARSER_CONTENT;
13326 ctxt->depth = depth;
13327
13328 /*
13329 * Doing validity checking on chunk doesn't make sense
13330 */
13331 ctxt->validate = 0;
13332 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013333 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013334
Daniel Veillardb39bc392002-10-26 19:29:51 +000013335 if ( doc != NULL ){
13336 content = doc->children;
13337 doc->children = NULL;
13338 xmlParseContent(ctxt);
13339 doc->children = content;
13340 }
13341 else {
13342 xmlParseContent(ctxt);
13343 }
Owen Taylor3473f882001-02-23 17:55:21 +000013344 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013345 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013346 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013347 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013348 }
13349 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013350 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013351 }
13352
13353 if (!ctxt->wellFormed) {
13354 if (ctxt->errNo == 0)
13355 ret = 1;
13356 else
13357 ret = ctxt->errNo;
13358 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013359 ret = 0;
13360 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013361
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013362 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13363 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013364
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013365 /*
13366 * Return the newly created nodeset after unlinking it from
13367 * they pseudo parent.
13368 */
13369 cur = newDoc->children->children;
13370 *lst = cur;
13371 while (cur != NULL) {
13372 xmlSetTreeDoc(cur, doc);
13373 cur->parent = NULL;
13374 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013375 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013376 newDoc->children->children = NULL;
13377 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013378
13379 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013380 ctxt->sax = oldsax;
13381 xmlFreeParserCtxt(ctxt);
13382 newDoc->intSubset = NULL;
13383 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013384 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013385 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013386
Owen Taylor3473f882001-02-23 17:55:21 +000013387 return(ret);
13388}
13389
13390/**
13391 * xmlSAXParseEntity:
13392 * @sax: the SAX handler block
13393 * @filename: the filename
13394 *
13395 * parse an XML external entity out of context and build a tree.
13396 * It use the given SAX function block to handle the parsing callback.
13397 * If sax is NULL, fallback to the default DOM tree building routines.
13398 *
13399 * [78] extParsedEnt ::= TextDecl? content
13400 *
13401 * This correspond to a "Well Balanced" chunk
13402 *
13403 * Returns the resulting document tree
13404 */
13405
13406xmlDocPtr
13407xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13408 xmlDocPtr ret;
13409 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013410
13411 ctxt = xmlCreateFileParserCtxt(filename);
13412 if (ctxt == NULL) {
13413 return(NULL);
13414 }
13415 if (sax != NULL) {
13416 if (ctxt->sax != NULL)
13417 xmlFree(ctxt->sax);
13418 ctxt->sax = sax;
13419 ctxt->userData = NULL;
13420 }
13421
Owen Taylor3473f882001-02-23 17:55:21 +000013422 xmlParseExtParsedEnt(ctxt);
13423
13424 if (ctxt->wellFormed)
13425 ret = ctxt->myDoc;
13426 else {
13427 ret = NULL;
13428 xmlFreeDoc(ctxt->myDoc);
13429 ctxt->myDoc = NULL;
13430 }
13431 if (sax != NULL)
13432 ctxt->sax = NULL;
13433 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013434
Owen Taylor3473f882001-02-23 17:55:21 +000013435 return(ret);
13436}
13437
13438/**
13439 * xmlParseEntity:
13440 * @filename: the filename
13441 *
13442 * parse an XML external entity out of context and build a tree.
13443 *
13444 * [78] extParsedEnt ::= TextDecl? content
13445 *
13446 * This correspond to a "Well Balanced" chunk
13447 *
13448 * Returns the resulting document tree
13449 */
13450
13451xmlDocPtr
13452xmlParseEntity(const char *filename) {
13453 return(xmlSAXParseEntity(NULL, filename));
13454}
Daniel Veillard81273902003-09-30 00:43:48 +000013455#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013456
13457/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013458 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013459 * @URL: the entity URL
13460 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013461 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013462 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013463 *
13464 * Create a parser context for an external entity
13465 * Automatic support for ZLIB/Compress compressed document is provided
13466 * by default if found at compile-time.
13467 *
13468 * Returns the new parser context or NULL
13469 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013470static xmlParserCtxtPtr
13471xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13472 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013473 xmlParserCtxtPtr ctxt;
13474 xmlParserInputPtr inputStream;
13475 char *directory = NULL;
13476 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013477
Owen Taylor3473f882001-02-23 17:55:21 +000013478 ctxt = xmlNewParserCtxt();
13479 if (ctxt == NULL) {
13480 return(NULL);
13481 }
13482
Daniel Veillard48247b42009-07-10 16:12:46 +020013483 if (pctx != NULL) {
13484 ctxt->options = pctx->options;
13485 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000013486 }
13487
Owen Taylor3473f882001-02-23 17:55:21 +000013488 uri = xmlBuildURI(URL, base);
13489
13490 if (uri == NULL) {
13491 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13492 if (inputStream == NULL) {
13493 xmlFreeParserCtxt(ctxt);
13494 return(NULL);
13495 }
13496
13497 inputPush(ctxt, inputStream);
13498
13499 if ((ctxt->directory == NULL) && (directory == NULL))
13500 directory = xmlParserGetDirectory((char *)URL);
13501 if ((ctxt->directory == NULL) && (directory != NULL))
13502 ctxt->directory = directory;
13503 } else {
13504 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13505 if (inputStream == NULL) {
13506 xmlFree(uri);
13507 xmlFreeParserCtxt(ctxt);
13508 return(NULL);
13509 }
13510
13511 inputPush(ctxt, inputStream);
13512
13513 if ((ctxt->directory == NULL) && (directory == NULL))
13514 directory = xmlParserGetDirectory((char *)uri);
13515 if ((ctxt->directory == NULL) && (directory != NULL))
13516 ctxt->directory = directory;
13517 xmlFree(uri);
13518 }
Owen Taylor3473f882001-02-23 17:55:21 +000013519 return(ctxt);
13520}
13521
Rob Richards9c0aa472009-03-26 18:10:19 +000013522/**
13523 * xmlCreateEntityParserCtxt:
13524 * @URL: the entity URL
13525 * @ID: the entity PUBLIC ID
13526 * @base: a possible base for the target URI
13527 *
13528 * Create a parser context for an external entity
13529 * Automatic support for ZLIB/Compress compressed document is provided
13530 * by default if found at compile-time.
13531 *
13532 * Returns the new parser context or NULL
13533 */
13534xmlParserCtxtPtr
13535xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13536 const xmlChar *base) {
13537 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13538
13539}
13540
Owen Taylor3473f882001-02-23 17:55:21 +000013541/************************************************************************
13542 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000013543 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000013544 * *
13545 ************************************************************************/
13546
13547/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013548 * xmlCreateURLParserCtxt:
13549 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013550 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013551 *
Daniel Veillard61b93382003-11-03 14:28:31 +000013552 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000013553 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000013554 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000013555 *
13556 * Returns the new parser context or NULL
13557 */
13558xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000013559xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000013560{
13561 xmlParserCtxtPtr ctxt;
13562 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000013563 char *directory = NULL;
13564
Owen Taylor3473f882001-02-23 17:55:21 +000013565 ctxt = xmlNewParserCtxt();
13566 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000013567 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000013568 return(NULL);
13569 }
13570
Daniel Veillarddf292f72005-01-16 19:00:15 +000013571 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000013572 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000013573 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000013574
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000013575 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013576 if (inputStream == NULL) {
13577 xmlFreeParserCtxt(ctxt);
13578 return(NULL);
13579 }
13580
Owen Taylor3473f882001-02-23 17:55:21 +000013581 inputPush(ctxt, inputStream);
13582 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000013583 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013584 if ((ctxt->directory == NULL) && (directory != NULL))
13585 ctxt->directory = directory;
13586
13587 return(ctxt);
13588}
13589
Daniel Veillard61b93382003-11-03 14:28:31 +000013590/**
13591 * xmlCreateFileParserCtxt:
13592 * @filename: the filename
13593 *
13594 * Create a parser context for a file content.
13595 * Automatic support for ZLIB/Compress compressed document is provided
13596 * by default if found at compile-time.
13597 *
13598 * Returns the new parser context or NULL
13599 */
13600xmlParserCtxtPtr
13601xmlCreateFileParserCtxt(const char *filename)
13602{
13603 return(xmlCreateURLParserCtxt(filename, 0));
13604}
13605
Daniel Veillard81273902003-09-30 00:43:48 +000013606#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013607/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013608 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000013609 * @sax: the SAX handler block
13610 * @filename: the filename
13611 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13612 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000013613 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000013614 *
13615 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13616 * compressed document is provided by default if found at compile-time.
13617 * It use the given SAX function block to handle the parsing callback.
13618 * If sax is NULL, fallback to the default DOM tree building routines.
13619 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000013620 * User data (void *) is stored within the parser context in the
13621 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000013622 *
Owen Taylor3473f882001-02-23 17:55:21 +000013623 * Returns the resulting document tree
13624 */
13625
13626xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000013627xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13628 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000013629 xmlDocPtr ret;
13630 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013631
Daniel Veillard635ef722001-10-29 11:48:19 +000013632 xmlInitParser();
13633
Owen Taylor3473f882001-02-23 17:55:21 +000013634 ctxt = xmlCreateFileParserCtxt(filename);
13635 if (ctxt == NULL) {
13636 return(NULL);
13637 }
13638 if (sax != NULL) {
13639 if (ctxt->sax != NULL)
13640 xmlFree(ctxt->sax);
13641 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013642 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013643 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000013644 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000013645 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000013646 }
Owen Taylor3473f882001-02-23 17:55:21 +000013647
Daniel Veillard37d2d162008-03-14 10:54:00 +000013648 if (ctxt->directory == NULL)
13649 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013650
Daniel Veillarddad3f682002-11-17 16:47:27 +000013651 ctxt->recovery = recovery;
13652
Owen Taylor3473f882001-02-23 17:55:21 +000013653 xmlParseDocument(ctxt);
13654
William M. Brackc07329e2003-09-08 01:57:30 +000013655 if ((ctxt->wellFormed) || recovery) {
13656 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000013657 if (ret != NULL) {
13658 if (ctxt->input->buf->compressed > 0)
13659 ret->compression = 9;
13660 else
13661 ret->compression = ctxt->input->buf->compressed;
13662 }
William M. Brackc07329e2003-09-08 01:57:30 +000013663 }
Owen Taylor3473f882001-02-23 17:55:21 +000013664 else {
13665 ret = NULL;
13666 xmlFreeDoc(ctxt->myDoc);
13667 ctxt->myDoc = NULL;
13668 }
13669 if (sax != NULL)
13670 ctxt->sax = NULL;
13671 xmlFreeParserCtxt(ctxt);
13672
13673 return(ret);
13674}
13675
13676/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013677 * xmlSAXParseFile:
13678 * @sax: the SAX handler block
13679 * @filename: the filename
13680 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13681 * documents
13682 *
13683 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13684 * compressed document is provided by default if found at compile-time.
13685 * It use the given SAX function block to handle the parsing callback.
13686 * If sax is NULL, fallback to the default DOM tree building routines.
13687 *
13688 * Returns the resulting document tree
13689 */
13690
13691xmlDocPtr
13692xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13693 int recovery) {
13694 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13695}
13696
13697/**
Owen Taylor3473f882001-02-23 17:55:21 +000013698 * xmlRecoverDoc:
13699 * @cur: a pointer to an array of xmlChar
13700 *
13701 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013702 * In the case the document is not Well Formed, a attempt to build a
13703 * tree is tried anyway
13704 *
13705 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013706 */
13707
13708xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020013709xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013710 return(xmlSAXParseDoc(NULL, cur, 1));
13711}
13712
13713/**
13714 * xmlParseFile:
13715 * @filename: the filename
13716 *
13717 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13718 * compressed document is provided by default if found at compile-time.
13719 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000013720 * Returns the resulting document tree if the file was wellformed,
13721 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000013722 */
13723
13724xmlDocPtr
13725xmlParseFile(const char *filename) {
13726 return(xmlSAXParseFile(NULL, filename, 0));
13727}
13728
13729/**
13730 * xmlRecoverFile:
13731 * @filename: the filename
13732 *
13733 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13734 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013735 * In the case the document is not Well Formed, it attempts to build
13736 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000013737 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000013738 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013739 */
13740
13741xmlDocPtr
13742xmlRecoverFile(const char *filename) {
13743 return(xmlSAXParseFile(NULL, filename, 1));
13744}
13745
13746
13747/**
13748 * xmlSetupParserForBuffer:
13749 * @ctxt: an XML parser context
13750 * @buffer: a xmlChar * buffer
13751 * @filename: a file name
13752 *
13753 * Setup the parser context to parse a new buffer; Clears any prior
13754 * contents from the parser context. The buffer parameter must not be
13755 * NULL, but the filename parameter can be
13756 */
13757void
13758xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13759 const char* filename)
13760{
13761 xmlParserInputPtr input;
13762
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013763 if ((ctxt == NULL) || (buffer == NULL))
13764 return;
13765
Owen Taylor3473f882001-02-23 17:55:21 +000013766 input = xmlNewInputStream(ctxt);
13767 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000013768 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013769 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013770 return;
13771 }
13772
13773 xmlClearParserCtxt(ctxt);
13774 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000013775 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013776 input->base = buffer;
13777 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013778 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000013779 inputPush(ctxt, input);
13780}
13781
13782/**
13783 * xmlSAXUserParseFile:
13784 * @sax: a SAX handler
13785 * @user_data: The user data returned on SAX callbacks
13786 * @filename: a file name
13787 *
13788 * parse an XML file and call the given SAX handler routines.
13789 * Automatic support for ZLIB/Compress compressed document is provided
13790 *
13791 * Returns 0 in case of success or a error number otherwise
13792 */
13793int
13794xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13795 const char *filename) {
13796 int ret = 0;
13797 xmlParserCtxtPtr ctxt;
13798
13799 ctxt = xmlCreateFileParserCtxt(filename);
13800 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000013801 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000013802 xmlFree(ctxt->sax);
13803 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013804 xmlDetectSAX2(ctxt);
13805
Owen Taylor3473f882001-02-23 17:55:21 +000013806 if (user_data != NULL)
13807 ctxt->userData = user_data;
13808
13809 xmlParseDocument(ctxt);
13810
13811 if (ctxt->wellFormed)
13812 ret = 0;
13813 else {
13814 if (ctxt->errNo != 0)
13815 ret = ctxt->errNo;
13816 else
13817 ret = -1;
13818 }
13819 if (sax != NULL)
13820 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013821 if (ctxt->myDoc != NULL) {
13822 xmlFreeDoc(ctxt->myDoc);
13823 ctxt->myDoc = NULL;
13824 }
Owen Taylor3473f882001-02-23 17:55:21 +000013825 xmlFreeParserCtxt(ctxt);
13826
13827 return ret;
13828}
Daniel Veillard81273902003-09-30 00:43:48 +000013829#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013830
13831/************************************************************************
13832 * *
13833 * Front ends when parsing from memory *
13834 * *
13835 ************************************************************************/
13836
13837/**
13838 * xmlCreateMemoryParserCtxt:
13839 * @buffer: a pointer to a char array
13840 * @size: the size of the array
13841 *
13842 * Create a parser context for an XML in-memory document.
13843 *
13844 * Returns the new parser context or NULL
13845 */
13846xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013847xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013848 xmlParserCtxtPtr ctxt;
13849 xmlParserInputPtr input;
13850 xmlParserInputBufferPtr buf;
13851
13852 if (buffer == NULL)
13853 return(NULL);
13854 if (size <= 0)
13855 return(NULL);
13856
13857 ctxt = xmlNewParserCtxt();
13858 if (ctxt == NULL)
13859 return(NULL);
13860
Daniel Veillard53350552003-09-18 13:35:51 +000013861 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000013862 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013863 if (buf == NULL) {
13864 xmlFreeParserCtxt(ctxt);
13865 return(NULL);
13866 }
Owen Taylor3473f882001-02-23 17:55:21 +000013867
13868 input = xmlNewInputStream(ctxt);
13869 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013870 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000013871 xmlFreeParserCtxt(ctxt);
13872 return(NULL);
13873 }
13874
13875 input->filename = NULL;
13876 input->buf = buf;
13877 input->base = input->buf->buffer->content;
13878 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013879 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000013880
13881 inputPush(ctxt, input);
13882 return(ctxt);
13883}
13884
Daniel Veillard81273902003-09-30 00:43:48 +000013885#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013886/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013887 * xmlSAXParseMemoryWithData:
13888 * @sax: the SAX handler block
13889 * @buffer: an pointer to a char array
13890 * @size: the size of the array
13891 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13892 * documents
13893 * @data: the userdata
13894 *
13895 * parse an XML in-memory block and use the given SAX function block
13896 * to handle the parsing callback. If sax is NULL, fallback to the default
13897 * DOM tree building routines.
13898 *
13899 * User data (void *) is stored within the parser context in the
13900 * context's _private member, so it is available nearly everywhere in libxml
13901 *
13902 * Returns the resulting document tree
13903 */
13904
13905xmlDocPtr
13906xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13907 int size, int recovery, void *data) {
13908 xmlDocPtr ret;
13909 xmlParserCtxtPtr ctxt;
13910
Daniel Veillardab2a7632009-07-09 08:45:03 +020013911 xmlInitParser();
13912
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013913 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13914 if (ctxt == NULL) return(NULL);
13915 if (sax != NULL) {
13916 if (ctxt->sax != NULL)
13917 xmlFree(ctxt->sax);
13918 ctxt->sax = sax;
13919 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013920 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013921 if (data!=NULL) {
13922 ctxt->_private=data;
13923 }
13924
Daniel Veillardadba5f12003-04-04 16:09:01 +000013925 ctxt->recovery = recovery;
13926
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013927 xmlParseDocument(ctxt);
13928
13929 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13930 else {
13931 ret = NULL;
13932 xmlFreeDoc(ctxt->myDoc);
13933 ctxt->myDoc = NULL;
13934 }
13935 if (sax != NULL)
13936 ctxt->sax = NULL;
13937 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020013938
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013939 return(ret);
13940}
13941
13942/**
Owen Taylor3473f882001-02-23 17:55:21 +000013943 * xmlSAXParseMemory:
13944 * @sax: the SAX handler block
13945 * @buffer: an pointer to a char array
13946 * @size: the size of the array
13947 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13948 * documents
13949 *
13950 * parse an XML in-memory block and use the given SAX function block
13951 * to handle the parsing callback. If sax is NULL, fallback to the default
13952 * DOM tree building routines.
13953 *
13954 * Returns the resulting document tree
13955 */
13956xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000013957xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13958 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013959 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013960}
13961
13962/**
13963 * xmlParseMemory:
13964 * @buffer: an pointer to a char array
13965 * @size: the size of the array
13966 *
13967 * parse an XML in-memory block and build a tree.
13968 *
13969 * Returns the resulting document tree
13970 */
13971
Daniel Veillard50822cb2001-07-26 20:05:51 +000013972xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013973 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13974}
13975
13976/**
13977 * xmlRecoverMemory:
13978 * @buffer: an pointer to a char array
13979 * @size: the size of the array
13980 *
13981 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013982 * In the case the document is not Well Formed, an attempt to
13983 * build a tree is tried anyway
13984 *
13985 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000013986 */
13987
Daniel Veillard50822cb2001-07-26 20:05:51 +000013988xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013989 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13990}
13991
13992/**
13993 * xmlSAXUserParseMemory:
13994 * @sax: a SAX handler
13995 * @user_data: The user data returned on SAX callbacks
13996 * @buffer: an in-memory XML document input
13997 * @size: the length of the XML document in bytes
13998 *
13999 * A better SAX parsing routine.
14000 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020014001 *
Owen Taylor3473f882001-02-23 17:55:21 +000014002 * Returns 0 in case of success or a error number otherwise
14003 */
14004int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014005 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014006 int ret = 0;
14007 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014008
14009 xmlInitParser();
14010
Owen Taylor3473f882001-02-23 17:55:21 +000014011 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14012 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014013 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14014 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014015 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014016 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014017
Daniel Veillard30211a02001-04-26 09:33:18 +000014018 if (user_data != NULL)
14019 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014020
Owen Taylor3473f882001-02-23 17:55:21 +000014021 xmlParseDocument(ctxt);
14022
14023 if (ctxt->wellFormed)
14024 ret = 0;
14025 else {
14026 if (ctxt->errNo != 0)
14027 ret = ctxt->errNo;
14028 else
14029 ret = -1;
14030 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014031 if (sax != NULL)
14032 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014033 if (ctxt->myDoc != NULL) {
14034 xmlFreeDoc(ctxt->myDoc);
14035 ctxt->myDoc = NULL;
14036 }
Owen Taylor3473f882001-02-23 17:55:21 +000014037 xmlFreeParserCtxt(ctxt);
14038
14039 return ret;
14040}
Daniel Veillard81273902003-09-30 00:43:48 +000014041#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014042
14043/**
14044 * xmlCreateDocParserCtxt:
14045 * @cur: a pointer to an array of xmlChar
14046 *
14047 * Creates a parser context for an XML in-memory document.
14048 *
14049 * Returns the new parser context or NULL
14050 */
14051xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014052xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014053 int len;
14054
14055 if (cur == NULL)
14056 return(NULL);
14057 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014058 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014059}
14060
Daniel Veillard81273902003-09-30 00:43:48 +000014061#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014062/**
14063 * xmlSAXParseDoc:
14064 * @sax: the SAX handler block
14065 * @cur: a pointer to an array of xmlChar
14066 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14067 * documents
14068 *
14069 * parse an XML in-memory document and build a tree.
14070 * It use the given SAX function block to handle the parsing callback.
14071 * If sax is NULL, fallback to the default DOM tree building routines.
14072 *
14073 * Returns the resulting document tree
14074 */
14075
14076xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014077xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014078 xmlDocPtr ret;
14079 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014080 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014081
Daniel Veillard38936062004-11-04 17:45:11 +000014082 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014083
14084
14085 ctxt = xmlCreateDocParserCtxt(cur);
14086 if (ctxt == NULL) return(NULL);
14087 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014088 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014089 ctxt->sax = sax;
14090 ctxt->userData = NULL;
14091 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014092 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014093
14094 xmlParseDocument(ctxt);
14095 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14096 else {
14097 ret = NULL;
14098 xmlFreeDoc(ctxt->myDoc);
14099 ctxt->myDoc = NULL;
14100 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014101 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014102 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014103 xmlFreeParserCtxt(ctxt);
14104
14105 return(ret);
14106}
14107
14108/**
14109 * xmlParseDoc:
14110 * @cur: a pointer to an array of xmlChar
14111 *
14112 * parse an XML in-memory document and build a tree.
14113 *
14114 * Returns the resulting document tree
14115 */
14116
14117xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014118xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014119 return(xmlSAXParseDoc(NULL, cur, 0));
14120}
Daniel Veillard81273902003-09-30 00:43:48 +000014121#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014122
Daniel Veillard81273902003-09-30 00:43:48 +000014123#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014124/************************************************************************
14125 * *
14126 * Specific function to keep track of entities references *
14127 * and used by the XSLT debugger *
14128 * *
14129 ************************************************************************/
14130
14131static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14132
14133/**
14134 * xmlAddEntityReference:
14135 * @ent : A valid entity
14136 * @firstNode : A valid first node for children of entity
14137 * @lastNode : A valid last node of children entity
14138 *
14139 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14140 */
14141static void
14142xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14143 xmlNodePtr lastNode)
14144{
14145 if (xmlEntityRefFunc != NULL) {
14146 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14147 }
14148}
14149
14150
14151/**
14152 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014153 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014154 *
14155 * Set the function to call call back when a xml reference has been made
14156 */
14157void
14158xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14159{
14160 xmlEntityRefFunc = func;
14161}
Daniel Veillard81273902003-09-30 00:43:48 +000014162#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014163
14164/************************************************************************
14165 * *
14166 * Miscellaneous *
14167 * *
14168 ************************************************************************/
14169
14170#ifdef LIBXML_XPATH_ENABLED
14171#include <libxml/xpath.h>
14172#endif
14173
Daniel Veillardffa3c742005-07-21 13:24:09 +000014174extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014175static int xmlParserInitialized = 0;
14176
14177/**
14178 * xmlInitParser:
14179 *
14180 * Initialization function for the XML parser.
14181 * This is not reentrant. Call once before processing in case of
14182 * use in multithreaded programs.
14183 */
14184
14185void
14186xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014187 if (xmlParserInitialized != 0)
14188 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014189
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014190#ifdef LIBXML_THREAD_ENABLED
14191 __xmlGlobalInitMutexLock();
14192 if (xmlParserInitialized == 0) {
14193#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014194 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014195 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014196 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14197 (xmlGenericError == NULL))
14198 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014199 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014200 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014201 xmlInitCharEncodingHandlers();
14202 xmlDefaultSAXHandlerInit();
14203 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014204#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014205 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014206#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014207#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014208 htmlInitAutoClose();
14209 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014210#endif
14211#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014212 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014213#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014214 xmlParserInitialized = 1;
14215#ifdef LIBXML_THREAD_ENABLED
14216 }
14217 __xmlGlobalInitMutexUnlock();
14218#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014219}
14220
14221/**
14222 * xmlCleanupParser:
14223 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014224 * This function name is somewhat misleading. It does not clean up
14225 * parser state, it cleans up memory allocated by the library itself.
14226 * It is a cleanup function for the XML library. It tries to reclaim all
14227 * related global memory allocated for the library processing.
14228 * It doesn't deallocate any document related memory. One should
14229 * call xmlCleanupParser() only when the process has finished using
14230 * the library and all XML/HTML documents built with it.
14231 * See also xmlInitParser() which has the opposite function of preparing
14232 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014233 *
14234 * WARNING: if your application is multithreaded or has plugin support
14235 * calling this may crash the application if another thread or
14236 * a plugin is still using libxml2. It's sometimes very hard to
14237 * guess if libxml2 is in use in the application, some libraries
14238 * or plugins may use it without notice. In case of doubt abstain
14239 * from calling this function or do it just before calling exit()
14240 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014241 */
14242
14243void
14244xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014245 if (!xmlParserInitialized)
14246 return;
14247
Owen Taylor3473f882001-02-23 17:55:21 +000014248 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014249#ifdef LIBXML_CATALOG_ENABLED
14250 xmlCatalogCleanup();
14251#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014252 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014253 xmlCleanupInputCallbacks();
14254#ifdef LIBXML_OUTPUT_ENABLED
14255 xmlCleanupOutputCallbacks();
14256#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014257#ifdef LIBXML_SCHEMAS_ENABLED
14258 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014259 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014260#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000014261 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014262 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014263 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014264 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014265 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014266}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014267
14268/************************************************************************
14269 * *
14270 * New set (2.6.0) of simpler and more flexible APIs *
14271 * *
14272 ************************************************************************/
14273
14274/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014275 * DICT_FREE:
14276 * @str: a string
14277 *
14278 * Free a string if it is not owned by the "dict" dictionnary in the
14279 * current scope
14280 */
14281#define DICT_FREE(str) \
14282 if ((str) && ((!dict) || \
14283 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14284 xmlFree((char *)(str));
14285
14286/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014287 * xmlCtxtReset:
14288 * @ctxt: an XML parser context
14289 *
14290 * Reset a parser context
14291 */
14292void
14293xmlCtxtReset(xmlParserCtxtPtr ctxt)
14294{
14295 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014296 xmlDictPtr dict;
14297
14298 if (ctxt == NULL)
14299 return;
14300
14301 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014302
14303 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14304 xmlFreeInputStream(input);
14305 }
14306 ctxt->inputNr = 0;
14307 ctxt->input = NULL;
14308
14309 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014310 if (ctxt->spaceTab != NULL) {
14311 ctxt->spaceTab[0] = -1;
14312 ctxt->space = &ctxt->spaceTab[0];
14313 } else {
14314 ctxt->space = NULL;
14315 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014316
14317
14318 ctxt->nodeNr = 0;
14319 ctxt->node = NULL;
14320
14321 ctxt->nameNr = 0;
14322 ctxt->name = NULL;
14323
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014324 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014325 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014326 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014327 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014328 DICT_FREE(ctxt->directory);
14329 ctxt->directory = NULL;
14330 DICT_FREE(ctxt->extSubURI);
14331 ctxt->extSubURI = NULL;
14332 DICT_FREE(ctxt->extSubSystem);
14333 ctxt->extSubSystem = NULL;
14334 if (ctxt->myDoc != NULL)
14335 xmlFreeDoc(ctxt->myDoc);
14336 ctxt->myDoc = NULL;
14337
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014338 ctxt->standalone = -1;
14339 ctxt->hasExternalSubset = 0;
14340 ctxt->hasPErefs = 0;
14341 ctxt->html = 0;
14342 ctxt->external = 0;
14343 ctxt->instate = XML_PARSER_START;
14344 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014345
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014346 ctxt->wellFormed = 1;
14347 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014348 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014349 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014350#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014351 ctxt->vctxt.userData = ctxt;
14352 ctxt->vctxt.error = xmlParserValidityError;
14353 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014354#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014355 ctxt->record_info = 0;
14356 ctxt->nbChars = 0;
14357 ctxt->checkIndex = 0;
14358 ctxt->inSubset = 0;
14359 ctxt->errNo = XML_ERR_OK;
14360 ctxt->depth = 0;
14361 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14362 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014363 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014364 ctxt->sizeentities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014365 xmlInitNodeInfoSeq(&ctxt->node_seq);
14366
14367 if (ctxt->attsDefault != NULL) {
14368 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14369 ctxt->attsDefault = NULL;
14370 }
14371 if (ctxt->attsSpecial != NULL) {
14372 xmlHashFree(ctxt->attsSpecial, NULL);
14373 ctxt->attsSpecial = NULL;
14374 }
14375
Daniel Veillard4432df22003-09-28 18:58:27 +000014376#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014377 if (ctxt->catalogs != NULL)
14378 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014379#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014380 if (ctxt->lastError.code != XML_ERR_OK)
14381 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014382}
14383
14384/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014385 * xmlCtxtResetPush:
14386 * @ctxt: an XML parser context
14387 * @chunk: a pointer to an array of chars
14388 * @size: number of chars in the array
14389 * @filename: an optional file name or URI
14390 * @encoding: the document encoding, or NULL
14391 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014392 * Reset a push parser context
14393 *
14394 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014395 */
14396int
14397xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14398 int size, const char *filename, const char *encoding)
14399{
14400 xmlParserInputPtr inputStream;
14401 xmlParserInputBufferPtr buf;
14402 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14403
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014404 if (ctxt == NULL)
14405 return(1);
14406
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014407 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14408 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14409
14410 buf = xmlAllocParserInputBuffer(enc);
14411 if (buf == NULL)
14412 return(1);
14413
14414 if (ctxt == NULL) {
14415 xmlFreeParserInputBuffer(buf);
14416 return(1);
14417 }
14418
14419 xmlCtxtReset(ctxt);
14420
14421 if (ctxt->pushTab == NULL) {
14422 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14423 sizeof(xmlChar *));
14424 if (ctxt->pushTab == NULL) {
14425 xmlErrMemory(ctxt, NULL);
14426 xmlFreeParserInputBuffer(buf);
14427 return(1);
14428 }
14429 }
14430
14431 if (filename == NULL) {
14432 ctxt->directory = NULL;
14433 } else {
14434 ctxt->directory = xmlParserGetDirectory(filename);
14435 }
14436
14437 inputStream = xmlNewInputStream(ctxt);
14438 if (inputStream == NULL) {
14439 xmlFreeParserInputBuffer(buf);
14440 return(1);
14441 }
14442
14443 if (filename == NULL)
14444 inputStream->filename = NULL;
14445 else
14446 inputStream->filename = (char *)
14447 xmlCanonicPath((const xmlChar *) filename);
14448 inputStream->buf = buf;
14449 inputStream->base = inputStream->buf->buffer->content;
14450 inputStream->cur = inputStream->buf->buffer->content;
14451 inputStream->end =
14452 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14453
14454 inputPush(ctxt, inputStream);
14455
14456 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14457 (ctxt->input->buf != NULL)) {
14458 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14459 int cur = ctxt->input->cur - ctxt->input->base;
14460
14461 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14462
14463 ctxt->input->base = ctxt->input->buf->buffer->content + base;
14464 ctxt->input->cur = ctxt->input->base + cur;
14465 ctxt->input->end =
14466 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14467 use];
14468#ifdef DEBUG_PUSH
14469 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14470#endif
14471 }
14472
14473 if (encoding != NULL) {
14474 xmlCharEncodingHandlerPtr hdlr;
14475
Daniel Veillard37334572008-07-31 08:20:02 +000014476 if (ctxt->encoding != NULL)
14477 xmlFree((xmlChar *) ctxt->encoding);
14478 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14479
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014480 hdlr = xmlFindCharEncodingHandler(encoding);
14481 if (hdlr != NULL) {
14482 xmlSwitchToEncoding(ctxt, hdlr);
14483 } else {
14484 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14485 "Unsupported encoding %s\n", BAD_CAST encoding);
14486 }
14487 } else if (enc != XML_CHAR_ENCODING_NONE) {
14488 xmlSwitchEncoding(ctxt, enc);
14489 }
14490
14491 return(0);
14492}
14493
Daniel Veillard37334572008-07-31 08:20:02 +000014494
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014495/**
Daniel Veillard37334572008-07-31 08:20:02 +000014496 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014497 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014498 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014499 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014500 *
14501 * Applies the options to the parser context
14502 *
14503 * Returns 0 in case of success, the set of unknown or unimplemented options
14504 * in case of error.
14505 */
Daniel Veillard37334572008-07-31 08:20:02 +000014506static int
14507xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014508{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014509 if (ctxt == NULL)
14510 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014511 if (encoding != NULL) {
14512 if (ctxt->encoding != NULL)
14513 xmlFree((xmlChar *) ctxt->encoding);
14514 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14515 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014516 if (options & XML_PARSE_RECOVER) {
14517 ctxt->recovery = 1;
14518 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014519 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014520 } else
14521 ctxt->recovery = 0;
14522 if (options & XML_PARSE_DTDLOAD) {
14523 ctxt->loadsubset = XML_DETECT_IDS;
14524 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014525 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014526 } else
14527 ctxt->loadsubset = 0;
14528 if (options & XML_PARSE_DTDATTR) {
14529 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14530 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014531 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014532 }
14533 if (options & XML_PARSE_NOENT) {
14534 ctxt->replaceEntities = 1;
14535 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14536 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014537 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014538 } else
14539 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014540 if (options & XML_PARSE_PEDANTIC) {
14541 ctxt->pedantic = 1;
14542 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014543 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014544 } else
14545 ctxt->pedantic = 0;
14546 if (options & XML_PARSE_NOBLANKS) {
14547 ctxt->keepBlanks = 0;
14548 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14549 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014550 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014551 } else
14552 ctxt->keepBlanks = 1;
14553 if (options & XML_PARSE_DTDVALID) {
14554 ctxt->validate = 1;
14555 if (options & XML_PARSE_NOWARNING)
14556 ctxt->vctxt.warning = NULL;
14557 if (options & XML_PARSE_NOERROR)
14558 ctxt->vctxt.error = NULL;
14559 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014560 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014561 } else
14562 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000014563 if (options & XML_PARSE_NOWARNING) {
14564 ctxt->sax->warning = NULL;
14565 options -= XML_PARSE_NOWARNING;
14566 }
14567 if (options & XML_PARSE_NOERROR) {
14568 ctxt->sax->error = NULL;
14569 ctxt->sax->fatalError = NULL;
14570 options -= XML_PARSE_NOERROR;
14571 }
Daniel Veillard81273902003-09-30 00:43:48 +000014572#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014573 if (options & XML_PARSE_SAX1) {
14574 ctxt->sax->startElement = xmlSAX2StartElement;
14575 ctxt->sax->endElement = xmlSAX2EndElement;
14576 ctxt->sax->startElementNs = NULL;
14577 ctxt->sax->endElementNs = NULL;
14578 ctxt->sax->initialized = 1;
14579 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014580 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014581 }
Daniel Veillard81273902003-09-30 00:43:48 +000014582#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014583 if (options & XML_PARSE_NODICT) {
14584 ctxt->dictNames = 0;
14585 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014586 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014587 } else {
14588 ctxt->dictNames = 1;
14589 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014590 if (options & XML_PARSE_NOCDATA) {
14591 ctxt->sax->cdataBlock = NULL;
14592 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014593 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014594 }
14595 if (options & XML_PARSE_NSCLEAN) {
14596 ctxt->options |= XML_PARSE_NSCLEAN;
14597 options -= XML_PARSE_NSCLEAN;
14598 }
Daniel Veillard61b93382003-11-03 14:28:31 +000014599 if (options & XML_PARSE_NONET) {
14600 ctxt->options |= XML_PARSE_NONET;
14601 options -= XML_PARSE_NONET;
14602 }
Daniel Veillard8874b942005-08-25 13:19:21 +000014603 if (options & XML_PARSE_COMPACT) {
14604 ctxt->options |= XML_PARSE_COMPACT;
14605 options -= XML_PARSE_COMPACT;
14606 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000014607 if (options & XML_PARSE_OLD10) {
14608 ctxt->options |= XML_PARSE_OLD10;
14609 options -= XML_PARSE_OLD10;
14610 }
Daniel Veillard8915c152008-08-26 13:05:34 +000014611 if (options & XML_PARSE_NOBASEFIX) {
14612 ctxt->options |= XML_PARSE_NOBASEFIX;
14613 options -= XML_PARSE_NOBASEFIX;
14614 }
14615 if (options & XML_PARSE_HUGE) {
14616 ctxt->options |= XML_PARSE_HUGE;
14617 options -= XML_PARSE_HUGE;
14618 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000014619 if (options & XML_PARSE_OLDSAX) {
14620 ctxt->options |= XML_PARSE_OLDSAX;
14621 options -= XML_PARSE_OLDSAX;
14622 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080014623 if (options & XML_PARSE_IGNORE_ENC) {
14624 ctxt->options |= XML_PARSE_IGNORE_ENC;
14625 options -= XML_PARSE_IGNORE_ENC;
14626 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000014627 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014628 return (options);
14629}
14630
14631/**
Daniel Veillard37334572008-07-31 08:20:02 +000014632 * xmlCtxtUseOptions:
14633 * @ctxt: an XML parser context
14634 * @options: a combination of xmlParserOption
14635 *
14636 * Applies the options to the parser context
14637 *
14638 * Returns 0 in case of success, the set of unknown or unimplemented options
14639 * in case of error.
14640 */
14641int
14642xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14643{
14644 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14645}
14646
14647/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014648 * xmlDoRead:
14649 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000014650 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014651 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014652 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014653 * @reuse: keep the context for reuse
14654 *
14655 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000014656 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014657 * Returns the resulting document tree or NULL
14658 */
14659static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014660xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14661 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014662{
14663 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000014664
14665 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014666 if (encoding != NULL) {
14667 xmlCharEncodingHandlerPtr hdlr;
14668
14669 hdlr = xmlFindCharEncodingHandler(encoding);
14670 if (hdlr != NULL)
14671 xmlSwitchToEncoding(ctxt, hdlr);
14672 }
Daniel Veillard60942de2003-09-25 21:05:58 +000014673 if ((URL != NULL) && (ctxt->input != NULL) &&
14674 (ctxt->input->filename == NULL))
14675 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014676 xmlParseDocument(ctxt);
14677 if ((ctxt->wellFormed) || ctxt->recovery)
14678 ret = ctxt->myDoc;
14679 else {
14680 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014681 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014682 xmlFreeDoc(ctxt->myDoc);
14683 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014684 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014685 ctxt->myDoc = NULL;
14686 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014687 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014688 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014689
14690 return (ret);
14691}
14692
14693/**
14694 * xmlReadDoc:
14695 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014696 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014697 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014698 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014699 *
14700 * parse an XML in-memory document and build a tree.
14701 *
14702 * Returns the resulting document tree
14703 */
14704xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014705xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014706{
14707 xmlParserCtxtPtr ctxt;
14708
14709 if (cur == NULL)
14710 return (NULL);
14711
14712 ctxt = xmlCreateDocParserCtxt(cur);
14713 if (ctxt == NULL)
14714 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014715 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014716}
14717
14718/**
14719 * xmlReadFile:
14720 * @filename: a file or URL
14721 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014722 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014723 *
14724 * parse an XML file from the filesystem or the network.
14725 *
14726 * Returns the resulting document tree
14727 */
14728xmlDocPtr
14729xmlReadFile(const char *filename, const char *encoding, int options)
14730{
14731 xmlParserCtxtPtr ctxt;
14732
Daniel Veillard61b93382003-11-03 14:28:31 +000014733 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014734 if (ctxt == NULL)
14735 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014736 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014737}
14738
14739/**
14740 * xmlReadMemory:
14741 * @buffer: a pointer to a char array
14742 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014743 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014744 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014745 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014746 *
14747 * parse an XML in-memory document and build a tree.
14748 *
14749 * Returns the resulting document tree
14750 */
14751xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014752xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014753{
14754 xmlParserCtxtPtr ctxt;
14755
14756 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14757 if (ctxt == NULL)
14758 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014759 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014760}
14761
14762/**
14763 * xmlReadFd:
14764 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014765 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014766 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014767 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014768 *
14769 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014770 * NOTE that the file descriptor will not be closed when the
14771 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014772 *
14773 * Returns the resulting document tree
14774 */
14775xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014776xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014777{
14778 xmlParserCtxtPtr ctxt;
14779 xmlParserInputBufferPtr input;
14780 xmlParserInputPtr stream;
14781
14782 if (fd < 0)
14783 return (NULL);
14784
14785 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14786 if (input == NULL)
14787 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014788 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014789 ctxt = xmlNewParserCtxt();
14790 if (ctxt == NULL) {
14791 xmlFreeParserInputBuffer(input);
14792 return (NULL);
14793 }
14794 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14795 if (stream == NULL) {
14796 xmlFreeParserInputBuffer(input);
14797 xmlFreeParserCtxt(ctxt);
14798 return (NULL);
14799 }
14800 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014801 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014802}
14803
14804/**
14805 * xmlReadIO:
14806 * @ioread: an I/O read function
14807 * @ioclose: an I/O close function
14808 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014809 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014810 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014811 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014812 *
14813 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080014814 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014815 * Returns the resulting document tree
14816 */
14817xmlDocPtr
14818xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000014819 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014820{
14821 xmlParserCtxtPtr ctxt;
14822 xmlParserInputBufferPtr input;
14823 xmlParserInputPtr stream;
14824
14825 if (ioread == NULL)
14826 return (NULL);
14827
14828 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14829 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080014830 if (input == NULL) {
14831 if (ioclose != NULL)
14832 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014833 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080014834 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014835 ctxt = xmlNewParserCtxt();
14836 if (ctxt == NULL) {
14837 xmlFreeParserInputBuffer(input);
14838 return (NULL);
14839 }
14840 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14841 if (stream == NULL) {
14842 xmlFreeParserInputBuffer(input);
14843 xmlFreeParserCtxt(ctxt);
14844 return (NULL);
14845 }
14846 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014847 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014848}
14849
14850/**
14851 * xmlCtxtReadDoc:
14852 * @ctxt: an XML parser context
14853 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014854 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014855 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014856 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014857 *
14858 * parse an XML in-memory document and build a tree.
14859 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080014860 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014861 * Returns the resulting document tree
14862 */
14863xmlDocPtr
14864xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000014865 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014866{
14867 xmlParserInputPtr stream;
14868
14869 if (cur == NULL)
14870 return (NULL);
14871 if (ctxt == NULL)
14872 return (NULL);
14873
14874 xmlCtxtReset(ctxt);
14875
14876 stream = xmlNewStringInputStream(ctxt, cur);
14877 if (stream == NULL) {
14878 return (NULL);
14879 }
14880 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014881 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014882}
14883
14884/**
14885 * xmlCtxtReadFile:
14886 * @ctxt: an XML parser context
14887 * @filename: a file or URL
14888 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014889 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014890 *
14891 * parse an XML file from the filesystem or the network.
14892 * This reuses the existing @ctxt parser context
14893 *
14894 * Returns the resulting document tree
14895 */
14896xmlDocPtr
14897xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14898 const char *encoding, int options)
14899{
14900 xmlParserInputPtr stream;
14901
14902 if (filename == NULL)
14903 return (NULL);
14904 if (ctxt == NULL)
14905 return (NULL);
14906
14907 xmlCtxtReset(ctxt);
14908
Daniel Veillard29614c72004-11-26 10:47:26 +000014909 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014910 if (stream == NULL) {
14911 return (NULL);
14912 }
14913 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014914 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014915}
14916
14917/**
14918 * xmlCtxtReadMemory:
14919 * @ctxt: an XML parser context
14920 * @buffer: a pointer to a char array
14921 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014922 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014923 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014924 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014925 *
14926 * parse an XML in-memory document and build a tree.
14927 * This reuses the existing @ctxt parser context
14928 *
14929 * Returns the resulting document tree
14930 */
14931xmlDocPtr
14932xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000014933 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014934{
14935 xmlParserInputBufferPtr input;
14936 xmlParserInputPtr stream;
14937
14938 if (ctxt == NULL)
14939 return (NULL);
14940 if (buffer == NULL)
14941 return (NULL);
14942
14943 xmlCtxtReset(ctxt);
14944
14945 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14946 if (input == NULL) {
14947 return(NULL);
14948 }
14949
14950 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14951 if (stream == NULL) {
14952 xmlFreeParserInputBuffer(input);
14953 return(NULL);
14954 }
14955
14956 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014957 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014958}
14959
14960/**
14961 * xmlCtxtReadFd:
14962 * @ctxt: an XML parser context
14963 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014964 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014965 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014966 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014967 *
14968 * parse an XML from a file descriptor and build a tree.
14969 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014970 * NOTE that the file descriptor will not be closed when the
14971 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014972 *
14973 * Returns the resulting document tree
14974 */
14975xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014976xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14977 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014978{
14979 xmlParserInputBufferPtr input;
14980 xmlParserInputPtr stream;
14981
14982 if (fd < 0)
14983 return (NULL);
14984 if (ctxt == NULL)
14985 return (NULL);
14986
14987 xmlCtxtReset(ctxt);
14988
14989
14990 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14991 if (input == NULL)
14992 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014993 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014994 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14995 if (stream == NULL) {
14996 xmlFreeParserInputBuffer(input);
14997 return (NULL);
14998 }
14999 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015000 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015001}
15002
15003/**
15004 * xmlCtxtReadIO:
15005 * @ctxt: an XML parser context
15006 * @ioread: an I/O read function
15007 * @ioclose: an I/O close function
15008 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015009 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015010 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015011 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015012 *
15013 * parse an XML document from I/O functions and source and build a tree.
15014 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015015 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015016 * Returns the resulting document tree
15017 */
15018xmlDocPtr
15019xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15020 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015021 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015022 const char *encoding, int options)
15023{
15024 xmlParserInputBufferPtr input;
15025 xmlParserInputPtr stream;
15026
15027 if (ioread == NULL)
15028 return (NULL);
15029 if (ctxt == NULL)
15030 return (NULL);
15031
15032 xmlCtxtReset(ctxt);
15033
15034 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15035 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015036 if (input == NULL) {
15037 if (ioclose != NULL)
15038 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015039 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015040 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015041 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15042 if (stream == NULL) {
15043 xmlFreeParserInputBuffer(input);
15044 return (NULL);
15045 }
15046 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015047 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015048}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015049
15050#define bottom_parser
15051#include "elfgcchack.h"