blob: 98632757a7bed90ce9979bc432e85c3931c76a6a [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080043#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000045#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000047#include <libxml/threads.h>
48#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <libxml/tree.h>
50#include <libxml/parser.h>
51#include <libxml/parserInternals.h>
52#include <libxml/valid.h>
53#include <libxml/entities.h>
54#include <libxml/xmlerror.h>
55#include <libxml/encoding.h>
56#include <libxml/xmlIO.h>
57#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000058#ifdef LIBXML_CATALOG_ENABLED
59#include <libxml/catalog.h>
60#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000061#ifdef LIBXML_SCHEMAS_ENABLED
62#include <libxml/xmlschemastypes.h>
63#include <libxml/relaxng.h>
64#endif
Owen Taylor3473f882001-02-23 17:55:21 +000065#ifdef HAVE_CTYPE_H
66#include <ctype.h>
67#endif
68#ifdef HAVE_STDLIB_H
69#include <stdlib.h>
70#endif
71#ifdef HAVE_SYS_STAT_H
72#include <sys/stat.h>
73#endif
74#ifdef HAVE_FCNTL_H
75#include <fcntl.h>
76#endif
77#ifdef HAVE_UNISTD_H
78#include <unistd.h>
79#endif
80#ifdef HAVE_ZLIB_H
81#include <zlib.h>
82#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +020083#ifdef HAVE_LZMA_H
84#include <lzma.h>
85#endif
Owen Taylor3473f882001-02-23 17:55:21 +000086
Daniel Veillard0161e632008-08-28 15:36:32 +000087static void
88xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
89
Rob Richards9c0aa472009-03-26 18:10:19 +000090static xmlParserCtxtPtr
91xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
92 const xmlChar *base, xmlParserCtxtPtr pctx);
93
Daniel Veillard0161e632008-08-28 15:36:32 +000094/************************************************************************
95 * *
96 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
97 * *
98 ************************************************************************/
99
100#define XML_PARSER_BIG_ENTITY 1000
101#define XML_PARSER_LOT_ENTITY 5000
102
103/*
104 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
105 * replacement over the size in byte of the input indicates that you have
106 * and eponential behaviour. A value of 10 correspond to at least 3 entity
107 * replacement per byte of input.
108 */
109#define XML_PARSER_NON_LINEAR 10
110
111/*
112 * xmlParserEntityCheck
113 *
114 * Function to check non-linear entity expansion behaviour
115 * This is here to detect and stop exponential linear entity expansion
116 * This is not a limitation of the parser but a safety
117 * boundary feature. It can be disabled with the XML_PARSE_HUGE
118 * parser option.
119 */
120static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800121xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard0161e632008-08-28 15:36:32 +0000122 xmlEntityPtr ent)
123{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800124 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000125
126 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
127 return (0);
128 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
129 return (1);
130 if (size != 0) {
131 /*
132 * Do the check based on the replacement size of the entity
133 */
134 if (size < XML_PARSER_BIG_ENTITY)
135 return(0);
136
137 /*
138 * A limit on the amount of text data reasonably used
139 */
140 if (ctxt->input != NULL) {
141 consumed = ctxt->input->consumed +
142 (ctxt->input->cur - ctxt->input->base);
143 }
144 consumed += ctxt->sizeentities;
145
146 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
147 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
148 return (0);
149 } else if (ent != NULL) {
150 /*
151 * use the number of parsed entities in the replacement
152 */
153 size = ent->checked;
154
155 /*
156 * The amount of data parsed counting entities size only once
157 */
158 if (ctxt->input != NULL) {
159 consumed = ctxt->input->consumed +
160 (ctxt->input->cur - ctxt->input->base);
161 }
162 consumed += ctxt->sizeentities;
163
164 /*
165 * Check the density of entities for the amount of data
166 * knowing an entity reference will take at least 3 bytes
167 */
168 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
169 return (0);
170 } else {
171 /*
172 * strange we got no data for checking just return
173 */
174 return (0);
175 }
176
177 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
178 return (1);
179}
180
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000181/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000182 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000183 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000184 * arbitrary depth limit for the XML documents that we allow to
185 * process. This is not a limitation of the parser but a safety
186 * boundary feature. It can be disabled with the XML_PARSE_HUGE
187 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000188 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000189unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000190
Daniel Veillard0fb18932003-09-07 09:14:37 +0000191
Daniel Veillard0161e632008-08-28 15:36:32 +0000192
193#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000194#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000195#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000196#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
197
Owen Taylor3473f882001-02-23 17:55:21 +0000198/*
Owen Taylor3473f882001-02-23 17:55:21 +0000199 * List of XML prefixed PI allowed by W3C specs
200 */
201
Daniel Veillardb44025c2001-10-11 22:55:55 +0000202static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000203 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800204 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000205 NULL
206};
207
Daniel Veillarda07050d2003-10-19 14:46:32 +0000208
Owen Taylor3473f882001-02-23 17:55:21 +0000209/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200210static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
211 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000212
Daniel Veillard7d515752003-09-26 19:12:37 +0000213static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000214xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
215 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000216 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000217 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000218
Daniel Veillard37334572008-07-31 08:20:02 +0000219static int
220xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
221 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000222#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000223static void
224xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
225 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000226#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000227
Daniel Veillard7d515752003-09-26 19:12:37 +0000228static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000229xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
230 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000231
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000232static int
233xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
234
Daniel Veillarde57ec792003-09-10 10:50:59 +0000235/************************************************************************
236 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000237 * Some factorized error routines *
238 * *
239 ************************************************************************/
240
241/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000242 * xmlErrAttributeDup:
243 * @ctxt: an XML parser context
244 * @prefix: the attribute prefix
245 * @localname: the attribute localname
246 *
247 * Handle a redefinition of attribute error
248 */
249static void
250xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
251 const xmlChar * localname)
252{
Daniel Veillard157fee02003-10-31 10:36:03 +0000253 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
254 (ctxt->instate == XML_PARSER_EOF))
255 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000256 if (ctxt != NULL)
257 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200258
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000259 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000260 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200261 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000262 (const char *) localname, NULL, NULL, 0, 0,
263 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000264 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000265 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200266 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000267 (const char *) prefix, (const char *) localname,
268 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
269 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000270 if (ctxt != NULL) {
271 ctxt->wellFormed = 0;
272 if (ctxt->recovery == 0)
273 ctxt->disableSAX = 1;
274 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000275}
276
277/**
278 * xmlFatalErr:
279 * @ctxt: an XML parser context
280 * @error: the error number
281 * @extra: extra information string
282 *
283 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
284 */
285static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000286xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287{
288 const char *errmsg;
289
Daniel Veillard157fee02003-10-31 10:36:03 +0000290 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
291 (ctxt->instate == XML_PARSER_EOF))
292 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 switch (error) {
294 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000295 errmsg = "CharRef: invalid hexadecimal value\n";
296 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000297 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000298 errmsg = "CharRef: invalid decimal value\n";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "CharRef: invalid value\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "internal error";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "PEReference at end of document\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "PEReference in prolog\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "PEReference in epilog\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "PEReference: no name\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "PEReference: expecting ';'\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "Detected an entity reference loop\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "EntityValue: \" or ' expected\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "PEReferences forbidden in internal subset\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "EntityValue: \" or ' expected\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "AttValue: \" or ' expected\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Unescaped '<' not allowed in attributes values\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "SystemLiteral \" or ' expected\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "Unfinished System or Public ID \" or ' expected\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "Sequence ']]>' not allowed in content\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "PUBLIC, the Public Identifier is missing\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "Comment must not contain '--' (double-hyphen)\n";
356 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 errmsg = "xmlParsePI : no target name\n";
359 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000360 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 errmsg = "Invalid PI name\n";
362 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 errmsg = "NOTATION: Name expected here\n";
365 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000367 errmsg = "'>' required to close NOTATION declaration\n";
368 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000369 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000370 errmsg = "Entity value required\n";
371 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000372 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000373 errmsg = "Fragment not allowed";
374 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000375 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000376 errmsg = "'(' required to start ATTLIST enumeration\n";
377 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000378 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000379 errmsg = "NmToken expected in ATTLIST enumeration\n";
380 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000381 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000382 errmsg = "')' required to finish ATTLIST enumeration\n";
383 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000384 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
386 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000387 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
389 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000390 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000391 errmsg = "ContentDecl : Name or '(' expected\n";
392 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000393 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000394 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
395 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000396 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000397 errmsg =
398 "PEReference: forbidden within markup decl in internal subset\n";
399 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000400 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000401 errmsg = "expected '>'\n";
402 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000403 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000404 errmsg = "XML conditional section '[' expected\n";
405 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000406 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000407 errmsg = "Content error in the external subset\n";
408 break;
409 case XML_ERR_CONDSEC_INVALID_KEYWORD:
410 errmsg =
411 "conditional section INCLUDE or IGNORE keyword expected\n";
412 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000413 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 errmsg = "XML conditional section not closed\n";
415 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000416 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 errmsg = "Text declaration '<?xml' required\n";
418 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000419 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 errmsg = "parsing XML declaration: '?>' expected\n";
421 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000422 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 errmsg = "external parsed entities cannot be standalone\n";
424 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000425 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 errmsg = "EntityRef: expecting ';'\n";
427 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000428 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000429 errmsg = "DOCTYPE improperly terminated\n";
430 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000431 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000432 errmsg = "EndTag: '</' not found\n";
433 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000434 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000435 errmsg = "expected '='\n";
436 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000437 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000438 errmsg = "String not closed expecting \" or '\n";
439 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000440 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000441 errmsg = "String not started expecting ' or \"\n";
442 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000443 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000444 errmsg = "Invalid XML encoding name\n";
445 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000446 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000447 errmsg = "standalone accepts only 'yes' or 'no'\n";
448 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000449 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000450 errmsg = "Document is empty\n";
451 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000452 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000453 errmsg = "Extra content at the end of the document\n";
454 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000455 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000456 errmsg = "chunk is not well balanced\n";
457 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000458 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000459 errmsg = "extra content at the end of well balanced chunk\n";
460 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000461 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000462 errmsg = "Malformed declaration expecting version\n";
463 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000464#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000465 case:
466 errmsg = "\n";
467 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000468#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000469 default:
470 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000471 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000472 if (ctxt != NULL)
473 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000474 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000475 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
476 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000477 if (ctxt != NULL) {
478 ctxt->wellFormed = 0;
479 if (ctxt->recovery == 0)
480 ctxt->disableSAX = 1;
481 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000482}
483
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000484/**
485 * xmlFatalErrMsg:
486 * @ctxt: an XML parser context
487 * @error: the error number
488 * @msg: the error message
489 *
490 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
491 */
492static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000493xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
494 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000495{
Daniel Veillard157fee02003-10-31 10:36:03 +0000496 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
497 (ctxt->instate == XML_PARSER_EOF))
498 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000499 if (ctxt != NULL)
500 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000501 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200502 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000503 if (ctxt != NULL) {
504 ctxt->wellFormed = 0;
505 if (ctxt->recovery == 0)
506 ctxt->disableSAX = 1;
507 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000508}
509
510/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000511 * xmlWarningMsg:
512 * @ctxt: an XML parser context
513 * @error: the error number
514 * @msg: the error message
515 * @str1: extra data
516 * @str2: extra data
517 *
518 * Handle a warning.
519 */
520static void
521xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
522 const char *msg, const xmlChar *str1, const xmlChar *str2)
523{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000524 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000525
Daniel Veillard157fee02003-10-31 10:36:03 +0000526 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
527 (ctxt->instate == XML_PARSER_EOF))
528 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000529 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
530 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000531 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200532 if (ctxt != NULL) {
533 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000534 (ctxt->sax) ? ctxt->sax->warning : NULL,
535 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000536 ctxt, NULL, XML_FROM_PARSER, error,
537 XML_ERR_WARNING, NULL, 0,
538 (const char *) str1, (const char *) str2, NULL, 0, 0,
539 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200540 } else {
541 __xmlRaiseError(schannel, NULL, NULL,
542 ctxt, NULL, XML_FROM_PARSER, error,
543 XML_ERR_WARNING, NULL, 0,
544 (const char *) str1, (const char *) str2, NULL, 0, 0,
545 msg, (const char *) str1, (const char *) str2);
546 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000547}
548
549/**
550 * xmlValidityError:
551 * @ctxt: an XML parser context
552 * @error: the error number
553 * @msg: the error message
554 * @str1: extra data
555 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000556 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000557 */
558static void
559xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000560 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000561{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000562 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000563
564 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
565 (ctxt->instate == XML_PARSER_EOF))
566 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000567 if (ctxt != NULL) {
568 ctxt->errNo = error;
569 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
570 schannel = ctxt->sax->serror;
571 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200572 if (ctxt != NULL) {
573 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000574 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000575 ctxt, NULL, XML_FROM_DTD, error,
576 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000577 (const char *) str2, NULL, 0, 0,
578 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000579 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200580 } else {
581 __xmlRaiseError(schannel, NULL, NULL,
582 ctxt, NULL, XML_FROM_DTD, error,
583 XML_ERR_ERROR, NULL, 0, (const char *) str1,
584 (const char *) str2, NULL, 0, 0,
585 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000586 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000587}
588
589/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000590 * xmlFatalErrMsgInt:
591 * @ctxt: an XML parser context
592 * @error: the error number
593 * @msg: the error message
594 * @val: an integer value
595 *
596 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
597 */
598static void
599xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000600 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000601{
Daniel Veillard157fee02003-10-31 10:36:03 +0000602 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
603 (ctxt->instate == XML_PARSER_EOF))
604 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000605 if (ctxt != NULL)
606 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000607 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000608 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
609 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000610 if (ctxt != NULL) {
611 ctxt->wellFormed = 0;
612 if (ctxt->recovery == 0)
613 ctxt->disableSAX = 1;
614 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000615}
616
617/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000618 * xmlFatalErrMsgStrIntStr:
619 * @ctxt: an XML parser context
620 * @error: the error number
621 * @msg: the error message
622 * @str1: an string info
623 * @val: an integer value
624 * @str2: an string info
625 *
626 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
627 */
628static void
629xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
630 const char *msg, const xmlChar *str1, int val,
631 const xmlChar *str2)
632{
Daniel Veillard157fee02003-10-31 10:36:03 +0000633 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634 (ctxt->instate == XML_PARSER_EOF))
635 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000636 if (ctxt != NULL)
637 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000638 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000639 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
640 NULL, 0, (const char *) str1, (const char *) str2,
641 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000642 if (ctxt != NULL) {
643 ctxt->wellFormed = 0;
644 if (ctxt->recovery == 0)
645 ctxt->disableSAX = 1;
646 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000647}
648
649/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000650 * xmlFatalErrMsgStr:
651 * @ctxt: an XML parser context
652 * @error: the error number
653 * @msg: the error message
654 * @val: a string value
655 *
656 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
657 */
658static void
659xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000660 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000661{
Daniel Veillard157fee02003-10-31 10:36:03 +0000662 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
663 (ctxt->instate == XML_PARSER_EOF))
664 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000665 if (ctxt != NULL)
666 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000667 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000668 XML_FROM_PARSER, error, XML_ERR_FATAL,
669 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
670 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000671 if (ctxt != NULL) {
672 ctxt->wellFormed = 0;
673 if (ctxt->recovery == 0)
674 ctxt->disableSAX = 1;
675 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000676}
677
678/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000679 * xmlErrMsgStr:
680 * @ctxt: an XML parser context
681 * @error: the error number
682 * @msg: the error message
683 * @val: a string value
684 *
685 * Handle a non fatal parser error
686 */
687static void
688xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
689 const char *msg, const xmlChar * val)
690{
Daniel Veillard157fee02003-10-31 10:36:03 +0000691 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
692 (ctxt->instate == XML_PARSER_EOF))
693 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000694 if (ctxt != NULL)
695 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000696 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000697 XML_FROM_PARSER, error, XML_ERR_ERROR,
698 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
699 val);
700}
701
702/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000703 * xmlNsErr:
704 * @ctxt: an XML parser context
705 * @error: the error number
706 * @msg: the message
707 * @info1: extra information string
708 * @info2: extra information string
709 *
710 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
711 */
712static void
713xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
714 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000715 const xmlChar * info1, const xmlChar * info2,
716 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000717{
Daniel Veillard157fee02003-10-31 10:36:03 +0000718 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
719 (ctxt->instate == XML_PARSER_EOF))
720 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000721 if (ctxt != NULL)
722 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000723 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000724 XML_ERR_ERROR, NULL, 0, (const char *) info1,
725 (const char *) info2, (const char *) info3, 0, 0, msg,
726 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000727 if (ctxt != NULL)
728 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000729}
730
Daniel Veillard37334572008-07-31 08:20:02 +0000731/**
732 * xmlNsWarn
733 * @ctxt: an XML parser context
734 * @error: the error number
735 * @msg: the message
736 * @info1: extra information string
737 * @info2: extra information string
738 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800739 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000740 */
741static void
742xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
743 const char *msg,
744 const xmlChar * info1, const xmlChar * info2,
745 const xmlChar * info3)
746{
747 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
748 (ctxt->instate == XML_PARSER_EOF))
749 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000750 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
751 XML_ERR_WARNING, NULL, 0, (const char *) info1,
752 (const char *) info2, (const char *) info3, 0, 0, msg,
753 info1, info2, info3);
754}
755
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000756/************************************************************************
757 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000758 * Library wide options *
759 * *
760 ************************************************************************/
761
762/**
763 * xmlHasFeature:
764 * @feature: the feature to be examined
765 *
766 * Examines if the library has been compiled with a given feature.
767 *
768 * Returns a non-zero value if the feature exist, otherwise zero.
769 * Returns zero (0) if the feature does not exist or an unknown
770 * unknown feature is requested, non-zero otherwise.
771 */
772int
773xmlHasFeature(xmlFeature feature)
774{
775 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000776 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000777#ifdef LIBXML_THREAD_ENABLED
778 return(1);
779#else
780 return(0);
781#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000782 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000783#ifdef LIBXML_TREE_ENABLED
784 return(1);
785#else
786 return(0);
787#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000788 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000789#ifdef LIBXML_OUTPUT_ENABLED
790 return(1);
791#else
792 return(0);
793#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000794 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000795#ifdef LIBXML_PUSH_ENABLED
796 return(1);
797#else
798 return(0);
799#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000800 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000801#ifdef LIBXML_READER_ENABLED
802 return(1);
803#else
804 return(0);
805#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000806 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000807#ifdef LIBXML_PATTERN_ENABLED
808 return(1);
809#else
810 return(0);
811#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000812 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000813#ifdef LIBXML_WRITER_ENABLED
814 return(1);
815#else
816 return(0);
817#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000818 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000819#ifdef LIBXML_SAX1_ENABLED
820 return(1);
821#else
822 return(0);
823#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000824 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000825#ifdef LIBXML_FTP_ENABLED
826 return(1);
827#else
828 return(0);
829#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000830 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000831#ifdef LIBXML_HTTP_ENABLED
832 return(1);
833#else
834 return(0);
835#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000836 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000837#ifdef LIBXML_VALID_ENABLED
838 return(1);
839#else
840 return(0);
841#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000842 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000843#ifdef LIBXML_HTML_ENABLED
844 return(1);
845#else
846 return(0);
847#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000848 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000849#ifdef LIBXML_LEGACY_ENABLED
850 return(1);
851#else
852 return(0);
853#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000854 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000855#ifdef LIBXML_C14N_ENABLED
856 return(1);
857#else
858 return(0);
859#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000860 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000861#ifdef LIBXML_CATALOG_ENABLED
862 return(1);
863#else
864 return(0);
865#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000866 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000867#ifdef LIBXML_XPATH_ENABLED
868 return(1);
869#else
870 return(0);
871#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000872 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000873#ifdef LIBXML_XPTR_ENABLED
874 return(1);
875#else
876 return(0);
877#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000878 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000879#ifdef LIBXML_XINCLUDE_ENABLED
880 return(1);
881#else
882 return(0);
883#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000884 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000885#ifdef LIBXML_ICONV_ENABLED
886 return(1);
887#else
888 return(0);
889#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000890 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000891#ifdef LIBXML_ISO8859X_ENABLED
892 return(1);
893#else
894 return(0);
895#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000896 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000897#ifdef LIBXML_UNICODE_ENABLED
898 return(1);
899#else
900 return(0);
901#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000902 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000903#ifdef LIBXML_REGEXP_ENABLED
904 return(1);
905#else
906 return(0);
907#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000908 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000909#ifdef LIBXML_AUTOMATA_ENABLED
910 return(1);
911#else
912 return(0);
913#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000914 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000915#ifdef LIBXML_EXPR_ENABLED
916 return(1);
917#else
918 return(0);
919#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000920 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000921#ifdef LIBXML_SCHEMAS_ENABLED
922 return(1);
923#else
924 return(0);
925#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000926 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000927#ifdef LIBXML_SCHEMATRON_ENABLED
928 return(1);
929#else
930 return(0);
931#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000932 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000933#ifdef LIBXML_MODULES_ENABLED
934 return(1);
935#else
936 return(0);
937#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000938 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000939#ifdef LIBXML_DEBUG_ENABLED
940 return(1);
941#else
942 return(0);
943#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000944 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000945#ifdef DEBUG_MEMORY_LOCATION
946 return(1);
947#else
948 return(0);
949#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000950 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000951#ifdef LIBXML_DEBUG_RUNTIME
952 return(1);
953#else
954 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000955#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000956 case XML_WITH_ZLIB:
957#ifdef LIBXML_ZLIB_ENABLED
958 return(1);
959#else
960 return(0);
961#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +0200962 case XML_WITH_LZMA:
963#ifdef LIBXML_LZMA_ENABLED
964 return(1);
965#else
966 return(0);
967#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100968 case XML_WITH_ICU:
969#ifdef LIBXML_ICU_ENABLED
970 return(1);
971#else
972 return(0);
973#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000974 default:
975 break;
976 }
977 return(0);
978}
979
980/************************************************************************
981 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000982 * SAX2 defaulted attributes handling *
983 * *
984 ************************************************************************/
985
986/**
987 * xmlDetectSAX2:
988 * @ctxt: an XML parser context
989 *
990 * Do the SAX2 detection and specific intialization
991 */
992static void
993xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
994 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000995#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000996 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
997 ((ctxt->sax->startElementNs != NULL) ||
998 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000999#else
1000 ctxt->sax2 = 1;
1001#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001002
1003 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1004 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1005 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +00001006 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1007 (ctxt->str_xml_ns == NULL)) {
1008 xmlErrMemory(ctxt, NULL);
1009 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001010}
1011
Daniel Veillarde57ec792003-09-10 10:50:59 +00001012typedef struct _xmlDefAttrs xmlDefAttrs;
1013typedef xmlDefAttrs *xmlDefAttrsPtr;
1014struct _xmlDefAttrs {
1015 int nbAttrs; /* number of defaulted attributes on that element */
1016 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +00001017 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001018};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001019
1020/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001021 * xmlAttrNormalizeSpace:
1022 * @src: the source string
1023 * @dst: the target string
1024 *
1025 * Normalize the space in non CDATA attribute values:
1026 * If the attribute type is not CDATA, then the XML processor MUST further
1027 * process the normalized attribute value by discarding any leading and
1028 * trailing space (#x20) characters, and by replacing sequences of space
1029 * (#x20) characters by a single space (#x20) character.
1030 * Note that the size of dst need to be at least src, and if one doesn't need
1031 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1032 * passing src as dst is just fine.
1033 *
1034 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1035 * is needed.
1036 */
1037static xmlChar *
1038xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1039{
1040 if ((src == NULL) || (dst == NULL))
1041 return(NULL);
1042
1043 while (*src == 0x20) src++;
1044 while (*src != 0) {
1045 if (*src == 0x20) {
1046 while (*src == 0x20) src++;
1047 if (*src != 0)
1048 *dst++ = 0x20;
1049 } else {
1050 *dst++ = *src++;
1051 }
1052 }
1053 *dst = 0;
1054 if (dst == src)
1055 return(NULL);
1056 return(dst);
1057}
1058
1059/**
1060 * xmlAttrNormalizeSpace2:
1061 * @src: the source string
1062 *
1063 * Normalize the space in non CDATA attribute values, a slightly more complex
1064 * front end to avoid allocation problems when running on attribute values
1065 * coming from the input.
1066 *
1067 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1068 * is needed.
1069 */
1070static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001071xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001072{
1073 int i;
1074 int remove_head = 0;
1075 int need_realloc = 0;
1076 const xmlChar *cur;
1077
1078 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1079 return(NULL);
1080 i = *len;
1081 if (i <= 0)
1082 return(NULL);
1083
1084 cur = src;
1085 while (*cur == 0x20) {
1086 cur++;
1087 remove_head++;
1088 }
1089 while (*cur != 0) {
1090 if (*cur == 0x20) {
1091 cur++;
1092 if ((*cur == 0x20) || (*cur == 0)) {
1093 need_realloc = 1;
1094 break;
1095 }
1096 } else
1097 cur++;
1098 }
1099 if (need_realloc) {
1100 xmlChar *ret;
1101
1102 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1103 if (ret == NULL) {
1104 xmlErrMemory(ctxt, NULL);
1105 return(NULL);
1106 }
1107 xmlAttrNormalizeSpace(ret, ret);
1108 *len = (int) strlen((const char *)ret);
1109 return(ret);
1110 } else if (remove_head) {
1111 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001112 memmove(src, src + remove_head, 1 + *len);
1113 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001114 }
1115 return(NULL);
1116}
1117
1118/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001119 * xmlAddDefAttrs:
1120 * @ctxt: an XML parser context
1121 * @fullname: the element fullname
1122 * @fullattr: the attribute fullname
1123 * @value: the attribute value
1124 *
1125 * Add a defaulted attribute for an element
1126 */
1127static void
1128xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1129 const xmlChar *fullname,
1130 const xmlChar *fullattr,
1131 const xmlChar *value) {
1132 xmlDefAttrsPtr defaults;
1133 int len;
1134 const xmlChar *name;
1135 const xmlChar *prefix;
1136
Daniel Veillard6a31b832008-03-26 14:06:44 +00001137 /*
1138 * Allows to detect attribute redefinitions
1139 */
1140 if (ctxt->attsSpecial != NULL) {
1141 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1142 return;
1143 }
1144
Daniel Veillarde57ec792003-09-10 10:50:59 +00001145 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001146 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001147 if (ctxt->attsDefault == NULL)
1148 goto mem_error;
1149 }
1150
1151 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001152 * split the element name into prefix:localname , the string found
1153 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001154 */
1155 name = xmlSplitQName3(fullname, &len);
1156 if (name == NULL) {
1157 name = xmlDictLookup(ctxt->dict, fullname, -1);
1158 prefix = NULL;
1159 } else {
1160 name = xmlDictLookup(ctxt->dict, name, -1);
1161 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1162 }
1163
1164 /*
1165 * make sure there is some storage
1166 */
1167 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1168 if (defaults == NULL) {
1169 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001170 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001171 if (defaults == NULL)
1172 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001173 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001174 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001175 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1176 defaults, NULL) < 0) {
1177 xmlFree(defaults);
1178 goto mem_error;
1179 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001180 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001181 xmlDefAttrsPtr temp;
1182
1183 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001184 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001185 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001186 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001187 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001188 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001189 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1190 defaults, NULL) < 0) {
1191 xmlFree(defaults);
1192 goto mem_error;
1193 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001194 }
1195
1196 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001197 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001198 * are within the DTD and hen not associated to namespace names.
1199 */
1200 name = xmlSplitQName3(fullattr, &len);
1201 if (name == NULL) {
1202 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1203 prefix = NULL;
1204 } else {
1205 name = xmlDictLookup(ctxt->dict, name, -1);
1206 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1207 }
1208
Daniel Veillardae0765b2008-07-31 19:54:59 +00001209 defaults->values[5 * defaults->nbAttrs] = name;
1210 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001211 /* intern the string and precompute the end */
1212 len = xmlStrlen(value);
1213 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001214 defaults->values[5 * defaults->nbAttrs + 2] = value;
1215 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1216 if (ctxt->external)
1217 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1218 else
1219 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001220 defaults->nbAttrs++;
1221
1222 return;
1223
1224mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001225 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001226 return;
1227}
1228
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001229/**
1230 * xmlAddSpecialAttr:
1231 * @ctxt: an XML parser context
1232 * @fullname: the element fullname
1233 * @fullattr: the attribute fullname
1234 * @type: the attribute type
1235 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001236 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001237 */
1238static void
1239xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1240 const xmlChar *fullname,
1241 const xmlChar *fullattr,
1242 int type)
1243{
1244 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001245 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001246 if (ctxt->attsSpecial == NULL)
1247 goto mem_error;
1248 }
1249
Daniel Veillardac4118d2008-01-11 05:27:32 +00001250 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1251 return;
1252
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001253 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1254 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001255 return;
1256
1257mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001258 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001259 return;
1260}
1261
Daniel Veillard4432df22003-09-28 18:58:27 +00001262/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001263 * xmlCleanSpecialAttrCallback:
1264 *
1265 * Removes CDATA attributes from the special attribute table
1266 */
1267static void
1268xmlCleanSpecialAttrCallback(void *payload, void *data,
1269 const xmlChar *fullname, const xmlChar *fullattr,
1270 const xmlChar *unused ATTRIBUTE_UNUSED) {
1271 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1272
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001273 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001274 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1275 }
1276}
1277
1278/**
1279 * xmlCleanSpecialAttr:
1280 * @ctxt: an XML parser context
1281 *
1282 * Trim the list of attributes defined to remove all those of type
1283 * CDATA as they are not special. This call should be done when finishing
1284 * to parse the DTD and before starting to parse the document root.
1285 */
1286static void
1287xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1288{
1289 if (ctxt->attsSpecial == NULL)
1290 return;
1291
1292 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1293
1294 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1295 xmlHashFree(ctxt->attsSpecial, NULL);
1296 ctxt->attsSpecial = NULL;
1297 }
1298 return;
1299}
1300
1301/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001302 * xmlCheckLanguageID:
1303 * @lang: pointer to the string value
1304 *
1305 * Checks that the value conforms to the LanguageID production:
1306 *
1307 * NOTE: this is somewhat deprecated, those productions were removed from
1308 * the XML Second edition.
1309 *
1310 * [33] LanguageID ::= Langcode ('-' Subcode)*
1311 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1312 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1313 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1314 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1315 * [38] Subcode ::= ([a-z] | [A-Z])+
1316 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001317 * The current REC reference the sucessors of RFC 1766, currently 5646
1318 *
1319 * http://www.rfc-editor.org/rfc/rfc5646.txt
1320 * langtag = language
1321 * ["-" script]
1322 * ["-" region]
1323 * *("-" variant)
1324 * *("-" extension)
1325 * ["-" privateuse]
1326 * language = 2*3ALPHA ; shortest ISO 639 code
1327 * ["-" extlang] ; sometimes followed by
1328 * ; extended language subtags
1329 * / 4ALPHA ; or reserved for future use
1330 * / 5*8ALPHA ; or registered language subtag
1331 *
1332 * extlang = 3ALPHA ; selected ISO 639 codes
1333 * *2("-" 3ALPHA) ; permanently reserved
1334 *
1335 * script = 4ALPHA ; ISO 15924 code
1336 *
1337 * region = 2ALPHA ; ISO 3166-1 code
1338 * / 3DIGIT ; UN M.49 code
1339 *
1340 * variant = 5*8alphanum ; registered variants
1341 * / (DIGIT 3alphanum)
1342 *
1343 * extension = singleton 1*("-" (2*8alphanum))
1344 *
1345 * ; Single alphanumerics
1346 * ; "x" reserved for private use
1347 * singleton = DIGIT ; 0 - 9
1348 * / %x41-57 ; A - W
1349 * / %x59-5A ; Y - Z
1350 * / %x61-77 ; a - w
1351 * / %x79-7A ; y - z
1352 *
1353 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1354 * The parser below doesn't try to cope with extension or privateuse
1355 * that could be added but that's not interoperable anyway
1356 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001357 * Returns 1 if correct 0 otherwise
1358 **/
1359int
1360xmlCheckLanguageID(const xmlChar * lang)
1361{
Daniel Veillard60587d62010-11-04 15:16:27 +01001362 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001363
1364 if (cur == NULL)
1365 return (0);
1366 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001367 ((cur[0] == 'I') && (cur[1] == '-')) ||
1368 ((cur[0] == 'x') && (cur[1] == '-')) ||
1369 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001370 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001371 * Still allow IANA code and user code which were coming
1372 * from the previous version of the XML-1.0 specification
1373 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001374 */
1375 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001376 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001377 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1378 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001379 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001380 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001381 nxt = cur;
1382 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1383 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1384 nxt++;
1385 if (nxt - cur >= 4) {
1386 /*
1387 * Reserved
1388 */
1389 if ((nxt - cur > 8) || (nxt[0] != 0))
1390 return(0);
1391 return(1);
1392 }
1393 if (nxt - cur < 2)
1394 return(0);
1395 /* we got an ISO 639 code */
1396 if (nxt[0] == 0)
1397 return(1);
1398 if (nxt[0] != '-')
1399 return(0);
1400
1401 nxt++;
1402 cur = nxt;
1403 /* now we can have extlang or script or region or variant */
1404 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1405 goto region_m49;
1406
1407 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1408 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1409 nxt++;
1410 if (nxt - cur == 4)
1411 goto script;
1412 if (nxt - cur == 2)
1413 goto region;
1414 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1415 goto variant;
1416 if (nxt - cur != 3)
1417 return(0);
1418 /* we parsed an extlang */
1419 if (nxt[0] == 0)
1420 return(1);
1421 if (nxt[0] != '-')
1422 return(0);
1423
1424 nxt++;
1425 cur = nxt;
1426 /* now we can have script or region or variant */
1427 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1428 goto region_m49;
1429
1430 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1431 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1432 nxt++;
1433 if (nxt - cur == 2)
1434 goto region;
1435 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1436 goto variant;
1437 if (nxt - cur != 4)
1438 return(0);
1439 /* we parsed a script */
1440script:
1441 if (nxt[0] == 0)
1442 return(1);
1443 if (nxt[0] != '-')
1444 return(0);
1445
1446 nxt++;
1447 cur = nxt;
1448 /* now we can have region or variant */
1449 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1450 goto region_m49;
1451
1452 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1453 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1454 nxt++;
1455
1456 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1457 goto variant;
1458 if (nxt - cur != 2)
1459 return(0);
1460 /* we parsed a region */
1461region:
1462 if (nxt[0] == 0)
1463 return(1);
1464 if (nxt[0] != '-')
1465 return(0);
1466
1467 nxt++;
1468 cur = nxt;
1469 /* now we can just have a variant */
1470 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1471 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1472 nxt++;
1473
1474 if ((nxt - cur < 5) || (nxt - cur > 8))
1475 return(0);
1476
1477 /* we parsed a variant */
1478variant:
1479 if (nxt[0] == 0)
1480 return(1);
1481 if (nxt[0] != '-')
1482 return(0);
1483 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001484 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001485
1486region_m49:
1487 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1488 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1489 nxt += 3;
1490 goto region;
1491 }
1492 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001493}
1494
Owen Taylor3473f882001-02-23 17:55:21 +00001495/************************************************************************
1496 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001497 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001498 * *
1499 ************************************************************************/
1500
Daniel Veillard8ed10722009-08-20 19:17:36 +02001501static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1502 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001503
Daniel Veillard0fb18932003-09-07 09:14:37 +00001504#ifdef SAX2
1505/**
1506 * nsPush:
1507 * @ctxt: an XML parser context
1508 * @prefix: the namespace prefix or NULL
1509 * @URL: the namespace name
1510 *
1511 * Pushes a new parser namespace on top of the ns stack
1512 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001513 * Returns -1 in case of error, -2 if the namespace should be discarded
1514 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001515 */
1516static int
1517nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1518{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001519 if (ctxt->options & XML_PARSE_NSCLEAN) {
1520 int i;
1521 for (i = 0;i < ctxt->nsNr;i += 2) {
1522 if (ctxt->nsTab[i] == prefix) {
1523 /* in scope */
1524 if (ctxt->nsTab[i + 1] == URL)
1525 return(-2);
1526 /* out of scope keep it */
1527 break;
1528 }
1529 }
1530 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001531 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1532 ctxt->nsMax = 10;
1533 ctxt->nsNr = 0;
1534 ctxt->nsTab = (const xmlChar **)
1535 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1536 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001537 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001538 ctxt->nsMax = 0;
1539 return (-1);
1540 }
1541 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001542 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001543 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001544 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1545 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1546 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001547 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001548 ctxt->nsMax /= 2;
1549 return (-1);
1550 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001551 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001552 }
1553 ctxt->nsTab[ctxt->nsNr++] = prefix;
1554 ctxt->nsTab[ctxt->nsNr++] = URL;
1555 return (ctxt->nsNr);
1556}
1557/**
1558 * nsPop:
1559 * @ctxt: an XML parser context
1560 * @nr: the number to pop
1561 *
1562 * Pops the top @nr parser prefix/namespace from the ns stack
1563 *
1564 * Returns the number of namespaces removed
1565 */
1566static int
1567nsPop(xmlParserCtxtPtr ctxt, int nr)
1568{
1569 int i;
1570
1571 if (ctxt->nsTab == NULL) return(0);
1572 if (ctxt->nsNr < nr) {
1573 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1574 nr = ctxt->nsNr;
1575 }
1576 if (ctxt->nsNr <= 0)
1577 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001578
Daniel Veillard0fb18932003-09-07 09:14:37 +00001579 for (i = 0;i < nr;i++) {
1580 ctxt->nsNr--;
1581 ctxt->nsTab[ctxt->nsNr] = NULL;
1582 }
1583 return(nr);
1584}
1585#endif
1586
1587static int
1588xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1589 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001590 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001591 int maxatts;
1592
1593 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001594 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001595 atts = (const xmlChar **)
1596 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001597 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001598 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001599 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1600 if (attallocs == NULL) goto mem_error;
1601 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001602 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001603 } else if (nr + 5 > ctxt->maxatts) {
1604 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001605 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1606 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001607 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001608 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001609 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1610 (maxatts / 5) * sizeof(int));
1611 if (attallocs == NULL) goto mem_error;
1612 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001613 ctxt->maxatts = maxatts;
1614 }
1615 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001616mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001617 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001618 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001619}
1620
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001621/**
1622 * inputPush:
1623 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001624 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001625 *
1626 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001627 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001628 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001629 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001630int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001631inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1632{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001633 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001634 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001635 if (ctxt->inputNr >= ctxt->inputMax) {
1636 ctxt->inputMax *= 2;
1637 ctxt->inputTab =
1638 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1639 ctxt->inputMax *
1640 sizeof(ctxt->inputTab[0]));
1641 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001642 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001643 xmlFreeInputStream(value);
1644 ctxt->inputMax /= 2;
1645 value = NULL;
1646 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001647 }
1648 }
1649 ctxt->inputTab[ctxt->inputNr] = value;
1650 ctxt->input = value;
1651 return (ctxt->inputNr++);
1652}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001653/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001654 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001655 * @ctxt: an XML parser context
1656 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001657 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001658 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001659 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001660 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001661xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001662inputPop(xmlParserCtxtPtr ctxt)
1663{
1664 xmlParserInputPtr ret;
1665
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001666 if (ctxt == NULL)
1667 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001668 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001669 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001670 ctxt->inputNr--;
1671 if (ctxt->inputNr > 0)
1672 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1673 else
1674 ctxt->input = NULL;
1675 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001676 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001677 return (ret);
1678}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001679/**
1680 * nodePush:
1681 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001682 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001683 *
1684 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001685 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001686 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001687 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001688int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001689nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1690{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001691 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001692 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001693 xmlNodePtr *tmp;
1694
1695 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1696 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001697 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001698 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001699 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001700 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001701 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001702 ctxt->nodeTab = tmp;
1703 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001704 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001705 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1706 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001707 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001708 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001709 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001710 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001711 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001712 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001713 ctxt->nodeTab[ctxt->nodeNr] = value;
1714 ctxt->node = value;
1715 return (ctxt->nodeNr++);
1716}
Daniel Veillard8915c152008-08-26 13:05:34 +00001717
Daniel Veillard1c732d22002-11-30 11:22:59 +00001718/**
1719 * nodePop:
1720 * @ctxt: an XML parser context
1721 *
1722 * Pops the top element node from the node stack
1723 *
1724 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001725 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001726xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001727nodePop(xmlParserCtxtPtr ctxt)
1728{
1729 xmlNodePtr ret;
1730
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001731 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001732 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001733 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001734 ctxt->nodeNr--;
1735 if (ctxt->nodeNr > 0)
1736 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1737 else
1738 ctxt->node = NULL;
1739 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001740 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001741 return (ret);
1742}
Daniel Veillarda2351322004-06-27 12:08:10 +00001743
1744#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001745/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001746 * nameNsPush:
1747 * @ctxt: an XML parser context
1748 * @value: the element name
1749 * @prefix: the element prefix
1750 * @URI: the element namespace name
1751 *
1752 * Pushes a new element name/prefix/URL on top of the name stack
1753 *
1754 * Returns -1 in case of error, the index in the stack otherwise
1755 */
1756static int
1757nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1758 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1759{
1760 if (ctxt->nameNr >= ctxt->nameMax) {
1761 const xmlChar * *tmp;
1762 void **tmp2;
1763 ctxt->nameMax *= 2;
1764 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1765 ctxt->nameMax *
1766 sizeof(ctxt->nameTab[0]));
1767 if (tmp == NULL) {
1768 ctxt->nameMax /= 2;
1769 goto mem_error;
1770 }
1771 ctxt->nameTab = tmp;
1772 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1773 ctxt->nameMax * 3 *
1774 sizeof(ctxt->pushTab[0]));
1775 if (tmp2 == NULL) {
1776 ctxt->nameMax /= 2;
1777 goto mem_error;
1778 }
1779 ctxt->pushTab = tmp2;
1780 }
1781 ctxt->nameTab[ctxt->nameNr] = value;
1782 ctxt->name = value;
1783 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1784 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001785 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001786 return (ctxt->nameNr++);
1787mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001788 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001789 return (-1);
1790}
1791/**
1792 * nameNsPop:
1793 * @ctxt: an XML parser context
1794 *
1795 * Pops the top element/prefix/URI name from the name stack
1796 *
1797 * Returns the name just removed
1798 */
1799static const xmlChar *
1800nameNsPop(xmlParserCtxtPtr ctxt)
1801{
1802 const xmlChar *ret;
1803
1804 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001805 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001806 ctxt->nameNr--;
1807 if (ctxt->nameNr > 0)
1808 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1809 else
1810 ctxt->name = NULL;
1811 ret = ctxt->nameTab[ctxt->nameNr];
1812 ctxt->nameTab[ctxt->nameNr] = NULL;
1813 return (ret);
1814}
Daniel Veillarda2351322004-06-27 12:08:10 +00001815#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001816
1817/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001818 * namePush:
1819 * @ctxt: an XML parser context
1820 * @value: the element name
1821 *
1822 * Pushes a new element name on top of the name stack
1823 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001824 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001825 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001826int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001827namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001828{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001829 if (ctxt == NULL) return (-1);
1830
Daniel Veillard1c732d22002-11-30 11:22:59 +00001831 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001832 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001833 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001834 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001835 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001836 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001837 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001838 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001839 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001840 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001841 }
1842 ctxt->nameTab[ctxt->nameNr] = value;
1843 ctxt->name = value;
1844 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001845mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001846 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001847 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001848}
1849/**
1850 * namePop:
1851 * @ctxt: an XML parser context
1852 *
1853 * Pops the top element name from the name stack
1854 *
1855 * Returns the name just removed
1856 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001857const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001858namePop(xmlParserCtxtPtr ctxt)
1859{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001860 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001861
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001862 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1863 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001864 ctxt->nameNr--;
1865 if (ctxt->nameNr > 0)
1866 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1867 else
1868 ctxt->name = NULL;
1869 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001870 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001871 return (ret);
1872}
Owen Taylor3473f882001-02-23 17:55:21 +00001873
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001874static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001875 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001876 int *tmp;
1877
Owen Taylor3473f882001-02-23 17:55:21 +00001878 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001879 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1880 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1881 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001882 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001883 ctxt->spaceMax /=2;
1884 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001885 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001886 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001887 }
1888 ctxt->spaceTab[ctxt->spaceNr] = val;
1889 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1890 return(ctxt->spaceNr++);
1891}
1892
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001893static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001894 int ret;
1895 if (ctxt->spaceNr <= 0) return(0);
1896 ctxt->spaceNr--;
1897 if (ctxt->spaceNr > 0)
1898 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1899 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001900 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001901 ret = ctxt->spaceTab[ctxt->spaceNr];
1902 ctxt->spaceTab[ctxt->spaceNr] = -1;
1903 return(ret);
1904}
1905
1906/*
1907 * Macros for accessing the content. Those should be used only by the parser,
1908 * and not exported.
1909 *
1910 * Dirty macros, i.e. one often need to make assumption on the context to
1911 * use them
1912 *
1913 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1914 * To be used with extreme caution since operations consuming
1915 * characters may move the input buffer to a different location !
1916 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1917 * This should be used internally by the parser
1918 * only to compare to ASCII values otherwise it would break when
1919 * running with UTF-8 encoding.
1920 * RAW same as CUR but in the input buffer, bypass any token
1921 * extraction that may have been done
1922 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1923 * to compare on ASCII based substring.
1924 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001925 * strings without newlines within the parser.
1926 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1927 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001928 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1929 *
1930 * NEXT Skip to the next character, this does the proper decoding
1931 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001932 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001933 * CUR_CHAR(l) returns the current unicode character (int), set l
1934 * to the number of xmlChars used for the encoding [0-5].
1935 * CUR_SCHAR same but operate on a string instead of the context
1936 * COPY_BUF copy the current unicode char to the target buffer, increment
1937 * the index
1938 * GROW, SHRINK handling of input buffers
1939 */
1940
Daniel Veillardfdc91562002-07-01 21:52:03 +00001941#define RAW (*ctxt->input->cur)
1942#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001943#define NXT(val) ctxt->input->cur[(val)]
1944#define CUR_PTR ctxt->input->cur
1945
Daniel Veillarda07050d2003-10-19 14:46:32 +00001946#define CMP4( s, c1, c2, c3, c4 ) \
1947 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1948 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1949#define CMP5( s, c1, c2, c3, c4, c5 ) \
1950 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1951#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1952 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1953#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1954 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1955#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1956 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1957#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1958 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1959 ((unsigned char *) s)[ 8 ] == c9 )
1960#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1961 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1962 ((unsigned char *) s)[ 9 ] == c10 )
1963
Owen Taylor3473f882001-02-23 17:55:21 +00001964#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001965 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001966 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001967 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001968 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1969 xmlPopInput(ctxt); \
1970 } while (0)
1971
Daniel Veillard0b787f32004-03-26 17:29:53 +00001972#define SKIPL(val) do { \
1973 int skipl; \
1974 for(skipl=0; skipl<val; skipl++) { \
1975 if (*(ctxt->input->cur) == '\n') { \
1976 ctxt->input->line++; ctxt->input->col = 1; \
1977 } else ctxt->input->col++; \
1978 ctxt->nbChars++; \
1979 ctxt->input->cur++; \
1980 } \
1981 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1982 if ((*ctxt->input->cur == 0) && \
1983 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1984 xmlPopInput(ctxt); \
1985 } while (0)
1986
Daniel Veillarda880b122003-04-21 21:36:41 +00001987#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001988 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1989 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001990 xmlSHRINK (ctxt);
1991
1992static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1993 xmlParserInputShrink(ctxt->input);
1994 if ((*ctxt->input->cur == 0) &&
1995 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1996 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001997 }
Owen Taylor3473f882001-02-23 17:55:21 +00001998
Daniel Veillarda880b122003-04-21 21:36:41 +00001999#define GROW if ((ctxt->progressive == 0) && \
2000 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002001 xmlGROW (ctxt);
2002
2003static void xmlGROW (xmlParserCtxtPtr ctxt) {
2004 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard59df7832010-02-02 10:24:01 +01002005 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
Daniel Veillard46de64e2002-05-29 08:21:33 +00002006 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2007 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00002008}
Owen Taylor3473f882001-02-23 17:55:21 +00002009
2010#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2011
2012#define NEXT xmlNextChar(ctxt)
2013
Daniel Veillard21a0f912001-02-25 19:54:14 +00002014#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002015 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002016 ctxt->input->cur++; \
2017 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002018 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002019 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2020 }
2021
Owen Taylor3473f882001-02-23 17:55:21 +00002022#define NEXTL(l) do { \
2023 if (*(ctxt->input->cur) == '\n') { \
2024 ctxt->input->line++; ctxt->input->col = 1; \
2025 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002026 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002027 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00002028 } while (0)
2029
2030#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2031#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2032
2033#define COPY_BUF(l,b,i,v) \
2034 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002035 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002036
2037/**
2038 * xmlSkipBlankChars:
2039 * @ctxt: the XML parser context
2040 *
2041 * skip all blanks character found at that point in the input streams.
2042 * It pops up finished entities in the process if allowable at that point.
2043 *
2044 * Returns the number of space chars skipped
2045 */
2046
2047int
2048xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002049 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002050
2051 /*
2052 * It's Okay to use CUR/NEXT here since all the blanks are on
2053 * the ASCII range.
2054 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002055 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2056 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002057 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002058 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002059 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002060 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002061 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002062 if (*cur == '\n') {
2063 ctxt->input->line++; ctxt->input->col = 1;
2064 }
2065 cur++;
2066 res++;
2067 if (*cur == 0) {
2068 ctxt->input->cur = cur;
2069 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2070 cur = ctxt->input->cur;
2071 }
2072 }
2073 ctxt->input->cur = cur;
2074 } else {
2075 int cur;
2076 do {
2077 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00002078 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002079 NEXT;
2080 cur = CUR;
2081 res++;
2082 }
2083 while ((cur == 0) && (ctxt->inputNr > 1) &&
2084 (ctxt->instate != XML_PARSER_COMMENT)) {
2085 xmlPopInput(ctxt);
2086 cur = CUR;
2087 }
2088 /*
2089 * Need to handle support of entities branching here
2090 */
2091 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2092 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2093 }
Owen Taylor3473f882001-02-23 17:55:21 +00002094 return(res);
2095}
2096
2097/************************************************************************
2098 * *
2099 * Commodity functions to handle entities *
2100 * *
2101 ************************************************************************/
2102
2103/**
2104 * xmlPopInput:
2105 * @ctxt: an XML parser context
2106 *
2107 * xmlPopInput: the current input pointed by ctxt->input came to an end
2108 * pop it and return the next char.
2109 *
2110 * Returns the current xmlChar in the parser context
2111 */
2112xmlChar
2113xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002114 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002115 if (xmlParserDebugEntities)
2116 xmlGenericError(xmlGenericErrorContext,
2117 "Popping input %d\n", ctxt->inputNr);
2118 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00002119 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00002120 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2121 return(xmlPopInput(ctxt));
2122 return(CUR);
2123}
2124
2125/**
2126 * xmlPushInput:
2127 * @ctxt: an XML parser context
2128 * @input: an XML parser input fragment (entity, XML fragment ...).
2129 *
2130 * xmlPushInput: switch to a new input stream which is stacked on top
2131 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002132 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002133 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002134int
Owen Taylor3473f882001-02-23 17:55:21 +00002135xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002136 int ret;
2137 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002138
2139 if (xmlParserDebugEntities) {
2140 if ((ctxt->input != NULL) && (ctxt->input->filename))
2141 xmlGenericError(xmlGenericErrorContext,
2142 "%s(%d): ", ctxt->input->filename,
2143 ctxt->input->line);
2144 xmlGenericError(xmlGenericErrorContext,
2145 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2146 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002147 ret = inputPush(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002148 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002149 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002150}
2151
2152/**
2153 * xmlParseCharRef:
2154 * @ctxt: an XML parser context
2155 *
2156 * parse Reference declarations
2157 *
2158 * [66] CharRef ::= '&#' [0-9]+ ';' |
2159 * '&#x' [0-9a-fA-F]+ ';'
2160 *
2161 * [ WFC: Legal Character ]
2162 * Characters referred to using character references must match the
2163 * production for Char.
2164 *
2165 * Returns the value parsed (as an int), 0 in case of error
2166 */
2167int
2168xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002169 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002170 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002171 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002172
Owen Taylor3473f882001-02-23 17:55:21 +00002173 /*
2174 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2175 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002176 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002177 (NXT(2) == 'x')) {
2178 SKIP(3);
2179 GROW;
2180 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002181 if (count++ > 20) {
2182 count = 0;
2183 GROW;
2184 }
2185 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002186 val = val * 16 + (CUR - '0');
2187 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2188 val = val * 16 + (CUR - 'a') + 10;
2189 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2190 val = val * 16 + (CUR - 'A') + 10;
2191 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002192 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002193 val = 0;
2194 break;
2195 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002196 if (val > 0x10FFFF)
2197 outofrange = val;
2198
Owen Taylor3473f882001-02-23 17:55:21 +00002199 NEXT;
2200 count++;
2201 }
2202 if (RAW == ';') {
2203 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002204 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002205 ctxt->nbChars ++;
2206 ctxt->input->cur++;
2207 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002208 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002209 SKIP(2);
2210 GROW;
2211 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002212 if (count++ > 20) {
2213 count = 0;
2214 GROW;
2215 }
2216 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002217 val = val * 10 + (CUR - '0');
2218 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002219 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002220 val = 0;
2221 break;
2222 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002223 if (val > 0x10FFFF)
2224 outofrange = val;
2225
Owen Taylor3473f882001-02-23 17:55:21 +00002226 NEXT;
2227 count++;
2228 }
2229 if (RAW == ';') {
2230 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002231 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002232 ctxt->nbChars ++;
2233 ctxt->input->cur++;
2234 }
2235 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002236 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002237 }
2238
2239 /*
2240 * [ WFC: Legal Character ]
2241 * Characters referred to using character references must match the
2242 * production for Char.
2243 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002244 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002245 return(val);
2246 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002247 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2248 "xmlParseCharRef: invalid xmlChar value %d\n",
2249 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002250 }
2251 return(0);
2252}
2253
2254/**
2255 * xmlParseStringCharRef:
2256 * @ctxt: an XML parser context
2257 * @str: a pointer to an index in the string
2258 *
2259 * parse Reference declarations, variant parsing from a string rather
2260 * than an an input flow.
2261 *
2262 * [66] CharRef ::= '&#' [0-9]+ ';' |
2263 * '&#x' [0-9a-fA-F]+ ';'
2264 *
2265 * [ WFC: Legal Character ]
2266 * Characters referred to using character references must match the
2267 * production for Char.
2268 *
2269 * Returns the value parsed (as an int), 0 in case of error, str will be
2270 * updated to the current value of the index
2271 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002272static int
Owen Taylor3473f882001-02-23 17:55:21 +00002273xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2274 const xmlChar *ptr;
2275 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002276 unsigned int val = 0;
2277 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002278
2279 if ((str == NULL) || (*str == NULL)) return(0);
2280 ptr = *str;
2281 cur = *ptr;
2282 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2283 ptr += 3;
2284 cur = *ptr;
2285 while (cur != ';') { /* Non input consuming loop */
2286 if ((cur >= '0') && (cur <= '9'))
2287 val = val * 16 + (cur - '0');
2288 else if ((cur >= 'a') && (cur <= 'f'))
2289 val = val * 16 + (cur - 'a') + 10;
2290 else if ((cur >= 'A') && (cur <= 'F'))
2291 val = val * 16 + (cur - 'A') + 10;
2292 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002293 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002294 val = 0;
2295 break;
2296 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002297 if (val > 0x10FFFF)
2298 outofrange = val;
2299
Owen Taylor3473f882001-02-23 17:55:21 +00002300 ptr++;
2301 cur = *ptr;
2302 }
2303 if (cur == ';')
2304 ptr++;
2305 } else if ((cur == '&') && (ptr[1] == '#')){
2306 ptr += 2;
2307 cur = *ptr;
2308 while (cur != ';') { /* Non input consuming loops */
2309 if ((cur >= '0') && (cur <= '9'))
2310 val = val * 10 + (cur - '0');
2311 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002312 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002313 val = 0;
2314 break;
2315 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002316 if (val > 0x10FFFF)
2317 outofrange = val;
2318
Owen Taylor3473f882001-02-23 17:55:21 +00002319 ptr++;
2320 cur = *ptr;
2321 }
2322 if (cur == ';')
2323 ptr++;
2324 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002325 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002326 return(0);
2327 }
2328 *str = ptr;
2329
2330 /*
2331 * [ WFC: Legal Character ]
2332 * Characters referred to using character references must match the
2333 * production for Char.
2334 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002335 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002336 return(val);
2337 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002338 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2339 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2340 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002341 }
2342 return(0);
2343}
2344
2345/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002346 * xmlNewBlanksWrapperInputStream:
2347 * @ctxt: an XML parser context
2348 * @entity: an Entity pointer
2349 *
2350 * Create a new input stream for wrapping
2351 * blanks around a PEReference
2352 *
2353 * Returns the new input stream or NULL
2354 */
2355
2356static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2357
Daniel Veillardf4862f02002-09-10 11:13:43 +00002358static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002359xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2360 xmlParserInputPtr input;
2361 xmlChar *buffer;
2362 size_t length;
2363 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002364 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2365 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002366 return(NULL);
2367 }
2368 if (xmlParserDebugEntities)
2369 xmlGenericError(xmlGenericErrorContext,
2370 "new blanks wrapper for entity: %s\n", entity->name);
2371 input = xmlNewInputStream(ctxt);
2372 if (input == NULL) {
2373 return(NULL);
2374 }
2375 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002376 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002377 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002378 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002379 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002380 return(NULL);
2381 }
2382 buffer [0] = ' ';
2383 buffer [1] = '%';
2384 buffer [length-3] = ';';
2385 buffer [length-2] = ' ';
2386 buffer [length-1] = 0;
2387 memcpy(buffer + 2, entity->name, length - 5);
2388 input->free = deallocblankswrapper;
2389 input->base = buffer;
2390 input->cur = buffer;
2391 input->length = length;
2392 input->end = &buffer[length];
2393 return(input);
2394}
2395
2396/**
Owen Taylor3473f882001-02-23 17:55:21 +00002397 * xmlParserHandlePEReference:
2398 * @ctxt: the parser context
2399 *
2400 * [69] PEReference ::= '%' Name ';'
2401 *
2402 * [ WFC: No Recursion ]
2403 * A parsed entity must not contain a recursive
2404 * reference to itself, either directly or indirectly.
2405 *
2406 * [ WFC: Entity Declared ]
2407 * In a document without any DTD, a document with only an internal DTD
2408 * subset which contains no parameter entity references, or a document
2409 * with "standalone='yes'", ... ... The declaration of a parameter
2410 * entity must precede any reference to it...
2411 *
2412 * [ VC: Entity Declared ]
2413 * In a document with an external subset or external parameter entities
2414 * with "standalone='no'", ... ... The declaration of a parameter entity
2415 * must precede any reference to it...
2416 *
2417 * [ WFC: In DTD ]
2418 * Parameter-entity references may only appear in the DTD.
2419 * NOTE: misleading but this is handled.
2420 *
2421 * A PEReference may have been detected in the current input stream
2422 * the handling is done accordingly to
2423 * http://www.w3.org/TR/REC-xml#entproc
2424 * i.e.
2425 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002426 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002427 */
2428void
2429xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002430 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002431 xmlEntityPtr entity = NULL;
2432 xmlParserInputPtr input;
2433
Owen Taylor3473f882001-02-23 17:55:21 +00002434 if (RAW != '%') return;
2435 switch(ctxt->instate) {
2436 case XML_PARSER_CDATA_SECTION:
2437 return;
2438 case XML_PARSER_COMMENT:
2439 return;
2440 case XML_PARSER_START_TAG:
2441 return;
2442 case XML_PARSER_END_TAG:
2443 return;
2444 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002445 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002446 return;
2447 case XML_PARSER_PROLOG:
2448 case XML_PARSER_START:
2449 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002450 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002451 return;
2452 case XML_PARSER_ENTITY_DECL:
2453 case XML_PARSER_CONTENT:
2454 case XML_PARSER_ATTRIBUTE_VALUE:
2455 case XML_PARSER_PI:
2456 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002457 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002458 /* we just ignore it there */
2459 return;
2460 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002461 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002462 return;
2463 case XML_PARSER_ENTITY_VALUE:
2464 /*
2465 * NOTE: in the case of entity values, we don't do the
2466 * substitution here since we need the literal
2467 * entity value to be able to save the internal
2468 * subset of the document.
2469 * This will be handled by xmlStringDecodeEntities
2470 */
2471 return;
2472 case XML_PARSER_DTD:
2473 /*
2474 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2475 * In the internal DTD subset, parameter-entity references
2476 * can occur only where markup declarations can occur, not
2477 * within markup declarations.
2478 * In that case this is handled in xmlParseMarkupDecl
2479 */
2480 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2481 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002482 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002483 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002484 break;
2485 case XML_PARSER_IGNORE:
2486 return;
2487 }
2488
2489 NEXT;
2490 name = xmlParseName(ctxt);
2491 if (xmlParserDebugEntities)
2492 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002493 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002494 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002495 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002496 } else {
2497 if (RAW == ';') {
2498 NEXT;
2499 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2500 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2501 if (entity == NULL) {
2502
2503 /*
2504 * [ WFC: Entity Declared ]
2505 * In a document without any DTD, a document with only an
2506 * internal DTD subset which contains no parameter entity
2507 * references, or a document with "standalone='yes'", ...
2508 * ... The declaration of a parameter entity must precede
2509 * any reference to it...
2510 */
2511 if ((ctxt->standalone == 1) ||
2512 ((ctxt->hasExternalSubset == 0) &&
2513 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002514 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002515 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002516 } else {
2517 /*
2518 * [ VC: Entity Declared ]
2519 * In a document with an external subset or external
2520 * parameter entities with "standalone='no'", ...
2521 * ... The declaration of a parameter entity must precede
2522 * any reference to it...
2523 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002524 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2525 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2526 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002527 name, NULL);
Daniel Veillard24eb9782003-10-04 21:08:09 +00002528 } else
2529 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2530 "PEReference: %%%s; not found\n",
2531 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002532 ctxt->valid = 0;
2533 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002534 } else if (ctxt->input->free != deallocblankswrapper) {
2535 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002536 if (xmlPushInput(ctxt, input) < 0)
2537 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002538 } else {
2539 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2540 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002541 xmlChar start[4];
2542 xmlCharEncoding enc;
2543
Owen Taylor3473f882001-02-23 17:55:21 +00002544 /*
2545 * handle the extra spaces added before and after
2546 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002547 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002548 */
2549 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002550 if (xmlPushInput(ctxt, input) < 0)
2551 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002552
2553 /*
2554 * Get the 4 first bytes and decode the charset
2555 * if enc != XML_CHAR_ENCODING_NONE
2556 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002557 * Note that, since we may have some non-UTF8
2558 * encoding (like UTF16, bug 135229), the 'length'
2559 * is not known, but we can calculate based upon
2560 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002561 */
2562 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002563 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002564 start[0] = RAW;
2565 start[1] = NXT(1);
2566 start[2] = NXT(2);
2567 start[3] = NXT(3);
2568 enc = xmlDetectCharEncoding(start, 4);
2569 if (enc != XML_CHAR_ENCODING_NONE) {
2570 xmlSwitchEncoding(ctxt, enc);
2571 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002572 }
2573
Owen Taylor3473f882001-02-23 17:55:21 +00002574 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002575 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2576 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002577 xmlParseTextDecl(ctxt);
2578 }
Owen Taylor3473f882001-02-23 17:55:21 +00002579 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002580 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2581 "PEReference: %s is not a parameter entity\n",
2582 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002583 }
2584 }
2585 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002586 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002587 }
Owen Taylor3473f882001-02-23 17:55:21 +00002588 }
2589}
2590
2591/*
2592 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002593 * buffer##_size is expected to be a size_t
2594 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002595 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002596#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002597 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002598 size_t new_size = buffer##_size * 2 + n; \
2599 if (new_size < buffer##_size) goto mem_error; \
2600 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002601 if (tmp == NULL) goto mem_error; \
2602 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002603 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002604}
2605
2606/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002607 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002608 * @ctxt: the parser context
2609 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002610 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002611 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2612 * @end: an end marker xmlChar, 0 if none
2613 * @end2: an end marker xmlChar, 0 if none
2614 * @end3: an end marker xmlChar, 0 if none
2615 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002616 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002617 *
2618 * [67] Reference ::= EntityRef | CharRef
2619 *
2620 * [69] PEReference ::= '%' Name ';'
2621 *
2622 * Returns A newly allocated string with the substitution done. The caller
2623 * must deallocate it !
2624 */
2625xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002626xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2627 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002628 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002629 size_t buffer_size = 0;
2630 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002631
2632 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002633 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002634 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002635 xmlEntityPtr ent;
2636 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002637
Daniel Veillarda82b1822004-11-08 16:24:57 +00002638 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002639 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002640 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002641
Daniel Veillard0161e632008-08-28 15:36:32 +00002642 if (((ctxt->depth > 40) &&
2643 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2644 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002645 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002646 return(NULL);
2647 }
2648
2649 /*
2650 * allocate a translation buffer.
2651 */
2652 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002653 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002654 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002655
2656 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002657 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002658 * we are operating on already parsed values.
2659 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002660 if (str < last)
2661 c = CUR_SCHAR(str, l);
2662 else
2663 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002664 while ((c != 0) && (c != end) && /* non input consuming loop */
2665 (c != end2) && (c != end3)) {
2666
2667 if (c == 0) break;
2668 if ((c == '&') && (str[1] == '#')) {
2669 int val = xmlParseStringCharRef(ctxt, &str);
2670 if (val != 0) {
2671 COPY_BUF(0,buffer,nbchars,val);
2672 }
Daniel Veillard459eeb92012-07-17 16:19:17 +08002673 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002674 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002675 }
Owen Taylor3473f882001-02-23 17:55:21 +00002676 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2677 if (xmlParserDebugEntities)
2678 xmlGenericError(xmlGenericErrorContext,
2679 "String decoding Entity Reference: %.30s\n",
2680 str);
2681 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002682 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2683 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002684 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002685 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002686 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002687 if ((ent != NULL) &&
2688 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2689 if (ent->content != NULL) {
2690 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002691 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002692 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002693 }
Owen Taylor3473f882001-02-23 17:55:21 +00002694 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002695 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2696 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002697 }
2698 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002699 ctxt->depth++;
2700 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2701 0, 0, 0);
2702 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002703
Owen Taylor3473f882001-02-23 17:55:21 +00002704 if (rep != NULL) {
2705 current = rep;
2706 while (*current != 0) { /* non input consuming loop */
2707 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002708 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002709 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2710 goto int_error;
2711 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002712 }
2713 }
2714 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002715 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002716 }
2717 } else if (ent != NULL) {
2718 int i = xmlStrlen(ent->name);
2719 const xmlChar *cur = ent->name;
2720
2721 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002722 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002723 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002724 }
2725 for (;i > 0;i--)
2726 buffer[nbchars++] = *cur++;
2727 buffer[nbchars++] = ';';
2728 }
2729 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2730 if (xmlParserDebugEntities)
2731 xmlGenericError(xmlGenericErrorContext,
2732 "String decoding PE Reference: %.30s\n", str);
2733 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002734 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2735 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002736 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002737 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002738 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002739 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002740 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002741 }
Owen Taylor3473f882001-02-23 17:55:21 +00002742 ctxt->depth++;
2743 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2744 0, 0, 0);
2745 ctxt->depth--;
2746 if (rep != NULL) {
2747 current = rep;
2748 while (*current != 0) { /* non input consuming loop */
2749 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002750 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002751 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2752 goto int_error;
2753 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002754 }
2755 }
2756 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002757 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002758 }
2759 }
2760 } else {
2761 COPY_BUF(l,buffer,nbchars,c);
2762 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002763 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2764 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002765 }
2766 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002767 if (str < last)
2768 c = CUR_SCHAR(str, l);
2769 else
2770 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002771 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002772 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002773 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002774
2775mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002776 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002777int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002778 if (rep != NULL)
2779 xmlFree(rep);
2780 if (buffer != NULL)
2781 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002782 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002783}
2784
Daniel Veillarde57ec792003-09-10 10:50:59 +00002785/**
2786 * xmlStringDecodeEntities:
2787 * @ctxt: the parser context
2788 * @str: the input string
2789 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2790 * @end: an end marker xmlChar, 0 if none
2791 * @end2: an end marker xmlChar, 0 if none
2792 * @end3: an end marker xmlChar, 0 if none
2793 *
2794 * Takes a entity string content and process to do the adequate substitutions.
2795 *
2796 * [67] Reference ::= EntityRef | CharRef
2797 *
2798 * [69] PEReference ::= '%' Name ';'
2799 *
2800 * Returns A newly allocated string with the substitution done. The caller
2801 * must deallocate it !
2802 */
2803xmlChar *
2804xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2805 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002806 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002807 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2808 end, end2, end3));
2809}
Owen Taylor3473f882001-02-23 17:55:21 +00002810
2811/************************************************************************
2812 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002813 * Commodity functions, cleanup needed ? *
2814 * *
2815 ************************************************************************/
2816
2817/**
2818 * areBlanks:
2819 * @ctxt: an XML parser context
2820 * @str: a xmlChar *
2821 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002822 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002823 *
2824 * Is this a sequence of blank chars that one can ignore ?
2825 *
2826 * Returns 1 if ignorable 0 otherwise.
2827 */
2828
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002829static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2830 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002831 int i, ret;
2832 xmlNodePtr lastChild;
2833
Daniel Veillard05c13a22001-09-09 08:38:09 +00002834 /*
2835 * Don't spend time trying to differentiate them, the same callback is
2836 * used !
2837 */
2838 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002839 return(0);
2840
Owen Taylor3473f882001-02-23 17:55:21 +00002841 /*
2842 * Check for xml:space value.
2843 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002844 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2845 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002846 return(0);
2847
2848 /*
2849 * Check that the string is made of blanks
2850 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002851 if (blank_chars == 0) {
2852 for (i = 0;i < len;i++)
2853 if (!(IS_BLANK_CH(str[i]))) return(0);
2854 }
Owen Taylor3473f882001-02-23 17:55:21 +00002855
2856 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002857 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002858 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002859 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002860 if (ctxt->myDoc != NULL) {
2861 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2862 if (ret == 0) return(1);
2863 if (ret == 1) return(0);
2864 }
2865
2866 /*
2867 * Otherwise, heuristic :-\
2868 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002869 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002870 if ((ctxt->node->children == NULL) &&
2871 (RAW == '<') && (NXT(1) == '/')) return(0);
2872
2873 lastChild = xmlGetLastChild(ctxt->node);
2874 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002875 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2876 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002877 } else if (xmlNodeIsText(lastChild))
2878 return(0);
2879 else if ((ctxt->node->children != NULL) &&
2880 (xmlNodeIsText(ctxt->node->children)))
2881 return(0);
2882 return(1);
2883}
2884
Owen Taylor3473f882001-02-23 17:55:21 +00002885/************************************************************************
2886 * *
2887 * Extra stuff for namespace support *
2888 * Relates to http://www.w3.org/TR/WD-xml-names *
2889 * *
2890 ************************************************************************/
2891
2892/**
2893 * xmlSplitQName:
2894 * @ctxt: an XML parser context
2895 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002896 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002897 *
2898 * parse an UTF8 encoded XML qualified name string
2899 *
2900 * [NS 5] QName ::= (Prefix ':')? LocalPart
2901 *
2902 * [NS 6] Prefix ::= NCName
2903 *
2904 * [NS 7] LocalPart ::= NCName
2905 *
2906 * Returns the local part, and prefix is updated
2907 * to get the Prefix if any.
2908 */
2909
2910xmlChar *
2911xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2912 xmlChar buf[XML_MAX_NAMELEN + 5];
2913 xmlChar *buffer = NULL;
2914 int len = 0;
2915 int max = XML_MAX_NAMELEN;
2916 xmlChar *ret = NULL;
2917 const xmlChar *cur = name;
2918 int c;
2919
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002920 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002921 *prefix = NULL;
2922
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002923 if (cur == NULL) return(NULL);
2924
Owen Taylor3473f882001-02-23 17:55:21 +00002925#ifndef XML_XML_NAMESPACE
2926 /* xml: prefix is not really a namespace */
2927 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2928 (cur[2] == 'l') && (cur[3] == ':'))
2929 return(xmlStrdup(name));
2930#endif
2931
Daniel Veillard597bc482003-07-24 16:08:28 +00002932 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002933 if (cur[0] == ':')
2934 return(xmlStrdup(name));
2935
2936 c = *cur++;
2937 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2938 buf[len++] = c;
2939 c = *cur++;
2940 }
2941 if (len >= max) {
2942 /*
2943 * Okay someone managed to make a huge name, so he's ready to pay
2944 * for the processing speed.
2945 */
2946 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002947
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002948 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002949 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002950 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002951 return(NULL);
2952 }
2953 memcpy(buffer, buf, len);
2954 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2955 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002956 xmlChar *tmp;
2957
Owen Taylor3473f882001-02-23 17:55:21 +00002958 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002959 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002960 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002961 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002962 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002963 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002964 return(NULL);
2965 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002966 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002967 }
2968 buffer[len++] = c;
2969 c = *cur++;
2970 }
2971 buffer[len] = 0;
2972 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002973
Daniel Veillard597bc482003-07-24 16:08:28 +00002974 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002975 if (buffer != NULL)
2976 xmlFree(buffer);
2977 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002978 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002979 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002980
Owen Taylor3473f882001-02-23 17:55:21 +00002981 if (buffer == NULL)
2982 ret = xmlStrndup(buf, len);
2983 else {
2984 ret = buffer;
2985 buffer = NULL;
2986 max = XML_MAX_NAMELEN;
2987 }
2988
2989
2990 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002991 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002992 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002993 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002994 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002995 }
Owen Taylor3473f882001-02-23 17:55:21 +00002996 len = 0;
2997
Daniel Veillardbb284f42002-10-16 18:02:47 +00002998 /*
2999 * Check that the first character is proper to start
3000 * a new name
3001 */
3002 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3003 ((c >= 0x41) && (c <= 0x5A)) ||
3004 (c == '_') || (c == ':'))) {
3005 int l;
3006 int first = CUR_SCHAR(cur, l);
3007
3008 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003009 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003010 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003011 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003012 }
3013 }
3014 cur++;
3015
Owen Taylor3473f882001-02-23 17:55:21 +00003016 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3017 buf[len++] = c;
3018 c = *cur++;
3019 }
3020 if (len >= max) {
3021 /*
3022 * Okay someone managed to make a huge name, so he's ready to pay
3023 * for the processing speed.
3024 */
3025 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003026
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003027 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003028 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003029 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003030 return(NULL);
3031 }
3032 memcpy(buffer, buf, len);
3033 while (c != 0) { /* tested bigname2.xml */
3034 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003035 xmlChar *tmp;
3036
Owen Taylor3473f882001-02-23 17:55:21 +00003037 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003038 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003039 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003040 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003041 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003042 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003043 return(NULL);
3044 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003045 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003046 }
3047 buffer[len++] = c;
3048 c = *cur++;
3049 }
3050 buffer[len] = 0;
3051 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003052
Owen Taylor3473f882001-02-23 17:55:21 +00003053 if (buffer == NULL)
3054 ret = xmlStrndup(buf, len);
3055 else {
3056 ret = buffer;
3057 }
3058 }
3059
3060 return(ret);
3061}
3062
3063/************************************************************************
3064 * *
3065 * The parser itself *
3066 * Relates to http://www.w3.org/TR/REC-xml *
3067 * *
3068 ************************************************************************/
3069
Daniel Veillard34e3f642008-07-29 09:02:27 +00003070/************************************************************************
3071 * *
3072 * Routines to parse Name, NCName and NmToken *
3073 * *
3074 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003075#ifdef DEBUG
3076static unsigned long nbParseName = 0;
3077static unsigned long nbParseNmToken = 0;
3078static unsigned long nbParseNCName = 0;
3079static unsigned long nbParseNCNameComplex = 0;
3080static unsigned long nbParseNameComplex = 0;
3081static unsigned long nbParseStringName = 0;
3082#endif
3083
Daniel Veillard34e3f642008-07-29 09:02:27 +00003084/*
3085 * The two following functions are related to the change of accepted
3086 * characters for Name and NmToken in the Revision 5 of XML-1.0
3087 * They correspond to the modified production [4] and the new production [4a]
3088 * changes in that revision. Also note that the macros used for the
3089 * productions Letter, Digit, CombiningChar and Extender are not needed
3090 * anymore.
3091 * We still keep compatibility to pre-revision5 parsing semantic if the
3092 * new XML_PARSE_OLD10 option is given to the parser.
3093 */
3094static int
3095xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3096 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3097 /*
3098 * Use the new checks of production [4] [4a] amd [5] of the
3099 * Update 5 of XML-1.0
3100 */
3101 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3102 (((c >= 'a') && (c <= 'z')) ||
3103 ((c >= 'A') && (c <= 'Z')) ||
3104 (c == '_') || (c == ':') ||
3105 ((c >= 0xC0) && (c <= 0xD6)) ||
3106 ((c >= 0xD8) && (c <= 0xF6)) ||
3107 ((c >= 0xF8) && (c <= 0x2FF)) ||
3108 ((c >= 0x370) && (c <= 0x37D)) ||
3109 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3110 ((c >= 0x200C) && (c <= 0x200D)) ||
3111 ((c >= 0x2070) && (c <= 0x218F)) ||
3112 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3113 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3114 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3115 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3116 ((c >= 0x10000) && (c <= 0xEFFFF))))
3117 return(1);
3118 } else {
3119 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3120 return(1);
3121 }
3122 return(0);
3123}
3124
3125static int
3126xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3127 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3128 /*
3129 * Use the new checks of production [4] [4a] amd [5] of the
3130 * Update 5 of XML-1.0
3131 */
3132 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3133 (((c >= 'a') && (c <= 'z')) ||
3134 ((c >= 'A') && (c <= 'Z')) ||
3135 ((c >= '0') && (c <= '9')) || /* !start */
3136 (c == '_') || (c == ':') ||
3137 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3138 ((c >= 0xC0) && (c <= 0xD6)) ||
3139 ((c >= 0xD8) && (c <= 0xF6)) ||
3140 ((c >= 0xF8) && (c <= 0x2FF)) ||
3141 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3142 ((c >= 0x370) && (c <= 0x37D)) ||
3143 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3144 ((c >= 0x200C) && (c <= 0x200D)) ||
3145 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3146 ((c >= 0x2070) && (c <= 0x218F)) ||
3147 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3148 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3149 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3150 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3151 ((c >= 0x10000) && (c <= 0xEFFFF))))
3152 return(1);
3153 } else {
3154 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3155 (c == '.') || (c == '-') ||
3156 (c == '_') || (c == ':') ||
3157 (IS_COMBINING(c)) ||
3158 (IS_EXTENDER(c)))
3159 return(1);
3160 }
3161 return(0);
3162}
3163
Daniel Veillarde57ec792003-09-10 10:50:59 +00003164static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003165 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003166
Daniel Veillard34e3f642008-07-29 09:02:27 +00003167static const xmlChar *
3168xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3169 int len = 0, l;
3170 int c;
3171 int count = 0;
3172
Daniel Veillardc6561462009-03-25 10:22:31 +00003173#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003174 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003175#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003176
3177 /*
3178 * Handler for more complex cases
3179 */
3180 GROW;
3181 c = CUR_CHAR(l);
3182 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3183 /*
3184 * Use the new checks of production [4] [4a] amd [5] of the
3185 * Update 5 of XML-1.0
3186 */
3187 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3188 (!(((c >= 'a') && (c <= 'z')) ||
3189 ((c >= 'A') && (c <= 'Z')) ||
3190 (c == '_') || (c == ':') ||
3191 ((c >= 0xC0) && (c <= 0xD6)) ||
3192 ((c >= 0xD8) && (c <= 0xF6)) ||
3193 ((c >= 0xF8) && (c <= 0x2FF)) ||
3194 ((c >= 0x370) && (c <= 0x37D)) ||
3195 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3196 ((c >= 0x200C) && (c <= 0x200D)) ||
3197 ((c >= 0x2070) && (c <= 0x218F)) ||
3198 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3199 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3200 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3201 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3202 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3203 return(NULL);
3204 }
3205 len += l;
3206 NEXTL(l);
3207 c = CUR_CHAR(l);
3208 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3209 (((c >= 'a') && (c <= 'z')) ||
3210 ((c >= 'A') && (c <= 'Z')) ||
3211 ((c >= '0') && (c <= '9')) || /* !start */
3212 (c == '_') || (c == ':') ||
3213 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3214 ((c >= 0xC0) && (c <= 0xD6)) ||
3215 ((c >= 0xD8) && (c <= 0xF6)) ||
3216 ((c >= 0xF8) && (c <= 0x2FF)) ||
3217 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3218 ((c >= 0x370) && (c <= 0x37D)) ||
3219 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3220 ((c >= 0x200C) && (c <= 0x200D)) ||
3221 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3222 ((c >= 0x2070) && (c <= 0x218F)) ||
3223 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3224 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3225 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3226 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3227 ((c >= 0x10000) && (c <= 0xEFFFF))
3228 )) {
3229 if (count++ > 100) {
3230 count = 0;
3231 GROW;
3232 }
3233 len += l;
3234 NEXTL(l);
3235 c = CUR_CHAR(l);
3236 }
3237 } else {
3238 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3239 (!IS_LETTER(c) && (c != '_') &&
3240 (c != ':'))) {
3241 return(NULL);
3242 }
3243 len += l;
3244 NEXTL(l);
3245 c = CUR_CHAR(l);
3246
3247 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3248 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3249 (c == '.') || (c == '-') ||
3250 (c == '_') || (c == ':') ||
3251 (IS_COMBINING(c)) ||
3252 (IS_EXTENDER(c)))) {
3253 if (count++ > 100) {
3254 count = 0;
3255 GROW;
3256 }
3257 len += l;
3258 NEXTL(l);
3259 c = CUR_CHAR(l);
3260 }
3261 }
3262 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3263 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3264 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3265}
3266
Owen Taylor3473f882001-02-23 17:55:21 +00003267/**
3268 * xmlParseName:
3269 * @ctxt: an XML parser context
3270 *
3271 * parse an XML name.
3272 *
3273 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3274 * CombiningChar | Extender
3275 *
3276 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3277 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003278 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003279 *
3280 * Returns the Name parsed or NULL
3281 */
3282
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003283const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003284xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003285 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003286 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003287 int count = 0;
3288
3289 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003290
Daniel Veillardc6561462009-03-25 10:22:31 +00003291#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003292 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003293#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003294
Daniel Veillard48b2f892001-02-25 16:11:03 +00003295 /*
3296 * Accelerator for simple ASCII names
3297 */
3298 in = ctxt->input->cur;
3299 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3300 ((*in >= 0x41) && (*in <= 0x5A)) ||
3301 (*in == '_') || (*in == ':')) {
3302 in++;
3303 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3304 ((*in >= 0x41) && (*in <= 0x5A)) ||
3305 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003306 (*in == '_') || (*in == '-') ||
3307 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003308 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003309 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003310 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003311 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003312 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003313 ctxt->nbChars += count;
3314 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003315 if (ret == NULL)
3316 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003317 return(ret);
3318 }
3319 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003320 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003321 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003322}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003323
Daniel Veillard34e3f642008-07-29 09:02:27 +00003324static const xmlChar *
3325xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3326 int len = 0, l;
3327 int c;
3328 int count = 0;
3329
Daniel Veillardc6561462009-03-25 10:22:31 +00003330#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003331 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003332#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003333
3334 /*
3335 * Handler for more complex cases
3336 */
3337 GROW;
3338 c = CUR_CHAR(l);
3339 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3340 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3341 return(NULL);
3342 }
3343
3344 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3345 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3346 if (count++ > 100) {
3347 count = 0;
3348 GROW;
3349 }
3350 len += l;
3351 NEXTL(l);
3352 c = CUR_CHAR(l);
3353 }
3354 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3355}
3356
3357/**
3358 * xmlParseNCName:
3359 * @ctxt: an XML parser context
3360 * @len: lenght of the string parsed
3361 *
3362 * parse an XML name.
3363 *
3364 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3365 * CombiningChar | Extender
3366 *
3367 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3368 *
3369 * Returns the Name parsed or NULL
3370 */
3371
3372static const xmlChar *
3373xmlParseNCName(xmlParserCtxtPtr ctxt) {
3374 const xmlChar *in;
3375 const xmlChar *ret;
3376 int count = 0;
3377
Daniel Veillardc6561462009-03-25 10:22:31 +00003378#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003379 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003380#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003381
3382 /*
3383 * Accelerator for simple ASCII names
3384 */
3385 in = ctxt->input->cur;
3386 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3387 ((*in >= 0x41) && (*in <= 0x5A)) ||
3388 (*in == '_')) {
3389 in++;
3390 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3391 ((*in >= 0x41) && (*in <= 0x5A)) ||
3392 ((*in >= 0x30) && (*in <= 0x39)) ||
3393 (*in == '_') || (*in == '-') ||
3394 (*in == '.'))
3395 in++;
3396 if ((*in > 0) && (*in < 0x80)) {
3397 count = in - ctxt->input->cur;
3398 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3399 ctxt->input->cur = in;
3400 ctxt->nbChars += count;
3401 ctxt->input->col += count;
3402 if (ret == NULL) {
3403 xmlErrMemory(ctxt, NULL);
3404 }
3405 return(ret);
3406 }
3407 }
3408 return(xmlParseNCNameComplex(ctxt));
3409}
3410
Daniel Veillard46de64e2002-05-29 08:21:33 +00003411/**
3412 * xmlParseNameAndCompare:
3413 * @ctxt: an XML parser context
3414 *
3415 * parse an XML name and compares for match
3416 * (specialized for endtag parsing)
3417 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003418 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3419 * and the name for mismatch
3420 */
3421
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003422static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003423xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003424 register const xmlChar *cmp = other;
3425 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003426 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003427
3428 GROW;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003429
Daniel Veillard46de64e2002-05-29 08:21:33 +00003430 in = ctxt->input->cur;
3431 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003432 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003433 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003434 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003435 }
William M. Brack76e95df2003-10-18 16:20:14 +00003436 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003437 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003438 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003439 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003440 }
3441 /* failure (or end of input buffer), check with full function */
3442 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003443 /* strings coming from the dictionnary direct compare possible */
3444 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003445 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003446 }
3447 return ret;
3448}
3449
Owen Taylor3473f882001-02-23 17:55:21 +00003450/**
3451 * xmlParseStringName:
3452 * @ctxt: an XML parser context
3453 * @str: a pointer to the string pointer (IN/OUT)
3454 *
3455 * parse an XML name.
3456 *
3457 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3458 * CombiningChar | Extender
3459 *
3460 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3461 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003462 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003463 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003464 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003465 * is updated to the current location in the string.
3466 */
3467
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003468static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003469xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3470 xmlChar buf[XML_MAX_NAMELEN + 5];
3471 const xmlChar *cur = *str;
3472 int len = 0, l;
3473 int c;
3474
Daniel Veillardc6561462009-03-25 10:22:31 +00003475#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003476 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003477#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003478
Owen Taylor3473f882001-02-23 17:55:21 +00003479 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003480 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003481 return(NULL);
3482 }
3483
Daniel Veillard34e3f642008-07-29 09:02:27 +00003484 COPY_BUF(l,buf,len,c);
3485 cur += l;
3486 c = CUR_SCHAR(cur, l);
3487 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003488 COPY_BUF(l,buf,len,c);
3489 cur += l;
3490 c = CUR_SCHAR(cur, l);
3491 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3492 /*
3493 * Okay someone managed to make a huge name, so he's ready to pay
3494 * for the processing speed.
3495 */
3496 xmlChar *buffer;
3497 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003498
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003499 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003500 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003501 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003502 return(NULL);
3503 }
3504 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003505 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003506 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003507 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003508 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003509 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003510 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003511 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003512 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003513 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003514 return(NULL);
3515 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003516 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003517 }
3518 COPY_BUF(l,buffer,len,c);
3519 cur += l;
3520 c = CUR_SCHAR(cur, l);
3521 }
3522 buffer[len] = 0;
3523 *str = cur;
3524 return(buffer);
3525 }
3526 }
3527 *str = cur;
3528 return(xmlStrndup(buf, len));
3529}
3530
3531/**
3532 * xmlParseNmtoken:
3533 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003534 *
Owen Taylor3473f882001-02-23 17:55:21 +00003535 * parse an XML Nmtoken.
3536 *
3537 * [7] Nmtoken ::= (NameChar)+
3538 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003539 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003540 *
3541 * Returns the Nmtoken parsed or NULL
3542 */
3543
3544xmlChar *
3545xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3546 xmlChar buf[XML_MAX_NAMELEN + 5];
3547 int len = 0, l;
3548 int c;
3549 int count = 0;
3550
Daniel Veillardc6561462009-03-25 10:22:31 +00003551#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003552 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003553#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003554
Owen Taylor3473f882001-02-23 17:55:21 +00003555 GROW;
3556 c = CUR_CHAR(l);
3557
Daniel Veillard34e3f642008-07-29 09:02:27 +00003558 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003559 if (count++ > 100) {
3560 count = 0;
3561 GROW;
3562 }
3563 COPY_BUF(l,buf,len,c);
3564 NEXTL(l);
3565 c = CUR_CHAR(l);
3566 if (len >= XML_MAX_NAMELEN) {
3567 /*
3568 * Okay someone managed to make a huge token, so he's ready to pay
3569 * for the processing speed.
3570 */
3571 xmlChar *buffer;
3572 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003573
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003574 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003575 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003576 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003577 return(NULL);
3578 }
3579 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003580 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003581 if (count++ > 100) {
3582 count = 0;
3583 GROW;
3584 }
3585 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003586 xmlChar *tmp;
3587
Owen Taylor3473f882001-02-23 17:55:21 +00003588 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003589 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003590 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003591 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003592 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003593 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003594 return(NULL);
3595 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003596 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003597 }
3598 COPY_BUF(l,buffer,len,c);
3599 NEXTL(l);
3600 c = CUR_CHAR(l);
3601 }
3602 buffer[len] = 0;
3603 return(buffer);
3604 }
3605 }
3606 if (len == 0)
3607 return(NULL);
3608 return(xmlStrndup(buf, len));
3609}
3610
3611/**
3612 * xmlParseEntityValue:
3613 * @ctxt: an XML parser context
3614 * @orig: if non-NULL store a copy of the original entity value
3615 *
3616 * parse a value for ENTITY declarations
3617 *
3618 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3619 * "'" ([^%&'] | PEReference | Reference)* "'"
3620 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003621 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003622 */
3623
3624xmlChar *
3625xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3626 xmlChar *buf = NULL;
3627 int len = 0;
3628 int size = XML_PARSER_BUFFER_SIZE;
3629 int c, l;
3630 xmlChar stop;
3631 xmlChar *ret = NULL;
3632 const xmlChar *cur = NULL;
3633 xmlParserInputPtr input;
3634
3635 if (RAW == '"') stop = '"';
3636 else if (RAW == '\'') stop = '\'';
3637 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003638 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003639 return(NULL);
3640 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003641 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003642 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003643 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003644 return(NULL);
3645 }
3646
3647 /*
3648 * The content of the entity definition is copied in a buffer.
3649 */
3650
3651 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3652 input = ctxt->input;
3653 GROW;
3654 NEXT;
3655 c = CUR_CHAR(l);
3656 /*
3657 * NOTE: 4.4.5 Included in Literal
3658 * When a parameter entity reference appears in a literal entity
3659 * value, ... a single or double quote character in the replacement
3660 * text is always treated as a normal data character and will not
3661 * terminate the literal.
3662 * In practice it means we stop the loop only when back at parsing
3663 * the initial entity and the quote is found
3664 */
William M. Brack871611b2003-10-18 04:53:14 +00003665 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003666 (ctxt->input != input))) {
3667 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003668 xmlChar *tmp;
3669
Owen Taylor3473f882001-02-23 17:55:21 +00003670 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003671 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3672 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003673 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003674 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003675 return(NULL);
3676 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003677 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003678 }
3679 COPY_BUF(l,buf,len,c);
3680 NEXTL(l);
3681 /*
3682 * Pop-up of finished entities.
3683 */
3684 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3685 xmlPopInput(ctxt);
3686
3687 GROW;
3688 c = CUR_CHAR(l);
3689 if (c == 0) {
3690 GROW;
3691 c = CUR_CHAR(l);
3692 }
3693 }
3694 buf[len] = 0;
3695
3696 /*
3697 * Raise problem w.r.t. '&' and '%' being used in non-entities
3698 * reference constructs. Note Charref will be handled in
3699 * xmlStringDecodeEntities()
3700 */
3701 cur = buf;
3702 while (*cur != 0) { /* non input consuming */
3703 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3704 xmlChar *name;
3705 xmlChar tmp = *cur;
3706
3707 cur++;
3708 name = xmlParseStringName(ctxt, &cur);
3709 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003710 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003711 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003712 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003713 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003714 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3715 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003716 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003717 }
3718 if (name != NULL)
3719 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003720 if (*cur == 0)
3721 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003722 }
3723 cur++;
3724 }
3725
3726 /*
3727 * Then PEReference entities are substituted.
3728 */
3729 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003730 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003731 xmlFree(buf);
3732 } else {
3733 NEXT;
3734 /*
3735 * NOTE: 4.4.7 Bypassed
3736 * When a general entity reference appears in the EntityValue in
3737 * an entity declaration, it is bypassed and left as is.
3738 * so XML_SUBSTITUTE_REF is not set here.
3739 */
3740 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3741 0, 0, 0);
3742 if (orig != NULL)
3743 *orig = buf;
3744 else
3745 xmlFree(buf);
3746 }
3747
3748 return(ret);
3749}
3750
3751/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003752 * xmlParseAttValueComplex:
3753 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003754 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003755 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003756 *
3757 * parse a value for an attribute, this is the fallback function
3758 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003759 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003760 *
3761 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3762 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003763static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003764xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003765 xmlChar limit = 0;
3766 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003767 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003768 size_t len = 0;
3769 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003770 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003771 xmlChar *current = NULL;
3772 xmlEntityPtr ent;
3773
Owen Taylor3473f882001-02-23 17:55:21 +00003774 if (NXT(0) == '"') {
3775 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3776 limit = '"';
3777 NEXT;
3778 } else if (NXT(0) == '\'') {
3779 limit = '\'';
3780 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3781 NEXT;
3782 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003783 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003784 return(NULL);
3785 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003786
Owen Taylor3473f882001-02-23 17:55:21 +00003787 /*
3788 * allocate a translation buffer.
3789 */
3790 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003791 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003792 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003793
3794 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003795 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003796 */
3797 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003798 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003799 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003800 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003801 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003802 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003803 if (NXT(1) == '#') {
3804 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003805
Owen Taylor3473f882001-02-23 17:55:21 +00003806 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003807 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003808 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003809 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003810 }
3811 buf[len++] = '&';
3812 } else {
3813 /*
3814 * The reparsing will be done in xmlStringGetNodeList()
3815 * called by the attribute() function in SAX.c
3816 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08003817 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003818 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003819 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003820 buf[len++] = '&';
3821 buf[len++] = '#';
3822 buf[len++] = '3';
3823 buf[len++] = '8';
3824 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003825 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003826 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003827 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003828 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003829 }
Owen Taylor3473f882001-02-23 17:55:21 +00003830 len += xmlCopyChar(0, &buf[len], val);
3831 }
3832 } else {
3833 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003834 ctxt->nbentities++;
3835 if (ent != NULL)
3836 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003837 if ((ent != NULL) &&
3838 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003839 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003840 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003841 }
3842 if ((ctxt->replaceEntities == 0) &&
3843 (ent->content[0] == '&')) {
3844 buf[len++] = '&';
3845 buf[len++] = '#';
3846 buf[len++] = '3';
3847 buf[len++] = '8';
3848 buf[len++] = ';';
3849 } else {
3850 buf[len++] = ent->content[0];
3851 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003852 } else if ((ent != NULL) &&
3853 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003854 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3855 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003856 XML_SUBSTITUTE_REF,
3857 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003858 if (rep != NULL) {
3859 current = rep;
3860 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02003861 if ((*current == 0xD) || (*current == 0xA) ||
3862 (*current == 0x9)) {
3863 buf[len++] = 0x20;
3864 current++;
3865 } else
3866 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003867 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003868 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003869 }
3870 }
3871 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003872 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003873 }
3874 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003875 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003876 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003877 }
Owen Taylor3473f882001-02-23 17:55:21 +00003878 if (ent->content != NULL)
3879 buf[len++] = ent->content[0];
3880 }
3881 } else if (ent != NULL) {
3882 int i = xmlStrlen(ent->name);
3883 const xmlChar *cur = ent->name;
3884
3885 /*
3886 * This may look absurd but is needed to detect
3887 * entities problems
3888 */
3889 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3890 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003891 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003892 XML_SUBSTITUTE_REF, 0, 0, 0);
3893 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003894 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003895 rep = NULL;
3896 }
Owen Taylor3473f882001-02-23 17:55:21 +00003897 }
3898
3899 /*
3900 * Just output the reference
3901 */
3902 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08003903 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003904 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003905 }
3906 for (;i > 0;i--)
3907 buf[len++] = *cur++;
3908 buf[len++] = ';';
3909 }
3910 }
3911 } else {
3912 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003913 if ((len != 0) || (!normalize)) {
3914 if ((!normalize) || (!in_space)) {
3915 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08003916 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003917 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003918 }
3919 }
3920 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003921 }
3922 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003923 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003924 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08003925 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003926 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003927 }
3928 }
3929 NEXTL(l);
3930 }
3931 GROW;
3932 c = CUR_CHAR(l);
3933 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003934 if ((in_space) && (normalize)) {
3935 while (buf[len - 1] == 0x20) len--;
3936 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003937 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003938 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003939 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003940 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003941 if ((c != 0) && (!IS_CHAR(c))) {
3942 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3943 "invalid character in attribute value\n");
3944 } else {
3945 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3946 "AttValue: ' expected\n");
3947 }
Owen Taylor3473f882001-02-23 17:55:21 +00003948 } else
3949 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003950
3951 /*
3952 * There we potentially risk an overflow, don't allow attribute value of
3953 * lenght more than INT_MAX it is a very reasonnable assumption !
3954 */
3955 if (len >= INT_MAX) {
3956 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3957 "AttValue lenght too long\n");
3958 goto mem_error;
3959 }
3960
3961 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00003962 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003963
3964mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003965 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003966 if (buf != NULL)
3967 xmlFree(buf);
3968 if (rep != NULL)
3969 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003970 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003971}
3972
3973/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003974 * xmlParseAttValue:
3975 * @ctxt: an XML parser context
3976 *
3977 * parse a value for an attribute
3978 * Note: the parser won't do substitution of entities here, this
3979 * will be handled later in xmlStringGetNodeList
3980 *
3981 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3982 * "'" ([^<&'] | Reference)* "'"
3983 *
3984 * 3.3.3 Attribute-Value Normalization:
3985 * Before the value of an attribute is passed to the application or
3986 * checked for validity, the XML processor must normalize it as follows:
3987 * - a character reference is processed by appending the referenced
3988 * character to the attribute value
3989 * - an entity reference is processed by recursively processing the
3990 * replacement text of the entity
3991 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3992 * appending #x20 to the normalized value, except that only a single
3993 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3994 * parsed entity or the literal entity value of an internal parsed entity
3995 * - other characters are processed by appending them to the normalized value
3996 * If the declared value is not CDATA, then the XML processor must further
3997 * process the normalized attribute value by discarding any leading and
3998 * trailing space (#x20) characters, and by replacing sequences of space
3999 * (#x20) characters by a single space (#x20) character.
4000 * All attributes for which no declaration has been read should be treated
4001 * by a non-validating parser as if declared CDATA.
4002 *
4003 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4004 */
4005
4006
4007xmlChar *
4008xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004009 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004010 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004011}
4012
4013/**
Owen Taylor3473f882001-02-23 17:55:21 +00004014 * xmlParseSystemLiteral:
4015 * @ctxt: an XML parser context
4016 *
4017 * parse an XML Literal
4018 *
4019 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4020 *
4021 * Returns the SystemLiteral parsed or NULL
4022 */
4023
4024xmlChar *
4025xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4026 xmlChar *buf = NULL;
4027 int len = 0;
4028 int size = XML_PARSER_BUFFER_SIZE;
4029 int cur, l;
4030 xmlChar stop;
4031 int state = ctxt->instate;
4032 int count = 0;
4033
4034 SHRINK;
4035 if (RAW == '"') {
4036 NEXT;
4037 stop = '"';
4038 } else if (RAW == '\'') {
4039 NEXT;
4040 stop = '\'';
4041 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004042 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004043 return(NULL);
4044 }
4045
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004046 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004047 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004048 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004049 return(NULL);
4050 }
4051 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4052 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004053 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004054 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004055 xmlChar *tmp;
4056
Owen Taylor3473f882001-02-23 17:55:21 +00004057 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004058 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4059 if (tmp == NULL) {
4060 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004061 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004062 ctxt->instate = (xmlParserInputState) state;
4063 return(NULL);
4064 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004065 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004066 }
4067 count++;
4068 if (count > 50) {
4069 GROW;
4070 count = 0;
4071 }
4072 COPY_BUF(l,buf,len,cur);
4073 NEXTL(l);
4074 cur = CUR_CHAR(l);
4075 if (cur == 0) {
4076 GROW;
4077 SHRINK;
4078 cur = CUR_CHAR(l);
4079 }
4080 }
4081 buf[len] = 0;
4082 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004083 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004084 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004085 } else {
4086 NEXT;
4087 }
4088 return(buf);
4089}
4090
4091/**
4092 * xmlParsePubidLiteral:
4093 * @ctxt: an XML parser context
4094 *
4095 * parse an XML public literal
4096 *
4097 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4098 *
4099 * Returns the PubidLiteral parsed or NULL.
4100 */
4101
4102xmlChar *
4103xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4104 xmlChar *buf = NULL;
4105 int len = 0;
4106 int size = XML_PARSER_BUFFER_SIZE;
4107 xmlChar cur;
4108 xmlChar stop;
4109 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004110 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004111
4112 SHRINK;
4113 if (RAW == '"') {
4114 NEXT;
4115 stop = '"';
4116 } else if (RAW == '\'') {
4117 NEXT;
4118 stop = '\'';
4119 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004120 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004121 return(NULL);
4122 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004123 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004124 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004125 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004126 return(NULL);
4127 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004128 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004129 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004130 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004131 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004132 xmlChar *tmp;
4133
Owen Taylor3473f882001-02-23 17:55:21 +00004134 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004135 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4136 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004137 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004138 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004139 return(NULL);
4140 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004141 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004142 }
4143 buf[len++] = cur;
4144 count++;
4145 if (count > 50) {
4146 GROW;
4147 count = 0;
4148 }
4149 NEXT;
4150 cur = CUR;
4151 if (cur == 0) {
4152 GROW;
4153 SHRINK;
4154 cur = CUR;
4155 }
4156 }
4157 buf[len] = 0;
4158 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004159 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004160 } else {
4161 NEXT;
4162 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004163 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004164 return(buf);
4165}
4166
Daniel Veillard8ed10722009-08-20 19:17:36 +02004167static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004168
4169/*
4170 * used for the test in the inner loop of the char data testing
4171 */
4172static const unsigned char test_char_data[256] = {
4173 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4174 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4175 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4176 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4177 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4178 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4179 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4180 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4181 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4182 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4183 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4184 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4185 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4186 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4187 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4188 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4189 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4190 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4191 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4192 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4193 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4194 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4195 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4196 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4197 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4198 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4199 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4200 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4201 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4202 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4203 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4204 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4205};
4206
Owen Taylor3473f882001-02-23 17:55:21 +00004207/**
4208 * xmlParseCharData:
4209 * @ctxt: an XML parser context
4210 * @cdata: int indicating whether we are within a CDATA section
4211 *
4212 * parse a CharData section.
4213 * if we are within a CDATA section ']]>' marks an end of section.
4214 *
4215 * The right angle bracket (>) may be represented using the string "&gt;",
4216 * and must, for compatibility, be escaped using "&gt;" or a character
4217 * reference when it appears in the string "]]>" in content, when that
4218 * string is not marking the end of a CDATA section.
4219 *
4220 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4221 */
4222
4223void
4224xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004225 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004226 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004227 int line = ctxt->input->line;
4228 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004229 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004230
4231 SHRINK;
4232 GROW;
4233 /*
4234 * Accelerated common case where input don't need to be
4235 * modified before passing it to the handler.
4236 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004237 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004238 in = ctxt->input->cur;
4239 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004240get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004241 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004242 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004243 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004244 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004245 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004246 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004247 goto get_more_space;
4248 }
4249 if (*in == '<') {
4250 nbchar = in - ctxt->input->cur;
4251 if (nbchar > 0) {
4252 const xmlChar *tmp = ctxt->input->cur;
4253 ctxt->input->cur = in;
4254
Daniel Veillard34099b42004-11-04 17:34:35 +00004255 if ((ctxt->sax != NULL) &&
4256 (ctxt->sax->ignorableWhitespace !=
4257 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004258 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004259 if (ctxt->sax->ignorableWhitespace != NULL)
4260 ctxt->sax->ignorableWhitespace(ctxt->userData,
4261 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004262 } else {
4263 if (ctxt->sax->characters != NULL)
4264 ctxt->sax->characters(ctxt->userData,
4265 tmp, nbchar);
4266 if (*ctxt->space == -1)
4267 *ctxt->space = -2;
4268 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004269 } else if ((ctxt->sax != NULL) &&
4270 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004271 ctxt->sax->characters(ctxt->userData,
4272 tmp, nbchar);
4273 }
4274 }
4275 return;
4276 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004277
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004278get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004279 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004280 while (test_char_data[*in]) {
4281 in++;
4282 ccol++;
4283 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004284 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004285 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004286 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004287 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004288 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004289 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004290 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004291 }
4292 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004293 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004294 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004295 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004296 return;
4297 }
4298 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004299 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004300 goto get_more;
4301 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004302 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004303 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004304 if ((ctxt->sax != NULL) &&
4305 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004306 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004307 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004308 const xmlChar *tmp = ctxt->input->cur;
4309 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004310
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004311 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004312 if (ctxt->sax->ignorableWhitespace != NULL)
4313 ctxt->sax->ignorableWhitespace(ctxt->userData,
4314 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004315 } else {
4316 if (ctxt->sax->characters != NULL)
4317 ctxt->sax->characters(ctxt->userData,
4318 tmp, nbchar);
4319 if (*ctxt->space == -1)
4320 *ctxt->space = -2;
4321 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004322 line = ctxt->input->line;
4323 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004324 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004325 if (ctxt->sax->characters != NULL)
4326 ctxt->sax->characters(ctxt->userData,
4327 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004328 line = ctxt->input->line;
4329 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004330 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004331 /* something really bad happened in the SAX callback */
4332 if (ctxt->instate != XML_PARSER_CONTENT)
4333 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004334 }
4335 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004336 if (*in == 0xD) {
4337 in++;
4338 if (*in == 0xA) {
4339 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004340 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004341 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004342 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004343 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004344 in--;
4345 }
4346 if (*in == '<') {
4347 return;
4348 }
4349 if (*in == '&') {
4350 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004351 }
4352 SHRINK;
4353 GROW;
4354 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004355 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004356 nbchar = 0;
4357 }
Daniel Veillard50582112001-03-26 22:52:16 +00004358 ctxt->input->line = line;
4359 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004360 xmlParseCharDataComplex(ctxt, cdata);
4361}
4362
Daniel Veillard01c13b52002-12-10 15:19:08 +00004363/**
4364 * xmlParseCharDataComplex:
4365 * @ctxt: an XML parser context
4366 * @cdata: int indicating whether we are within a CDATA section
4367 *
4368 * parse a CharData section.this is the fallback function
4369 * of xmlParseCharData() when the parsing requires handling
4370 * of non-ASCII characters.
4371 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004372static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004373xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004374 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4375 int nbchar = 0;
4376 int cur, l;
4377 int count = 0;
4378
4379 SHRINK;
4380 GROW;
4381 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004382 while ((cur != '<') && /* checked */
4383 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004384 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004385 if ((cur == ']') && (NXT(1) == ']') &&
4386 (NXT(2) == '>')) {
4387 if (cdata) break;
4388 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004389 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004390 }
4391 }
4392 COPY_BUF(l,buf,nbchar,cur);
4393 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004394 buf[nbchar] = 0;
4395
Owen Taylor3473f882001-02-23 17:55:21 +00004396 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004397 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004398 */
4399 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004400 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004401 if (ctxt->sax->ignorableWhitespace != NULL)
4402 ctxt->sax->ignorableWhitespace(ctxt->userData,
4403 buf, nbchar);
4404 } else {
4405 if (ctxt->sax->characters != NULL)
4406 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004407 if ((ctxt->sax->characters !=
4408 ctxt->sax->ignorableWhitespace) &&
4409 (*ctxt->space == -1))
4410 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004411 }
4412 }
4413 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004414 /* something really bad happened in the SAX callback */
4415 if (ctxt->instate != XML_PARSER_CONTENT)
4416 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004417 }
4418 count++;
4419 if (count > 50) {
4420 GROW;
4421 count = 0;
4422 }
4423 NEXTL(l);
4424 cur = CUR_CHAR(l);
4425 }
4426 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004427 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004428 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004429 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004430 */
4431 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004432 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004433 if (ctxt->sax->ignorableWhitespace != NULL)
4434 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4435 } else {
4436 if (ctxt->sax->characters != NULL)
4437 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004438 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4439 (*ctxt->space == -1))
4440 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004441 }
4442 }
4443 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004444 if ((cur != 0) && (!IS_CHAR(cur))) {
4445 /* Generate the error and skip the offending character */
4446 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4447 "PCDATA invalid Char value %d\n",
4448 cur);
4449 NEXTL(l);
4450 }
Owen Taylor3473f882001-02-23 17:55:21 +00004451}
4452
4453/**
4454 * xmlParseExternalID:
4455 * @ctxt: an XML parser context
4456 * @publicID: a xmlChar** receiving PubidLiteral
4457 * @strict: indicate whether we should restrict parsing to only
4458 * production [75], see NOTE below
4459 *
4460 * Parse an External ID or a Public ID
4461 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004462 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004463 * 'PUBLIC' S PubidLiteral S SystemLiteral
4464 *
4465 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4466 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4467 *
4468 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4469 *
4470 * Returns the function returns SystemLiteral and in the second
4471 * case publicID receives PubidLiteral, is strict is off
4472 * it is possible to return NULL and have publicID set.
4473 */
4474
4475xmlChar *
4476xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4477 xmlChar *URI = NULL;
4478
4479 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004480
4481 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004482 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004483 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004484 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004485 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4486 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004487 }
4488 SKIP_BLANKS;
4489 URI = xmlParseSystemLiteral(ctxt);
4490 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004491 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004492 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004493 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004494 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004495 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004496 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004497 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004498 }
4499 SKIP_BLANKS;
4500 *publicID = xmlParsePubidLiteral(ctxt);
4501 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004502 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004503 }
4504 if (strict) {
4505 /*
4506 * We don't handle [83] so "S SystemLiteral" is required.
4507 */
William M. Brack76e95df2003-10-18 16:20:14 +00004508 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004509 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004510 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004511 }
4512 } else {
4513 /*
4514 * We handle [83] so we return immediately, if
4515 * "S SystemLiteral" is not detected. From a purely parsing
4516 * point of view that's a nice mess.
4517 */
4518 const xmlChar *ptr;
4519 GROW;
4520
4521 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004522 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004523
William M. Brack76e95df2003-10-18 16:20:14 +00004524 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004525 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4526 }
4527 SKIP_BLANKS;
4528 URI = xmlParseSystemLiteral(ctxt);
4529 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004530 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004531 }
4532 }
4533 return(URI);
4534}
4535
4536/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004537 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004538 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004539 * @buf: the already parsed part of the buffer
4540 * @len: number of bytes filles in the buffer
4541 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004542 *
4543 * Skip an XML (SGML) comment <!-- .... -->
4544 * The spec says that "For compatibility, the string "--" (double-hyphen)
4545 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004546 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004547 *
4548 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4549 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004550static void
4551xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004552 int q, ql;
4553 int r, rl;
4554 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00004555 int count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004556 int inputid;
4557
4558 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004559
Owen Taylor3473f882001-02-23 17:55:21 +00004560 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004561 len = 0;
4562 size = XML_PARSER_BUFFER_SIZE;
4563 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4564 if (buf == NULL) {
4565 xmlErrMemory(ctxt, NULL);
4566 return;
4567 }
Owen Taylor3473f882001-02-23 17:55:21 +00004568 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004569 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004570 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004571 if (q == 0)
4572 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004573 if (!IS_CHAR(q)) {
4574 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4575 "xmlParseComment: invalid xmlChar value %d\n",
4576 q);
4577 xmlFree (buf);
4578 return;
4579 }
Owen Taylor3473f882001-02-23 17:55:21 +00004580 NEXTL(ql);
4581 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004582 if (r == 0)
4583 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004584 if (!IS_CHAR(r)) {
4585 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4586 "xmlParseComment: invalid xmlChar value %d\n",
4587 q);
4588 xmlFree (buf);
4589 return;
4590 }
Owen Taylor3473f882001-02-23 17:55:21 +00004591 NEXTL(rl);
4592 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004593 if (cur == 0)
4594 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004595 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004596 ((cur != '>') ||
4597 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004598 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004599 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004600 }
4601 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004602 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004603 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004604 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4605 if (new_buf == NULL) {
4606 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004607 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004608 return;
4609 }
William M. Bracka3215c72004-07-31 16:24:01 +00004610 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004611 }
4612 COPY_BUF(ql,buf,len,q);
4613 q = r;
4614 ql = rl;
4615 r = cur;
4616 rl = l;
4617
4618 count++;
4619 if (count > 50) {
4620 GROW;
4621 count = 0;
4622 }
4623 NEXTL(l);
4624 cur = CUR_CHAR(l);
4625 if (cur == 0) {
4626 SHRINK;
4627 GROW;
4628 cur = CUR_CHAR(l);
4629 }
4630 }
4631 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004632 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004633 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004634 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004635 } else if (!IS_CHAR(cur)) {
4636 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4637 "xmlParseComment: invalid xmlChar value %d\n",
4638 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004639 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004640 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004641 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4642 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004643 }
4644 NEXT;
4645 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4646 (!ctxt->disableSAX))
4647 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004648 }
Daniel Veillardda629342007-08-01 07:49:06 +00004649 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004650 return;
4651not_terminated:
4652 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4653 "Comment not terminated\n", NULL);
4654 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004655 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004656}
Daniel Veillardda629342007-08-01 07:49:06 +00004657
Daniel Veillard4c778d82005-01-23 17:37:44 +00004658/**
4659 * xmlParseComment:
4660 * @ctxt: an XML parser context
4661 *
4662 * Skip an XML (SGML) comment <!-- .... -->
4663 * The spec says that "For compatibility, the string "--" (double-hyphen)
4664 * must not occur within comments. "
4665 *
4666 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4667 */
4668void
4669xmlParseComment(xmlParserCtxtPtr ctxt) {
4670 xmlChar *buf = NULL;
4671 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004672 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004673 xmlParserInputState state;
4674 const xmlChar *in;
4675 int nbchar = 0, ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004676 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004677
4678 /*
4679 * Check that there is a comment right here.
4680 */
4681 if ((RAW != '<') || (NXT(1) != '!') ||
4682 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004683 state = ctxt->instate;
4684 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004685 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004686 SKIP(4);
4687 SHRINK;
4688 GROW;
4689
4690 /*
4691 * Accelerated common case where input don't need to be
4692 * modified before passing it to the handler.
4693 */
4694 in = ctxt->input->cur;
4695 do {
4696 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004697 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004698 ctxt->input->line++; ctxt->input->col = 1;
4699 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004700 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004701 }
4702get_more:
4703 ccol = ctxt->input->col;
4704 while (((*in > '-') && (*in <= 0x7F)) ||
4705 ((*in >= 0x20) && (*in < '-')) ||
4706 (*in == 0x09)) {
4707 in++;
4708 ccol++;
4709 }
4710 ctxt->input->col = ccol;
4711 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004712 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004713 ctxt->input->line++; ctxt->input->col = 1;
4714 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004715 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004716 goto get_more;
4717 }
4718 nbchar = in - ctxt->input->cur;
4719 /*
4720 * save current set of data
4721 */
4722 if (nbchar > 0) {
4723 if ((ctxt->sax != NULL) &&
4724 (ctxt->sax->comment != NULL)) {
4725 if (buf == NULL) {
4726 if ((*in == '-') && (in[1] == '-'))
4727 size = nbchar + 1;
4728 else
4729 size = XML_PARSER_BUFFER_SIZE + nbchar;
4730 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4731 if (buf == NULL) {
4732 xmlErrMemory(ctxt, NULL);
4733 ctxt->instate = state;
4734 return;
4735 }
4736 len = 0;
4737 } else if (len + nbchar + 1 >= size) {
4738 xmlChar *new_buf;
4739 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4740 new_buf = (xmlChar *) xmlRealloc(buf,
4741 size * sizeof(xmlChar));
4742 if (new_buf == NULL) {
4743 xmlFree (buf);
4744 xmlErrMemory(ctxt, NULL);
4745 ctxt->instate = state;
4746 return;
4747 }
4748 buf = new_buf;
4749 }
4750 memcpy(&buf[len], ctxt->input->cur, nbchar);
4751 len += nbchar;
4752 buf[len] = 0;
4753 }
4754 }
4755 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004756 if (*in == 0xA) {
4757 in++;
4758 ctxt->input->line++; ctxt->input->col = 1;
4759 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004760 if (*in == 0xD) {
4761 in++;
4762 if (*in == 0xA) {
4763 ctxt->input->cur = in;
4764 in++;
4765 ctxt->input->line++; ctxt->input->col = 1;
4766 continue; /* while */
4767 }
4768 in--;
4769 }
4770 SHRINK;
4771 GROW;
4772 in = ctxt->input->cur;
4773 if (*in == '-') {
4774 if (in[1] == '-') {
4775 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004776 if (ctxt->input->id != inputid) {
4777 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4778 "comment doesn't start and stop in the same entity\n");
4779 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004780 SKIP(3);
4781 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4782 (!ctxt->disableSAX)) {
4783 if (buf != NULL)
4784 ctxt->sax->comment(ctxt->userData, buf);
4785 else
4786 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4787 }
4788 if (buf != NULL)
4789 xmlFree(buf);
4790 ctxt->instate = state;
4791 return;
4792 }
Bryan Henderson8658d272012-05-08 16:39:05 +08004793 if (buf != NULL) {
4794 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4795 "Double hyphen within comment: "
4796 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00004797 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08004798 } else
4799 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4800 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004801 in++;
4802 ctxt->input->col++;
4803 }
4804 in++;
4805 ctxt->input->col++;
4806 goto get_more;
4807 }
4808 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4809 xmlParseCommentComplex(ctxt, buf, len, size);
4810 ctxt->instate = state;
4811 return;
4812}
4813
Owen Taylor3473f882001-02-23 17:55:21 +00004814
4815/**
4816 * xmlParsePITarget:
4817 * @ctxt: an XML parser context
4818 *
4819 * parse the name of a PI
4820 *
4821 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4822 *
4823 * Returns the PITarget name or NULL
4824 */
4825
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004826const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004827xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004828 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004829
4830 name = xmlParseName(ctxt);
4831 if ((name != NULL) &&
4832 ((name[0] == 'x') || (name[0] == 'X')) &&
4833 ((name[1] == 'm') || (name[1] == 'M')) &&
4834 ((name[2] == 'l') || (name[2] == 'L'))) {
4835 int i;
4836 if ((name[0] == 'x') && (name[1] == 'm') &&
4837 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004838 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004839 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004840 return(name);
4841 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004842 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004843 return(name);
4844 }
4845 for (i = 0;;i++) {
4846 if (xmlW3CPIs[i] == NULL) break;
4847 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4848 return(name);
4849 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004850 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4851 "xmlParsePITarget: invalid name prefix 'xml'\n",
4852 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004853 }
Daniel Veillard37334572008-07-31 08:20:02 +00004854 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4855 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4856 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4857 }
Owen Taylor3473f882001-02-23 17:55:21 +00004858 return(name);
4859}
4860
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004861#ifdef LIBXML_CATALOG_ENABLED
4862/**
4863 * xmlParseCatalogPI:
4864 * @ctxt: an XML parser context
4865 * @catalog: the PI value string
4866 *
4867 * parse an XML Catalog Processing Instruction.
4868 *
4869 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4870 *
4871 * Occurs only if allowed by the user and if happening in the Misc
4872 * part of the document before any doctype informations
4873 * This will add the given catalog to the parsing context in order
4874 * to be used if there is a resolution need further down in the document
4875 */
4876
4877static void
4878xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4879 xmlChar *URL = NULL;
4880 const xmlChar *tmp, *base;
4881 xmlChar marker;
4882
4883 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004884 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004885 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4886 goto error;
4887 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004888 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004889 if (*tmp != '=') {
4890 return;
4891 }
4892 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004893 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004894 marker = *tmp;
4895 if ((marker != '\'') && (marker != '"'))
4896 goto error;
4897 tmp++;
4898 base = tmp;
4899 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4900 if (*tmp == 0)
4901 goto error;
4902 URL = xmlStrndup(base, tmp - base);
4903 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004904 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004905 if (*tmp != 0)
4906 goto error;
4907
4908 if (URL != NULL) {
4909 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4910 xmlFree(URL);
4911 }
4912 return;
4913
4914error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004915 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4916 "Catalog PI syntax error: %s\n",
4917 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004918 if (URL != NULL)
4919 xmlFree(URL);
4920}
4921#endif
4922
Owen Taylor3473f882001-02-23 17:55:21 +00004923/**
4924 * xmlParsePI:
4925 * @ctxt: an XML parser context
4926 *
4927 * parse an XML Processing Instruction.
4928 *
4929 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4930 *
4931 * The processing is transfered to SAX once parsed.
4932 */
4933
4934void
4935xmlParsePI(xmlParserCtxtPtr ctxt) {
4936 xmlChar *buf = NULL;
4937 int len = 0;
4938 int size = XML_PARSER_BUFFER_SIZE;
4939 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004940 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004941 xmlParserInputState state;
4942 int count = 0;
4943
4944 if ((RAW == '<') && (NXT(1) == '?')) {
4945 xmlParserInputPtr input = ctxt->input;
4946 state = ctxt->instate;
4947 ctxt->instate = XML_PARSER_PI;
4948 /*
4949 * this is a Processing Instruction.
4950 */
4951 SKIP(2);
4952 SHRINK;
4953
4954 /*
4955 * Parse the target name and check for special support like
4956 * namespace.
4957 */
4958 target = xmlParsePITarget(ctxt);
4959 if (target != NULL) {
4960 if ((RAW == '?') && (NXT(1) == '>')) {
4961 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004962 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4963 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004964 }
4965 SKIP(2);
4966
4967 /*
4968 * SAX: PI detected.
4969 */
4970 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4971 (ctxt->sax->processingInstruction != NULL))
4972 ctxt->sax->processingInstruction(ctxt->userData,
4973 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08004974 if (ctxt->instate != XML_PARSER_EOF)
4975 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004976 return;
4977 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004978 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004979 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004980 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004981 ctxt->instate = state;
4982 return;
4983 }
4984 cur = CUR;
4985 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004986 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4987 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004988 }
4989 SKIP_BLANKS;
4990 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004991 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004992 ((cur != '?') || (NXT(1) != '>'))) {
4993 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004994 xmlChar *tmp;
4995
Owen Taylor3473f882001-02-23 17:55:21 +00004996 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004997 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4998 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004999 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005000 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005001 ctxt->instate = state;
5002 return;
5003 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005004 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005005 }
5006 count++;
5007 if (count > 50) {
5008 GROW;
5009 count = 0;
5010 }
5011 COPY_BUF(l,buf,len,cur);
5012 NEXTL(l);
5013 cur = CUR_CHAR(l);
5014 if (cur == 0) {
5015 SHRINK;
5016 GROW;
5017 cur = CUR_CHAR(l);
5018 }
5019 }
5020 buf[len] = 0;
5021 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005022 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5023 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005024 } else {
5025 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005026 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5027 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005028 }
5029 SKIP(2);
5030
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005031#ifdef LIBXML_CATALOG_ENABLED
5032 if (((state == XML_PARSER_MISC) ||
5033 (state == XML_PARSER_START)) &&
5034 (xmlStrEqual(target, XML_CATALOG_PI))) {
5035 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5036 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5037 (allow == XML_CATA_ALLOW_ALL))
5038 xmlParseCatalogPI(ctxt, buf);
5039 }
5040#endif
5041
5042
Owen Taylor3473f882001-02-23 17:55:21 +00005043 /*
5044 * SAX: PI detected.
5045 */
5046 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5047 (ctxt->sax->processingInstruction != NULL))
5048 ctxt->sax->processingInstruction(ctxt->userData,
5049 target, buf);
5050 }
5051 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005052 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005053 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005054 }
Chris Evans77404b82011-12-14 16:18:25 +08005055 if (ctxt->instate != XML_PARSER_EOF)
5056 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005057 }
5058}
5059
5060/**
5061 * xmlParseNotationDecl:
5062 * @ctxt: an XML parser context
5063 *
5064 * parse a notation declaration
5065 *
5066 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5067 *
5068 * Hence there is actually 3 choices:
5069 * 'PUBLIC' S PubidLiteral
5070 * 'PUBLIC' S PubidLiteral S SystemLiteral
5071 * and 'SYSTEM' S SystemLiteral
5072 *
5073 * See the NOTE on xmlParseExternalID().
5074 */
5075
5076void
5077xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005078 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005079 xmlChar *Pubid;
5080 xmlChar *Systemid;
5081
Daniel Veillarda07050d2003-10-19 14:46:32 +00005082 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005083 xmlParserInputPtr input = ctxt->input;
5084 SHRINK;
5085 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00005086 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005087 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5088 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005089 return;
5090 }
5091 SKIP_BLANKS;
5092
Daniel Veillard76d66f42001-05-16 21:05:17 +00005093 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005094 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005095 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005096 return;
5097 }
William M. Brack76e95df2003-10-18 16:20:14 +00005098 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005099 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005100 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005101 return;
5102 }
Daniel Veillard37334572008-07-31 08:20:02 +00005103 if (xmlStrchr(name, ':') != NULL) {
5104 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5105 "colon are forbidden from notation names '%s'\n",
5106 name, NULL, NULL);
5107 }
Owen Taylor3473f882001-02-23 17:55:21 +00005108 SKIP_BLANKS;
5109
5110 /*
5111 * Parse the IDs.
5112 */
5113 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5114 SKIP_BLANKS;
5115
5116 if (RAW == '>') {
5117 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005118 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5119 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005120 }
5121 NEXT;
5122 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5123 (ctxt->sax->notationDecl != NULL))
5124 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5125 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005126 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005127 }
Owen Taylor3473f882001-02-23 17:55:21 +00005128 if (Systemid != NULL) xmlFree(Systemid);
5129 if (Pubid != NULL) xmlFree(Pubid);
5130 }
5131}
5132
5133/**
5134 * xmlParseEntityDecl:
5135 * @ctxt: an XML parser context
5136 *
5137 * parse <!ENTITY declarations
5138 *
5139 * [70] EntityDecl ::= GEDecl | PEDecl
5140 *
5141 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5142 *
5143 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5144 *
5145 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5146 *
5147 * [74] PEDef ::= EntityValue | ExternalID
5148 *
5149 * [76] NDataDecl ::= S 'NDATA' S Name
5150 *
5151 * [ VC: Notation Declared ]
5152 * The Name must match the declared name of a notation.
5153 */
5154
5155void
5156xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005157 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005158 xmlChar *value = NULL;
5159 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005160 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005161 int isParameter = 0;
5162 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005163 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00005164
Daniel Veillard4c778d82005-01-23 17:37:44 +00005165 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005166 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005167 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005168 SHRINK;
5169 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005170 skipped = SKIP_BLANKS;
5171 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005172 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5173 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005174 }
Owen Taylor3473f882001-02-23 17:55:21 +00005175
5176 if (RAW == '%') {
5177 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005178 skipped = SKIP_BLANKS;
5179 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005180 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5181 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005182 }
Owen Taylor3473f882001-02-23 17:55:21 +00005183 isParameter = 1;
5184 }
5185
Daniel Veillard76d66f42001-05-16 21:05:17 +00005186 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005187 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005188 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5189 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005190 return;
5191 }
Daniel Veillard37334572008-07-31 08:20:02 +00005192 if (xmlStrchr(name, ':') != NULL) {
5193 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5194 "colon are forbidden from entities names '%s'\n",
5195 name, NULL, NULL);
5196 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005197 skipped = SKIP_BLANKS;
5198 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005199 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5200 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005201 }
Owen Taylor3473f882001-02-23 17:55:21 +00005202
Daniel Veillardf5582f12002-06-11 10:08:16 +00005203 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005204 /*
5205 * handle the various case of definitions...
5206 */
5207 if (isParameter) {
5208 if ((RAW == '"') || (RAW == '\'')) {
5209 value = xmlParseEntityValue(ctxt, &orig);
5210 if (value) {
5211 if ((ctxt->sax != NULL) &&
5212 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5213 ctxt->sax->entityDecl(ctxt->userData, name,
5214 XML_INTERNAL_PARAMETER_ENTITY,
5215 NULL, NULL, value);
5216 }
5217 } else {
5218 URI = xmlParseExternalID(ctxt, &literal, 1);
5219 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005220 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005221 }
5222 if (URI) {
5223 xmlURIPtr uri;
5224
5225 uri = xmlParseURI((const char *) URI);
5226 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005227 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5228 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005229 /*
5230 * This really ought to be a well formedness error
5231 * but the XML Core WG decided otherwise c.f. issue
5232 * E26 of the XML erratas.
5233 */
Owen Taylor3473f882001-02-23 17:55:21 +00005234 } else {
5235 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005236 /*
5237 * Okay this is foolish to block those but not
5238 * invalid URIs.
5239 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005240 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005241 } else {
5242 if ((ctxt->sax != NULL) &&
5243 (!ctxt->disableSAX) &&
5244 (ctxt->sax->entityDecl != NULL))
5245 ctxt->sax->entityDecl(ctxt->userData, name,
5246 XML_EXTERNAL_PARAMETER_ENTITY,
5247 literal, URI, NULL);
5248 }
5249 xmlFreeURI(uri);
5250 }
5251 }
5252 }
5253 } else {
5254 if ((RAW == '"') || (RAW == '\'')) {
5255 value = xmlParseEntityValue(ctxt, &orig);
5256 if ((ctxt->sax != NULL) &&
5257 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5258 ctxt->sax->entityDecl(ctxt->userData, name,
5259 XML_INTERNAL_GENERAL_ENTITY,
5260 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005261 /*
5262 * For expat compatibility in SAX mode.
5263 */
5264 if ((ctxt->myDoc == NULL) ||
5265 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5266 if (ctxt->myDoc == NULL) {
5267 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005268 if (ctxt->myDoc == NULL) {
5269 xmlErrMemory(ctxt, "New Doc failed");
5270 return;
5271 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005272 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005273 }
5274 if (ctxt->myDoc->intSubset == NULL)
5275 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5276 BAD_CAST "fake", NULL, NULL);
5277
Daniel Veillard1af9a412003-08-20 22:54:39 +00005278 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5279 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005280 }
Owen Taylor3473f882001-02-23 17:55:21 +00005281 } else {
5282 URI = xmlParseExternalID(ctxt, &literal, 1);
5283 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005284 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005285 }
5286 if (URI) {
5287 xmlURIPtr uri;
5288
5289 uri = xmlParseURI((const char *)URI);
5290 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005291 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5292 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005293 /*
5294 * This really ought to be a well formedness error
5295 * but the XML Core WG decided otherwise c.f. issue
5296 * E26 of the XML erratas.
5297 */
Owen Taylor3473f882001-02-23 17:55:21 +00005298 } else {
5299 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005300 /*
5301 * Okay this is foolish to block those but not
5302 * invalid URIs.
5303 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005304 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005305 }
5306 xmlFreeURI(uri);
5307 }
5308 }
William M. Brack76e95df2003-10-18 16:20:14 +00005309 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005310 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5311 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005312 }
5313 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005314 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005315 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005316 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005317 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5318 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005319 }
5320 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005321 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005322 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5323 (ctxt->sax->unparsedEntityDecl != NULL))
5324 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5325 literal, URI, ndata);
5326 } else {
5327 if ((ctxt->sax != NULL) &&
5328 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5329 ctxt->sax->entityDecl(ctxt->userData, name,
5330 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5331 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005332 /*
5333 * For expat compatibility in SAX mode.
5334 * assuming the entity repalcement was asked for
5335 */
5336 if ((ctxt->replaceEntities != 0) &&
5337 ((ctxt->myDoc == NULL) ||
5338 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5339 if (ctxt->myDoc == NULL) {
5340 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005341 if (ctxt->myDoc == NULL) {
5342 xmlErrMemory(ctxt, "New Doc failed");
5343 return;
5344 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005345 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005346 }
5347
5348 if (ctxt->myDoc->intSubset == NULL)
5349 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5350 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005351 xmlSAX2EntityDecl(ctxt, name,
5352 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5353 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005354 }
Owen Taylor3473f882001-02-23 17:55:21 +00005355 }
5356 }
5357 }
5358 SKIP_BLANKS;
5359 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005360 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005361 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005362 } else {
5363 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005364 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5365 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005366 }
5367 NEXT;
5368 }
5369 if (orig != NULL) {
5370 /*
5371 * Ugly mechanism to save the raw entity value.
5372 */
5373 xmlEntityPtr cur = NULL;
5374
5375 if (isParameter) {
5376 if ((ctxt->sax != NULL) &&
5377 (ctxt->sax->getParameterEntity != NULL))
5378 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5379 } else {
5380 if ((ctxt->sax != NULL) &&
5381 (ctxt->sax->getEntity != NULL))
5382 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005383 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005384 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005385 }
Owen Taylor3473f882001-02-23 17:55:21 +00005386 }
5387 if (cur != NULL) {
5388 if (cur->orig != NULL)
5389 xmlFree(orig);
5390 else
5391 cur->orig = orig;
5392 } else
5393 xmlFree(orig);
5394 }
Owen Taylor3473f882001-02-23 17:55:21 +00005395 if (value != NULL) xmlFree(value);
5396 if (URI != NULL) xmlFree(URI);
5397 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005398 }
5399}
5400
5401/**
5402 * xmlParseDefaultDecl:
5403 * @ctxt: an XML parser context
5404 * @value: Receive a possible fixed default value for the attribute
5405 *
5406 * Parse an attribute default declaration
5407 *
5408 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5409 *
5410 * [ VC: Required Attribute ]
5411 * if the default declaration is the keyword #REQUIRED, then the
5412 * attribute must be specified for all elements of the type in the
5413 * attribute-list declaration.
5414 *
5415 * [ VC: Attribute Default Legal ]
5416 * The declared default value must meet the lexical constraints of
5417 * the declared attribute type c.f. xmlValidateAttributeDecl()
5418 *
5419 * [ VC: Fixed Attribute Default ]
5420 * if an attribute has a default value declared with the #FIXED
5421 * keyword, instances of that attribute must match the default value.
5422 *
5423 * [ WFC: No < in Attribute Values ]
5424 * handled in xmlParseAttValue()
5425 *
5426 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5427 * or XML_ATTRIBUTE_FIXED.
5428 */
5429
5430int
5431xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5432 int val;
5433 xmlChar *ret;
5434
5435 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005436 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005437 SKIP(9);
5438 return(XML_ATTRIBUTE_REQUIRED);
5439 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005440 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005441 SKIP(8);
5442 return(XML_ATTRIBUTE_IMPLIED);
5443 }
5444 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005445 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005446 SKIP(6);
5447 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005448 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005449 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5450 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005451 }
5452 SKIP_BLANKS;
5453 }
5454 ret = xmlParseAttValue(ctxt);
5455 ctxt->instate = XML_PARSER_DTD;
5456 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005457 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005458 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005459 } else
5460 *value = ret;
5461 return(val);
5462}
5463
5464/**
5465 * xmlParseNotationType:
5466 * @ctxt: an XML parser context
5467 *
5468 * parse an Notation attribute type.
5469 *
5470 * Note: the leading 'NOTATION' S part has already being parsed...
5471 *
5472 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5473 *
5474 * [ VC: Notation Attributes ]
5475 * Values of this type must match one of the notation names included
5476 * in the declaration; all notation names in the declaration must be declared.
5477 *
5478 * Returns: the notation attribute tree built while parsing
5479 */
5480
5481xmlEnumerationPtr
5482xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005483 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005484 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005485
5486 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005487 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005488 return(NULL);
5489 }
5490 SHRINK;
5491 do {
5492 NEXT;
5493 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005494 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005495 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005496 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5497 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005498 xmlFreeEnumeration(ret);
5499 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005500 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005501 tmp = ret;
5502 while (tmp != NULL) {
5503 if (xmlStrEqual(name, tmp->name)) {
5504 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5505 "standalone: attribute notation value token %s duplicated\n",
5506 name, NULL);
5507 if (!xmlDictOwns(ctxt->dict, name))
5508 xmlFree((xmlChar *) name);
5509 break;
5510 }
5511 tmp = tmp->next;
5512 }
5513 if (tmp == NULL) {
5514 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005515 if (cur == NULL) {
5516 xmlFreeEnumeration(ret);
5517 return(NULL);
5518 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005519 if (last == NULL) ret = last = cur;
5520 else {
5521 last->next = cur;
5522 last = cur;
5523 }
Owen Taylor3473f882001-02-23 17:55:21 +00005524 }
5525 SKIP_BLANKS;
5526 } while (RAW == '|');
5527 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005528 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005529 xmlFreeEnumeration(ret);
5530 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005531 }
5532 NEXT;
5533 return(ret);
5534}
5535
5536/**
5537 * xmlParseEnumerationType:
5538 * @ctxt: an XML parser context
5539 *
5540 * parse an Enumeration attribute type.
5541 *
5542 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5543 *
5544 * [ VC: Enumeration ]
5545 * Values of this type must match one of the Nmtoken tokens in
5546 * the declaration
5547 *
5548 * Returns: the enumeration attribute tree built while parsing
5549 */
5550
5551xmlEnumerationPtr
5552xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5553 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005554 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005555
5556 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005557 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005558 return(NULL);
5559 }
5560 SHRINK;
5561 do {
5562 NEXT;
5563 SKIP_BLANKS;
5564 name = xmlParseNmtoken(ctxt);
5565 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005566 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005567 return(ret);
5568 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005569 tmp = ret;
5570 while (tmp != NULL) {
5571 if (xmlStrEqual(name, tmp->name)) {
5572 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5573 "standalone: attribute enumeration value token %s duplicated\n",
5574 name, NULL);
5575 if (!xmlDictOwns(ctxt->dict, name))
5576 xmlFree(name);
5577 break;
5578 }
5579 tmp = tmp->next;
5580 }
5581 if (tmp == NULL) {
5582 cur = xmlCreateEnumeration(name);
5583 if (!xmlDictOwns(ctxt->dict, name))
5584 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005585 if (cur == NULL) {
5586 xmlFreeEnumeration(ret);
5587 return(NULL);
5588 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005589 if (last == NULL) ret = last = cur;
5590 else {
5591 last->next = cur;
5592 last = cur;
5593 }
Owen Taylor3473f882001-02-23 17:55:21 +00005594 }
5595 SKIP_BLANKS;
5596 } while (RAW == '|');
5597 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005598 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005599 return(ret);
5600 }
5601 NEXT;
5602 return(ret);
5603}
5604
5605/**
5606 * xmlParseEnumeratedType:
5607 * @ctxt: an XML parser context
5608 * @tree: the enumeration tree built while parsing
5609 *
5610 * parse an Enumerated attribute type.
5611 *
5612 * [57] EnumeratedType ::= NotationType | Enumeration
5613 *
5614 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5615 *
5616 *
5617 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5618 */
5619
5620int
5621xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005622 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005623 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005624 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005625 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5626 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005627 return(0);
5628 }
5629 SKIP_BLANKS;
5630 *tree = xmlParseNotationType(ctxt);
5631 if (*tree == NULL) return(0);
5632 return(XML_ATTRIBUTE_NOTATION);
5633 }
5634 *tree = xmlParseEnumerationType(ctxt);
5635 if (*tree == NULL) return(0);
5636 return(XML_ATTRIBUTE_ENUMERATION);
5637}
5638
5639/**
5640 * xmlParseAttributeType:
5641 * @ctxt: an XML parser context
5642 * @tree: the enumeration tree built while parsing
5643 *
5644 * parse the Attribute list def for an element
5645 *
5646 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5647 *
5648 * [55] StringType ::= 'CDATA'
5649 *
5650 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5651 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5652 *
5653 * Validity constraints for attribute values syntax are checked in
5654 * xmlValidateAttributeValue()
5655 *
5656 * [ VC: ID ]
5657 * Values of type ID must match the Name production. A name must not
5658 * appear more than once in an XML document as a value of this type;
5659 * i.e., ID values must uniquely identify the elements which bear them.
5660 *
5661 * [ VC: One ID per Element Type ]
5662 * No element type may have more than one ID attribute specified.
5663 *
5664 * [ VC: ID Attribute Default ]
5665 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5666 *
5667 * [ VC: IDREF ]
5668 * Values of type IDREF must match the Name production, and values
5669 * of type IDREFS must match Names; each IDREF Name must match the value
5670 * of an ID attribute on some element in the XML document; i.e. IDREF
5671 * values must match the value of some ID attribute.
5672 *
5673 * [ VC: Entity Name ]
5674 * Values of type ENTITY must match the Name production, values
5675 * of type ENTITIES must match Names; each Entity Name must match the
5676 * name of an unparsed entity declared in the DTD.
5677 *
5678 * [ VC: Name Token ]
5679 * Values of type NMTOKEN must match the Nmtoken production; values
5680 * of type NMTOKENS must match Nmtokens.
5681 *
5682 * Returns the attribute type
5683 */
5684int
5685xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5686 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005687 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005688 SKIP(5);
5689 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005690 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005691 SKIP(6);
5692 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005693 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005694 SKIP(5);
5695 return(XML_ATTRIBUTE_IDREF);
5696 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5697 SKIP(2);
5698 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005699 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005700 SKIP(6);
5701 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005702 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005703 SKIP(8);
5704 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005705 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005706 SKIP(8);
5707 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005708 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005709 SKIP(7);
5710 return(XML_ATTRIBUTE_NMTOKEN);
5711 }
5712 return(xmlParseEnumeratedType(ctxt, tree));
5713}
5714
5715/**
5716 * xmlParseAttributeListDecl:
5717 * @ctxt: an XML parser context
5718 *
5719 * : parse the Attribute list def for an element
5720 *
5721 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5722 *
5723 * [53] AttDef ::= S Name S AttType S DefaultDecl
5724 *
5725 */
5726void
5727xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005728 const xmlChar *elemName;
5729 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005730 xmlEnumerationPtr tree;
5731
Daniel Veillarda07050d2003-10-19 14:46:32 +00005732 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005733 xmlParserInputPtr input = ctxt->input;
5734
5735 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005736 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005737 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005738 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005739 }
5740 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005741 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005742 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005743 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5744 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005745 return;
5746 }
5747 SKIP_BLANKS;
5748 GROW;
5749 while (RAW != '>') {
5750 const xmlChar *check = CUR_PTR;
5751 int type;
5752 int def;
5753 xmlChar *defaultValue = NULL;
5754
5755 GROW;
5756 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005757 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005758 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005759 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5760 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005761 break;
5762 }
5763 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005764 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005765 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005766 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005767 break;
5768 }
5769 SKIP_BLANKS;
5770
5771 type = xmlParseAttributeType(ctxt, &tree);
5772 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005773 break;
5774 }
5775
5776 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005777 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005778 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5779 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005780 if (tree != NULL)
5781 xmlFreeEnumeration(tree);
5782 break;
5783 }
5784 SKIP_BLANKS;
5785
5786 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5787 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005788 if (defaultValue != NULL)
5789 xmlFree(defaultValue);
5790 if (tree != NULL)
5791 xmlFreeEnumeration(tree);
5792 break;
5793 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005794 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5795 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005796
5797 GROW;
5798 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005799 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005800 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005801 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005802 if (defaultValue != NULL)
5803 xmlFree(defaultValue);
5804 if (tree != NULL)
5805 xmlFreeEnumeration(tree);
5806 break;
5807 }
5808 SKIP_BLANKS;
5809 }
5810 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005811 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5812 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005813 if (defaultValue != NULL)
5814 xmlFree(defaultValue);
5815 if (tree != NULL)
5816 xmlFreeEnumeration(tree);
5817 break;
5818 }
5819 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5820 (ctxt->sax->attributeDecl != NULL))
5821 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5822 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005823 else if (tree != NULL)
5824 xmlFreeEnumeration(tree);
5825
5826 if ((ctxt->sax2) && (defaultValue != NULL) &&
5827 (def != XML_ATTRIBUTE_IMPLIED) &&
5828 (def != XML_ATTRIBUTE_REQUIRED)) {
5829 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5830 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005831 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005832 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5833 }
Owen Taylor3473f882001-02-23 17:55:21 +00005834 if (defaultValue != NULL)
5835 xmlFree(defaultValue);
5836 GROW;
5837 }
5838 if (RAW == '>') {
5839 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00005840 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5841 "Attribute list declaration doesn't start and stop in the same entity\n",
5842 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005843 }
5844 NEXT;
5845 }
Owen Taylor3473f882001-02-23 17:55:21 +00005846 }
5847}
5848
5849/**
5850 * xmlParseElementMixedContentDecl:
5851 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005852 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005853 *
5854 * parse the declaration for a Mixed Element content
5855 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5856 *
5857 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5858 * '(' S? '#PCDATA' S? ')'
5859 *
5860 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5861 *
5862 * [ VC: No Duplicate Types ]
5863 * The same name must not appear more than once in a single
5864 * mixed-content declaration.
5865 *
5866 * returns: the list of the xmlElementContentPtr describing the element choices
5867 */
5868xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005869xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005870 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005871 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005872
5873 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005874 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005875 SKIP(7);
5876 SKIP_BLANKS;
5877 SHRINK;
5878 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005879 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005880 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5881"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005882 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005883 }
Owen Taylor3473f882001-02-23 17:55:21 +00005884 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005885 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005886 if (ret == NULL)
5887 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005888 if (RAW == '*') {
5889 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5890 NEXT;
5891 }
5892 return(ret);
5893 }
5894 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005895 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005896 if (ret == NULL) return(NULL);
5897 }
5898 while (RAW == '|') {
5899 NEXT;
5900 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005901 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005902 if (ret == NULL) return(NULL);
5903 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005904 if (cur != NULL)
5905 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005906 cur = ret;
5907 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005908 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005909 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005910 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005911 if (n->c1 != NULL)
5912 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005913 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005914 if (n != NULL)
5915 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005916 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005917 }
5918 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005919 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005920 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005921 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005922 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005923 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005924 return(NULL);
5925 }
5926 SKIP_BLANKS;
5927 GROW;
5928 }
5929 if ((RAW == ')') && (NXT(1) == '*')) {
5930 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005931 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005932 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005933 if (cur->c2 != NULL)
5934 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005935 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02005936 if (ret != NULL)
5937 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005938 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005939 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5940"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005941 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005942 }
Owen Taylor3473f882001-02-23 17:55:21 +00005943 SKIP(2);
5944 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005945 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005946 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005947 return(NULL);
5948 }
5949
5950 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005951 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005952 }
5953 return(ret);
5954}
5955
5956/**
Daniel Veillard489f9672009-08-10 16:49:30 +02005957 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00005958 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005959 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02005960 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00005961 *
5962 * parse the declaration for a Mixed Element content
5963 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5964 *
5965 *
5966 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5967 *
5968 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5969 *
5970 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5971 *
5972 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5973 *
5974 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5975 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005976 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005977 * opening or closing parentheses in a choice, seq, or Mixed
5978 * construct is contained in the replacement text for a parameter
5979 * entity, both must be contained in the same replacement text. For
5980 * interoperability, if a parameter-entity reference appears in a
5981 * choice, seq, or Mixed construct, its replacement text should not
5982 * be empty, and neither the first nor last non-blank character of
5983 * the replacement text should be a connector (| or ,).
5984 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005985 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005986 * hierarchy.
5987 */
Daniel Veillard489f9672009-08-10 16:49:30 +02005988static xmlElementContentPtr
5989xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5990 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00005991 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005992 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005993 xmlChar type = 0;
5994
Daniel Veillard489f9672009-08-10 16:49:30 +02005995 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5996 (depth > 2048)) {
5997 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5998"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5999 depth);
6000 return(NULL);
6001 }
Owen Taylor3473f882001-02-23 17:55:21 +00006002 SKIP_BLANKS;
6003 GROW;
6004 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006005 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006006
Owen Taylor3473f882001-02-23 17:55:21 +00006007 /* Recurse on first child */
6008 NEXT;
6009 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006010 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6011 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006012 SKIP_BLANKS;
6013 GROW;
6014 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006015 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006016 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006017 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006018 return(NULL);
6019 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006020 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006021 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006022 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006023 return(NULL);
6024 }
Owen Taylor3473f882001-02-23 17:55:21 +00006025 GROW;
6026 if (RAW == '?') {
6027 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6028 NEXT;
6029 } else if (RAW == '*') {
6030 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6031 NEXT;
6032 } else if (RAW == '+') {
6033 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6034 NEXT;
6035 } else {
6036 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6037 }
Owen Taylor3473f882001-02-23 17:55:21 +00006038 GROW;
6039 }
6040 SKIP_BLANKS;
6041 SHRINK;
6042 while (RAW != ')') {
6043 /*
6044 * Each loop we parse one separator and one element.
6045 */
6046 if (RAW == ',') {
6047 if (type == 0) type = CUR;
6048
6049 /*
6050 * Detect "Name | Name , Name" error
6051 */
6052 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006053 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006054 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006055 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006056 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006057 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006058 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006059 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006060 return(NULL);
6061 }
6062 NEXT;
6063
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006064 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006065 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006066 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006067 xmlFreeDocElementContent(ctxt->myDoc, last);
6068 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006069 return(NULL);
6070 }
6071 if (last == NULL) {
6072 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006073 if (ret != NULL)
6074 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006075 ret = cur = op;
6076 } else {
6077 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006078 if (op != NULL)
6079 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006080 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006081 if (last != NULL)
6082 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006083 cur =op;
6084 last = NULL;
6085 }
6086 } else if (RAW == '|') {
6087 if (type == 0) type = CUR;
6088
6089 /*
6090 * Detect "Name , Name | Name" error
6091 */
6092 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006093 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006094 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006095 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006096 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006097 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006098 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006099 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006100 return(NULL);
6101 }
6102 NEXT;
6103
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006104 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006105 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006106 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006107 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006108 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006109 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006110 return(NULL);
6111 }
6112 if (last == NULL) {
6113 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006114 if (ret != NULL)
6115 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006116 ret = cur = op;
6117 } else {
6118 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006119 if (op != NULL)
6120 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006121 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006122 if (last != NULL)
6123 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006124 cur =op;
6125 last = NULL;
6126 }
6127 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006128 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006129 if ((last != NULL) && (last != ret))
6130 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006131 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006132 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006133 return(NULL);
6134 }
6135 GROW;
6136 SKIP_BLANKS;
6137 GROW;
6138 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006139 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006140 /* Recurse on second child */
6141 NEXT;
6142 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006143 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6144 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006145 SKIP_BLANKS;
6146 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006147 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006148 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006149 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006150 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006151 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006152 return(NULL);
6153 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006154 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006155 if (last == NULL) {
6156 if (ret != NULL)
6157 xmlFreeDocElementContent(ctxt->myDoc, ret);
6158 return(NULL);
6159 }
Owen Taylor3473f882001-02-23 17:55:21 +00006160 if (RAW == '?') {
6161 last->ocur = XML_ELEMENT_CONTENT_OPT;
6162 NEXT;
6163 } else if (RAW == '*') {
6164 last->ocur = XML_ELEMENT_CONTENT_MULT;
6165 NEXT;
6166 } else if (RAW == '+') {
6167 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6168 NEXT;
6169 } else {
6170 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6171 }
6172 }
6173 SKIP_BLANKS;
6174 GROW;
6175 }
6176 if ((cur != NULL) && (last != NULL)) {
6177 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006178 if (last != NULL)
6179 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006180 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006181 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006182 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6183"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006184 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006185 }
Owen Taylor3473f882001-02-23 17:55:21 +00006186 NEXT;
6187 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006188 if (ret != NULL) {
6189 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6190 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6191 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6192 else
6193 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6194 }
Owen Taylor3473f882001-02-23 17:55:21 +00006195 NEXT;
6196 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006197 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006198 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006199 cur = ret;
6200 /*
6201 * Some normalization:
6202 * (a | b* | c?)* == (a | b | c)*
6203 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006204 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006205 if ((cur->c1 != NULL) &&
6206 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6207 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6208 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6209 if ((cur->c2 != NULL) &&
6210 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6211 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6212 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6213 cur = cur->c2;
6214 }
6215 }
Owen Taylor3473f882001-02-23 17:55:21 +00006216 NEXT;
6217 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006218 if (ret != NULL) {
6219 int found = 0;
6220
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006221 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6222 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6223 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006224 else
6225 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006226 /*
6227 * Some normalization:
6228 * (a | b*)+ == (a | b)*
6229 * (a | b?)+ == (a | b)*
6230 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006231 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006232 if ((cur->c1 != NULL) &&
6233 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6234 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6235 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6236 found = 1;
6237 }
6238 if ((cur->c2 != NULL) &&
6239 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6240 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6241 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6242 found = 1;
6243 }
6244 cur = cur->c2;
6245 }
6246 if (found)
6247 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6248 }
Owen Taylor3473f882001-02-23 17:55:21 +00006249 NEXT;
6250 }
6251 return(ret);
6252}
6253
6254/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006255 * xmlParseElementChildrenContentDecl:
6256 * @ctxt: an XML parser context
6257 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006258 *
6259 * parse the declaration for a Mixed Element content
6260 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6261 *
6262 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6263 *
6264 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6265 *
6266 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6267 *
6268 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6269 *
6270 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6271 * TODO Parameter-entity replacement text must be properly nested
6272 * with parenthesized groups. That is to say, if either of the
6273 * opening or closing parentheses in a choice, seq, or Mixed
6274 * construct is contained in the replacement text for a parameter
6275 * entity, both must be contained in the same replacement text. For
6276 * interoperability, if a parameter-entity reference appears in a
6277 * choice, seq, or Mixed construct, its replacement text should not
6278 * be empty, and neither the first nor last non-blank character of
6279 * the replacement text should be a connector (| or ,).
6280 *
6281 * Returns the tree of xmlElementContentPtr describing the element
6282 * hierarchy.
6283 */
6284xmlElementContentPtr
6285xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6286 /* stub left for API/ABI compat */
6287 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6288}
6289
6290/**
Owen Taylor3473f882001-02-23 17:55:21 +00006291 * xmlParseElementContentDecl:
6292 * @ctxt: an XML parser context
6293 * @name: the name of the element being defined.
6294 * @result: the Element Content pointer will be stored here if any
6295 *
6296 * parse the declaration for an Element content either Mixed or Children,
6297 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6298 *
6299 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6300 *
6301 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6302 */
6303
6304int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006305xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006306 xmlElementContentPtr *result) {
6307
6308 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006309 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006310 int res;
6311
6312 *result = NULL;
6313
6314 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006315 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006316 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006317 return(-1);
6318 }
6319 NEXT;
6320 GROW;
6321 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006322 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006323 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006324 res = XML_ELEMENT_TYPE_MIXED;
6325 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006326 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006327 res = XML_ELEMENT_TYPE_ELEMENT;
6328 }
Owen Taylor3473f882001-02-23 17:55:21 +00006329 SKIP_BLANKS;
6330 *result = tree;
6331 return(res);
6332}
6333
6334/**
6335 * xmlParseElementDecl:
6336 * @ctxt: an XML parser context
6337 *
6338 * parse an Element declaration.
6339 *
6340 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6341 *
6342 * [ VC: Unique Element Type Declaration ]
6343 * No element type may be declared more than once
6344 *
6345 * Returns the type of the element, or -1 in case of error
6346 */
6347int
6348xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006349 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006350 int ret = -1;
6351 xmlElementContentPtr content = NULL;
6352
Daniel Veillard4c778d82005-01-23 17:37:44 +00006353 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006354 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006355 xmlParserInputPtr input = ctxt->input;
6356
6357 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006358 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006359 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6360 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006361 }
6362 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006363 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006364 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006365 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6366 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006367 return(-1);
6368 }
6369 while ((RAW == 0) && (ctxt->inputNr > 1))
6370 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006371 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006372 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6373 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006374 }
6375 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006376 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006377 SKIP(5);
6378 /*
6379 * Element must always be empty.
6380 */
6381 ret = XML_ELEMENT_TYPE_EMPTY;
6382 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6383 (NXT(2) == 'Y')) {
6384 SKIP(3);
6385 /*
6386 * Element is a generic container.
6387 */
6388 ret = XML_ELEMENT_TYPE_ANY;
6389 } else if (RAW == '(') {
6390 ret = xmlParseElementContentDecl(ctxt, name, &content);
6391 } else {
6392 /*
6393 * [ WFC: PEs in Internal Subset ] error handling.
6394 */
6395 if ((RAW == '%') && (ctxt->external == 0) &&
6396 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006397 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006398 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006399 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006400 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006401 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6402 }
Owen Taylor3473f882001-02-23 17:55:21 +00006403 return(-1);
6404 }
6405
6406 SKIP_BLANKS;
6407 /*
6408 * Pop-up of finished entities.
6409 */
6410 while ((RAW == 0) && (ctxt->inputNr > 1))
6411 xmlPopInput(ctxt);
6412 SKIP_BLANKS;
6413
6414 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006415 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006416 if (content != NULL) {
6417 xmlFreeDocElementContent(ctxt->myDoc, content);
6418 }
Owen Taylor3473f882001-02-23 17:55:21 +00006419 } else {
6420 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006421 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6422 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006423 }
6424
6425 NEXT;
6426 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006427 (ctxt->sax->elementDecl != NULL)) {
6428 if (content != NULL)
6429 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006430 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6431 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006432 if ((content != NULL) && (content->parent == NULL)) {
6433 /*
6434 * this is a trick: if xmlAddElementDecl is called,
6435 * instead of copying the full tree it is plugged directly
6436 * if called from the parser. Avoid duplicating the
6437 * interfaces or change the API/ABI
6438 */
6439 xmlFreeDocElementContent(ctxt->myDoc, content);
6440 }
6441 } else if (content != NULL) {
6442 xmlFreeDocElementContent(ctxt->myDoc, content);
6443 }
Owen Taylor3473f882001-02-23 17:55:21 +00006444 }
Owen Taylor3473f882001-02-23 17:55:21 +00006445 }
6446 return(ret);
6447}
6448
6449/**
Owen Taylor3473f882001-02-23 17:55:21 +00006450 * xmlParseConditionalSections
6451 * @ctxt: an XML parser context
6452 *
6453 * [61] conditionalSect ::= includeSect | ignoreSect
6454 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6455 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6456 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6457 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6458 */
6459
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006460static void
Owen Taylor3473f882001-02-23 17:55:21 +00006461xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006462 int id = ctxt->input->id;
6463
Owen Taylor3473f882001-02-23 17:55:21 +00006464 SKIP(3);
6465 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006466 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006467 SKIP(7);
6468 SKIP_BLANKS;
6469 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006470 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006471 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006472 if (ctxt->input->id != id) {
6473 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6474 "All markup of the conditional section is not in the same entity\n",
6475 NULL, NULL);
6476 }
Owen Taylor3473f882001-02-23 17:55:21 +00006477 NEXT;
6478 }
6479 if (xmlParserDebugEntities) {
6480 if ((ctxt->input != NULL) && (ctxt->input->filename))
6481 xmlGenericError(xmlGenericErrorContext,
6482 "%s(%d): ", ctxt->input->filename,
6483 ctxt->input->line);
6484 xmlGenericError(xmlGenericErrorContext,
6485 "Entering INCLUDE Conditional Section\n");
6486 }
6487
6488 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6489 (NXT(2) != '>'))) {
6490 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006491 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006492
6493 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6494 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006495 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006496 NEXT;
6497 } else if (RAW == '%') {
6498 xmlParsePEReference(ctxt);
6499 } else
6500 xmlParseMarkupDecl(ctxt);
6501
6502 /*
6503 * Pop-up of finished entities.
6504 */
6505 while ((RAW == 0) && (ctxt->inputNr > 1))
6506 xmlPopInput(ctxt);
6507
Daniel Veillardfdc91562002-07-01 21:52:03 +00006508 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006509 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006510 break;
6511 }
6512 }
6513 if (xmlParserDebugEntities) {
6514 if ((ctxt->input != NULL) && (ctxt->input->filename))
6515 xmlGenericError(xmlGenericErrorContext,
6516 "%s(%d): ", ctxt->input->filename,
6517 ctxt->input->line);
6518 xmlGenericError(xmlGenericErrorContext,
6519 "Leaving INCLUDE Conditional Section\n");
6520 }
6521
Daniel Veillarda07050d2003-10-19 14:46:32 +00006522 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006523 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006524 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006525 int depth = 0;
6526
6527 SKIP(6);
6528 SKIP_BLANKS;
6529 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006530 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006531 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006532 if (ctxt->input->id != id) {
6533 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6534 "All markup of the conditional section is not in the same entity\n",
6535 NULL, NULL);
6536 }
Owen Taylor3473f882001-02-23 17:55:21 +00006537 NEXT;
6538 }
6539 if (xmlParserDebugEntities) {
6540 if ((ctxt->input != NULL) && (ctxt->input->filename))
6541 xmlGenericError(xmlGenericErrorContext,
6542 "%s(%d): ", ctxt->input->filename,
6543 ctxt->input->line);
6544 xmlGenericError(xmlGenericErrorContext,
6545 "Entering IGNORE Conditional Section\n");
6546 }
6547
6548 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006549 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006550 * But disable SAX event generating DTD building in the meantime
6551 */
6552 state = ctxt->disableSAX;
6553 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006554 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006555 ctxt->instate = XML_PARSER_IGNORE;
6556
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006557 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006558 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6559 depth++;
6560 SKIP(3);
6561 continue;
6562 }
6563 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6564 if (--depth >= 0) SKIP(3);
6565 continue;
6566 }
6567 NEXT;
6568 continue;
6569 }
6570
6571 ctxt->disableSAX = state;
6572 ctxt->instate = instate;
6573
6574 if (xmlParserDebugEntities) {
6575 if ((ctxt->input != NULL) && (ctxt->input->filename))
6576 xmlGenericError(xmlGenericErrorContext,
6577 "%s(%d): ", ctxt->input->filename,
6578 ctxt->input->line);
6579 xmlGenericError(xmlGenericErrorContext,
6580 "Leaving IGNORE Conditional Section\n");
6581 }
6582
6583 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006584 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006585 }
6586
6587 if (RAW == 0)
6588 SHRINK;
6589
6590 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006591 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006592 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006593 if (ctxt->input->id != id) {
6594 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6595 "All markup of the conditional section is not in the same entity\n",
6596 NULL, NULL);
6597 }
Owen Taylor3473f882001-02-23 17:55:21 +00006598 SKIP(3);
6599 }
6600}
6601
6602/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006603 * xmlParseMarkupDecl:
6604 * @ctxt: an XML parser context
6605 *
6606 * parse Markup declarations
6607 *
6608 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6609 * NotationDecl | PI | Comment
6610 *
6611 * [ VC: Proper Declaration/PE Nesting ]
6612 * Parameter-entity replacement text must be properly nested with
6613 * markup declarations. That is to say, if either the first character
6614 * or the last character of a markup declaration (markupdecl above) is
6615 * contained in the replacement text for a parameter-entity reference,
6616 * both must be contained in the same replacement text.
6617 *
6618 * [ WFC: PEs in Internal Subset ]
6619 * In the internal DTD subset, parameter-entity references can occur
6620 * only where markup declarations can occur, not within markup declarations.
6621 * (This does not apply to references that occur in external parameter
6622 * entities or to the external subset.)
6623 */
6624void
6625xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6626 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006627 if (CUR == '<') {
6628 if (NXT(1) == '!') {
6629 switch (NXT(2)) {
6630 case 'E':
6631 if (NXT(3) == 'L')
6632 xmlParseElementDecl(ctxt);
6633 else if (NXT(3) == 'N')
6634 xmlParseEntityDecl(ctxt);
6635 break;
6636 case 'A':
6637 xmlParseAttributeListDecl(ctxt);
6638 break;
6639 case 'N':
6640 xmlParseNotationDecl(ctxt);
6641 break;
6642 case '-':
6643 xmlParseComment(ctxt);
6644 break;
6645 default:
6646 /* there is an error but it will be detected later */
6647 break;
6648 }
6649 } else if (NXT(1) == '?') {
6650 xmlParsePI(ctxt);
6651 }
6652 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006653 /*
6654 * This is only for internal subset. On external entities,
6655 * the replacement is done before parsing stage
6656 */
6657 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6658 xmlParsePEReference(ctxt);
6659
6660 /*
6661 * Conditional sections are allowed from entities included
6662 * by PE References in the internal subset.
6663 */
6664 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6665 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6666 xmlParseConditionalSections(ctxt);
6667 }
6668 }
6669
6670 ctxt->instate = XML_PARSER_DTD;
6671}
6672
6673/**
6674 * xmlParseTextDecl:
6675 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006676 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006677 * parse an XML declaration header for external entities
6678 *
6679 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006680 */
6681
6682void
6683xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6684 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006685 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006686
6687 /*
6688 * We know that '<?xml' is here.
6689 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006690 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006691 SKIP(5);
6692 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006693 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006694 return;
6695 }
6696
William M. Brack76e95df2003-10-18 16:20:14 +00006697 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006698 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6699 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006700 }
6701 SKIP_BLANKS;
6702
6703 /*
6704 * We may have the VersionInfo here.
6705 */
6706 version = xmlParseVersionInfo(ctxt);
6707 if (version == NULL)
6708 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006709 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006710 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006711 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6712 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006713 }
6714 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006715 ctxt->input->version = version;
6716
6717 /*
6718 * We must have the encoding declaration
6719 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006720 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006721 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6722 /*
6723 * The XML REC instructs us to stop parsing right here
6724 */
6725 return;
6726 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006727 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6728 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6729 "Missing encoding in text declaration\n");
6730 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006731
6732 SKIP_BLANKS;
6733 if ((RAW == '?') && (NXT(1) == '>')) {
6734 SKIP(2);
6735 } else if (RAW == '>') {
6736 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006737 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006738 NEXT;
6739 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006740 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006741 MOVETO_ENDTAG(CUR_PTR);
6742 NEXT;
6743 }
6744}
6745
6746/**
Owen Taylor3473f882001-02-23 17:55:21 +00006747 * xmlParseExternalSubset:
6748 * @ctxt: an XML parser context
6749 * @ExternalID: the external identifier
6750 * @SystemID: the system identifier (or URL)
6751 *
6752 * parse Markup declarations from an external subset
6753 *
6754 * [30] extSubset ::= textDecl? extSubsetDecl
6755 *
6756 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6757 */
6758void
6759xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6760 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006761 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006762 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006763
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01006764 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006765 (ctxt->input->end - ctxt->input->cur >= 4)) {
6766 xmlChar start[4];
6767 xmlCharEncoding enc;
6768
6769 start[0] = RAW;
6770 start[1] = NXT(1);
6771 start[2] = NXT(2);
6772 start[3] = NXT(3);
6773 enc = xmlDetectCharEncoding(start, 4);
6774 if (enc != XML_CHAR_ENCODING_NONE)
6775 xmlSwitchEncoding(ctxt, enc);
6776 }
6777
Daniel Veillarda07050d2003-10-19 14:46:32 +00006778 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006779 xmlParseTextDecl(ctxt);
6780 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6781 /*
6782 * The XML REC instructs us to stop parsing right here
6783 */
6784 ctxt->instate = XML_PARSER_EOF;
6785 return;
6786 }
6787 }
6788 if (ctxt->myDoc == NULL) {
6789 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006790 if (ctxt->myDoc == NULL) {
6791 xmlErrMemory(ctxt, "New Doc failed");
6792 return;
6793 }
6794 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006795 }
6796 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6797 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6798
6799 ctxt->instate = XML_PARSER_DTD;
6800 ctxt->external = 1;
6801 while (((RAW == '<') && (NXT(1) == '?')) ||
6802 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006803 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006804 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006805 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006806
6807 GROW;
6808 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6809 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006810 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006811 NEXT;
6812 } else if (RAW == '%') {
6813 xmlParsePEReference(ctxt);
6814 } else
6815 xmlParseMarkupDecl(ctxt);
6816
6817 /*
6818 * Pop-up of finished entities.
6819 */
6820 while ((RAW == 0) && (ctxt->inputNr > 1))
6821 xmlPopInput(ctxt);
6822
Daniel Veillardfdc91562002-07-01 21:52:03 +00006823 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006824 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006825 break;
6826 }
6827 }
6828
6829 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006830 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006831 }
6832
6833}
6834
6835/**
6836 * xmlParseReference:
6837 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006838 *
Owen Taylor3473f882001-02-23 17:55:21 +00006839 * parse and handle entity references in content, depending on the SAX
6840 * interface, this may end-up in a call to character() if this is a
6841 * CharRef, a predefined entity, if there is no reference() callback.
6842 * or if the parser was asked to switch to that mode.
6843 *
6844 * [67] Reference ::= EntityRef | CharRef
6845 */
6846void
6847xmlParseReference(xmlParserCtxtPtr ctxt) {
6848 xmlEntityPtr ent;
6849 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00006850 int was_checked;
6851 xmlNodePtr list = NULL;
6852 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006853
Daniel Veillard0161e632008-08-28 15:36:32 +00006854
6855 if (RAW != '&')
6856 return;
6857
6858 /*
6859 * Simple case of a CharRef
6860 */
Owen Taylor3473f882001-02-23 17:55:21 +00006861 if (NXT(1) == '#') {
6862 int i = 0;
6863 xmlChar out[10];
6864 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006865 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00006866
Daniel Veillarddc171602008-03-26 17:41:38 +00006867 if (value == 0)
6868 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006869 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6870 /*
6871 * So we are using non-UTF-8 buffers
6872 * Check that the char fit on 8bits, if not
6873 * generate a CharRef.
6874 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006875 if (value <= 0xFF) {
6876 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006877 out[1] = 0;
6878 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6879 (!ctxt->disableSAX))
6880 ctxt->sax->characters(ctxt->userData, out, 1);
6881 } else {
6882 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006883 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006884 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006885 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006886 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6887 (!ctxt->disableSAX))
6888 ctxt->sax->reference(ctxt->userData, out);
6889 }
6890 } else {
6891 /*
6892 * Just encode the value in UTF-8
6893 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006894 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006895 out[i] = 0;
6896 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6897 (!ctxt->disableSAX))
6898 ctxt->sax->characters(ctxt->userData, out, i);
6899 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006900 return;
6901 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006902
Daniel Veillard0161e632008-08-28 15:36:32 +00006903 /*
6904 * We are seeing an entity reference
6905 */
6906 ent = xmlParseEntityRef(ctxt);
6907 if (ent == NULL) return;
6908 if (!ctxt->wellFormed)
6909 return;
6910 was_checked = ent->checked;
6911
6912 /* special case of predefined entities */
6913 if ((ent->name == NULL) ||
6914 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6915 val = ent->content;
6916 if (val == NULL) return;
6917 /*
6918 * inline the entity.
6919 */
6920 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6921 (!ctxt->disableSAX))
6922 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6923 return;
6924 }
6925
6926 /*
6927 * The first reference to the entity trigger a parsing phase
6928 * where the ent->children is filled with the result from
6929 * the parsing.
6930 */
6931 if (ent->checked == 0) {
6932 unsigned long oldnbent = ctxt->nbentities;
6933
6934 /*
6935 * This is a bit hackish but this seems the best
6936 * way to make sure both SAX and DOM entity support
6937 * behaves okay.
6938 */
6939 void *user_data;
6940 if (ctxt->userData == ctxt)
6941 user_data = NULL;
6942 else
6943 user_data = ctxt->userData;
6944
6945 /*
6946 * Check that this entity is well formed
6947 * 4.3.2: An internal general parsed entity is well-formed
6948 * if its replacement text matches the production labeled
6949 * content.
6950 */
6951 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6952 ctxt->depth++;
6953 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6954 user_data, &list);
6955 ctxt->depth--;
6956
6957 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6958 ctxt->depth++;
6959 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6960 user_data, ctxt->depth, ent->URI,
6961 ent->ExternalID, &list);
6962 ctxt->depth--;
6963 } else {
6964 ret = XML_ERR_ENTITY_PE_INTERNAL;
6965 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6966 "invalid entity type found\n", NULL);
6967 }
6968
6969 /*
6970 * Store the number of entities needing parsing for this entity
6971 * content and do checkings
6972 */
6973 ent->checked = ctxt->nbentities - oldnbent;
6974 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006975 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00006976 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006977 return;
6978 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006979 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6980 xmlFreeNodeList(list);
6981 return;
6982 }
Owen Taylor3473f882001-02-23 17:55:21 +00006983
Daniel Veillard0161e632008-08-28 15:36:32 +00006984 if ((ret == XML_ERR_OK) && (list != NULL)) {
6985 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6986 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6987 (ent->children == NULL)) {
6988 ent->children = list;
6989 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00006990 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006991 * Prune it directly in the generated document
6992 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00006993 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006994 if (((list->type == XML_TEXT_NODE) &&
6995 (list->next == NULL)) ||
6996 (ctxt->parseMode == XML_PARSE_READER)) {
6997 list->parent = (xmlNodePtr) ent;
6998 list = NULL;
6999 ent->owner = 1;
7000 } else {
7001 ent->owner = 0;
7002 while (list != NULL) {
7003 list->parent = (xmlNodePtr) ctxt->node;
7004 list->doc = ctxt->myDoc;
7005 if (list->next == NULL)
7006 ent->last = list;
7007 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007008 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007009 list = ent->children;
7010#ifdef LIBXML_LEGACY_ENABLED
7011 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7012 xmlAddEntityReference(ent, list, NULL);
7013#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007014 }
7015 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007016 ent->owner = 1;
7017 while (list != NULL) {
7018 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007019 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007020 if (list->next == NULL)
7021 ent->last = list;
7022 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007023 }
7024 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007025 } else {
7026 xmlFreeNodeList(list);
7027 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007028 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007029 } else if ((ret != XML_ERR_OK) &&
7030 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7031 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7032 "Entity '%s' failed to parse\n", ent->name);
7033 } else if (list != NULL) {
7034 xmlFreeNodeList(list);
7035 list = NULL;
7036 }
7037 if (ent->checked == 0)
7038 ent->checked = 1;
7039 } else if (ent->checked != 1) {
7040 ctxt->nbentities += ent->checked;
7041 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007042
Daniel Veillard0161e632008-08-28 15:36:32 +00007043 /*
7044 * Now that the entity content has been gathered
7045 * provide it to the application, this can take different forms based
7046 * on the parsing modes.
7047 */
7048 if (ent->children == NULL) {
7049 /*
7050 * Probably running in SAX mode and the callbacks don't
7051 * build the entity content. So unless we already went
7052 * though parsing for first checking go though the entity
7053 * content to generate callbacks associated to the entity
7054 */
7055 if (was_checked != 0) {
7056 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007057 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007058 * This is a bit hackish but this seems the best
7059 * way to make sure both SAX and DOM entity support
7060 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007061 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007062 if (ctxt->userData == ctxt)
7063 user_data = NULL;
7064 else
7065 user_data = ctxt->userData;
7066
7067 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7068 ctxt->depth++;
7069 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7070 ent->content, user_data, NULL);
7071 ctxt->depth--;
7072 } else if (ent->etype ==
7073 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7074 ctxt->depth++;
7075 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7076 ctxt->sax, user_data, ctxt->depth,
7077 ent->URI, ent->ExternalID, NULL);
7078 ctxt->depth--;
7079 } else {
7080 ret = XML_ERR_ENTITY_PE_INTERNAL;
7081 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7082 "invalid entity type found\n", NULL);
7083 }
7084 if (ret == XML_ERR_ENTITY_LOOP) {
7085 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7086 return;
7087 }
7088 }
7089 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7090 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7091 /*
7092 * Entity reference callback comes second, it's somewhat
7093 * superfluous but a compatibility to historical behaviour
7094 */
7095 ctxt->sax->reference(ctxt->userData, ent->name);
7096 }
7097 return;
7098 }
7099
7100 /*
7101 * If we didn't get any children for the entity being built
7102 */
7103 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7104 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7105 /*
7106 * Create a node.
7107 */
7108 ctxt->sax->reference(ctxt->userData, ent->name);
7109 return;
7110 }
7111
7112 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7113 /*
7114 * There is a problem on the handling of _private for entities
7115 * (bug 155816): Should we copy the content of the field from
7116 * the entity (possibly overwriting some value set by the user
7117 * when a copy is created), should we leave it alone, or should
7118 * we try to take care of different situations? The problem
7119 * is exacerbated by the usage of this field by the xmlReader.
7120 * To fix this bug, we look at _private on the created node
7121 * and, if it's NULL, we copy in whatever was in the entity.
7122 * If it's not NULL we leave it alone. This is somewhat of a
7123 * hack - maybe we should have further tests to determine
7124 * what to do.
7125 */
7126 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7127 /*
7128 * Seems we are generating the DOM content, do
7129 * a simple tree copy for all references except the first
7130 * In the first occurrence list contains the replacement.
7131 * progressive == 2 means we are operating on the Reader
7132 * and since nodes are discarded we must copy all the time.
7133 */
7134 if (((list == NULL) && (ent->owner == 0)) ||
7135 (ctxt->parseMode == XML_PARSE_READER)) {
7136 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7137
7138 /*
7139 * when operating on a reader, the entities definitions
7140 * are always owning the entities subtree.
7141 if (ctxt->parseMode == XML_PARSE_READER)
7142 ent->owner = 1;
7143 */
7144
7145 cur = ent->children;
7146 while (cur != NULL) {
7147 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7148 if (nw != NULL) {
7149 if (nw->_private == NULL)
7150 nw->_private = cur->_private;
7151 if (firstChild == NULL){
7152 firstChild = nw;
7153 }
7154 nw = xmlAddChild(ctxt->node, nw);
7155 }
7156 if (cur == ent->last) {
7157 /*
7158 * needed to detect some strange empty
7159 * node cases in the reader tests
7160 */
7161 if ((ctxt->parseMode == XML_PARSE_READER) &&
7162 (nw != NULL) &&
7163 (nw->type == XML_ELEMENT_NODE) &&
7164 (nw->children == NULL))
7165 nw->extra = 1;
7166
7167 break;
7168 }
7169 cur = cur->next;
7170 }
7171#ifdef LIBXML_LEGACY_ENABLED
7172 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7173 xmlAddEntityReference(ent, firstChild, nw);
7174#endif /* LIBXML_LEGACY_ENABLED */
7175 } else if (list == NULL) {
7176 xmlNodePtr nw = NULL, cur, next, last,
7177 firstChild = NULL;
7178 /*
7179 * Copy the entity child list and make it the new
7180 * entity child list. The goal is to make sure any
7181 * ID or REF referenced will be the one from the
7182 * document content and not the entity copy.
7183 */
7184 cur = ent->children;
7185 ent->children = NULL;
7186 last = ent->last;
7187 ent->last = NULL;
7188 while (cur != NULL) {
7189 next = cur->next;
7190 cur->next = NULL;
7191 cur->parent = NULL;
7192 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7193 if (nw != NULL) {
7194 if (nw->_private == NULL)
7195 nw->_private = cur->_private;
7196 if (firstChild == NULL){
7197 firstChild = cur;
7198 }
7199 xmlAddChild((xmlNodePtr) ent, nw);
7200 xmlAddChild(ctxt->node, cur);
7201 }
7202 if (cur == last)
7203 break;
7204 cur = next;
7205 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007206 if (ent->owner == 0)
7207 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007208#ifdef LIBXML_LEGACY_ENABLED
7209 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7210 xmlAddEntityReference(ent, firstChild, nw);
7211#endif /* LIBXML_LEGACY_ENABLED */
7212 } else {
7213 const xmlChar *nbktext;
7214
7215 /*
7216 * the name change is to avoid coalescing of the
7217 * node with a possible previous text one which
7218 * would make ent->children a dangling pointer
7219 */
7220 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7221 -1);
7222 if (ent->children->type == XML_TEXT_NODE)
7223 ent->children->name = nbktext;
7224 if ((ent->last != ent->children) &&
7225 (ent->last->type == XML_TEXT_NODE))
7226 ent->last->name = nbktext;
7227 xmlAddChildList(ctxt->node, ent->children);
7228 }
7229
7230 /*
7231 * This is to avoid a nasty side effect, see
7232 * characters() in SAX.c
7233 */
7234 ctxt->nodemem = 0;
7235 ctxt->nodelen = 0;
7236 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007237 }
7238 }
7239}
7240
7241/**
7242 * xmlParseEntityRef:
7243 * @ctxt: an XML parser context
7244 *
7245 * parse ENTITY references declarations
7246 *
7247 * [68] EntityRef ::= '&' Name ';'
7248 *
7249 * [ WFC: Entity Declared ]
7250 * In a document without any DTD, a document with only an internal DTD
7251 * subset which contains no parameter entity references, or a document
7252 * with "standalone='yes'", the Name given in the entity reference
7253 * must match that in an entity declaration, except that well-formed
7254 * documents need not declare any of the following entities: amp, lt,
7255 * gt, apos, quot. The declaration of a parameter entity must precede
7256 * any reference to it. Similarly, the declaration of a general entity
7257 * must precede any reference to it which appears in a default value in an
7258 * attribute-list declaration. Note that if entities are declared in the
7259 * external subset or in external parameter entities, a non-validating
7260 * processor is not obligated to read and process their declarations;
7261 * for such documents, the rule that an entity must be declared is a
7262 * well-formedness constraint only if standalone='yes'.
7263 *
7264 * [ WFC: Parsed Entity ]
7265 * An entity reference must not contain the name of an unparsed entity
7266 *
7267 * Returns the xmlEntityPtr if found, or NULL otherwise.
7268 */
7269xmlEntityPtr
7270xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007271 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007272 xmlEntityPtr ent = NULL;
7273
7274 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007275
Daniel Veillard0161e632008-08-28 15:36:32 +00007276 if (RAW != '&')
7277 return(NULL);
7278 NEXT;
7279 name = xmlParseName(ctxt);
7280 if (name == NULL) {
7281 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7282 "xmlParseEntityRef: no name\n");
7283 return(NULL);
7284 }
7285 if (RAW != ';') {
7286 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7287 return(NULL);
7288 }
7289 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007290
Daniel Veillard0161e632008-08-28 15:36:32 +00007291 /*
7292 * Predefined entites override any extra definition
7293 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007294 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7295 ent = xmlGetPredefinedEntity(name);
7296 if (ent != NULL)
7297 return(ent);
7298 }
Owen Taylor3473f882001-02-23 17:55:21 +00007299
Daniel Veillard0161e632008-08-28 15:36:32 +00007300 /*
7301 * Increate the number of entity references parsed
7302 */
7303 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007304
Daniel Veillard0161e632008-08-28 15:36:32 +00007305 /*
7306 * Ask first SAX for entity resolution, otherwise try the
7307 * entities which may have stored in the parser context.
7308 */
7309 if (ctxt->sax != NULL) {
7310 if (ctxt->sax->getEntity != NULL)
7311 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007312 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7313 (ctxt->options & XML_PARSE_OLDSAX))
7314 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007315 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7316 (ctxt->userData==ctxt)) {
7317 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007318 }
7319 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007320 /*
7321 * [ WFC: Entity Declared ]
7322 * In a document without any DTD, a document with only an
7323 * internal DTD subset which contains no parameter entity
7324 * references, or a document with "standalone='yes'", the
7325 * Name given in the entity reference must match that in an
7326 * entity declaration, except that well-formed documents
7327 * need not declare any of the following entities: amp, lt,
7328 * gt, apos, quot.
7329 * The declaration of a parameter entity must precede any
7330 * reference to it.
7331 * Similarly, the declaration of a general entity must
7332 * precede any reference to it which appears in a default
7333 * value in an attribute-list declaration. Note that if
7334 * entities are declared in the external subset or in
7335 * external parameter entities, a non-validating processor
7336 * is not obligated to read and process their declarations;
7337 * for such documents, the rule that an entity must be
7338 * declared is a well-formedness constraint only if
7339 * standalone='yes'.
7340 */
7341 if (ent == NULL) {
7342 if ((ctxt->standalone == 1) ||
7343 ((ctxt->hasExternalSubset == 0) &&
7344 (ctxt->hasPErefs == 0))) {
7345 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7346 "Entity '%s' not defined\n", name);
7347 } else {
7348 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7349 "Entity '%s' not defined\n", name);
7350 if ((ctxt->inSubset == 0) &&
7351 (ctxt->sax != NULL) &&
7352 (ctxt->sax->reference != NULL)) {
7353 ctxt->sax->reference(ctxt->userData, name);
7354 }
7355 }
7356 ctxt->valid = 0;
7357 }
7358
7359 /*
7360 * [ WFC: Parsed Entity ]
7361 * An entity reference must not contain the name of an
7362 * unparsed entity
7363 */
7364 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7365 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7366 "Entity reference to unparsed entity %s\n", name);
7367 }
7368
7369 /*
7370 * [ WFC: No External Entity References ]
7371 * Attribute values cannot contain direct or indirect
7372 * entity references to external entities.
7373 */
7374 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7375 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7376 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7377 "Attribute references external entity '%s'\n", name);
7378 }
7379 /*
7380 * [ WFC: No < in Attribute Values ]
7381 * The replacement text of any entity referred to directly or
7382 * indirectly in an attribute value (other than "&lt;") must
7383 * not contain a <.
7384 */
7385 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7386 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007387 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007388 (xmlStrchr(ent->content, '<'))) {
7389 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7390 "'<' in entity '%s' is not allowed in attributes values\n", name);
7391 }
7392
7393 /*
7394 * Internal check, no parameter entities here ...
7395 */
7396 else {
7397 switch (ent->etype) {
7398 case XML_INTERNAL_PARAMETER_ENTITY:
7399 case XML_EXTERNAL_PARAMETER_ENTITY:
7400 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7401 "Attempt to reference the parameter entity '%s'\n",
7402 name);
7403 break;
7404 default:
7405 break;
7406 }
7407 }
7408
7409 /*
7410 * [ WFC: No Recursion ]
7411 * A parsed entity must not contain a recursive reference
7412 * to itself, either directly or indirectly.
7413 * Done somewhere else
7414 */
Owen Taylor3473f882001-02-23 17:55:21 +00007415 return(ent);
7416}
7417
7418/**
7419 * xmlParseStringEntityRef:
7420 * @ctxt: an XML parser context
7421 * @str: a pointer to an index in the string
7422 *
7423 * parse ENTITY references declarations, but this version parses it from
7424 * a string value.
7425 *
7426 * [68] EntityRef ::= '&' Name ';'
7427 *
7428 * [ WFC: Entity Declared ]
7429 * In a document without any DTD, a document with only an internal DTD
7430 * subset which contains no parameter entity references, or a document
7431 * with "standalone='yes'", the Name given in the entity reference
7432 * must match that in an entity declaration, except that well-formed
7433 * documents need not declare any of the following entities: amp, lt,
7434 * gt, apos, quot. The declaration of a parameter entity must precede
7435 * any reference to it. Similarly, the declaration of a general entity
7436 * must precede any reference to it which appears in a default value in an
7437 * attribute-list declaration. Note that if entities are declared in the
7438 * external subset or in external parameter entities, a non-validating
7439 * processor is not obligated to read and process their declarations;
7440 * for such documents, the rule that an entity must be declared is a
7441 * well-formedness constraint only if standalone='yes'.
7442 *
7443 * [ WFC: Parsed Entity ]
7444 * An entity reference must not contain the name of an unparsed entity
7445 *
7446 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7447 * is updated to the current location in the string.
7448 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007449static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007450xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7451 xmlChar *name;
7452 const xmlChar *ptr;
7453 xmlChar cur;
7454 xmlEntityPtr ent = NULL;
7455
7456 if ((str == NULL) || (*str == NULL))
7457 return(NULL);
7458 ptr = *str;
7459 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007460 if (cur != '&')
7461 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007462
Daniel Veillard0161e632008-08-28 15:36:32 +00007463 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007464 name = xmlParseStringName(ctxt, &ptr);
7465 if (name == NULL) {
7466 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7467 "xmlParseStringEntityRef: no name\n");
7468 *str = ptr;
7469 return(NULL);
7470 }
7471 if (*ptr != ';') {
7472 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007473 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007474 *str = ptr;
7475 return(NULL);
7476 }
7477 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007478
Owen Taylor3473f882001-02-23 17:55:21 +00007479
Daniel Veillard0161e632008-08-28 15:36:32 +00007480 /*
7481 * Predefined entites override any extra definition
7482 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007483 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7484 ent = xmlGetPredefinedEntity(name);
7485 if (ent != NULL) {
7486 xmlFree(name);
7487 *str = ptr;
7488 return(ent);
7489 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007490 }
Owen Taylor3473f882001-02-23 17:55:21 +00007491
Daniel Veillard0161e632008-08-28 15:36:32 +00007492 /*
7493 * Increate the number of entity references parsed
7494 */
7495 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007496
Daniel Veillard0161e632008-08-28 15:36:32 +00007497 /*
7498 * Ask first SAX for entity resolution, otherwise try the
7499 * entities which may have stored in the parser context.
7500 */
7501 if (ctxt->sax != NULL) {
7502 if (ctxt->sax->getEntity != NULL)
7503 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007504 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7505 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007506 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7507 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007508 }
7509 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007510
7511 /*
7512 * [ WFC: Entity Declared ]
7513 * In a document without any DTD, a document with only an
7514 * internal DTD subset which contains no parameter entity
7515 * references, or a document with "standalone='yes'", the
7516 * Name given in the entity reference must match that in an
7517 * entity declaration, except that well-formed documents
7518 * need not declare any of the following entities: amp, lt,
7519 * gt, apos, quot.
7520 * The declaration of a parameter entity must precede any
7521 * reference to it.
7522 * Similarly, the declaration of a general entity must
7523 * precede any reference to it which appears in a default
7524 * value in an attribute-list declaration. Note that if
7525 * entities are declared in the external subset or in
7526 * external parameter entities, a non-validating processor
7527 * is not obligated to read and process their declarations;
7528 * for such documents, the rule that an entity must be
7529 * declared is a well-formedness constraint only if
7530 * standalone='yes'.
7531 */
7532 if (ent == NULL) {
7533 if ((ctxt->standalone == 1) ||
7534 ((ctxt->hasExternalSubset == 0) &&
7535 (ctxt->hasPErefs == 0))) {
7536 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7537 "Entity '%s' not defined\n", name);
7538 } else {
7539 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7540 "Entity '%s' not defined\n",
7541 name);
7542 }
7543 /* TODO ? check regressions ctxt->valid = 0; */
7544 }
7545
7546 /*
7547 * [ WFC: Parsed Entity ]
7548 * An entity reference must not contain the name of an
7549 * unparsed entity
7550 */
7551 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7552 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7553 "Entity reference to unparsed entity %s\n", name);
7554 }
7555
7556 /*
7557 * [ WFC: No External Entity References ]
7558 * Attribute values cannot contain direct or indirect
7559 * entity references to external entities.
7560 */
7561 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7562 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7563 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7564 "Attribute references external entity '%s'\n", name);
7565 }
7566 /*
7567 * [ WFC: No < in Attribute Values ]
7568 * The replacement text of any entity referred to directly or
7569 * indirectly in an attribute value (other than "&lt;") must
7570 * not contain a <.
7571 */
7572 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7573 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007574 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007575 (xmlStrchr(ent->content, '<'))) {
7576 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7577 "'<' in entity '%s' is not allowed in attributes values\n",
7578 name);
7579 }
7580
7581 /*
7582 * Internal check, no parameter entities here ...
7583 */
7584 else {
7585 switch (ent->etype) {
7586 case XML_INTERNAL_PARAMETER_ENTITY:
7587 case XML_EXTERNAL_PARAMETER_ENTITY:
7588 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7589 "Attempt to reference the parameter entity '%s'\n",
7590 name);
7591 break;
7592 default:
7593 break;
7594 }
7595 }
7596
7597 /*
7598 * [ WFC: No Recursion ]
7599 * A parsed entity must not contain a recursive reference
7600 * to itself, either directly or indirectly.
7601 * Done somewhere else
7602 */
7603
7604 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007605 *str = ptr;
7606 return(ent);
7607}
7608
7609/**
7610 * xmlParsePEReference:
7611 * @ctxt: an XML parser context
7612 *
7613 * parse PEReference declarations
7614 * The entity content is handled directly by pushing it's content as
7615 * a new input stream.
7616 *
7617 * [69] PEReference ::= '%' Name ';'
7618 *
7619 * [ WFC: No Recursion ]
7620 * A parsed entity must not contain a recursive
7621 * reference to itself, either directly or indirectly.
7622 *
7623 * [ WFC: Entity Declared ]
7624 * In a document without any DTD, a document with only an internal DTD
7625 * subset which contains no parameter entity references, or a document
7626 * with "standalone='yes'", ... ... The declaration of a parameter
7627 * entity must precede any reference to it...
7628 *
7629 * [ VC: Entity Declared ]
7630 * In a document with an external subset or external parameter entities
7631 * with "standalone='no'", ... ... The declaration of a parameter entity
7632 * must precede any reference to it...
7633 *
7634 * [ WFC: In DTD ]
7635 * Parameter-entity references may only appear in the DTD.
7636 * NOTE: misleading but this is handled.
7637 */
7638void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007639xmlParsePEReference(xmlParserCtxtPtr ctxt)
7640{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007641 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007642 xmlEntityPtr entity = NULL;
7643 xmlParserInputPtr input;
7644
Daniel Veillard0161e632008-08-28 15:36:32 +00007645 if (RAW != '%')
7646 return;
7647 NEXT;
7648 name = xmlParseName(ctxt);
7649 if (name == NULL) {
7650 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7651 "xmlParsePEReference: no name\n");
7652 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007653 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007654 if (RAW != ';') {
7655 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7656 return;
7657 }
7658
7659 NEXT;
7660
7661 /*
7662 * Increate the number of entity references parsed
7663 */
7664 ctxt->nbentities++;
7665
7666 /*
7667 * Request the entity from SAX
7668 */
7669 if ((ctxt->sax != NULL) &&
7670 (ctxt->sax->getParameterEntity != NULL))
7671 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7672 name);
7673 if (entity == NULL) {
7674 /*
7675 * [ WFC: Entity Declared ]
7676 * In a document without any DTD, a document with only an
7677 * internal DTD subset which contains no parameter entity
7678 * references, or a document with "standalone='yes'", ...
7679 * ... The declaration of a parameter entity must precede
7680 * any reference to it...
7681 */
7682 if ((ctxt->standalone == 1) ||
7683 ((ctxt->hasExternalSubset == 0) &&
7684 (ctxt->hasPErefs == 0))) {
7685 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7686 "PEReference: %%%s; not found\n",
7687 name);
7688 } else {
7689 /*
7690 * [ VC: Entity Declared ]
7691 * In a document with an external subset or external
7692 * parameter entities with "standalone='no'", ...
7693 * ... The declaration of a parameter entity must
7694 * precede any reference to it...
7695 */
7696 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7697 "PEReference: %%%s; not found\n",
7698 name, NULL);
7699 ctxt->valid = 0;
7700 }
7701 } else {
7702 /*
7703 * Internal checking in case the entity quest barfed
7704 */
7705 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7706 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7707 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7708 "Internal: %%%s; is not a parameter entity\n",
7709 name, NULL);
7710 } else if (ctxt->input->free != deallocblankswrapper) {
7711 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7712 if (xmlPushInput(ctxt, input) < 0)
7713 return;
7714 } else {
7715 /*
7716 * TODO !!!
7717 * handle the extra spaces added before and after
7718 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7719 */
7720 input = xmlNewEntityInputStream(ctxt, entity);
7721 if (xmlPushInput(ctxt, input) < 0)
7722 return;
7723 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7724 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7725 (IS_BLANK_CH(NXT(5)))) {
7726 xmlParseTextDecl(ctxt);
7727 if (ctxt->errNo ==
7728 XML_ERR_UNSUPPORTED_ENCODING) {
7729 /*
7730 * The XML REC instructs us to stop parsing
7731 * right here
7732 */
7733 ctxt->instate = XML_PARSER_EOF;
7734 return;
7735 }
7736 }
7737 }
7738 }
7739 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007740}
7741
7742/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007743 * xmlLoadEntityContent:
7744 * @ctxt: an XML parser context
7745 * @entity: an unloaded system entity
7746 *
7747 * Load the original content of the given system entity from the
7748 * ExternalID/SystemID given. This is to be used for Included in Literal
7749 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7750 *
7751 * Returns 0 in case of success and -1 in case of failure
7752 */
7753static int
7754xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7755 xmlParserInputPtr input;
7756 xmlBufferPtr buf;
7757 int l, c;
7758 int count = 0;
7759
7760 if ((ctxt == NULL) || (entity == NULL) ||
7761 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7762 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7763 (entity->content != NULL)) {
7764 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7765 "xmlLoadEntityContent parameter error");
7766 return(-1);
7767 }
7768
7769 if (xmlParserDebugEntities)
7770 xmlGenericError(xmlGenericErrorContext,
7771 "Reading %s entity content input\n", entity->name);
7772
7773 buf = xmlBufferCreate();
7774 if (buf == NULL) {
7775 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7776 "xmlLoadEntityContent parameter error");
7777 return(-1);
7778 }
7779
7780 input = xmlNewEntityInputStream(ctxt, entity);
7781 if (input == NULL) {
7782 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7783 "xmlLoadEntityContent input error");
7784 xmlBufferFree(buf);
7785 return(-1);
7786 }
7787
7788 /*
7789 * Push the entity as the current input, read char by char
7790 * saving to the buffer until the end of the entity or an error
7791 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00007792 if (xmlPushInput(ctxt, input) < 0) {
7793 xmlBufferFree(buf);
7794 return(-1);
7795 }
7796
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007797 GROW;
7798 c = CUR_CHAR(l);
7799 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7800 (IS_CHAR(c))) {
7801 xmlBufferAdd(buf, ctxt->input->cur, l);
7802 if (count++ > 100) {
7803 count = 0;
7804 GROW;
7805 }
7806 NEXTL(l);
7807 c = CUR_CHAR(l);
7808 }
7809
7810 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7811 xmlPopInput(ctxt);
7812 } else if (!IS_CHAR(c)) {
7813 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7814 "xmlLoadEntityContent: invalid char value %d\n",
7815 c);
7816 xmlBufferFree(buf);
7817 return(-1);
7818 }
7819 entity->content = buf->content;
7820 buf->content = NULL;
7821 xmlBufferFree(buf);
7822
7823 return(0);
7824}
7825
7826/**
Owen Taylor3473f882001-02-23 17:55:21 +00007827 * xmlParseStringPEReference:
7828 * @ctxt: an XML parser context
7829 * @str: a pointer to an index in the string
7830 *
7831 * parse PEReference declarations
7832 *
7833 * [69] PEReference ::= '%' Name ';'
7834 *
7835 * [ WFC: No Recursion ]
7836 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007837 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007838 *
7839 * [ WFC: Entity Declared ]
7840 * In a document without any DTD, a document with only an internal DTD
7841 * subset which contains no parameter entity references, or a document
7842 * with "standalone='yes'", ... ... The declaration of a parameter
7843 * entity must precede any reference to it...
7844 *
7845 * [ VC: Entity Declared ]
7846 * In a document with an external subset or external parameter entities
7847 * with "standalone='no'", ... ... The declaration of a parameter entity
7848 * must precede any reference to it...
7849 *
7850 * [ WFC: In DTD ]
7851 * Parameter-entity references may only appear in the DTD.
7852 * NOTE: misleading but this is handled.
7853 *
7854 * Returns the string of the entity content.
7855 * str is updated to the current value of the index
7856 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007857static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007858xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7859 const xmlChar *ptr;
7860 xmlChar cur;
7861 xmlChar *name;
7862 xmlEntityPtr entity = NULL;
7863
7864 if ((str == NULL) || (*str == NULL)) return(NULL);
7865 ptr = *str;
7866 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007867 if (cur != '%')
7868 return(NULL);
7869 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007870 name = xmlParseStringName(ctxt, &ptr);
7871 if (name == NULL) {
7872 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7873 "xmlParseStringPEReference: no name\n");
7874 *str = ptr;
7875 return(NULL);
7876 }
7877 cur = *ptr;
7878 if (cur != ';') {
7879 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7880 xmlFree(name);
7881 *str = ptr;
7882 return(NULL);
7883 }
7884 ptr++;
7885
7886 /*
7887 * Increate the number of entity references parsed
7888 */
7889 ctxt->nbentities++;
7890
7891 /*
7892 * Request the entity from SAX
7893 */
7894 if ((ctxt->sax != NULL) &&
7895 (ctxt->sax->getParameterEntity != NULL))
7896 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7897 name);
7898 if (entity == NULL) {
7899 /*
7900 * [ WFC: Entity Declared ]
7901 * In a document without any DTD, a document with only an
7902 * internal DTD subset which contains no parameter entity
7903 * references, or a document with "standalone='yes'", ...
7904 * ... The declaration of a parameter entity must precede
7905 * any reference to it...
7906 */
7907 if ((ctxt->standalone == 1) ||
7908 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7909 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7910 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007911 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007912 /*
7913 * [ VC: Entity Declared ]
7914 * In a document with an external subset or external
7915 * parameter entities with "standalone='no'", ...
7916 * ... The declaration of a parameter entity must
7917 * precede any reference to it...
7918 */
7919 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7920 "PEReference: %%%s; not found\n",
7921 name, NULL);
7922 ctxt->valid = 0;
7923 }
7924 } else {
7925 /*
7926 * Internal checking in case the entity quest barfed
7927 */
7928 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7929 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7930 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7931 "%%%s; is not a parameter entity\n",
7932 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007933 }
7934 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007935 ctxt->hasPErefs = 1;
7936 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007937 *str = ptr;
7938 return(entity);
7939}
7940
7941/**
7942 * xmlParseDocTypeDecl:
7943 * @ctxt: an XML parser context
7944 *
7945 * parse a DOCTYPE declaration
7946 *
7947 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7948 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7949 *
7950 * [ VC: Root Element Type ]
7951 * The Name in the document type declaration must match the element
7952 * type of the root element.
7953 */
7954
7955void
7956xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007957 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007958 xmlChar *ExternalID = NULL;
7959 xmlChar *URI = NULL;
7960
7961 /*
7962 * We know that '<!DOCTYPE' has been detected.
7963 */
7964 SKIP(9);
7965
7966 SKIP_BLANKS;
7967
7968 /*
7969 * Parse the DOCTYPE name.
7970 */
7971 name = xmlParseName(ctxt);
7972 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007973 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7974 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007975 }
7976 ctxt->intSubName = name;
7977
7978 SKIP_BLANKS;
7979
7980 /*
7981 * Check for SystemID and ExternalID
7982 */
7983 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7984
7985 if ((URI != NULL) || (ExternalID != NULL)) {
7986 ctxt->hasExternalSubset = 1;
7987 }
7988 ctxt->extSubURI = URI;
7989 ctxt->extSubSystem = ExternalID;
7990
7991 SKIP_BLANKS;
7992
7993 /*
7994 * Create and update the internal subset.
7995 */
7996 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7997 (!ctxt->disableSAX))
7998 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7999
8000 /*
8001 * Is there any internal subset declarations ?
8002 * they are handled separately in xmlParseInternalSubset()
8003 */
8004 if (RAW == '[')
8005 return;
8006
8007 /*
8008 * We should be at the end of the DOCTYPE declaration.
8009 */
8010 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008011 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008012 }
8013 NEXT;
8014}
8015
8016/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008017 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008018 * @ctxt: an XML parser context
8019 *
8020 * parse the internal subset declaration
8021 *
8022 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8023 */
8024
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008025static void
Owen Taylor3473f882001-02-23 17:55:21 +00008026xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8027 /*
8028 * Is there any DTD definition ?
8029 */
8030 if (RAW == '[') {
8031 ctxt->instate = XML_PARSER_DTD;
8032 NEXT;
8033 /*
8034 * Parse the succession of Markup declarations and
8035 * PEReferences.
8036 * Subsequence (markupdecl | PEReference | S)*
8037 */
8038 while (RAW != ']') {
8039 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008040 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008041
8042 SKIP_BLANKS;
8043 xmlParseMarkupDecl(ctxt);
8044 xmlParsePEReference(ctxt);
8045
8046 /*
8047 * Pop-up of finished entities.
8048 */
8049 while ((RAW == 0) && (ctxt->inputNr > 1))
8050 xmlPopInput(ctxt);
8051
8052 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008053 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008054 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008055 break;
8056 }
8057 }
8058 if (RAW == ']') {
8059 NEXT;
8060 SKIP_BLANKS;
8061 }
8062 }
8063
8064 /*
8065 * We should be at the end of the DOCTYPE declaration.
8066 */
8067 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008068 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008069 }
8070 NEXT;
8071}
8072
Daniel Veillard81273902003-09-30 00:43:48 +00008073#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008074/**
8075 * xmlParseAttribute:
8076 * @ctxt: an XML parser context
8077 * @value: a xmlChar ** used to store the value of the attribute
8078 *
8079 * parse an attribute
8080 *
8081 * [41] Attribute ::= Name Eq AttValue
8082 *
8083 * [ WFC: No External Entity References ]
8084 * Attribute values cannot contain direct or indirect entity references
8085 * to external entities.
8086 *
8087 * [ WFC: No < in Attribute Values ]
8088 * The replacement text of any entity referred to directly or indirectly in
8089 * an attribute value (other than "&lt;") must not contain a <.
8090 *
8091 * [ VC: Attribute Value Type ]
8092 * The attribute must have been declared; the value must be of the type
8093 * declared for it.
8094 *
8095 * [25] Eq ::= S? '=' S?
8096 *
8097 * With namespace:
8098 *
8099 * [NS 11] Attribute ::= QName Eq AttValue
8100 *
8101 * Also the case QName == xmlns:??? is handled independently as a namespace
8102 * definition.
8103 *
8104 * Returns the attribute name, and the value in *value.
8105 */
8106
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008107const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008108xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008109 const xmlChar *name;
8110 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008111
8112 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008113 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008114 name = xmlParseName(ctxt);
8115 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008116 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008117 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008118 return(NULL);
8119 }
8120
8121 /*
8122 * read the value
8123 */
8124 SKIP_BLANKS;
8125 if (RAW == '=') {
8126 NEXT;
8127 SKIP_BLANKS;
8128 val = xmlParseAttValue(ctxt);
8129 ctxt->instate = XML_PARSER_CONTENT;
8130 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008131 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00008132 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008133 return(NULL);
8134 }
8135
8136 /*
8137 * Check that xml:lang conforms to the specification
8138 * No more registered as an error, just generate a warning now
8139 * since this was deprecated in XML second edition
8140 */
8141 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8142 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008143 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8144 "Malformed value for xml:lang : %s\n",
8145 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008146 }
8147 }
8148
8149 /*
8150 * Check that xml:space conforms to the specification
8151 */
8152 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8153 if (xmlStrEqual(val, BAD_CAST "default"))
8154 *(ctxt->space) = 0;
8155 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8156 *(ctxt->space) = 1;
8157 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008158 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008159"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008160 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008161 }
8162 }
8163
8164 *value = val;
8165 return(name);
8166}
8167
8168/**
8169 * xmlParseStartTag:
8170 * @ctxt: an XML parser context
8171 *
8172 * parse a start of tag either for rule element or
8173 * EmptyElement. In both case we don't parse the tag closing chars.
8174 *
8175 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8176 *
8177 * [ WFC: Unique Att Spec ]
8178 * No attribute name may appear more than once in the same start-tag or
8179 * empty-element tag.
8180 *
8181 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8182 *
8183 * [ WFC: Unique Att Spec ]
8184 * No attribute name may appear more than once in the same start-tag or
8185 * empty-element tag.
8186 *
8187 * With namespace:
8188 *
8189 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8190 *
8191 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8192 *
8193 * Returns the element name parsed
8194 */
8195
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008196const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008197xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008198 const xmlChar *name;
8199 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008200 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008201 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008202 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008203 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008204 int i;
8205
8206 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008207 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008208
8209 name = xmlParseName(ctxt);
8210 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008211 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008212 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008213 return(NULL);
8214 }
8215
8216 /*
8217 * Now parse the attributes, it ends up with the ending
8218 *
8219 * (S Attribute)* S?
8220 */
8221 SKIP_BLANKS;
8222 GROW;
8223
Daniel Veillard21a0f912001-02-25 19:54:14 +00008224 while ((RAW != '>') &&
8225 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008226 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008227 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008228 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008229
8230 attname = xmlParseAttribute(ctxt, &attvalue);
8231 if ((attname != NULL) && (attvalue != NULL)) {
8232 /*
8233 * [ WFC: Unique Att Spec ]
8234 * No attribute name may appear more than once in the same
8235 * start-tag or empty-element tag.
8236 */
8237 for (i = 0; i < nbatts;i += 2) {
8238 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008239 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008240 xmlFree(attvalue);
8241 goto failed;
8242 }
8243 }
Owen Taylor3473f882001-02-23 17:55:21 +00008244 /*
8245 * Add the pair to atts
8246 */
8247 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008248 maxatts = 22; /* allow for 10 attrs by default */
8249 atts = (const xmlChar **)
8250 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008251 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008252 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008253 if (attvalue != NULL)
8254 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008255 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008256 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008257 ctxt->atts = atts;
8258 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008259 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008260 const xmlChar **n;
8261
Owen Taylor3473f882001-02-23 17:55:21 +00008262 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008263 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008264 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008265 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008266 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008267 if (attvalue != NULL)
8268 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008269 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008270 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008271 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008272 ctxt->atts = atts;
8273 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008274 }
8275 atts[nbatts++] = attname;
8276 atts[nbatts++] = attvalue;
8277 atts[nbatts] = NULL;
8278 atts[nbatts + 1] = NULL;
8279 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008280 if (attvalue != NULL)
8281 xmlFree(attvalue);
8282 }
8283
8284failed:
8285
Daniel Veillard3772de32002-12-17 10:31:45 +00008286 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008287 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8288 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008289 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008290 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8291 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008292 }
8293 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008294 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8295 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008296 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8297 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008298 break;
8299 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008300 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008301 GROW;
8302 }
8303
8304 /*
8305 * SAX: Start of Element !
8306 */
8307 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008308 (!ctxt->disableSAX)) {
8309 if (nbatts > 0)
8310 ctxt->sax->startElement(ctxt->userData, name, atts);
8311 else
8312 ctxt->sax->startElement(ctxt->userData, name, NULL);
8313 }
Owen Taylor3473f882001-02-23 17:55:21 +00008314
8315 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008316 /* Free only the content strings */
8317 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008318 if (atts[i] != NULL)
8319 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008320 }
8321 return(name);
8322}
8323
8324/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008325 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008326 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008327 * @line: line of the start tag
8328 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008329 *
8330 * parse an end of tag
8331 *
8332 * [42] ETag ::= '</' Name S? '>'
8333 *
8334 * With namespace
8335 *
8336 * [NS 9] ETag ::= '</' QName S? '>'
8337 */
8338
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008339static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008340xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008341 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008342
8343 GROW;
8344 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008345 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008346 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008347 return;
8348 }
8349 SKIP(2);
8350
Daniel Veillard46de64e2002-05-29 08:21:33 +00008351 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008352
8353 /*
8354 * We should definitely be at the ending "S? '>'" part
8355 */
8356 GROW;
8357 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008358 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008359 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008360 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008361 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008362
8363 /*
8364 * [ WFC: Element Type Match ]
8365 * The Name in an element's end-tag must match the element type in the
8366 * start-tag.
8367 *
8368 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008369 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008370 if (name == NULL) name = BAD_CAST "unparseable";
8371 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008372 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008373 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008374 }
8375
8376 /*
8377 * SAX: End of Tag
8378 */
8379 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8380 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008381 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008382
Daniel Veillarde57ec792003-09-10 10:50:59 +00008383 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008384 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008385 return;
8386}
8387
8388/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008389 * xmlParseEndTag:
8390 * @ctxt: an XML parser context
8391 *
8392 * parse an end of tag
8393 *
8394 * [42] ETag ::= '</' Name S? '>'
8395 *
8396 * With namespace
8397 *
8398 * [NS 9] ETag ::= '</' QName S? '>'
8399 */
8400
8401void
8402xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008403 xmlParseEndTag1(ctxt, 0);
8404}
Daniel Veillard81273902003-09-30 00:43:48 +00008405#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008406
8407/************************************************************************
8408 * *
8409 * SAX 2 specific operations *
8410 * *
8411 ************************************************************************/
8412
Daniel Veillard0fb18932003-09-07 09:14:37 +00008413/*
8414 * xmlGetNamespace:
8415 * @ctxt: an XML parser context
8416 * @prefix: the prefix to lookup
8417 *
8418 * Lookup the namespace name for the @prefix (which ca be NULL)
8419 * The prefix must come from the @ctxt->dict dictionnary
8420 *
8421 * Returns the namespace name or NULL if not bound
8422 */
8423static const xmlChar *
8424xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8425 int i;
8426
Daniel Veillarde57ec792003-09-10 10:50:59 +00008427 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008428 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008429 if (ctxt->nsTab[i] == prefix) {
8430 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8431 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008432 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008433 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008434 return(NULL);
8435}
8436
8437/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008438 * xmlParseQName:
8439 * @ctxt: an XML parser context
8440 * @prefix: pointer to store the prefix part
8441 *
8442 * parse an XML Namespace QName
8443 *
8444 * [6] QName ::= (Prefix ':')? LocalPart
8445 * [7] Prefix ::= NCName
8446 * [8] LocalPart ::= NCName
8447 *
8448 * Returns the Name parsed or NULL
8449 */
8450
8451static const xmlChar *
8452xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8453 const xmlChar *l, *p;
8454
8455 GROW;
8456
8457 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008458 if (l == NULL) {
8459 if (CUR == ':') {
8460 l = xmlParseName(ctxt);
8461 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008462 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8463 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008464 *prefix = NULL;
8465 return(l);
8466 }
8467 }
8468 return(NULL);
8469 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008470 if (CUR == ':') {
8471 NEXT;
8472 p = l;
8473 l = xmlParseNCName(ctxt);
8474 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008475 xmlChar *tmp;
8476
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008477 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8478 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008479 l = xmlParseNmtoken(ctxt);
8480 if (l == NULL)
8481 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8482 else {
8483 tmp = xmlBuildQName(l, p, NULL, 0);
8484 xmlFree((char *)l);
8485 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008486 p = xmlDictLookup(ctxt->dict, tmp, -1);
8487 if (tmp != NULL) xmlFree(tmp);
8488 *prefix = NULL;
8489 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008490 }
8491 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008492 xmlChar *tmp;
8493
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008494 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8495 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008496 NEXT;
8497 tmp = (xmlChar *) xmlParseName(ctxt);
8498 if (tmp != NULL) {
8499 tmp = xmlBuildQName(tmp, l, NULL, 0);
8500 l = xmlDictLookup(ctxt->dict, tmp, -1);
8501 if (tmp != NULL) xmlFree(tmp);
8502 *prefix = p;
8503 return(l);
8504 }
8505 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8506 l = xmlDictLookup(ctxt->dict, tmp, -1);
8507 if (tmp != NULL) xmlFree(tmp);
8508 *prefix = p;
8509 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008510 }
8511 *prefix = p;
8512 } else
8513 *prefix = NULL;
8514 return(l);
8515}
8516
8517/**
8518 * xmlParseQNameAndCompare:
8519 * @ctxt: an XML parser context
8520 * @name: the localname
8521 * @prefix: the prefix, if any.
8522 *
8523 * parse an XML name and compares for match
8524 * (specialized for endtag parsing)
8525 *
8526 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8527 * and the name for mismatch
8528 */
8529
8530static const xmlChar *
8531xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8532 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008533 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008534 const xmlChar *in;
8535 const xmlChar *ret;
8536 const xmlChar *prefix2;
8537
8538 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8539
8540 GROW;
8541 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008542
Daniel Veillard0fb18932003-09-07 09:14:37 +00008543 cmp = prefix;
8544 while (*in != 0 && *in == *cmp) {
8545 ++in;
8546 ++cmp;
8547 }
8548 if ((*cmp == 0) && (*in == ':')) {
8549 in++;
8550 cmp = name;
8551 while (*in != 0 && *in == *cmp) {
8552 ++in;
8553 ++cmp;
8554 }
William M. Brack76e95df2003-10-18 16:20:14 +00008555 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008556 /* success */
8557 ctxt->input->cur = in;
8558 return((const xmlChar*) 1);
8559 }
8560 }
8561 /*
8562 * all strings coms from the dictionary, equality can be done directly
8563 */
8564 ret = xmlParseQName (ctxt, &prefix2);
8565 if ((ret == name) && (prefix == prefix2))
8566 return((const xmlChar*) 1);
8567 return ret;
8568}
8569
8570/**
8571 * xmlParseAttValueInternal:
8572 * @ctxt: an XML parser context
8573 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008574 * @alloc: whether the attribute was reallocated as a new string
8575 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008576 *
8577 * parse a value for an attribute.
8578 * NOTE: if no normalization is needed, the routine will return pointers
8579 * directly from the data buffer.
8580 *
8581 * 3.3.3 Attribute-Value Normalization:
8582 * Before the value of an attribute is passed to the application or
8583 * checked for validity, the XML processor must normalize it as follows:
8584 * - a character reference is processed by appending the referenced
8585 * character to the attribute value
8586 * - an entity reference is processed by recursively processing the
8587 * replacement text of the entity
8588 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8589 * appending #x20 to the normalized value, except that only a single
8590 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8591 * parsed entity or the literal entity value of an internal parsed entity
8592 * - other characters are processed by appending them to the normalized value
8593 * If the declared value is not CDATA, then the XML processor must further
8594 * process the normalized attribute value by discarding any leading and
8595 * trailing space (#x20) characters, and by replacing sequences of space
8596 * (#x20) characters by a single space (#x20) character.
8597 * All attributes for which no declaration has been read should be treated
8598 * by a non-validating parser as if declared CDATA.
8599 *
8600 * Returns the AttValue parsed or NULL. The value has to be freed by the
8601 * caller if it was copied, this can be detected by val[*len] == 0.
8602 */
8603
8604static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008605xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8606 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008607{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008608 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008609 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008610 xmlChar *ret = NULL;
8611
8612 GROW;
8613 in = (xmlChar *) CUR_PTR;
8614 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008615 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008616 return (NULL);
8617 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008618 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008619
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008620 /*
8621 * try to handle in this routine the most common case where no
8622 * allocation of a new string is required and where content is
8623 * pure ASCII.
8624 */
8625 limit = *in++;
8626 end = ctxt->input->end;
8627 start = in;
8628 if (in >= end) {
8629 const xmlChar *oldbase = ctxt->input->base;
8630 GROW;
8631 if (oldbase != ctxt->input->base) {
8632 long delta = ctxt->input->base - oldbase;
8633 start = start + delta;
8634 in = in + delta;
8635 }
8636 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008637 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008638 if (normalize) {
8639 /*
8640 * Skip any leading spaces
8641 */
8642 while ((in < end) && (*in != limit) &&
8643 ((*in == 0x20) || (*in == 0x9) ||
8644 (*in == 0xA) || (*in == 0xD))) {
8645 in++;
8646 start = in;
8647 if (in >= end) {
8648 const xmlChar *oldbase = ctxt->input->base;
8649 GROW;
8650 if (oldbase != ctxt->input->base) {
8651 long delta = ctxt->input->base - oldbase;
8652 start = start + delta;
8653 in = in + delta;
8654 }
8655 end = ctxt->input->end;
8656 }
8657 }
8658 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8659 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8660 if ((*in++ == 0x20) && (*in == 0x20)) break;
8661 if (in >= end) {
8662 const xmlChar *oldbase = ctxt->input->base;
8663 GROW;
8664 if (oldbase != ctxt->input->base) {
8665 long delta = ctxt->input->base - oldbase;
8666 start = start + delta;
8667 in = in + delta;
8668 }
8669 end = ctxt->input->end;
8670 }
8671 }
8672 last = in;
8673 /*
8674 * skip the trailing blanks
8675 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008676 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008677 while ((in < end) && (*in != limit) &&
8678 ((*in == 0x20) || (*in == 0x9) ||
8679 (*in == 0xA) || (*in == 0xD))) {
8680 in++;
8681 if (in >= end) {
8682 const xmlChar *oldbase = ctxt->input->base;
8683 GROW;
8684 if (oldbase != ctxt->input->base) {
8685 long delta = ctxt->input->base - oldbase;
8686 start = start + delta;
8687 in = in + delta;
8688 last = last + delta;
8689 }
8690 end = ctxt->input->end;
8691 }
8692 }
8693 if (*in != limit) goto need_complex;
8694 } else {
8695 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8696 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8697 in++;
8698 if (in >= end) {
8699 const xmlChar *oldbase = ctxt->input->base;
8700 GROW;
8701 if (oldbase != ctxt->input->base) {
8702 long delta = ctxt->input->base - oldbase;
8703 start = start + delta;
8704 in = in + delta;
8705 }
8706 end = ctxt->input->end;
8707 }
8708 }
8709 last = in;
8710 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008711 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008712 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008713 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008714 *len = last - start;
8715 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008716 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008717 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008718 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008719 }
8720 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008721 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008722 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008723need_complex:
8724 if (alloc) *alloc = 1;
8725 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008726}
8727
8728/**
8729 * xmlParseAttribute2:
8730 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008731 * @pref: the element prefix
8732 * @elem: the element name
8733 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008734 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008735 * @len: an int * to save the length of the attribute
8736 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008737 *
8738 * parse an attribute in the new SAX2 framework.
8739 *
8740 * Returns the attribute name, and the value in *value, .
8741 */
8742
8743static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008744xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008745 const xmlChar * pref, const xmlChar * elem,
8746 const xmlChar ** prefix, xmlChar ** value,
8747 int *len, int *alloc)
8748{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008749 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008750 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008751 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008752
8753 *value = NULL;
8754 GROW;
8755 name = xmlParseQName(ctxt, prefix);
8756 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008757 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8758 "error parsing attribute name\n");
8759 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008760 }
8761
8762 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008763 * get the type if needed
8764 */
8765 if (ctxt->attsSpecial != NULL) {
8766 int type;
8767
8768 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008769 pref, elem, *prefix, name);
8770 if (type != 0)
8771 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008772 }
8773
8774 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008775 * read the value
8776 */
8777 SKIP_BLANKS;
8778 if (RAW == '=') {
8779 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008780 SKIP_BLANKS;
8781 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8782 if (normalize) {
8783 /*
8784 * Sometimes a second normalisation pass for spaces is needed
8785 * but that only happens if charrefs or entities refernces
8786 * have been used in the attribute value, i.e. the attribute
8787 * value have been extracted in an allocated string already.
8788 */
8789 if (*alloc) {
8790 const xmlChar *val2;
8791
8792 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008793 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008794 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008795 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008796 }
8797 }
8798 }
8799 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008800 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008801 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8802 "Specification mandate value for attribute %s\n",
8803 name);
8804 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008805 }
8806
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008807 if (*prefix == ctxt->str_xml) {
8808 /*
8809 * Check that xml:lang conforms to the specification
8810 * No more registered as an error, just generate a warning now
8811 * since this was deprecated in XML second edition
8812 */
8813 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8814 internal_val = xmlStrndup(val, *len);
8815 if (!xmlCheckLanguageID(internal_val)) {
8816 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8817 "Malformed value for xml:lang : %s\n",
8818 internal_val, NULL);
8819 }
8820 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008821
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008822 /*
8823 * Check that xml:space conforms to the specification
8824 */
8825 if (xmlStrEqual(name, BAD_CAST "space")) {
8826 internal_val = xmlStrndup(val, *len);
8827 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8828 *(ctxt->space) = 0;
8829 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8830 *(ctxt->space) = 1;
8831 else {
8832 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8833 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8834 internal_val, NULL);
8835 }
8836 }
8837 if (internal_val) {
8838 xmlFree(internal_val);
8839 }
8840 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008841
8842 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008843 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008844}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008845/**
8846 * xmlParseStartTag2:
8847 * @ctxt: an XML parser context
8848 *
8849 * parse a start of tag either for rule element or
8850 * EmptyElement. In both case we don't parse the tag closing chars.
8851 * This routine is called when running SAX2 parsing
8852 *
8853 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8854 *
8855 * [ WFC: Unique Att Spec ]
8856 * No attribute name may appear more than once in the same start-tag or
8857 * empty-element tag.
8858 *
8859 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8860 *
8861 * [ WFC: Unique Att Spec ]
8862 * No attribute name may appear more than once in the same start-tag or
8863 * empty-element tag.
8864 *
8865 * With namespace:
8866 *
8867 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8868 *
8869 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8870 *
8871 * Returns the element name parsed
8872 */
8873
8874static const xmlChar *
8875xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008876 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008877 const xmlChar *localname;
8878 const xmlChar *prefix;
8879 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008880 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008881 const xmlChar *nsname;
8882 xmlChar *attvalue;
8883 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008884 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008885 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008886 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008887 const xmlChar *base;
8888 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008889 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008890
8891 if (RAW != '<') return(NULL);
8892 NEXT1;
8893
8894 /*
8895 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8896 * point since the attribute values may be stored as pointers to
8897 * the buffer and calling SHRINK would destroy them !
8898 * The Shrinking is only possible once the full set of attribute
8899 * callbacks have been done.
8900 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008901reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008902 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008903 base = ctxt->input->base;
8904 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008905 oldline = ctxt->input->line;
8906 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008907 nbatts = 0;
8908 nratts = 0;
8909 nbdef = 0;
8910 nbNs = 0;
8911 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008912 /* Forget any namespaces added during an earlier parse of this element. */
8913 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008914
8915 localname = xmlParseQName(ctxt, &prefix);
8916 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008917 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8918 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008919 return(NULL);
8920 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008921 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008922
8923 /*
8924 * Now parse the attributes, it ends up with the ending
8925 *
8926 * (S Attribute)* S?
8927 */
8928 SKIP_BLANKS;
8929 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008930 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008931
8932 while ((RAW != '>') &&
8933 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008934 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008935 const xmlChar *q = CUR_PTR;
8936 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008937 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008938
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008939 attname = xmlParseAttribute2(ctxt, prefix, localname,
8940 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008941 if (ctxt->input->base != base) {
8942 if ((attvalue != NULL) && (alloc != 0))
8943 xmlFree(attvalue);
8944 attvalue = NULL;
8945 goto base_changed;
8946 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008947 if ((attname != NULL) && (attvalue != NULL)) {
8948 if (len < 0) len = xmlStrlen(attvalue);
8949 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008950 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8951 xmlURIPtr uri;
8952
8953 if (*URL != 0) {
8954 uri = xmlParseURI((const char *) URL);
8955 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008956 xmlNsErr(ctxt, XML_WAR_NS_URI,
8957 "xmlns: '%s' is not a valid URI\n",
8958 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008959 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00008960 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008961 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8962 "xmlns: URI %s is not absolute\n",
8963 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008964 }
8965 xmlFreeURI(uri);
8966 }
Daniel Veillard37334572008-07-31 08:20:02 +00008967 if (URL == ctxt->str_xml_ns) {
8968 if (attname != ctxt->str_xml) {
8969 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8970 "xml namespace URI cannot be the default namespace\n",
8971 NULL, NULL, NULL);
8972 }
8973 goto skip_default_ns;
8974 }
8975 if ((len == 29) &&
8976 (xmlStrEqual(URL,
8977 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8978 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8979 "reuse of the xmlns namespace name is forbidden\n",
8980 NULL, NULL, NULL);
8981 goto skip_default_ns;
8982 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008983 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008984 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008985 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008986 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008987 for (j = 1;j <= nbNs;j++)
8988 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8989 break;
8990 if (j <= nbNs)
8991 xmlErrAttributeDup(ctxt, NULL, attname);
8992 else
8993 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008994skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008995 if (alloc != 0) xmlFree(attvalue);
8996 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008997 continue;
8998 }
8999 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009000 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9001 xmlURIPtr uri;
9002
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009003 if (attname == ctxt->str_xml) {
9004 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009005 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9006 "xml namespace prefix mapped to wrong URI\n",
9007 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009008 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009009 /*
9010 * Do not keep a namespace definition node
9011 */
Daniel Veillard37334572008-07-31 08:20:02 +00009012 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009013 }
Daniel Veillard37334572008-07-31 08:20:02 +00009014 if (URL == ctxt->str_xml_ns) {
9015 if (attname != ctxt->str_xml) {
9016 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9017 "xml namespace URI mapped to wrong prefix\n",
9018 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009019 }
Daniel Veillard37334572008-07-31 08:20:02 +00009020 goto skip_ns;
9021 }
9022 if (attname == ctxt->str_xmlns) {
9023 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9024 "redefinition of the xmlns prefix is forbidden\n",
9025 NULL, NULL, NULL);
9026 goto skip_ns;
9027 }
9028 if ((len == 29) &&
9029 (xmlStrEqual(URL,
9030 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9031 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9032 "reuse of the xmlns namespace name is forbidden\n",
9033 NULL, NULL, NULL);
9034 goto skip_ns;
9035 }
9036 if ((URL == NULL) || (URL[0] == 0)) {
9037 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9038 "xmlns:%s: Empty XML namespace is not allowed\n",
9039 attname, NULL, NULL);
9040 goto skip_ns;
9041 } else {
9042 uri = xmlParseURI((const char *) URL);
9043 if (uri == NULL) {
9044 xmlNsErr(ctxt, XML_WAR_NS_URI,
9045 "xmlns:%s: '%s' is not a valid URI\n",
9046 attname, URL, NULL);
9047 } else {
9048 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9049 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9050 "xmlns:%s: URI %s is not absolute\n",
9051 attname, URL, NULL);
9052 }
9053 xmlFreeURI(uri);
9054 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009055 }
9056
Daniel Veillard0fb18932003-09-07 09:14:37 +00009057 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009058 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009059 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009060 for (j = 1;j <= nbNs;j++)
9061 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9062 break;
9063 if (j <= nbNs)
9064 xmlErrAttributeDup(ctxt, aprefix, attname);
9065 else
9066 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009067skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009068 if (alloc != 0) xmlFree(attvalue);
9069 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00009070 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009071 continue;
9072 }
9073
9074 /*
9075 * Add the pair to atts
9076 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009077 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9078 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009079 if (attvalue[len] == 0)
9080 xmlFree(attvalue);
9081 goto failed;
9082 }
9083 maxatts = ctxt->maxatts;
9084 atts = ctxt->atts;
9085 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009086 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009087 atts[nbatts++] = attname;
9088 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009089 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00009090 atts[nbatts++] = attvalue;
9091 attvalue += len;
9092 atts[nbatts++] = attvalue;
9093 /*
9094 * tag if some deallocation is needed
9095 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009096 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009097 } else {
9098 if ((attvalue != NULL) && (attvalue[len] == 0))
9099 xmlFree(attvalue);
9100 }
9101
Daniel Veillard37334572008-07-31 08:20:02 +00009102failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009103
9104 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00009105 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009106 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9107 break;
William M. Brack76e95df2003-10-18 16:20:14 +00009108 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009109 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9110 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009111 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009112 }
9113 SKIP_BLANKS;
9114 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9115 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009116 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009117 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009118 break;
9119 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009120 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009121 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009122 }
9123
Daniel Veillard0fb18932003-09-07 09:14:37 +00009124 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009125 * The attributes defaulting
9126 */
9127 if (ctxt->attsDefault != NULL) {
9128 xmlDefAttrsPtr defaults;
9129
9130 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9131 if (defaults != NULL) {
9132 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009133 attname = defaults->values[5 * i];
9134 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009135
9136 /*
9137 * special work for namespaces defaulted defs
9138 */
9139 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9140 /*
9141 * check that it's not a defined namespace
9142 */
9143 for (j = 1;j <= nbNs;j++)
9144 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9145 break;
9146 if (j <= nbNs) continue;
9147
9148 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009149 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009150 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009151 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009152 nbNs++;
9153 }
9154 } else if (aprefix == ctxt->str_xmlns) {
9155 /*
9156 * check that it's not a defined namespace
9157 */
9158 for (j = 1;j <= nbNs;j++)
9159 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9160 break;
9161 if (j <= nbNs) continue;
9162
9163 nsname = xmlGetNamespace(ctxt, attname);
9164 if (nsname != defaults->values[2]) {
9165 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009166 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009167 nbNs++;
9168 }
9169 } else {
9170 /*
9171 * check that it's not a defined attribute
9172 */
9173 for (j = 0;j < nbatts;j+=5) {
9174 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9175 break;
9176 }
9177 if (j < nbatts) continue;
9178
9179 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9180 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009181 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009182 }
9183 maxatts = ctxt->maxatts;
9184 atts = ctxt->atts;
9185 }
9186 atts[nbatts++] = attname;
9187 atts[nbatts++] = aprefix;
9188 if (aprefix == NULL)
9189 atts[nbatts++] = NULL;
9190 else
9191 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009192 atts[nbatts++] = defaults->values[5 * i + 2];
9193 atts[nbatts++] = defaults->values[5 * i + 3];
9194 if ((ctxt->standalone == 1) &&
9195 (defaults->values[5 * i + 4] != NULL)) {
9196 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9197 "standalone: attribute %s on %s defaulted from external subset\n",
9198 attname, localname);
9199 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009200 nbdef++;
9201 }
9202 }
9203 }
9204 }
9205
Daniel Veillarde70c8772003-11-25 07:21:18 +00009206 /*
9207 * The attributes checkings
9208 */
9209 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009210 /*
9211 * The default namespace does not apply to attribute names.
9212 */
9213 if (atts[i + 1] != NULL) {
9214 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9215 if (nsname == NULL) {
9216 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9217 "Namespace prefix %s for %s on %s is not defined\n",
9218 atts[i + 1], atts[i], localname);
9219 }
9220 atts[i + 2] = nsname;
9221 } else
9222 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009223 /*
9224 * [ WFC: Unique Att Spec ]
9225 * No attribute name may appear more than once in the same
9226 * start-tag or empty-element tag.
9227 * As extended by the Namespace in XML REC.
9228 */
9229 for (j = 0; j < i;j += 5) {
9230 if (atts[i] == atts[j]) {
9231 if (atts[i+1] == atts[j+1]) {
9232 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9233 break;
9234 }
9235 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9236 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9237 "Namespaced Attribute %s in '%s' redefined\n",
9238 atts[i], nsname, NULL);
9239 break;
9240 }
9241 }
9242 }
9243 }
9244
Daniel Veillarde57ec792003-09-10 10:50:59 +00009245 nsname = xmlGetNamespace(ctxt, prefix);
9246 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009247 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9248 "Namespace prefix %s on %s is not defined\n",
9249 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009250 }
9251 *pref = prefix;
9252 *URI = nsname;
9253
9254 /*
9255 * SAX: Start of Element !
9256 */
9257 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9258 (!ctxt->disableSAX)) {
9259 if (nbNs > 0)
9260 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9261 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9262 nbatts / 5, nbdef, atts);
9263 else
9264 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9265 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9266 }
9267
9268 /*
9269 * Free up attribute allocated strings if needed
9270 */
9271 if (attval != 0) {
9272 for (i = 3,j = 0; j < nratts;i += 5,j++)
9273 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9274 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009275 }
9276
9277 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009278
9279base_changed:
9280 /*
9281 * the attribute strings are valid iif the base didn't changed
9282 */
9283 if (attval != 0) {
9284 for (i = 3,j = 0; j < nratts;i += 5,j++)
9285 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9286 xmlFree((xmlChar *) atts[i]);
9287 }
9288 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009289 ctxt->input->line = oldline;
9290 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009291 if (ctxt->wellFormed == 1) {
9292 goto reparse;
9293 }
9294 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009295}
9296
9297/**
9298 * xmlParseEndTag2:
9299 * @ctxt: an XML parser context
9300 * @line: line of the start tag
9301 * @nsNr: number of namespaces on the start tag
9302 *
9303 * parse an end of tag
9304 *
9305 * [42] ETag ::= '</' Name S? '>'
9306 *
9307 * With namespace
9308 *
9309 * [NS 9] ETag ::= '</' QName S? '>'
9310 */
9311
9312static void
9313xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009314 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009315 const xmlChar *name;
9316
9317 GROW;
9318 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009319 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009320 return;
9321 }
9322 SKIP(2);
9323
William M. Brack13dfa872004-09-18 04:52:08 +00009324 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009325 if (ctxt->input->cur[tlen] == '>') {
9326 ctxt->input->cur += tlen + 1;
9327 goto done;
9328 }
9329 ctxt->input->cur += tlen;
9330 name = (xmlChar*)1;
9331 } else {
9332 if (prefix == NULL)
9333 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9334 else
9335 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9336 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009337
9338 /*
9339 * We should definitely be at the ending "S? '>'" part
9340 */
9341 GROW;
9342 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009343 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009344 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009345 } else
9346 NEXT1;
9347
9348 /*
9349 * [ WFC: Element Type Match ]
9350 * The Name in an element's end-tag must match the element type in the
9351 * start-tag.
9352 *
9353 */
9354 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009355 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009356 if ((line == 0) && (ctxt->node != NULL))
9357 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009358 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009359 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009360 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009361 }
9362
9363 /*
9364 * SAX: End of Tag
9365 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009366done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009367 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9368 (!ctxt->disableSAX))
9369 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9370
Daniel Veillard0fb18932003-09-07 09:14:37 +00009371 spacePop(ctxt);
9372 if (nsNr != 0)
9373 nsPop(ctxt, nsNr);
9374 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009375}
9376
9377/**
Owen Taylor3473f882001-02-23 17:55:21 +00009378 * xmlParseCDSect:
9379 * @ctxt: an XML parser context
9380 *
9381 * Parse escaped pure raw content.
9382 *
9383 * [18] CDSect ::= CDStart CData CDEnd
9384 *
9385 * [19] CDStart ::= '<![CDATA['
9386 *
9387 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9388 *
9389 * [21] CDEnd ::= ']]>'
9390 */
9391void
9392xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9393 xmlChar *buf = NULL;
9394 int len = 0;
9395 int size = XML_PARSER_BUFFER_SIZE;
9396 int r, rl;
9397 int s, sl;
9398 int cur, l;
9399 int count = 0;
9400
Daniel Veillard8f597c32003-10-06 08:19:27 +00009401 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009402 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009403 SKIP(9);
9404 } else
9405 return;
9406
9407 ctxt->instate = XML_PARSER_CDATA_SECTION;
9408 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009409 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009410 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009411 ctxt->instate = XML_PARSER_CONTENT;
9412 return;
9413 }
9414 NEXTL(rl);
9415 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009416 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009417 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009418 ctxt->instate = XML_PARSER_CONTENT;
9419 return;
9420 }
9421 NEXTL(sl);
9422 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009423 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009424 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009425 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009426 return;
9427 }
William M. Brack871611b2003-10-18 04:53:14 +00009428 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009429 ((r != ']') || (s != ']') || (cur != '>'))) {
9430 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009431 xmlChar *tmp;
9432
Owen Taylor3473f882001-02-23 17:55:21 +00009433 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009434 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9435 if (tmp == NULL) {
9436 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009437 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009438 return;
9439 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009440 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009441 }
9442 COPY_BUF(rl,buf,len,r);
9443 r = s;
9444 rl = sl;
9445 s = cur;
9446 sl = l;
9447 count++;
9448 if (count > 50) {
9449 GROW;
9450 count = 0;
9451 }
9452 NEXTL(l);
9453 cur = CUR_CHAR(l);
9454 }
9455 buf[len] = 0;
9456 ctxt->instate = XML_PARSER_CONTENT;
9457 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009458 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009459 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009460 xmlFree(buf);
9461 return;
9462 }
9463 NEXTL(l);
9464
9465 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009466 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009467 */
9468 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9469 if (ctxt->sax->cdataBlock != NULL)
9470 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009471 else if (ctxt->sax->characters != NULL)
9472 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009473 }
9474 xmlFree(buf);
9475}
9476
9477/**
9478 * xmlParseContent:
9479 * @ctxt: an XML parser context
9480 *
9481 * Parse a content:
9482 *
9483 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9484 */
9485
9486void
9487xmlParseContent(xmlParserCtxtPtr ctxt) {
9488 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009489 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009490 ((RAW != '<') || (NXT(1) != '/')) &&
9491 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009492 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009493 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009494 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009495
9496 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009497 * First case : a Processing Instruction.
9498 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009499 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009500 xmlParsePI(ctxt);
9501 }
9502
9503 /*
9504 * Second case : a CDSection
9505 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009506 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009507 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009508 xmlParseCDSect(ctxt);
9509 }
9510
9511 /*
9512 * Third case : a comment
9513 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009514 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009515 (NXT(2) == '-') && (NXT(3) == '-')) {
9516 xmlParseComment(ctxt);
9517 ctxt->instate = XML_PARSER_CONTENT;
9518 }
9519
9520 /*
9521 * Fourth case : a sub-element.
9522 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009523 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009524 xmlParseElement(ctxt);
9525 }
9526
9527 /*
9528 * Fifth case : a reference. If if has not been resolved,
9529 * parsing returns it's Name, create the node
9530 */
9531
Daniel Veillard21a0f912001-02-25 19:54:14 +00009532 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009533 xmlParseReference(ctxt);
9534 }
9535
9536 /*
9537 * Last case, text. Note that References are handled directly.
9538 */
9539 else {
9540 xmlParseCharData(ctxt, 0);
9541 }
9542
9543 GROW;
9544 /*
9545 * Pop-up of finished entities.
9546 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009547 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009548 xmlPopInput(ctxt);
9549 SHRINK;
9550
Daniel Veillardfdc91562002-07-01 21:52:03 +00009551 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009552 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9553 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009554 ctxt->instate = XML_PARSER_EOF;
9555 break;
9556 }
9557 }
9558}
9559
9560/**
9561 * xmlParseElement:
9562 * @ctxt: an XML parser context
9563 *
9564 * parse an XML element, this is highly recursive
9565 *
9566 * [39] element ::= EmptyElemTag | STag content ETag
9567 *
9568 * [ WFC: Element Type Match ]
9569 * The Name in an element's end-tag must match the element type in the
9570 * start-tag.
9571 *
Owen Taylor3473f882001-02-23 17:55:21 +00009572 */
9573
9574void
9575xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009576 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009577 const xmlChar *prefix = NULL;
9578 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009579 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +08009580 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00009581 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009582 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009583
Daniel Veillard8915c152008-08-26 13:05:34 +00009584 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9585 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9586 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9587 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9588 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009589 ctxt->instate = XML_PARSER_EOF;
9590 return;
9591 }
9592
Owen Taylor3473f882001-02-23 17:55:21 +00009593 /* Capture start position */
9594 if (ctxt->record_info) {
9595 node_info.begin_pos = ctxt->input->consumed +
9596 (CUR_PTR - ctxt->input->base);
9597 node_info.begin_line = ctxt->input->line;
9598 }
9599
9600 if (ctxt->spaceNr == 0)
9601 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009602 else if (*ctxt->space == -2)
9603 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009604 else
9605 spacePush(ctxt, *ctxt->space);
9606
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009607 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009608#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009609 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009610#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009611 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009612#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009613 else
9614 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009615#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +08009616 if (ctxt->instate == XML_PARSER_EOF)
9617 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009618 if (name == NULL) {
9619 spacePop(ctxt);
9620 return;
9621 }
9622 namePush(ctxt, name);
9623 ret = ctxt->node;
9624
Daniel Veillard4432df22003-09-28 18:58:27 +00009625#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009626 /*
9627 * [ VC: Root Element Type ]
9628 * The Name in the document type declaration must match the element
9629 * type of the root element.
9630 */
9631 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9632 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9633 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009634#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009635
9636 /*
9637 * Check for an Empty Element.
9638 */
9639 if ((RAW == '/') && (NXT(1) == '>')) {
9640 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009641 if (ctxt->sax2) {
9642 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9643 (!ctxt->disableSAX))
9644 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009645#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009646 } else {
9647 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9648 (!ctxt->disableSAX))
9649 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009650#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009651 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009652 namePop(ctxt);
9653 spacePop(ctxt);
9654 if (nsNr != ctxt->nsNr)
9655 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009656 if ( ret != NULL && ctxt->record_info ) {
9657 node_info.end_pos = ctxt->input->consumed +
9658 (CUR_PTR - ctxt->input->base);
9659 node_info.end_line = ctxt->input->line;
9660 node_info.node = ret;
9661 xmlParserAddNodeInfo(ctxt, &node_info);
9662 }
9663 return;
9664 }
9665 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009666 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009667 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009668 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9669 "Couldn't find end of Start Tag %s line %d\n",
9670 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009671
9672 /*
9673 * end of parsing of this node.
9674 */
9675 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009676 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009677 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009678 if (nsNr != ctxt->nsNr)
9679 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009680
9681 /*
9682 * Capture end position and add node
9683 */
9684 if ( ret != NULL && ctxt->record_info ) {
9685 node_info.end_pos = ctxt->input->consumed +
9686 (CUR_PTR - ctxt->input->base);
9687 node_info.end_line = ctxt->input->line;
9688 node_info.node = ret;
9689 xmlParserAddNodeInfo(ctxt, &node_info);
9690 }
9691 return;
9692 }
9693
9694 /*
9695 * Parse the content of the element:
9696 */
9697 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009698 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009699 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009700 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009701 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009702
9703 /*
9704 * end of parsing of this node.
9705 */
9706 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009707 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009708 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009709 if (nsNr != ctxt->nsNr)
9710 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009711 return;
9712 }
9713
9714 /*
9715 * parse the end of tag: '</' should be here.
9716 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009717 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009718 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009719 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009720 }
9721#ifdef LIBXML_SAX1_ENABLED
9722 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00009723 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00009724#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009725
9726 /*
9727 * Capture end position and add node
9728 */
9729 if ( ret != NULL && ctxt->record_info ) {
9730 node_info.end_pos = ctxt->input->consumed +
9731 (CUR_PTR - ctxt->input->base);
9732 node_info.end_line = ctxt->input->line;
9733 node_info.node = ret;
9734 xmlParserAddNodeInfo(ctxt, &node_info);
9735 }
9736}
9737
9738/**
9739 * xmlParseVersionNum:
9740 * @ctxt: an XML parser context
9741 *
9742 * parse the XML version value.
9743 *
Daniel Veillard34e3f642008-07-29 09:02:27 +00009744 * [26] VersionNum ::= '1.' [0-9]+
9745 *
9746 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +00009747 *
9748 * Returns the string giving the XML version number, or NULL
9749 */
9750xmlChar *
9751xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9752 xmlChar *buf = NULL;
9753 int len = 0;
9754 int size = 10;
9755 xmlChar cur;
9756
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009757 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009758 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009759 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009760 return(NULL);
9761 }
9762 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00009763 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009764 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009765 return(NULL);
9766 }
9767 buf[len++] = cur;
9768 NEXT;
9769 cur=CUR;
9770 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009771 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009772 return(NULL);
9773 }
9774 buf[len++] = cur;
9775 NEXT;
9776 cur=CUR;
9777 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009778 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009779 xmlChar *tmp;
9780
Owen Taylor3473f882001-02-23 17:55:21 +00009781 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009782 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9783 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009784 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009785 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009786 return(NULL);
9787 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009788 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009789 }
9790 buf[len++] = cur;
9791 NEXT;
9792 cur=CUR;
9793 }
9794 buf[len] = 0;
9795 return(buf);
9796}
9797
9798/**
9799 * xmlParseVersionInfo:
9800 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009801 *
Owen Taylor3473f882001-02-23 17:55:21 +00009802 * parse the XML version.
9803 *
9804 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009805 *
Owen Taylor3473f882001-02-23 17:55:21 +00009806 * [25] Eq ::= S? '=' S?
9807 *
9808 * Returns the version string, e.g. "1.0"
9809 */
9810
9811xmlChar *
9812xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9813 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009814
Daniel Veillarda07050d2003-10-19 14:46:32 +00009815 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009816 SKIP(7);
9817 SKIP_BLANKS;
9818 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009819 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009820 return(NULL);
9821 }
9822 NEXT;
9823 SKIP_BLANKS;
9824 if (RAW == '"') {
9825 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009826 version = xmlParseVersionNum(ctxt);
9827 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009828 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009829 } else
9830 NEXT;
9831 } else if (RAW == '\''){
9832 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009833 version = xmlParseVersionNum(ctxt);
9834 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009835 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009836 } else
9837 NEXT;
9838 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009839 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009840 }
9841 }
9842 return(version);
9843}
9844
9845/**
9846 * xmlParseEncName:
9847 * @ctxt: an XML parser context
9848 *
9849 * parse the XML encoding name
9850 *
9851 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9852 *
9853 * Returns the encoding name value or NULL
9854 */
9855xmlChar *
9856xmlParseEncName(xmlParserCtxtPtr ctxt) {
9857 xmlChar *buf = NULL;
9858 int len = 0;
9859 int size = 10;
9860 xmlChar cur;
9861
9862 cur = CUR;
9863 if (((cur >= 'a') && (cur <= 'z')) ||
9864 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009865 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009866 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009867 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009868 return(NULL);
9869 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00009870
Owen Taylor3473f882001-02-23 17:55:21 +00009871 buf[len++] = cur;
9872 NEXT;
9873 cur = CUR;
9874 while (((cur >= 'a') && (cur <= 'z')) ||
9875 ((cur >= 'A') && (cur <= 'Z')) ||
9876 ((cur >= '0') && (cur <= '9')) ||
9877 (cur == '.') || (cur == '_') ||
9878 (cur == '-')) {
9879 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009880 xmlChar *tmp;
9881
Owen Taylor3473f882001-02-23 17:55:21 +00009882 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009883 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9884 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009885 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009886 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009887 return(NULL);
9888 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009889 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009890 }
9891 buf[len++] = cur;
9892 NEXT;
9893 cur = CUR;
9894 if (cur == 0) {
9895 SHRINK;
9896 GROW;
9897 cur = CUR;
9898 }
9899 }
9900 buf[len] = 0;
9901 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009902 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009903 }
9904 return(buf);
9905}
9906
9907/**
9908 * xmlParseEncodingDecl:
9909 * @ctxt: an XML parser context
9910 *
9911 * parse the XML encoding declaration
9912 *
9913 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9914 *
9915 * this setups the conversion filters.
9916 *
9917 * Returns the encoding value or NULL
9918 */
9919
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009920const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009921xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9922 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009923
9924 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009925 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009926 SKIP(8);
9927 SKIP_BLANKS;
9928 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009929 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009930 return(NULL);
9931 }
9932 NEXT;
9933 SKIP_BLANKS;
9934 if (RAW == '"') {
9935 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009936 encoding = xmlParseEncName(ctxt);
9937 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009938 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009939 } else
9940 NEXT;
9941 } else if (RAW == '\''){
9942 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009943 encoding = xmlParseEncName(ctxt);
9944 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009945 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009946 } else
9947 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009948 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009949 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009950 }
Daniel Veillardc62efc82011-05-16 16:03:50 +08009951
9952 /*
9953 * Non standard parsing, allowing the user to ignore encoding
9954 */
9955 if (ctxt->options & XML_PARSE_IGNORE_ENC)
9956 return(encoding);
9957
Daniel Veillard6b621b82003-08-11 15:03:34 +00009958 /*
9959 * UTF-16 encoding stwich has already taken place at this stage,
9960 * more over the little-endian/big-endian selection is already done
9961 */
9962 if ((encoding != NULL) &&
9963 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9964 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +00009965 /*
9966 * If no encoding was passed to the parser, that we are
9967 * using UTF-16 and no decoder is present i.e. the
9968 * document is apparently UTF-8 compatible, then raise an
9969 * encoding mismatch fatal error
9970 */
9971 if ((ctxt->encoding == NULL) &&
9972 (ctxt->input->buf != NULL) &&
9973 (ctxt->input->buf->encoder == NULL)) {
9974 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9975 "Document labelled UTF-16 but has UTF-8 content\n");
9976 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009977 if (ctxt->encoding != NULL)
9978 xmlFree((xmlChar *) ctxt->encoding);
9979 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009980 }
9981 /*
9982 * UTF-8 encoding is handled natively
9983 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009984 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009985 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9986 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009987 if (ctxt->encoding != NULL)
9988 xmlFree((xmlChar *) ctxt->encoding);
9989 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009990 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009991 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009992 xmlCharEncodingHandlerPtr handler;
9993
9994 if (ctxt->input->encoding != NULL)
9995 xmlFree((xmlChar *) ctxt->input->encoding);
9996 ctxt->input->encoding = encoding;
9997
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009998 handler = xmlFindCharEncodingHandler((const char *) encoding);
9999 if (handler != NULL) {
10000 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +000010001 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010002 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010003 "Unsupported encoding %s\n", encoding);
10004 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010005 }
10006 }
10007 }
10008 return(encoding);
10009}
10010
10011/**
10012 * xmlParseSDDecl:
10013 * @ctxt: an XML parser context
10014 *
10015 * parse the XML standalone declaration
10016 *
10017 * [32] SDDecl ::= S 'standalone' Eq
10018 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10019 *
10020 * [ VC: Standalone Document Declaration ]
10021 * TODO The standalone document declaration must have the value "no"
10022 * if any external markup declarations contain declarations of:
10023 * - attributes with default values, if elements to which these
10024 * attributes apply appear in the document without specifications
10025 * of values for these attributes, or
10026 * - entities (other than amp, lt, gt, apos, quot), if references
10027 * to those entities appear in the document, or
10028 * - attributes with values subject to normalization, where the
10029 * attribute appears in the document with a value which will change
10030 * as a result of normalization, or
10031 * - element types with element content, if white space occurs directly
10032 * within any instance of those types.
10033 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010034 * Returns:
10035 * 1 if standalone="yes"
10036 * 0 if standalone="no"
10037 * -2 if standalone attribute is missing or invalid
10038 * (A standalone value of -2 means that the XML declaration was found,
10039 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010040 */
10041
10042int
10043xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010044 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010045
10046 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010047 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010048 SKIP(10);
10049 SKIP_BLANKS;
10050 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010051 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010052 return(standalone);
10053 }
10054 NEXT;
10055 SKIP_BLANKS;
10056 if (RAW == '\''){
10057 NEXT;
10058 if ((RAW == 'n') && (NXT(1) == 'o')) {
10059 standalone = 0;
10060 SKIP(2);
10061 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10062 (NXT(2) == 's')) {
10063 standalone = 1;
10064 SKIP(3);
10065 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010066 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010067 }
10068 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010069 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010070 } else
10071 NEXT;
10072 } else if (RAW == '"'){
10073 NEXT;
10074 if ((RAW == 'n') && (NXT(1) == 'o')) {
10075 standalone = 0;
10076 SKIP(2);
10077 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10078 (NXT(2) == 's')) {
10079 standalone = 1;
10080 SKIP(3);
10081 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010082 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010083 }
10084 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010085 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010086 } else
10087 NEXT;
10088 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010089 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010090 }
10091 }
10092 return(standalone);
10093}
10094
10095/**
10096 * xmlParseXMLDecl:
10097 * @ctxt: an XML parser context
10098 *
10099 * parse an XML declaration header
10100 *
10101 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10102 */
10103
10104void
10105xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10106 xmlChar *version;
10107
10108 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010109 * This value for standalone indicates that the document has an
10110 * XML declaration but it does not have a standalone attribute.
10111 * It will be overwritten later if a standalone attribute is found.
10112 */
10113 ctxt->input->standalone = -2;
10114
10115 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010116 * We know that '<?xml' is here.
10117 */
10118 SKIP(5);
10119
William M. Brack76e95df2003-10-18 16:20:14 +000010120 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010121 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10122 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010123 }
10124 SKIP_BLANKS;
10125
10126 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010127 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010128 */
10129 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010130 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010131 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010132 } else {
10133 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10134 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010135 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010136 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010137 if (ctxt->options & XML_PARSE_OLD10) {
10138 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10139 "Unsupported version '%s'\n",
10140 version);
10141 } else {
10142 if ((version[0] == '1') && ((version[1] == '.'))) {
10143 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10144 "Unsupported version '%s'\n",
10145 version, NULL);
10146 } else {
10147 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10148 "Unsupported version '%s'\n",
10149 version);
10150 }
10151 }
Daniel Veillard19840942001-11-29 16:11:38 +000010152 }
10153 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010154 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010155 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010156 }
Owen Taylor3473f882001-02-23 17:55:21 +000010157
10158 /*
10159 * We may have the encoding declaration
10160 */
William M. Brack76e95df2003-10-18 16:20:14 +000010161 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010162 if ((RAW == '?') && (NXT(1) == '>')) {
10163 SKIP(2);
10164 return;
10165 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010166 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010167 }
10168 xmlParseEncodingDecl(ctxt);
10169 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10170 /*
10171 * The XML REC instructs us to stop parsing right here
10172 */
10173 return;
10174 }
10175
10176 /*
10177 * We may have the standalone status.
10178 */
William M. Brack76e95df2003-10-18 16:20:14 +000010179 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010180 if ((RAW == '?') && (NXT(1) == '>')) {
10181 SKIP(2);
10182 return;
10183 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010184 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010185 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010186
10187 /*
10188 * We can grow the input buffer freely at that point
10189 */
10190 GROW;
10191
Owen Taylor3473f882001-02-23 17:55:21 +000010192 SKIP_BLANKS;
10193 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10194
10195 SKIP_BLANKS;
10196 if ((RAW == '?') && (NXT(1) == '>')) {
10197 SKIP(2);
10198 } else if (RAW == '>') {
10199 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010200 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010201 NEXT;
10202 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010203 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010204 MOVETO_ENDTAG(CUR_PTR);
10205 NEXT;
10206 }
10207}
10208
10209/**
10210 * xmlParseMisc:
10211 * @ctxt: an XML parser context
10212 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010213 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010214 *
10215 * [27] Misc ::= Comment | PI | S
10216 */
10217
10218void
10219xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010220 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +000010221 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +000010222 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010223 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010224 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010225 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010226 NEXT;
10227 } else
10228 xmlParseComment(ctxt);
10229 }
10230}
10231
10232/**
10233 * xmlParseDocument:
10234 * @ctxt: an XML parser context
10235 *
10236 * parse an XML document (and build a tree if using the standard SAX
10237 * interface).
10238 *
10239 * [1] document ::= prolog element Misc*
10240 *
10241 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10242 *
10243 * Returns 0, -1 in case of error. the parser context is augmented
10244 * as a result of the parsing.
10245 */
10246
10247int
10248xmlParseDocument(xmlParserCtxtPtr ctxt) {
10249 xmlChar start[4];
10250 xmlCharEncoding enc;
10251
10252 xmlInitParser();
10253
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010254 if ((ctxt == NULL) || (ctxt->input == NULL))
10255 return(-1);
10256
Owen Taylor3473f882001-02-23 17:55:21 +000010257 GROW;
10258
10259 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010260 * SAX: detecting the level.
10261 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010262 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010263
10264 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010265 * SAX: beginning of the document processing.
10266 */
10267 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10268 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10269
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010270 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010271 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +000010272 /*
10273 * Get the 4 first bytes and decode the charset
10274 * if enc != XML_CHAR_ENCODING_NONE
10275 * plug some encoding conversion routines.
10276 */
10277 start[0] = RAW;
10278 start[1] = NXT(1);
10279 start[2] = NXT(2);
10280 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010281 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010282 if (enc != XML_CHAR_ENCODING_NONE) {
10283 xmlSwitchEncoding(ctxt, enc);
10284 }
Owen Taylor3473f882001-02-23 17:55:21 +000010285 }
10286
10287
10288 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010289 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010290 }
10291
10292 /*
10293 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010294 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010295 * than just the first line, unless the amount of data is really
10296 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010297 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010298 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10299 GROW;
10300 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010301 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010302
10303 /*
10304 * Note that we will switch encoding on the fly.
10305 */
10306 xmlParseXMLDecl(ctxt);
10307 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10308 /*
10309 * The XML REC instructs us to stop parsing right here
10310 */
10311 return(-1);
10312 }
10313 ctxt->standalone = ctxt->input->standalone;
10314 SKIP_BLANKS;
10315 } else {
10316 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10317 }
10318 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10319 ctxt->sax->startDocument(ctxt->userData);
10320
10321 /*
10322 * The Misc part of the Prolog
10323 */
10324 GROW;
10325 xmlParseMisc(ctxt);
10326
10327 /*
10328 * Then possibly doc type declaration(s) and more Misc
10329 * (doctypedecl Misc*)?
10330 */
10331 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010332 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010333
10334 ctxt->inSubset = 1;
10335 xmlParseDocTypeDecl(ctxt);
10336 if (RAW == '[') {
10337 ctxt->instate = XML_PARSER_DTD;
10338 xmlParseInternalSubset(ctxt);
10339 }
10340
10341 /*
10342 * Create and update the external subset.
10343 */
10344 ctxt->inSubset = 2;
10345 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10346 (!ctxt->disableSAX))
10347 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10348 ctxt->extSubSystem, ctxt->extSubURI);
10349 ctxt->inSubset = 0;
10350
Daniel Veillardac4118d2008-01-11 05:27:32 +000010351 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010352
10353 ctxt->instate = XML_PARSER_PROLOG;
10354 xmlParseMisc(ctxt);
10355 }
10356
10357 /*
10358 * Time to start parsing the tree itself
10359 */
10360 GROW;
10361 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010362 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10363 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010364 } else {
10365 ctxt->instate = XML_PARSER_CONTENT;
10366 xmlParseElement(ctxt);
10367 ctxt->instate = XML_PARSER_EPILOG;
10368
10369
10370 /*
10371 * The Misc part at the end
10372 */
10373 xmlParseMisc(ctxt);
10374
Daniel Veillard561b7f82002-03-20 21:55:57 +000010375 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010376 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010377 }
10378 ctxt->instate = XML_PARSER_EOF;
10379 }
10380
10381 /*
10382 * SAX: end of the document processing.
10383 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010384 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010385 ctxt->sax->endDocument(ctxt->userData);
10386
Daniel Veillard5997aca2002-03-18 18:36:20 +000010387 /*
10388 * Remove locally kept entity definitions if the tree was not built
10389 */
10390 if ((ctxt->myDoc != NULL) &&
10391 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10392 xmlFreeDoc(ctxt->myDoc);
10393 ctxt->myDoc = NULL;
10394 }
10395
Daniel Veillardae0765b2008-07-31 19:54:59 +000010396 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10397 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10398 if (ctxt->valid)
10399 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10400 if (ctxt->nsWellFormed)
10401 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10402 if (ctxt->options & XML_PARSE_OLD10)
10403 ctxt->myDoc->properties |= XML_DOC_OLD10;
10404 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010405 if (! ctxt->wellFormed) {
10406 ctxt->valid = 0;
10407 return(-1);
10408 }
Owen Taylor3473f882001-02-23 17:55:21 +000010409 return(0);
10410}
10411
10412/**
10413 * xmlParseExtParsedEnt:
10414 * @ctxt: an XML parser context
10415 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010416 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010417 * An external general parsed entity is well-formed if it matches the
10418 * production labeled extParsedEnt.
10419 *
10420 * [78] extParsedEnt ::= TextDecl? content
10421 *
10422 * Returns 0, -1 in case of error. the parser context is augmented
10423 * as a result of the parsing.
10424 */
10425
10426int
10427xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10428 xmlChar start[4];
10429 xmlCharEncoding enc;
10430
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010431 if ((ctxt == NULL) || (ctxt->input == NULL))
10432 return(-1);
10433
Owen Taylor3473f882001-02-23 17:55:21 +000010434 xmlDefaultSAXHandlerInit();
10435
Daniel Veillard309f81d2003-09-23 09:02:53 +000010436 xmlDetectSAX2(ctxt);
10437
Owen Taylor3473f882001-02-23 17:55:21 +000010438 GROW;
10439
10440 /*
10441 * SAX: beginning of the document processing.
10442 */
10443 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10444 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10445
10446 /*
10447 * Get the 4 first bytes and decode the charset
10448 * if enc != XML_CHAR_ENCODING_NONE
10449 * plug some encoding conversion routines.
10450 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010451 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10452 start[0] = RAW;
10453 start[1] = NXT(1);
10454 start[2] = NXT(2);
10455 start[3] = NXT(3);
10456 enc = xmlDetectCharEncoding(start, 4);
10457 if (enc != XML_CHAR_ENCODING_NONE) {
10458 xmlSwitchEncoding(ctxt, enc);
10459 }
Owen Taylor3473f882001-02-23 17:55:21 +000010460 }
10461
10462
10463 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010464 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010465 }
10466
10467 /*
10468 * Check for the XMLDecl in the Prolog.
10469 */
10470 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010471 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010472
10473 /*
10474 * Note that we will switch encoding on the fly.
10475 */
10476 xmlParseXMLDecl(ctxt);
10477 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10478 /*
10479 * The XML REC instructs us to stop parsing right here
10480 */
10481 return(-1);
10482 }
10483 SKIP_BLANKS;
10484 } else {
10485 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10486 }
10487 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10488 ctxt->sax->startDocument(ctxt->userData);
10489
10490 /*
10491 * Doing validity checking on chunk doesn't make sense
10492 */
10493 ctxt->instate = XML_PARSER_CONTENT;
10494 ctxt->validate = 0;
10495 ctxt->loadsubset = 0;
10496 ctxt->depth = 0;
10497
10498 xmlParseContent(ctxt);
10499
10500 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010501 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010502 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010503 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010504 }
10505
10506 /*
10507 * SAX: end of the document processing.
10508 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010509 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010510 ctxt->sax->endDocument(ctxt->userData);
10511
10512 if (! ctxt->wellFormed) return(-1);
10513 return(0);
10514}
10515
Daniel Veillard73b013f2003-09-30 12:36:01 +000010516#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010517/************************************************************************
10518 * *
10519 * Progressive parsing interfaces *
10520 * *
10521 ************************************************************************/
10522
10523/**
10524 * xmlParseLookupSequence:
10525 * @ctxt: an XML parser context
10526 * @first: the first char to lookup
10527 * @next: the next char to lookup or zero
10528 * @third: the next char to lookup or zero
10529 *
10530 * Try to find if a sequence (first, next, third) or just (first next) or
10531 * (first) is available in the input stream.
10532 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10533 * to avoid rescanning sequences of bytes, it DOES change the state of the
10534 * parser, do not use liberally.
10535 *
10536 * Returns the index to the current parsing point if the full sequence
10537 * is available, -1 otherwise.
10538 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010539static int
Owen Taylor3473f882001-02-23 17:55:21 +000010540xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10541 xmlChar next, xmlChar third) {
10542 int base, len;
10543 xmlParserInputPtr in;
10544 const xmlChar *buf;
10545
10546 in = ctxt->input;
10547 if (in == NULL) return(-1);
10548 base = in->cur - in->base;
10549 if (base < 0) return(-1);
10550 if (ctxt->checkIndex > base)
10551 base = ctxt->checkIndex;
10552 if (in->buf == NULL) {
10553 buf = in->base;
10554 len = in->length;
10555 } else {
10556 buf = in->buf->buffer->content;
10557 len = in->buf->buffer->use;
10558 }
10559 /* take into account the sequence length */
10560 if (third) len -= 2;
10561 else if (next) len --;
10562 for (;base < len;base++) {
10563 if (buf[base] == first) {
10564 if (third != 0) {
10565 if ((buf[base + 1] != next) ||
10566 (buf[base + 2] != third)) continue;
10567 } else if (next != 0) {
10568 if (buf[base + 1] != next) continue;
10569 }
10570 ctxt->checkIndex = 0;
10571#ifdef DEBUG_PUSH
10572 if (next == 0)
10573 xmlGenericError(xmlGenericErrorContext,
10574 "PP: lookup '%c' found at %d\n",
10575 first, base);
10576 else if (third == 0)
10577 xmlGenericError(xmlGenericErrorContext,
10578 "PP: lookup '%c%c' found at %d\n",
10579 first, next, base);
10580 else
10581 xmlGenericError(xmlGenericErrorContext,
10582 "PP: lookup '%c%c%c' found at %d\n",
10583 first, next, third, base);
10584#endif
10585 return(base - (in->cur - in->base));
10586 }
10587 }
10588 ctxt->checkIndex = base;
10589#ifdef DEBUG_PUSH
10590 if (next == 0)
10591 xmlGenericError(xmlGenericErrorContext,
10592 "PP: lookup '%c' failed\n", first);
10593 else if (third == 0)
10594 xmlGenericError(xmlGenericErrorContext,
10595 "PP: lookup '%c%c' failed\n", first, next);
10596 else
10597 xmlGenericError(xmlGenericErrorContext,
10598 "PP: lookup '%c%c%c' failed\n", first, next, third);
10599#endif
10600 return(-1);
10601}
10602
10603/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010604 * xmlParseGetLasts:
10605 * @ctxt: an XML parser context
10606 * @lastlt: pointer to store the last '<' from the input
10607 * @lastgt: pointer to store the last '>' from the input
10608 *
10609 * Lookup the last < and > in the current chunk
10610 */
10611static void
10612xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10613 const xmlChar **lastgt) {
10614 const xmlChar *tmp;
10615
10616 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10617 xmlGenericError(xmlGenericErrorContext,
10618 "Internal error: xmlParseGetLasts\n");
10619 return;
10620 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010621 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010622 tmp = ctxt->input->end;
10623 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010624 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010625 if (tmp < ctxt->input->base) {
10626 *lastlt = NULL;
10627 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010628 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010629 *lastlt = tmp;
10630 tmp++;
10631 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10632 if (*tmp == '\'') {
10633 tmp++;
10634 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10635 if (tmp < ctxt->input->end) tmp++;
10636 } else if (*tmp == '"') {
10637 tmp++;
10638 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10639 if (tmp < ctxt->input->end) tmp++;
10640 } else
10641 tmp++;
10642 }
10643 if (tmp < ctxt->input->end)
10644 *lastgt = tmp;
10645 else {
10646 tmp = *lastlt;
10647 tmp--;
10648 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10649 if (tmp >= ctxt->input->base)
10650 *lastgt = tmp;
10651 else
10652 *lastgt = NULL;
10653 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010654 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010655 } else {
10656 *lastlt = NULL;
10657 *lastgt = NULL;
10658 }
10659}
10660/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010661 * xmlCheckCdataPush:
10662 * @cur: pointer to the bock of characters
10663 * @len: length of the block in bytes
10664 *
10665 * Check that the block of characters is okay as SCdata content [20]
10666 *
10667 * Returns the number of bytes to pass if okay, a negative index where an
10668 * UTF-8 error occured otherwise
10669 */
10670static int
10671xmlCheckCdataPush(const xmlChar *utf, int len) {
10672 int ix;
10673 unsigned char c;
10674 int codepoint;
10675
10676 if ((utf == NULL) || (len <= 0))
10677 return(0);
10678
10679 for (ix = 0; ix < len;) { /* string is 0-terminated */
10680 c = utf[ix];
10681 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10682 if (c >= 0x20)
10683 ix++;
10684 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10685 ix++;
10686 else
10687 return(-ix);
10688 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10689 if (ix + 2 > len) return(ix);
10690 if ((utf[ix+1] & 0xc0 ) != 0x80)
10691 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010692 codepoint = (utf[ix] & 0x1f) << 6;
10693 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010694 if (!xmlIsCharQ(codepoint))
10695 return(-ix);
10696 ix += 2;
10697 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10698 if (ix + 3 > len) return(ix);
10699 if (((utf[ix+1] & 0xc0) != 0x80) ||
10700 ((utf[ix+2] & 0xc0) != 0x80))
10701 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010702 codepoint = (utf[ix] & 0xf) << 12;
10703 codepoint |= (utf[ix+1] & 0x3f) << 6;
10704 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010705 if (!xmlIsCharQ(codepoint))
10706 return(-ix);
10707 ix += 3;
10708 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10709 if (ix + 4 > len) return(ix);
10710 if (((utf[ix+1] & 0xc0) != 0x80) ||
10711 ((utf[ix+2] & 0xc0) != 0x80) ||
10712 ((utf[ix+3] & 0xc0) != 0x80))
10713 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010714 codepoint = (utf[ix] & 0x7) << 18;
10715 codepoint |= (utf[ix+1] & 0x3f) << 12;
10716 codepoint |= (utf[ix+2] & 0x3f) << 6;
10717 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010718 if (!xmlIsCharQ(codepoint))
10719 return(-ix);
10720 ix += 4;
10721 } else /* unknown encoding */
10722 return(-ix);
10723 }
10724 return(ix);
10725}
10726
10727/**
Owen Taylor3473f882001-02-23 17:55:21 +000010728 * xmlParseTryOrFinish:
10729 * @ctxt: an XML parser context
10730 * @terminate: last chunk indicator
10731 *
10732 * Try to progress on parsing
10733 *
10734 * Returns zero if no parsing was possible
10735 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010736static int
Owen Taylor3473f882001-02-23 17:55:21 +000010737xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10738 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010739 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000010740 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000010741 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000010742
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010743 if (ctxt->input == NULL)
10744 return(0);
10745
Owen Taylor3473f882001-02-23 17:55:21 +000010746#ifdef DEBUG_PUSH
10747 switch (ctxt->instate) {
10748 case XML_PARSER_EOF:
10749 xmlGenericError(xmlGenericErrorContext,
10750 "PP: try EOF\n"); break;
10751 case XML_PARSER_START:
10752 xmlGenericError(xmlGenericErrorContext,
10753 "PP: try START\n"); break;
10754 case XML_PARSER_MISC:
10755 xmlGenericError(xmlGenericErrorContext,
10756 "PP: try MISC\n");break;
10757 case XML_PARSER_COMMENT:
10758 xmlGenericError(xmlGenericErrorContext,
10759 "PP: try COMMENT\n");break;
10760 case XML_PARSER_PROLOG:
10761 xmlGenericError(xmlGenericErrorContext,
10762 "PP: try PROLOG\n");break;
10763 case XML_PARSER_START_TAG:
10764 xmlGenericError(xmlGenericErrorContext,
10765 "PP: try START_TAG\n");break;
10766 case XML_PARSER_CONTENT:
10767 xmlGenericError(xmlGenericErrorContext,
10768 "PP: try CONTENT\n");break;
10769 case XML_PARSER_CDATA_SECTION:
10770 xmlGenericError(xmlGenericErrorContext,
10771 "PP: try CDATA_SECTION\n");break;
10772 case XML_PARSER_END_TAG:
10773 xmlGenericError(xmlGenericErrorContext,
10774 "PP: try END_TAG\n");break;
10775 case XML_PARSER_ENTITY_DECL:
10776 xmlGenericError(xmlGenericErrorContext,
10777 "PP: try ENTITY_DECL\n");break;
10778 case XML_PARSER_ENTITY_VALUE:
10779 xmlGenericError(xmlGenericErrorContext,
10780 "PP: try ENTITY_VALUE\n");break;
10781 case XML_PARSER_ATTRIBUTE_VALUE:
10782 xmlGenericError(xmlGenericErrorContext,
10783 "PP: try ATTRIBUTE_VALUE\n");break;
10784 case XML_PARSER_DTD:
10785 xmlGenericError(xmlGenericErrorContext,
10786 "PP: try DTD\n");break;
10787 case XML_PARSER_EPILOG:
10788 xmlGenericError(xmlGenericErrorContext,
10789 "PP: try EPILOG\n");break;
10790 case XML_PARSER_PI:
10791 xmlGenericError(xmlGenericErrorContext,
10792 "PP: try PI\n");break;
10793 case XML_PARSER_IGNORE:
10794 xmlGenericError(xmlGenericErrorContext,
10795 "PP: try IGNORE\n");break;
10796 }
10797#endif
10798
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010799 if ((ctxt->input != NULL) &&
10800 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010801 xmlSHRINK(ctxt);
10802 ctxt->checkIndex = 0;
10803 }
10804 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010805
Daniel Veillarda880b122003-04-21 21:36:41 +000010806 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010807 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010808 return(0);
10809
10810
Owen Taylor3473f882001-02-23 17:55:21 +000010811 /*
10812 * Pop-up of finished entities.
10813 */
10814 while ((RAW == 0) && (ctxt->inputNr > 1))
10815 xmlPopInput(ctxt);
10816
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010817 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010818 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010819 avail = ctxt->input->length -
10820 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010821 else {
10822 /*
10823 * If we are operating on converted input, try to flush
10824 * remainng chars to avoid them stalling in the non-converted
10825 * buffer.
10826 */
10827 if ((ctxt->input->buf->raw != NULL) &&
10828 (ctxt->input->buf->raw->use > 0)) {
10829 int base = ctxt->input->base -
10830 ctxt->input->buf->buffer->content;
10831 int current = ctxt->input->cur - ctxt->input->base;
10832
10833 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10834 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10835 ctxt->input->cur = ctxt->input->base + current;
10836 ctxt->input->end =
10837 &ctxt->input->buf->buffer->content[
10838 ctxt->input->buf->buffer->use];
10839 }
10840 avail = ctxt->input->buf->buffer->use -
10841 (ctxt->input->cur - ctxt->input->base);
10842 }
Owen Taylor3473f882001-02-23 17:55:21 +000010843 if (avail < 1)
10844 goto done;
10845 switch (ctxt->instate) {
10846 case XML_PARSER_EOF:
10847 /*
10848 * Document parsing is done !
10849 */
10850 goto done;
10851 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010852 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10853 xmlChar start[4];
10854 xmlCharEncoding enc;
10855
10856 /*
10857 * Very first chars read from the document flow.
10858 */
10859 if (avail < 4)
10860 goto done;
10861
10862 /*
10863 * Get the 4 first bytes and decode the charset
10864 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010865 * plug some encoding conversion routines,
10866 * else xmlSwitchEncoding will set to (default)
10867 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010868 */
10869 start[0] = RAW;
10870 start[1] = NXT(1);
10871 start[2] = NXT(2);
10872 start[3] = NXT(3);
10873 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010874 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010875 break;
10876 }
Owen Taylor3473f882001-02-23 17:55:21 +000010877
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010878 if (avail < 2)
10879 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010880 cur = ctxt->input->cur[0];
10881 next = ctxt->input->cur[1];
10882 if (cur == 0) {
10883 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10884 ctxt->sax->setDocumentLocator(ctxt->userData,
10885 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010886 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010887 ctxt->instate = XML_PARSER_EOF;
10888#ifdef DEBUG_PUSH
10889 xmlGenericError(xmlGenericErrorContext,
10890 "PP: entering EOF\n");
10891#endif
10892 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10893 ctxt->sax->endDocument(ctxt->userData);
10894 goto done;
10895 }
10896 if ((cur == '<') && (next == '?')) {
10897 /* PI or XML decl */
10898 if (avail < 5) return(ret);
10899 if ((!terminate) &&
10900 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10901 return(ret);
10902 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10903 ctxt->sax->setDocumentLocator(ctxt->userData,
10904 &xmlDefaultSAXLocator);
10905 if ((ctxt->input->cur[2] == 'x') &&
10906 (ctxt->input->cur[3] == 'm') &&
10907 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010908 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010909 ret += 5;
10910#ifdef DEBUG_PUSH
10911 xmlGenericError(xmlGenericErrorContext,
10912 "PP: Parsing XML Decl\n");
10913#endif
10914 xmlParseXMLDecl(ctxt);
10915 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10916 /*
10917 * The XML REC instructs us to stop parsing right
10918 * here
10919 */
10920 ctxt->instate = XML_PARSER_EOF;
10921 return(0);
10922 }
10923 ctxt->standalone = ctxt->input->standalone;
10924 if ((ctxt->encoding == NULL) &&
10925 (ctxt->input->encoding != NULL))
10926 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10927 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10928 (!ctxt->disableSAX))
10929 ctxt->sax->startDocument(ctxt->userData);
10930 ctxt->instate = XML_PARSER_MISC;
10931#ifdef DEBUG_PUSH
10932 xmlGenericError(xmlGenericErrorContext,
10933 "PP: entering MISC\n");
10934#endif
10935 } else {
10936 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10937 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10938 (!ctxt->disableSAX))
10939 ctxt->sax->startDocument(ctxt->userData);
10940 ctxt->instate = XML_PARSER_MISC;
10941#ifdef DEBUG_PUSH
10942 xmlGenericError(xmlGenericErrorContext,
10943 "PP: entering MISC\n");
10944#endif
10945 }
10946 } else {
10947 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10948 ctxt->sax->setDocumentLocator(ctxt->userData,
10949 &xmlDefaultSAXLocator);
10950 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010951 if (ctxt->version == NULL) {
10952 xmlErrMemory(ctxt, NULL);
10953 break;
10954 }
Owen Taylor3473f882001-02-23 17:55:21 +000010955 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10956 (!ctxt->disableSAX))
10957 ctxt->sax->startDocument(ctxt->userData);
10958 ctxt->instate = XML_PARSER_MISC;
10959#ifdef DEBUG_PUSH
10960 xmlGenericError(xmlGenericErrorContext,
10961 "PP: entering MISC\n");
10962#endif
10963 }
10964 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010965 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010966 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020010967 const xmlChar *prefix = NULL;
10968 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010969 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010970
10971 if ((avail < 2) && (ctxt->inputNr == 1))
10972 goto done;
10973 cur = ctxt->input->cur[0];
10974 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010975 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010976 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010977 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10978 ctxt->sax->endDocument(ctxt->userData);
10979 goto done;
10980 }
10981 if (!terminate) {
10982 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010983 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010984 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010985 goto done;
10986 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10987 goto done;
10988 }
10989 }
10990 if (ctxt->spaceNr == 0)
10991 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010992 else if (*ctxt->space == -2)
10993 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010994 else
10995 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010996#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010997 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010998#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010999 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011000#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011001 else
11002 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011003#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011004 if (ctxt->instate == XML_PARSER_EOF)
11005 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011006 if (name == NULL) {
11007 spacePop(ctxt);
11008 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000011009 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11010 ctxt->sax->endDocument(ctxt->userData);
11011 goto done;
11012 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011013#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011014 /*
11015 * [ VC: Root Element Type ]
11016 * The Name in the document type declaration must match
11017 * the element type of the root element.
11018 */
11019 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11020 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11021 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011022#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011023
11024 /*
11025 * Check for an Empty Element.
11026 */
11027 if ((RAW == '/') && (NXT(1) == '>')) {
11028 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011029
11030 if (ctxt->sax2) {
11031 if ((ctxt->sax != NULL) &&
11032 (ctxt->sax->endElementNs != NULL) &&
11033 (!ctxt->disableSAX))
11034 ctxt->sax->endElementNs(ctxt->userData, name,
11035 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011036 if (ctxt->nsNr - nsNr > 0)
11037 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011038#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011039 } else {
11040 if ((ctxt->sax != NULL) &&
11041 (ctxt->sax->endElement != NULL) &&
11042 (!ctxt->disableSAX))
11043 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011044#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011045 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011046 spacePop(ctxt);
11047 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011048 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011049 } else {
11050 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011051 }
11052 break;
11053 }
11054 if (RAW == '>') {
11055 NEXT;
11056 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011057 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011058 "Couldn't find end of Start Tag %s\n",
11059 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011060 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011061 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011062 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011063 if (ctxt->sax2)
11064 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011065#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011066 else
11067 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011068#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011069
Daniel Veillarda880b122003-04-21 21:36:41 +000011070 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011071 break;
11072 }
11073 case XML_PARSER_CONTENT: {
11074 const xmlChar *test;
11075 unsigned int cons;
11076 if ((avail < 2) && (ctxt->inputNr == 1))
11077 goto done;
11078 cur = ctxt->input->cur[0];
11079 next = ctxt->input->cur[1];
11080
11081 test = CUR_PTR;
11082 cons = ctxt->input->consumed;
11083 if ((cur == '<') && (next == '/')) {
11084 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011085 break;
11086 } else if ((cur == '<') && (next == '?')) {
11087 if ((!terminate) &&
11088 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11089 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011090 xmlParsePI(ctxt);
11091 } else if ((cur == '<') && (next != '!')) {
11092 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011093 break;
11094 } else if ((cur == '<') && (next == '!') &&
11095 (ctxt->input->cur[2] == '-') &&
11096 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011097 int term;
11098
11099 if (avail < 4)
11100 goto done;
11101 ctxt->input->cur += 4;
11102 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11103 ctxt->input->cur -= 4;
11104 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000011105 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011106 xmlParseComment(ctxt);
11107 ctxt->instate = XML_PARSER_CONTENT;
11108 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11109 (ctxt->input->cur[2] == '[') &&
11110 (ctxt->input->cur[3] == 'C') &&
11111 (ctxt->input->cur[4] == 'D') &&
11112 (ctxt->input->cur[5] == 'A') &&
11113 (ctxt->input->cur[6] == 'T') &&
11114 (ctxt->input->cur[7] == 'A') &&
11115 (ctxt->input->cur[8] == '[')) {
11116 SKIP(9);
11117 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011118 break;
11119 } else if ((cur == '<') && (next == '!') &&
11120 (avail < 9)) {
11121 goto done;
11122 } else if (cur == '&') {
11123 if ((!terminate) &&
11124 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11125 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011126 xmlParseReference(ctxt);
11127 } else {
11128 /* TODO Avoid the extra copy, handle directly !!! */
11129 /*
11130 * Goal of the following test is:
11131 * - minimize calls to the SAX 'character' callback
11132 * when they are mergeable
11133 * - handle an problem for isBlank when we only parse
11134 * a sequence of blank chars and the next one is
11135 * not available to check against '<' presence.
11136 * - tries to homogenize the differences in SAX
11137 * callbacks between the push and pull versions
11138 * of the parser.
11139 */
11140 if ((ctxt->inputNr == 1) &&
11141 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11142 if (!terminate) {
11143 if (ctxt->progressive) {
11144 if ((lastlt == NULL) ||
11145 (ctxt->input->cur > lastlt))
11146 goto done;
11147 } else if (xmlParseLookupSequence(ctxt,
11148 '<', 0, 0) < 0) {
11149 goto done;
11150 }
11151 }
11152 }
11153 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011154 xmlParseCharData(ctxt, 0);
11155 }
11156 /*
11157 * Pop-up of finished entities.
11158 */
11159 while ((RAW == 0) && (ctxt->inputNr > 1))
11160 xmlPopInput(ctxt);
11161 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011162 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11163 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000011164 ctxt->instate = XML_PARSER_EOF;
11165 break;
11166 }
11167 break;
11168 }
11169 case XML_PARSER_END_TAG:
11170 if (avail < 2)
11171 goto done;
11172 if (!terminate) {
11173 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011174 /* > can be found unescaped in attribute values */
11175 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011176 goto done;
11177 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11178 goto done;
11179 }
11180 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011181 if (ctxt->sax2) {
11182 xmlParseEndTag2(ctxt,
11183 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11184 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011185 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011186 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011187 }
11188#ifdef LIBXML_SAX1_ENABLED
11189 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011190 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011191#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011192 if (ctxt->instate == XML_PARSER_EOF) {
11193 /* Nothing */
11194 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011195 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011196 } else {
11197 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011198 }
11199 break;
11200 case XML_PARSER_CDATA_SECTION: {
11201 /*
11202 * The Push mode need to have the SAX callback for
11203 * cdataBlock merge back contiguous callbacks.
11204 */
11205 int base;
11206
11207 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11208 if (base < 0) {
11209 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011210 int tmp;
11211
11212 tmp = xmlCheckCdataPush(ctxt->input->cur,
11213 XML_PARSER_BIG_BUFFER_SIZE);
11214 if (tmp < 0) {
11215 tmp = -tmp;
11216 ctxt->input->cur += tmp;
11217 goto encoding_error;
11218 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011219 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11220 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011221 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011222 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011223 else if (ctxt->sax->characters != NULL)
11224 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011225 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011226 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011227 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011228 ctxt->checkIndex = 0;
11229 }
11230 goto done;
11231 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011232 int tmp;
11233
11234 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11235 if ((tmp < 0) || (tmp != base)) {
11236 tmp = -tmp;
11237 ctxt->input->cur += tmp;
11238 goto encoding_error;
11239 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011240 if ((ctxt->sax != NULL) && (base == 0) &&
11241 (ctxt->sax->cdataBlock != NULL) &&
11242 (!ctxt->disableSAX)) {
11243 /*
11244 * Special case to provide identical behaviour
11245 * between pull and push parsers on enpty CDATA
11246 * sections
11247 */
11248 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11249 (!strncmp((const char *)&ctxt->input->cur[-9],
11250 "<![CDATA[", 9)))
11251 ctxt->sax->cdataBlock(ctxt->userData,
11252 BAD_CAST "", 0);
11253 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011254 (!ctxt->disableSAX)) {
11255 if (ctxt->sax->cdataBlock != NULL)
11256 ctxt->sax->cdataBlock(ctxt->userData,
11257 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011258 else if (ctxt->sax->characters != NULL)
11259 ctxt->sax->characters(ctxt->userData,
11260 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011261 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000011262 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011263 ctxt->checkIndex = 0;
11264 ctxt->instate = XML_PARSER_CONTENT;
11265#ifdef DEBUG_PUSH
11266 xmlGenericError(xmlGenericErrorContext,
11267 "PP: entering CONTENT\n");
11268#endif
11269 }
11270 break;
11271 }
Owen Taylor3473f882001-02-23 17:55:21 +000011272 case XML_PARSER_MISC:
11273 SKIP_BLANKS;
11274 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011275 avail = ctxt->input->length -
11276 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011277 else
Daniel Veillarda880b122003-04-21 21:36:41 +000011278 avail = ctxt->input->buf->buffer->use -
11279 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011280 if (avail < 2)
11281 goto done;
11282 cur = ctxt->input->cur[0];
11283 next = ctxt->input->cur[1];
11284 if ((cur == '<') && (next == '?')) {
11285 if ((!terminate) &&
11286 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11287 goto done;
11288#ifdef DEBUG_PUSH
11289 xmlGenericError(xmlGenericErrorContext,
11290 "PP: Parsing PI\n");
11291#endif
11292 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000011293 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011294 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011295 (ctxt->input->cur[2] == '-') &&
11296 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011297 if ((!terminate) &&
11298 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11299 goto done;
11300#ifdef DEBUG_PUSH
11301 xmlGenericError(xmlGenericErrorContext,
11302 "PP: Parsing Comment\n");
11303#endif
11304 xmlParseComment(ctxt);
11305 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011306 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011307 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011308 (ctxt->input->cur[2] == 'D') &&
11309 (ctxt->input->cur[3] == 'O') &&
11310 (ctxt->input->cur[4] == 'C') &&
11311 (ctxt->input->cur[5] == 'T') &&
11312 (ctxt->input->cur[6] == 'Y') &&
11313 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011314 (ctxt->input->cur[8] == 'E')) {
11315 if ((!terminate) &&
11316 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11317 goto done;
11318#ifdef DEBUG_PUSH
11319 xmlGenericError(xmlGenericErrorContext,
11320 "PP: Parsing internal subset\n");
11321#endif
11322 ctxt->inSubset = 1;
11323 xmlParseDocTypeDecl(ctxt);
11324 if (RAW == '[') {
11325 ctxt->instate = XML_PARSER_DTD;
11326#ifdef DEBUG_PUSH
11327 xmlGenericError(xmlGenericErrorContext,
11328 "PP: entering DTD\n");
11329#endif
11330 } else {
11331 /*
11332 * Create and update the external subset.
11333 */
11334 ctxt->inSubset = 2;
11335 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11336 (ctxt->sax->externalSubset != NULL))
11337 ctxt->sax->externalSubset(ctxt->userData,
11338 ctxt->intSubName, ctxt->extSubSystem,
11339 ctxt->extSubURI);
11340 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011341 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011342 ctxt->instate = XML_PARSER_PROLOG;
11343#ifdef DEBUG_PUSH
11344 xmlGenericError(xmlGenericErrorContext,
11345 "PP: entering PROLOG\n");
11346#endif
11347 }
11348 } else if ((cur == '<') && (next == '!') &&
11349 (avail < 9)) {
11350 goto done;
11351 } else {
11352 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011353 ctxt->progressive = 1;
11354 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011355#ifdef DEBUG_PUSH
11356 xmlGenericError(xmlGenericErrorContext,
11357 "PP: entering START_TAG\n");
11358#endif
11359 }
11360 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011361 case XML_PARSER_PROLOG:
11362 SKIP_BLANKS;
11363 if (ctxt->input->buf == NULL)
11364 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11365 else
11366 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11367 if (avail < 2)
11368 goto done;
11369 cur = ctxt->input->cur[0];
11370 next = ctxt->input->cur[1];
11371 if ((cur == '<') && (next == '?')) {
11372 if ((!terminate) &&
11373 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11374 goto done;
11375#ifdef DEBUG_PUSH
11376 xmlGenericError(xmlGenericErrorContext,
11377 "PP: Parsing PI\n");
11378#endif
11379 xmlParsePI(ctxt);
11380 } else if ((cur == '<') && (next == '!') &&
11381 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11382 if ((!terminate) &&
11383 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11384 goto done;
11385#ifdef DEBUG_PUSH
11386 xmlGenericError(xmlGenericErrorContext,
11387 "PP: Parsing Comment\n");
11388#endif
11389 xmlParseComment(ctxt);
11390 ctxt->instate = XML_PARSER_PROLOG;
11391 } else if ((cur == '<') && (next == '!') &&
11392 (avail < 4)) {
11393 goto done;
11394 } else {
11395 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011396 if (ctxt->progressive == 0)
11397 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011398 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011399#ifdef DEBUG_PUSH
11400 xmlGenericError(xmlGenericErrorContext,
11401 "PP: entering START_TAG\n");
11402#endif
11403 }
11404 break;
11405 case XML_PARSER_EPILOG:
11406 SKIP_BLANKS;
11407 if (ctxt->input->buf == NULL)
11408 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11409 else
11410 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11411 if (avail < 2)
11412 goto done;
11413 cur = ctxt->input->cur[0];
11414 next = ctxt->input->cur[1];
11415 if ((cur == '<') && (next == '?')) {
11416 if ((!terminate) &&
11417 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11418 goto done;
11419#ifdef DEBUG_PUSH
11420 xmlGenericError(xmlGenericErrorContext,
11421 "PP: Parsing PI\n");
11422#endif
11423 xmlParsePI(ctxt);
11424 ctxt->instate = XML_PARSER_EPILOG;
11425 } else if ((cur == '<') && (next == '!') &&
11426 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11427 if ((!terminate) &&
11428 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11429 goto done;
11430#ifdef DEBUG_PUSH
11431 xmlGenericError(xmlGenericErrorContext,
11432 "PP: Parsing Comment\n");
11433#endif
11434 xmlParseComment(ctxt);
11435 ctxt->instate = XML_PARSER_EPILOG;
11436 } else if ((cur == '<') && (next == '!') &&
11437 (avail < 4)) {
11438 goto done;
11439 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011440 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011441 ctxt->instate = XML_PARSER_EOF;
11442#ifdef DEBUG_PUSH
11443 xmlGenericError(xmlGenericErrorContext,
11444 "PP: entering EOF\n");
11445#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011446 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011447 ctxt->sax->endDocument(ctxt->userData);
11448 goto done;
11449 }
11450 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011451 case XML_PARSER_DTD: {
11452 /*
11453 * Sorry but progressive parsing of the internal subset
11454 * is not expected to be supported. We first check that
11455 * the full content of the internal subset is available and
11456 * the parsing is launched only at that point.
11457 * Internal subset ends up with "']' S? '>'" in an unescaped
11458 * section and not in a ']]>' sequence which are conditional
11459 * sections (whoever argued to keep that crap in XML deserve
11460 * a place in hell !).
11461 */
11462 int base, i;
11463 xmlChar *buf;
11464 xmlChar quote = 0;
11465
11466 base = ctxt->input->cur - ctxt->input->base;
11467 if (base < 0) return(0);
11468 if (ctxt->checkIndex > base)
11469 base = ctxt->checkIndex;
11470 buf = ctxt->input->buf->buffer->content;
11471 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11472 base++) {
11473 if (quote != 0) {
11474 if (buf[base] == quote)
11475 quote = 0;
11476 continue;
11477 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011478 if ((quote == 0) && (buf[base] == '<')) {
11479 int found = 0;
11480 /* special handling of comments */
11481 if (((unsigned int) base + 4 <
11482 ctxt->input->buf->buffer->use) &&
11483 (buf[base + 1] == '!') &&
11484 (buf[base + 2] == '-') &&
11485 (buf[base + 3] == '-')) {
11486 for (;(unsigned int) base + 3 <
11487 ctxt->input->buf->buffer->use; base++) {
11488 if ((buf[base] == '-') &&
11489 (buf[base + 1] == '-') &&
11490 (buf[base + 2] == '>')) {
11491 found = 1;
11492 base += 2;
11493 break;
11494 }
11495 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011496 if (!found) {
11497#if 0
11498 fprintf(stderr, "unfinished comment\n");
11499#endif
11500 break; /* for */
11501 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011502 continue;
11503 }
11504 }
Owen Taylor3473f882001-02-23 17:55:21 +000011505 if (buf[base] == '"') {
11506 quote = '"';
11507 continue;
11508 }
11509 if (buf[base] == '\'') {
11510 quote = '\'';
11511 continue;
11512 }
11513 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011514#if 0
11515 fprintf(stderr, "%c%c%c%c: ", buf[base],
11516 buf[base + 1], buf[base + 2], buf[base + 3]);
11517#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011518 if ((unsigned int) base +1 >=
11519 ctxt->input->buf->buffer->use)
11520 break;
11521 if (buf[base + 1] == ']') {
11522 /* conditional crap, skip both ']' ! */
11523 base++;
11524 continue;
11525 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011526 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011527 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11528 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011529 if (buf[base + i] == '>') {
11530#if 0
11531 fprintf(stderr, "found\n");
11532#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011533 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011534 }
11535 if (!IS_BLANK_CH(buf[base + i])) {
11536#if 0
11537 fprintf(stderr, "not found\n");
11538#endif
11539 goto not_end_of_int_subset;
11540 }
Owen Taylor3473f882001-02-23 17:55:21 +000011541 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011542#if 0
11543 fprintf(stderr, "end of stream\n");
11544#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011545 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011546
Owen Taylor3473f882001-02-23 17:55:21 +000011547 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011548not_end_of_int_subset:
11549 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011550 }
11551 /*
11552 * We didn't found the end of the Internal subset
11553 */
Owen Taylor3473f882001-02-23 17:55:21 +000011554#ifdef DEBUG_PUSH
11555 if (next == 0)
11556 xmlGenericError(xmlGenericErrorContext,
11557 "PP: lookup of int subset end filed\n");
11558#endif
11559 goto done;
11560
11561found_end_int_subset:
11562 xmlParseInternalSubset(ctxt);
11563 ctxt->inSubset = 2;
11564 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11565 (ctxt->sax->externalSubset != NULL))
11566 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11567 ctxt->extSubSystem, ctxt->extSubURI);
11568 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011569 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011570 ctxt->instate = XML_PARSER_PROLOG;
11571 ctxt->checkIndex = 0;
11572#ifdef DEBUG_PUSH
11573 xmlGenericError(xmlGenericErrorContext,
11574 "PP: entering PROLOG\n");
11575#endif
11576 break;
11577 }
11578 case XML_PARSER_COMMENT:
11579 xmlGenericError(xmlGenericErrorContext,
11580 "PP: internal error, state == COMMENT\n");
11581 ctxt->instate = XML_PARSER_CONTENT;
11582#ifdef DEBUG_PUSH
11583 xmlGenericError(xmlGenericErrorContext,
11584 "PP: entering CONTENT\n");
11585#endif
11586 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011587 case XML_PARSER_IGNORE:
11588 xmlGenericError(xmlGenericErrorContext,
11589 "PP: internal error, state == IGNORE");
11590 ctxt->instate = XML_PARSER_DTD;
11591#ifdef DEBUG_PUSH
11592 xmlGenericError(xmlGenericErrorContext,
11593 "PP: entering DTD\n");
11594#endif
11595 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011596 case XML_PARSER_PI:
11597 xmlGenericError(xmlGenericErrorContext,
11598 "PP: internal error, state == PI\n");
11599 ctxt->instate = XML_PARSER_CONTENT;
11600#ifdef DEBUG_PUSH
11601 xmlGenericError(xmlGenericErrorContext,
11602 "PP: entering CONTENT\n");
11603#endif
11604 break;
11605 case XML_PARSER_ENTITY_DECL:
11606 xmlGenericError(xmlGenericErrorContext,
11607 "PP: internal error, state == ENTITY_DECL\n");
11608 ctxt->instate = XML_PARSER_DTD;
11609#ifdef DEBUG_PUSH
11610 xmlGenericError(xmlGenericErrorContext,
11611 "PP: entering DTD\n");
11612#endif
11613 break;
11614 case XML_PARSER_ENTITY_VALUE:
11615 xmlGenericError(xmlGenericErrorContext,
11616 "PP: internal error, state == ENTITY_VALUE\n");
11617 ctxt->instate = XML_PARSER_CONTENT;
11618#ifdef DEBUG_PUSH
11619 xmlGenericError(xmlGenericErrorContext,
11620 "PP: entering DTD\n");
11621#endif
11622 break;
11623 case XML_PARSER_ATTRIBUTE_VALUE:
11624 xmlGenericError(xmlGenericErrorContext,
11625 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11626 ctxt->instate = XML_PARSER_START_TAG;
11627#ifdef DEBUG_PUSH
11628 xmlGenericError(xmlGenericErrorContext,
11629 "PP: entering START_TAG\n");
11630#endif
11631 break;
11632 case XML_PARSER_SYSTEM_LITERAL:
11633 xmlGenericError(xmlGenericErrorContext,
11634 "PP: internal error, state == SYSTEM_LITERAL\n");
11635 ctxt->instate = XML_PARSER_START_TAG;
11636#ifdef DEBUG_PUSH
11637 xmlGenericError(xmlGenericErrorContext,
11638 "PP: entering START_TAG\n");
11639#endif
11640 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011641 case XML_PARSER_PUBLIC_LITERAL:
11642 xmlGenericError(xmlGenericErrorContext,
11643 "PP: internal error, state == PUBLIC_LITERAL\n");
11644 ctxt->instate = XML_PARSER_START_TAG;
11645#ifdef DEBUG_PUSH
11646 xmlGenericError(xmlGenericErrorContext,
11647 "PP: entering START_TAG\n");
11648#endif
11649 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011650 }
11651 }
11652done:
11653#ifdef DEBUG_PUSH
11654 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11655#endif
11656 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011657encoding_error:
11658 {
11659 char buffer[150];
11660
11661 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11662 ctxt->input->cur[0], ctxt->input->cur[1],
11663 ctxt->input->cur[2], ctxt->input->cur[3]);
11664 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11665 "Input is not proper UTF-8, indicate encoding !\n%s",
11666 BAD_CAST buffer, NULL);
11667 }
11668 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000011669}
11670
11671/**
Owen Taylor3473f882001-02-23 17:55:21 +000011672 * xmlParseChunk:
11673 * @ctxt: an XML parser context
11674 * @chunk: an char array
11675 * @size: the size in byte of the chunk
11676 * @terminate: last chunk indicator
11677 *
11678 * Parse a Chunk of memory
11679 *
11680 * Returns zero if no error, the xmlParserErrors otherwise.
11681 */
11682int
11683xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11684 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000011685 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011686 int remain = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000011687
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011688 if (ctxt == NULL)
11689 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000011690 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011691 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000011692 if (ctxt->instate == XML_PARSER_START)
11693 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000011694 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11695 (chunk[size - 1] == '\r')) {
11696 end_in_lf = 1;
11697 size--;
11698 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011699
11700xmldecl_done:
11701
Owen Taylor3473f882001-02-23 17:55:21 +000011702 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11703 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11704 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11705 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000011706 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011707
11708 /*
11709 * Specific handling if we autodetected an encoding, we should not
11710 * push more than the first line ... which depend on the encoding
11711 * And only push the rest once the final encoding was detected
11712 */
11713 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
11714 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010011715 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011716
11717 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11718 BAD_CAST "UTF-16")) ||
11719 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11720 BAD_CAST "UTF16")))
11721 len = 90;
11722 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11723 BAD_CAST "UCS-4")) ||
11724 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11725 BAD_CAST "UCS4")))
11726 len = 180;
11727
11728 if (ctxt->input->buf->rawconsumed < len)
11729 len -= ctxt->input->buf->rawconsumed;
11730
Raul Hudeaba9716a2010-03-15 10:13:29 +010011731 /*
11732 * Change size for reading the initial declaration only
11733 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
11734 * will blindly copy extra bytes from memory.
11735 */
Daniel Veillard60587d62010-11-04 15:16:27 +010011736 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010011737 remain = size - len;
11738 size = len;
11739 } else {
11740 remain = 0;
11741 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011742 }
William M. Bracka3215c72004-07-31 16:24:01 +000011743 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11744 if (res < 0) {
11745 ctxt->errNo = XML_PARSER_EOF;
11746 ctxt->disableSAX = 1;
11747 return (XML_PARSER_EOF);
11748 }
Owen Taylor3473f882001-02-23 17:55:21 +000011749 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11750 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011751 ctxt->input->end =
11752 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011753#ifdef DEBUG_PUSH
11754 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11755#endif
11756
Owen Taylor3473f882001-02-23 17:55:21 +000011757 } else if (ctxt->instate != XML_PARSER_EOF) {
11758 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11759 xmlParserInputBufferPtr in = ctxt->input->buf;
11760 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11761 (in->raw != NULL)) {
11762 int nbchars;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011763
Owen Taylor3473f882001-02-23 17:55:21 +000011764 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11765 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011766 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000011767 xmlGenericError(xmlGenericErrorContext,
11768 "xmlParseChunk: encoder error\n");
11769 return(XML_ERR_INVALID_ENCODING);
11770 }
11771 }
11772 }
11773 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011774 if (remain != 0)
11775 xmlParseTryOrFinish(ctxt, 0);
11776 else
11777 xmlParseTryOrFinish(ctxt, terminate);
11778 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11779 return(ctxt->errNo);
11780
11781 if (remain != 0) {
11782 chunk += size;
11783 size = remain;
11784 remain = 0;
11785 goto xmldecl_done;
11786 }
Daniel Veillarda617e242006-01-09 14:38:44 +000011787 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11788 (ctxt->input->buf != NULL)) {
11789 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11790 }
Owen Taylor3473f882001-02-23 17:55:21 +000011791 if (terminate) {
11792 /*
11793 * Check for termination
11794 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011795 int avail = 0;
11796
11797 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011798 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011799 avail = ctxt->input->length -
11800 (ctxt->input->cur - ctxt->input->base);
11801 else
11802 avail = ctxt->input->buf->buffer->use -
11803 (ctxt->input->cur - ctxt->input->base);
11804 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011805
Owen Taylor3473f882001-02-23 17:55:21 +000011806 if ((ctxt->instate != XML_PARSER_EOF) &&
11807 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011808 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011809 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011810 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011811 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011812 }
Owen Taylor3473f882001-02-23 17:55:21 +000011813 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011814 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011815 ctxt->sax->endDocument(ctxt->userData);
11816 }
11817 ctxt->instate = XML_PARSER_EOF;
11818 }
11819 return((xmlParserErrors) ctxt->errNo);
11820}
11821
11822/************************************************************************
11823 * *
11824 * I/O front end functions to the parser *
11825 * *
11826 ************************************************************************/
11827
11828/**
Owen Taylor3473f882001-02-23 17:55:21 +000011829 * xmlCreatePushParserCtxt:
11830 * @sax: a SAX handler
11831 * @user_data: The user data returned on SAX callbacks
11832 * @chunk: a pointer to an array of chars
11833 * @size: number of chars in the array
11834 * @filename: an optional file name or URI
11835 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000011836 * Create a parser context for using the XML parser in push mode.
11837 * If @buffer and @size are non-NULL, the data is used to detect
11838 * the encoding. The remaining characters will be parsed so they
11839 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000011840 * To allow content encoding detection, @size should be >= 4
11841 * The value of @filename is used for fetching external entities
11842 * and error/warning reports.
11843 *
11844 * Returns the new parser context or NULL
11845 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000011846
Owen Taylor3473f882001-02-23 17:55:21 +000011847xmlParserCtxtPtr
11848xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11849 const char *chunk, int size, const char *filename) {
11850 xmlParserCtxtPtr ctxt;
11851 xmlParserInputPtr inputStream;
11852 xmlParserInputBufferPtr buf;
11853 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11854
11855 /*
11856 * plug some encoding conversion routines
11857 */
11858 if ((chunk != NULL) && (size >= 4))
11859 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11860
11861 buf = xmlAllocParserInputBuffer(enc);
11862 if (buf == NULL) return(NULL);
11863
11864 ctxt = xmlNewParserCtxt();
11865 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011866 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011867 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011868 return(NULL);
11869 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011870 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011871 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11872 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011873 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011874 xmlFreeParserInputBuffer(buf);
11875 xmlFreeParserCtxt(ctxt);
11876 return(NULL);
11877 }
Owen Taylor3473f882001-02-23 17:55:21 +000011878 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011879#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011880 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011881#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011882 xmlFree(ctxt->sax);
11883 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11884 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011885 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011886 xmlFreeParserInputBuffer(buf);
11887 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011888 return(NULL);
11889 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011890 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11891 if (sax->initialized == XML_SAX2_MAGIC)
11892 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11893 else
11894 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011895 if (user_data != NULL)
11896 ctxt->userData = user_data;
11897 }
11898 if (filename == NULL) {
11899 ctxt->directory = NULL;
11900 } else {
11901 ctxt->directory = xmlParserGetDirectory(filename);
11902 }
11903
11904 inputStream = xmlNewInputStream(ctxt);
11905 if (inputStream == NULL) {
11906 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011907 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011908 return(NULL);
11909 }
11910
11911 if (filename == NULL)
11912 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011913 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011914 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011915 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011916 if (inputStream->filename == NULL) {
11917 xmlFreeParserCtxt(ctxt);
11918 xmlFreeParserInputBuffer(buf);
11919 return(NULL);
11920 }
11921 }
Owen Taylor3473f882001-02-23 17:55:21 +000011922 inputStream->buf = buf;
11923 inputStream->base = inputStream->buf->buffer->content;
11924 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011925 inputStream->end =
11926 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011927
11928 inputPush(ctxt, inputStream);
11929
William M. Brack3a1cd212005-02-11 14:35:54 +000011930 /*
11931 * If the caller didn't provide an initial 'chunk' for determining
11932 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11933 * that it can be automatically determined later
11934 */
11935 if ((size == 0) || (chunk == NULL)) {
11936 ctxt->charset = XML_CHAR_ENCODING_NONE;
11937 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011938 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11939 int cur = ctxt->input->cur - ctxt->input->base;
11940
Owen Taylor3473f882001-02-23 17:55:21 +000011941 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011942
11943 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11944 ctxt->input->cur = ctxt->input->base + cur;
11945 ctxt->input->end =
11946 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011947#ifdef DEBUG_PUSH
11948 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11949#endif
11950 }
11951
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011952 if (enc != XML_CHAR_ENCODING_NONE) {
11953 xmlSwitchEncoding(ctxt, enc);
11954 }
11955
Owen Taylor3473f882001-02-23 17:55:21 +000011956 return(ctxt);
11957}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011958#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011959
11960/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011961 * xmlStopParser:
11962 * @ctxt: an XML parser context
11963 *
11964 * Blocks further parser processing
11965 */
11966void
11967xmlStopParser(xmlParserCtxtPtr ctxt) {
11968 if (ctxt == NULL)
11969 return;
11970 ctxt->instate = XML_PARSER_EOF;
11971 ctxt->disableSAX = 1;
11972 if (ctxt->input != NULL) {
11973 ctxt->input->cur = BAD_CAST"";
11974 ctxt->input->base = ctxt->input->cur;
11975 }
11976}
11977
11978/**
Owen Taylor3473f882001-02-23 17:55:21 +000011979 * xmlCreateIOParserCtxt:
11980 * @sax: a SAX handler
11981 * @user_data: The user data returned on SAX callbacks
11982 * @ioread: an I/O read function
11983 * @ioclose: an I/O close function
11984 * @ioctx: an I/O handler
11985 * @enc: the charset encoding if known
11986 *
11987 * Create a parser context for using the XML parser with an existing
11988 * I/O stream
11989 *
11990 * Returns the new parser context or NULL
11991 */
11992xmlParserCtxtPtr
11993xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11994 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11995 void *ioctx, xmlCharEncoding enc) {
11996 xmlParserCtxtPtr ctxt;
11997 xmlParserInputPtr inputStream;
11998 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080011999
Daniel Veillard42595322004-11-08 10:52:06 +000012000 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012001
12002 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012003 if (buf == NULL) {
12004 if (ioclose != NULL)
12005 ioclose(ioctx);
12006 return (NULL);
12007 }
Owen Taylor3473f882001-02-23 17:55:21 +000012008
12009 ctxt = xmlNewParserCtxt();
12010 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012011 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012012 return(NULL);
12013 }
12014 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012015#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012016 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012017#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012018 xmlFree(ctxt->sax);
12019 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12020 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012021 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012022 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012023 return(NULL);
12024 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012025 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12026 if (sax->initialized == XML_SAX2_MAGIC)
12027 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12028 else
12029 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012030 if (user_data != NULL)
12031 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012032 }
Owen Taylor3473f882001-02-23 17:55:21 +000012033
12034 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12035 if (inputStream == NULL) {
12036 xmlFreeParserCtxt(ctxt);
12037 return(NULL);
12038 }
12039 inputPush(ctxt, inputStream);
12040
12041 return(ctxt);
12042}
12043
Daniel Veillard4432df22003-09-28 18:58:27 +000012044#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012045/************************************************************************
12046 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012047 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012048 * *
12049 ************************************************************************/
12050
12051/**
12052 * xmlIOParseDTD:
12053 * @sax: the SAX handler block or NULL
12054 * @input: an Input Buffer
12055 * @enc: the charset encoding if known
12056 *
12057 * Load and parse a DTD
12058 *
12059 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012060 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012061 */
12062
12063xmlDtdPtr
12064xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12065 xmlCharEncoding enc) {
12066 xmlDtdPtr ret = NULL;
12067 xmlParserCtxtPtr ctxt;
12068 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012069 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012070
12071 if (input == NULL)
12072 return(NULL);
12073
12074 ctxt = xmlNewParserCtxt();
12075 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012076 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012077 return(NULL);
12078 }
12079
12080 /*
12081 * Set-up the SAX context
12082 */
12083 if (sax != NULL) {
12084 if (ctxt->sax != NULL)
12085 xmlFree(ctxt->sax);
12086 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012087 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012088 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012089 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012090
12091 /*
12092 * generate a parser input from the I/O handler
12093 */
12094
Daniel Veillard43caefb2003-12-07 19:32:22 +000012095 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012096 if (pinput == NULL) {
12097 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012098 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012099 xmlFreeParserCtxt(ctxt);
12100 return(NULL);
12101 }
12102
12103 /*
12104 * plug some encoding conversion routines here.
12105 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012106 if (xmlPushInput(ctxt, pinput) < 0) {
12107 if (sax != NULL) ctxt->sax = NULL;
12108 xmlFreeParserCtxt(ctxt);
12109 return(NULL);
12110 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012111 if (enc != XML_CHAR_ENCODING_NONE) {
12112 xmlSwitchEncoding(ctxt, enc);
12113 }
Owen Taylor3473f882001-02-23 17:55:21 +000012114
12115 pinput->filename = NULL;
12116 pinput->line = 1;
12117 pinput->col = 1;
12118 pinput->base = ctxt->input->cur;
12119 pinput->cur = ctxt->input->cur;
12120 pinput->free = NULL;
12121
12122 /*
12123 * let's parse that entity knowing it's an external subset.
12124 */
12125 ctxt->inSubset = 2;
12126 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012127 if (ctxt->myDoc == NULL) {
12128 xmlErrMemory(ctxt, "New Doc failed");
12129 return(NULL);
12130 }
12131 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012132 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12133 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012134
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012135 if ((enc == XML_CHAR_ENCODING_NONE) &&
12136 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000012137 /*
12138 * Get the 4 first bytes and decode the charset
12139 * if enc != XML_CHAR_ENCODING_NONE
12140 * plug some encoding conversion routines.
12141 */
12142 start[0] = RAW;
12143 start[1] = NXT(1);
12144 start[2] = NXT(2);
12145 start[3] = NXT(3);
12146 enc = xmlDetectCharEncoding(start, 4);
12147 if (enc != XML_CHAR_ENCODING_NONE) {
12148 xmlSwitchEncoding(ctxt, enc);
12149 }
12150 }
12151
Owen Taylor3473f882001-02-23 17:55:21 +000012152 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12153
12154 if (ctxt->myDoc != NULL) {
12155 if (ctxt->wellFormed) {
12156 ret = ctxt->myDoc->extSubset;
12157 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012158 if (ret != NULL) {
12159 xmlNodePtr tmp;
12160
12161 ret->doc = NULL;
12162 tmp = ret->children;
12163 while (tmp != NULL) {
12164 tmp->doc = NULL;
12165 tmp = tmp->next;
12166 }
12167 }
Owen Taylor3473f882001-02-23 17:55:21 +000012168 } else {
12169 ret = NULL;
12170 }
12171 xmlFreeDoc(ctxt->myDoc);
12172 ctxt->myDoc = NULL;
12173 }
12174 if (sax != NULL) ctxt->sax = NULL;
12175 xmlFreeParserCtxt(ctxt);
12176
12177 return(ret);
12178}
12179
12180/**
12181 * xmlSAXParseDTD:
12182 * @sax: the SAX handler block
12183 * @ExternalID: a NAME* containing the External ID of the DTD
12184 * @SystemID: a NAME* containing the URL to the DTD
12185 *
12186 * Load and parse an external subset.
12187 *
12188 * Returns the resulting xmlDtdPtr or NULL in case of error.
12189 */
12190
12191xmlDtdPtr
12192xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12193 const xmlChar *SystemID) {
12194 xmlDtdPtr ret = NULL;
12195 xmlParserCtxtPtr ctxt;
12196 xmlParserInputPtr input = NULL;
12197 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012198 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012199
12200 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12201
12202 ctxt = xmlNewParserCtxt();
12203 if (ctxt == NULL) {
12204 return(NULL);
12205 }
12206
12207 /*
12208 * Set-up the SAX context
12209 */
12210 if (sax != NULL) {
12211 if (ctxt->sax != NULL)
12212 xmlFree(ctxt->sax);
12213 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012214 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012215 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012216
12217 /*
12218 * Canonicalise the system ID
12219 */
12220 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012221 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012222 xmlFreeParserCtxt(ctxt);
12223 return(NULL);
12224 }
Owen Taylor3473f882001-02-23 17:55:21 +000012225
12226 /*
12227 * Ask the Entity resolver to load the damn thing
12228 */
12229
12230 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012231 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12232 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012233 if (input == NULL) {
12234 if (sax != NULL) ctxt->sax = NULL;
12235 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012236 if (systemIdCanonic != NULL)
12237 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012238 return(NULL);
12239 }
12240
12241 /*
12242 * plug some encoding conversion routines here.
12243 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012244 if (xmlPushInput(ctxt, input) < 0) {
12245 if (sax != NULL) ctxt->sax = NULL;
12246 xmlFreeParserCtxt(ctxt);
12247 if (systemIdCanonic != NULL)
12248 xmlFree(systemIdCanonic);
12249 return(NULL);
12250 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012251 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12252 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12253 xmlSwitchEncoding(ctxt, enc);
12254 }
Owen Taylor3473f882001-02-23 17:55:21 +000012255
12256 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012257 input->filename = (char *) systemIdCanonic;
12258 else
12259 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012260 input->line = 1;
12261 input->col = 1;
12262 input->base = ctxt->input->cur;
12263 input->cur = ctxt->input->cur;
12264 input->free = NULL;
12265
12266 /*
12267 * let's parse that entity knowing it's an external subset.
12268 */
12269 ctxt->inSubset = 2;
12270 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012271 if (ctxt->myDoc == NULL) {
12272 xmlErrMemory(ctxt, "New Doc failed");
12273 if (sax != NULL) ctxt->sax = NULL;
12274 xmlFreeParserCtxt(ctxt);
12275 return(NULL);
12276 }
12277 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012278 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12279 ExternalID, SystemID);
12280 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12281
12282 if (ctxt->myDoc != NULL) {
12283 if (ctxt->wellFormed) {
12284 ret = ctxt->myDoc->extSubset;
12285 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012286 if (ret != NULL) {
12287 xmlNodePtr tmp;
12288
12289 ret->doc = NULL;
12290 tmp = ret->children;
12291 while (tmp != NULL) {
12292 tmp->doc = NULL;
12293 tmp = tmp->next;
12294 }
12295 }
Owen Taylor3473f882001-02-23 17:55:21 +000012296 } else {
12297 ret = NULL;
12298 }
12299 xmlFreeDoc(ctxt->myDoc);
12300 ctxt->myDoc = NULL;
12301 }
12302 if (sax != NULL) ctxt->sax = NULL;
12303 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012304
Owen Taylor3473f882001-02-23 17:55:21 +000012305 return(ret);
12306}
12307
Daniel Veillard4432df22003-09-28 18:58:27 +000012308
Owen Taylor3473f882001-02-23 17:55:21 +000012309/**
12310 * xmlParseDTD:
12311 * @ExternalID: a NAME* containing the External ID of the DTD
12312 * @SystemID: a NAME* containing the URL to the DTD
12313 *
12314 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012315 *
Owen Taylor3473f882001-02-23 17:55:21 +000012316 * Returns the resulting xmlDtdPtr or NULL in case of error.
12317 */
12318
12319xmlDtdPtr
12320xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12321 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12322}
Daniel Veillard4432df22003-09-28 18:58:27 +000012323#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012324
12325/************************************************************************
12326 * *
12327 * Front ends when parsing an Entity *
12328 * *
12329 ************************************************************************/
12330
12331/**
Owen Taylor3473f882001-02-23 17:55:21 +000012332 * xmlParseCtxtExternalEntity:
12333 * @ctx: the existing parsing context
12334 * @URL: the URL for the entity to load
12335 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012336 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012337 *
12338 * Parse an external general entity within an existing parsing context
12339 * An external general parsed entity is well-formed if it matches the
12340 * production labeled extParsedEnt.
12341 *
12342 * [78] extParsedEnt ::= TextDecl? content
12343 *
12344 * Returns 0 if the entity is well formed, -1 in case of args problem and
12345 * the parser error code otherwise
12346 */
12347
12348int
12349xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012350 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012351 xmlParserCtxtPtr ctxt;
12352 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012353 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012354 xmlSAXHandlerPtr oldsax = NULL;
12355 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012356 xmlChar start[4];
12357 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012358
Daniel Veillardce682bc2004-11-05 17:22:25 +000012359 if (ctx == NULL) return(-1);
12360
Daniel Veillard0161e632008-08-28 15:36:32 +000012361 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12362 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012363 return(XML_ERR_ENTITY_LOOP);
12364 }
12365
Daniel Veillardcda96922001-08-21 10:56:31 +000012366 if (lst != NULL)
12367 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012368 if ((URL == NULL) && (ID == NULL))
12369 return(-1);
12370 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12371 return(-1);
12372
Rob Richards798743a2009-06-19 13:54:25 -040012373 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012374 if (ctxt == NULL) {
12375 return(-1);
12376 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012377
Owen Taylor3473f882001-02-23 17:55:21 +000012378 oldsax = ctxt->sax;
12379 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012380 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012381 newDoc = xmlNewDoc(BAD_CAST "1.0");
12382 if (newDoc == NULL) {
12383 xmlFreeParserCtxt(ctxt);
12384 return(-1);
12385 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012386 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012387 if (ctx->myDoc->dict) {
12388 newDoc->dict = ctx->myDoc->dict;
12389 xmlDictReference(newDoc->dict);
12390 }
Owen Taylor3473f882001-02-23 17:55:21 +000012391 if (ctx->myDoc != NULL) {
12392 newDoc->intSubset = ctx->myDoc->intSubset;
12393 newDoc->extSubset = ctx->myDoc->extSubset;
12394 }
12395 if (ctx->myDoc->URL != NULL) {
12396 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12397 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012398 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12399 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012400 ctxt->sax = oldsax;
12401 xmlFreeParserCtxt(ctxt);
12402 newDoc->intSubset = NULL;
12403 newDoc->extSubset = NULL;
12404 xmlFreeDoc(newDoc);
12405 return(-1);
12406 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012407 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012408 nodePush(ctxt, newDoc->children);
12409 if (ctx->myDoc == NULL) {
12410 ctxt->myDoc = newDoc;
12411 } else {
12412 ctxt->myDoc = ctx->myDoc;
12413 newDoc->children->doc = ctx->myDoc;
12414 }
12415
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012416 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012417 * Get the 4 first bytes and decode the charset
12418 * if enc != XML_CHAR_ENCODING_NONE
12419 * plug some encoding conversion routines.
12420 */
12421 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012422 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12423 start[0] = RAW;
12424 start[1] = NXT(1);
12425 start[2] = NXT(2);
12426 start[3] = NXT(3);
12427 enc = xmlDetectCharEncoding(start, 4);
12428 if (enc != XML_CHAR_ENCODING_NONE) {
12429 xmlSwitchEncoding(ctxt, enc);
12430 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012431 }
12432
Owen Taylor3473f882001-02-23 17:55:21 +000012433 /*
12434 * Parse a possible text declaration first
12435 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012436 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012437 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012438 /*
12439 * An XML-1.0 document can't reference an entity not XML-1.0
12440 */
12441 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12442 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12443 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12444 "Version mismatch between document and entity\n");
12445 }
Owen Taylor3473f882001-02-23 17:55:21 +000012446 }
12447
12448 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012449 * If the user provided its own SAX callbacks then reuse the
12450 * useData callback field, otherwise the expected setup in a
12451 * DOM builder is to have userData == ctxt
12452 */
12453 if (ctx->userData == ctx)
12454 ctxt->userData = ctxt;
12455 else
12456 ctxt->userData = ctx->userData;
12457
12458 /*
Owen Taylor3473f882001-02-23 17:55:21 +000012459 * Doing validity checking on chunk doesn't make sense
12460 */
12461 ctxt->instate = XML_PARSER_CONTENT;
12462 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012463 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012464 ctxt->loadsubset = ctx->loadsubset;
12465 ctxt->depth = ctx->depth + 1;
12466 ctxt->replaceEntities = ctx->replaceEntities;
12467 if (ctxt->validate) {
12468 ctxt->vctxt.error = ctx->vctxt.error;
12469 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012470 } else {
12471 ctxt->vctxt.error = NULL;
12472 ctxt->vctxt.warning = NULL;
12473 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012474 ctxt->vctxt.nodeTab = NULL;
12475 ctxt->vctxt.nodeNr = 0;
12476 ctxt->vctxt.nodeMax = 0;
12477 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012478 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12479 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012480 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12481 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12482 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012483 ctxt->dictNames = ctx->dictNames;
12484 ctxt->attsDefault = ctx->attsDefault;
12485 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012486 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012487
12488 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012489
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012490 ctx->validate = ctxt->validate;
12491 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012492 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012493 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012494 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012495 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012496 }
12497 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012498 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012499 }
12500
12501 if (!ctxt->wellFormed) {
12502 if (ctxt->errNo == 0)
12503 ret = 1;
12504 else
12505 ret = ctxt->errNo;
12506 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012507 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012508 xmlNodePtr cur;
12509
12510 /*
12511 * Return the newly created nodeset after unlinking it from
12512 * they pseudo parent.
12513 */
12514 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012515 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012516 while (cur != NULL) {
12517 cur->parent = NULL;
12518 cur = cur->next;
12519 }
12520 newDoc->children->children = NULL;
12521 }
12522 ret = 0;
12523 }
12524 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012525 ctxt->dict = NULL;
12526 ctxt->attsDefault = NULL;
12527 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012528 xmlFreeParserCtxt(ctxt);
12529 newDoc->intSubset = NULL;
12530 newDoc->extSubset = NULL;
12531 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012532
Owen Taylor3473f882001-02-23 17:55:21 +000012533 return(ret);
12534}
12535
12536/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012537 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012538 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012539 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012540 * @sax: the SAX handler bloc (possibly NULL)
12541 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12542 * @depth: Used for loop detection, use 0
12543 * @URL: the URL for the entity to load
12544 * @ID: the System ID for the entity to load
12545 * @list: the return value for the set of parsed nodes
12546 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012547 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012548 *
12549 * Returns 0 if the entity is well formed, -1 in case of args problem and
12550 * the parser error code otherwise
12551 */
12552
Daniel Veillard7d515752003-09-26 19:12:37 +000012553static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012554xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12555 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012556 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012557 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012558 xmlParserCtxtPtr ctxt;
12559 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012560 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012561 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012562 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012563 xmlChar start[4];
12564 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012565
Daniel Veillard0161e632008-08-28 15:36:32 +000012566 if (((depth > 40) &&
12567 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12568 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012569 return(XML_ERR_ENTITY_LOOP);
12570 }
12571
Owen Taylor3473f882001-02-23 17:55:21 +000012572 if (list != NULL)
12573 *list = NULL;
12574 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012575 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012576 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012577 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012578
12579
Rob Richards9c0aa472009-03-26 18:10:19 +000012580 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000012581 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012582 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012583 if (oldctxt != NULL) {
12584 ctxt->_private = oldctxt->_private;
12585 ctxt->loadsubset = oldctxt->loadsubset;
12586 ctxt->validate = oldctxt->validate;
12587 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012588 ctxt->record_info = oldctxt->record_info;
12589 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12590 ctxt->node_seq.length = oldctxt->node_seq.length;
12591 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012592 } else {
12593 /*
12594 * Doing validity checking on chunk without context
12595 * doesn't make sense
12596 */
12597 ctxt->_private = NULL;
12598 ctxt->validate = 0;
12599 ctxt->external = 2;
12600 ctxt->loadsubset = 0;
12601 }
Owen Taylor3473f882001-02-23 17:55:21 +000012602 if (sax != NULL) {
12603 oldsax = ctxt->sax;
12604 ctxt->sax = sax;
12605 if (user_data != NULL)
12606 ctxt->userData = user_data;
12607 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012608 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012609 newDoc = xmlNewDoc(BAD_CAST "1.0");
12610 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012611 ctxt->node_seq.maximum = 0;
12612 ctxt->node_seq.length = 0;
12613 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012614 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000012615 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012616 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012617 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000012618 newDoc->intSubset = doc->intSubset;
12619 newDoc->extSubset = doc->extSubset;
12620 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012621 xmlDictReference(newDoc->dict);
12622
Owen Taylor3473f882001-02-23 17:55:21 +000012623 if (doc->URL != NULL) {
12624 newDoc->URL = xmlStrdup(doc->URL);
12625 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012626 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12627 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012628 if (sax != NULL)
12629 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012630 ctxt->node_seq.maximum = 0;
12631 ctxt->node_seq.length = 0;
12632 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012633 xmlFreeParserCtxt(ctxt);
12634 newDoc->intSubset = NULL;
12635 newDoc->extSubset = NULL;
12636 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000012637 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012638 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012639 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012640 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000012641 ctxt->myDoc = doc;
12642 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000012643
Daniel Veillard0161e632008-08-28 15:36:32 +000012644 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012645 * Get the 4 first bytes and decode the charset
12646 * if enc != XML_CHAR_ENCODING_NONE
12647 * plug some encoding conversion routines.
12648 */
12649 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012650 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12651 start[0] = RAW;
12652 start[1] = NXT(1);
12653 start[2] = NXT(2);
12654 start[3] = NXT(3);
12655 enc = xmlDetectCharEncoding(start, 4);
12656 if (enc != XML_CHAR_ENCODING_NONE) {
12657 xmlSwitchEncoding(ctxt, enc);
12658 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012659 }
12660
Owen Taylor3473f882001-02-23 17:55:21 +000012661 /*
12662 * Parse a possible text declaration first
12663 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012664 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012665 xmlParseTextDecl(ctxt);
12666 }
12667
Owen Taylor3473f882001-02-23 17:55:21 +000012668 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012669 ctxt->depth = depth;
12670
12671 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012672
Daniel Veillard561b7f82002-03-20 21:55:57 +000012673 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012674 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000012675 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012676 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012677 }
12678 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012679 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012680 }
12681
12682 if (!ctxt->wellFormed) {
12683 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012684 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000012685 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012686 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000012687 } else {
12688 if (list != NULL) {
12689 xmlNodePtr cur;
12690
12691 /*
12692 * Return the newly created nodeset after unlinking it from
12693 * they pseudo parent.
12694 */
12695 cur = newDoc->children->children;
12696 *list = cur;
12697 while (cur != NULL) {
12698 cur->parent = NULL;
12699 cur = cur->next;
12700 }
12701 newDoc->children->children = NULL;
12702 }
Daniel Veillard7d515752003-09-26 19:12:37 +000012703 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000012704 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012705
12706 /*
12707 * Record in the parent context the number of entities replacement
12708 * done when parsing that reference.
12709 */
Daniel Veillard76d36452009-09-07 11:19:33 +020012710 if (oldctxt != NULL)
12711 oldctxt->nbentities += ctxt->nbentities;
12712
Daniel Veillard0161e632008-08-28 15:36:32 +000012713 /*
12714 * Also record the size of the entity parsed
12715 */
12716 if (ctxt->input != NULL) {
12717 oldctxt->sizeentities += ctxt->input->consumed;
12718 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12719 }
12720 /*
12721 * And record the last error if any
12722 */
12723 if (ctxt->lastError.code != XML_ERR_OK)
12724 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12725
Owen Taylor3473f882001-02-23 17:55:21 +000012726 if (sax != NULL)
12727 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000012728 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12729 oldctxt->node_seq.length = ctxt->node_seq.length;
12730 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012731 ctxt->node_seq.maximum = 0;
12732 ctxt->node_seq.length = 0;
12733 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012734 xmlFreeParserCtxt(ctxt);
12735 newDoc->intSubset = NULL;
12736 newDoc->extSubset = NULL;
12737 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000012738
Owen Taylor3473f882001-02-23 17:55:21 +000012739 return(ret);
12740}
12741
Daniel Veillard81273902003-09-30 00:43:48 +000012742#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012743/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012744 * xmlParseExternalEntity:
12745 * @doc: the document the chunk pertains to
12746 * @sax: the SAX handler bloc (possibly NULL)
12747 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12748 * @depth: Used for loop detection, use 0
12749 * @URL: the URL for the entity to load
12750 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012751 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000012752 *
12753 * Parse an external general entity
12754 * An external general parsed entity is well-formed if it matches the
12755 * production labeled extParsedEnt.
12756 *
12757 * [78] extParsedEnt ::= TextDecl? content
12758 *
12759 * Returns 0 if the entity is well formed, -1 in case of args problem and
12760 * the parser error code otherwise
12761 */
12762
12763int
12764xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000012765 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012766 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012767 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000012768}
12769
12770/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000012771 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000012772 * @doc: the document the chunk pertains to
12773 * @sax: the SAX handler bloc (possibly NULL)
12774 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12775 * @depth: Used for loop detection, use 0
12776 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000012777 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012778 *
12779 * Parse a well-balanced chunk of an XML document
12780 * called by the parser
12781 * The allowed sequence for the Well Balanced Chunk is the one defined by
12782 * the content production in the XML grammar:
12783 *
12784 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12785 *
12786 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12787 * the parser error code otherwise
12788 */
12789
12790int
12791xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000012792 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012793 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12794 depth, string, lst, 0 );
12795}
Daniel Veillard81273902003-09-30 00:43:48 +000012796#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000012797
12798/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000012799 * xmlParseBalancedChunkMemoryInternal:
12800 * @oldctxt: the existing parsing context
12801 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12802 * @user_data: the user data field for the parser context
12803 * @lst: the return value for the set of parsed nodes
12804 *
12805 *
12806 * Parse a well-balanced chunk of an XML document
12807 * called by the parser
12808 * The allowed sequence for the Well Balanced Chunk is the one defined by
12809 * the content production in the XML grammar:
12810 *
12811 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12812 *
Daniel Veillard7d515752003-09-26 19:12:37 +000012813 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12814 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000012815 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000012816 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000012817 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000012818 */
Daniel Veillard7d515752003-09-26 19:12:37 +000012819static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000012820xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12821 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12822 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012823 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012824 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012825 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012826 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012827 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012828 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000012829 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020012830#ifdef SAX2
12831 int i;
12832#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000012833
Daniel Veillard0161e632008-08-28 15:36:32 +000012834 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12835 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012836 return(XML_ERR_ENTITY_LOOP);
12837 }
12838
12839
12840 if (lst != NULL)
12841 *lst = NULL;
12842 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000012843 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012844
12845 size = xmlStrlen(string);
12846
12847 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000012848 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012849 if (user_data != NULL)
12850 ctxt->userData = user_data;
12851 else
12852 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012853 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12854 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012855 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12856 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12857 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012858
Daniel Veillard74eaec12009-08-26 15:57:20 +020012859#ifdef SAX2
12860 /* propagate namespaces down the entity */
12861 for (i = 0;i < oldctxt->nsNr;i += 2) {
12862 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12863 }
12864#endif
12865
Daniel Veillard328f48c2002-11-15 15:24:34 +000012866 oldsax = ctxt->sax;
12867 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012868 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012869 ctxt->replaceEntities = oldctxt->replaceEntities;
12870 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000012871
Daniel Veillarde1ca5032002-12-09 14:13:43 +000012872 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012873 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012874 newDoc = xmlNewDoc(BAD_CAST "1.0");
12875 if (newDoc == NULL) {
12876 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012877 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012878 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000012879 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012880 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012881 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012882 newDoc->dict = ctxt->dict;
12883 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012884 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012885 } else {
12886 ctxt->myDoc = oldctxt->myDoc;
12887 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012888 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012889 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012890 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12891 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012892 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012893 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012894 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012895 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012896 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012897 }
William M. Brack7b9154b2003-09-27 19:23:50 +000012898 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012899 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012900 ctxt->myDoc->children = NULL;
12901 ctxt->myDoc->last = NULL;
12902 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012903 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012904 ctxt->instate = XML_PARSER_CONTENT;
12905 ctxt->depth = oldctxt->depth + 1;
12906
Daniel Veillard328f48c2002-11-15 15:24:34 +000012907 ctxt->validate = 0;
12908 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000012909 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12910 /*
12911 * ID/IDREF registration will be done in xmlValidateElement below
12912 */
12913 ctxt->loadsubset |= XML_SKIP_IDS;
12914 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012915 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012916 ctxt->attsDefault = oldctxt->attsDefault;
12917 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012918
Daniel Veillard68e9e742002-11-16 15:35:11 +000012919 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012920 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012921 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012922 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012923 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012924 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012925 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012926 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012927 }
12928
12929 if (!ctxt->wellFormed) {
12930 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012931 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012932 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012933 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012934 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012935 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012936 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012937
William M. Brack7b9154b2003-09-27 19:23:50 +000012938 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012939 xmlNodePtr cur;
12940
12941 /*
12942 * Return the newly created nodeset after unlinking it from
12943 * they pseudo parent.
12944 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012945 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012946 *lst = cur;
12947 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012948#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012949 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12950 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12951 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012952 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12953 oldctxt->myDoc, cur);
12954 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012955#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012956 cur->parent = NULL;
12957 cur = cur->next;
12958 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012959 ctxt->myDoc->children->children = NULL;
12960 }
12961 if (ctxt->myDoc != NULL) {
12962 xmlFreeNode(ctxt->myDoc->children);
12963 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012964 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012965 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012966
12967 /*
12968 * Record in the parent context the number of entities replacement
12969 * done when parsing that reference.
12970 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020012971 if (oldctxt != NULL)
12972 oldctxt->nbentities += ctxt->nbentities;
12973
Daniel Veillard0161e632008-08-28 15:36:32 +000012974 /*
12975 * Also record the last error if any
12976 */
12977 if (ctxt->lastError.code != XML_ERR_OK)
12978 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12979
Daniel Veillard328f48c2002-11-15 15:24:34 +000012980 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012981 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012982 ctxt->attsDefault = NULL;
12983 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012984 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012985 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012986 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012987 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012988
Daniel Veillard328f48c2002-11-15 15:24:34 +000012989 return(ret);
12990}
12991
Daniel Veillard29b17482004-08-16 00:39:03 +000012992/**
12993 * xmlParseInNodeContext:
12994 * @node: the context node
12995 * @data: the input string
12996 * @datalen: the input string length in bytes
12997 * @options: a combination of xmlParserOption
12998 * @lst: the return value for the set of parsed nodes
12999 *
13000 * Parse a well-balanced chunk of an XML document
13001 * within the context (DTD, namespaces, etc ...) of the given node.
13002 *
13003 * The allowed sequence for the data is a Well Balanced Chunk defined by
13004 * the content production in the XML grammar:
13005 *
13006 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13007 *
13008 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13009 * error code otherwise
13010 */
13011xmlParserErrors
13012xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13013 int options, xmlNodePtr *lst) {
13014#ifdef SAX2
13015 xmlParserCtxtPtr ctxt;
13016 xmlDocPtr doc = NULL;
13017 xmlNodePtr fake, cur;
13018 int nsnr = 0;
13019
13020 xmlParserErrors ret = XML_ERR_OK;
13021
13022 /*
13023 * check all input parameters, grab the document
13024 */
13025 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13026 return(XML_ERR_INTERNAL_ERROR);
13027 switch (node->type) {
13028 case XML_ELEMENT_NODE:
13029 case XML_ATTRIBUTE_NODE:
13030 case XML_TEXT_NODE:
13031 case XML_CDATA_SECTION_NODE:
13032 case XML_ENTITY_REF_NODE:
13033 case XML_PI_NODE:
13034 case XML_COMMENT_NODE:
13035 case XML_DOCUMENT_NODE:
13036 case XML_HTML_DOCUMENT_NODE:
13037 break;
13038 default:
13039 return(XML_ERR_INTERNAL_ERROR);
13040
13041 }
13042 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13043 (node->type != XML_DOCUMENT_NODE) &&
13044 (node->type != XML_HTML_DOCUMENT_NODE))
13045 node = node->parent;
13046 if (node == NULL)
13047 return(XML_ERR_INTERNAL_ERROR);
13048 if (node->type == XML_ELEMENT_NODE)
13049 doc = node->doc;
13050 else
13051 doc = (xmlDocPtr) node;
13052 if (doc == NULL)
13053 return(XML_ERR_INTERNAL_ERROR);
13054
13055 /*
13056 * allocate a context and set-up everything not related to the
13057 * node position in the tree
13058 */
13059 if (doc->type == XML_DOCUMENT_NODE)
13060 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13061#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013062 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013063 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013064 /*
13065 * When parsing in context, it makes no sense to add implied
13066 * elements like html/body/etc...
13067 */
13068 options |= HTML_PARSE_NOIMPLIED;
13069 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013070#endif
13071 else
13072 return(XML_ERR_INTERNAL_ERROR);
13073
13074 if (ctxt == NULL)
13075 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013076
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013077 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013078 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13079 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13080 * we must wait until the last moment to free the original one.
13081 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013082 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013083 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013084 xmlDictFree(ctxt->dict);
13085 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013086 } else
13087 options |= XML_PARSE_NODICT;
13088
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013089 if (doc->encoding != NULL) {
13090 xmlCharEncodingHandlerPtr hdlr;
13091
13092 if (ctxt->encoding != NULL)
13093 xmlFree((xmlChar *) ctxt->encoding);
13094 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13095
13096 hdlr = xmlFindCharEncodingHandler(doc->encoding);
13097 if (hdlr != NULL) {
13098 xmlSwitchToEncoding(ctxt, hdlr);
13099 } else {
13100 return(XML_ERR_UNSUPPORTED_ENCODING);
13101 }
13102 }
13103
Daniel Veillard37334572008-07-31 08:20:02 +000013104 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013105 xmlDetectSAX2(ctxt);
13106 ctxt->myDoc = doc;
13107
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013108 fake = xmlNewComment(NULL);
13109 if (fake == NULL) {
13110 xmlFreeParserCtxt(ctxt);
13111 return(XML_ERR_NO_MEMORY);
13112 }
13113 xmlAddChild(node, fake);
13114
Daniel Veillard29b17482004-08-16 00:39:03 +000013115 if (node->type == XML_ELEMENT_NODE) {
13116 nodePush(ctxt, node);
13117 /*
13118 * initialize the SAX2 namespaces stack
13119 */
13120 cur = node;
13121 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13122 xmlNsPtr ns = cur->nsDef;
13123 const xmlChar *iprefix, *ihref;
13124
13125 while (ns != NULL) {
13126 if (ctxt->dict) {
13127 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13128 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13129 } else {
13130 iprefix = ns->prefix;
13131 ihref = ns->href;
13132 }
13133
13134 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13135 nsPush(ctxt, iprefix, ihref);
13136 nsnr++;
13137 }
13138 ns = ns->next;
13139 }
13140 cur = cur->parent;
13141 }
13142 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000013143 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013144
13145 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13146 /*
13147 * ID/IDREF registration will be done in xmlValidateElement below
13148 */
13149 ctxt->loadsubset |= XML_SKIP_IDS;
13150 }
13151
Daniel Veillard499cc922006-01-18 17:22:35 +000013152#ifdef LIBXML_HTML_ENABLED
13153 if (doc->type == XML_HTML_DOCUMENT_NODE)
13154 __htmlParseContent(ctxt);
13155 else
13156#endif
13157 xmlParseContent(ctxt);
13158
Daniel Veillard29b17482004-08-16 00:39:03 +000013159 nsPop(ctxt, nsnr);
13160 if ((RAW == '<') && (NXT(1) == '/')) {
13161 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13162 } else if (RAW != 0) {
13163 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13164 }
13165 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13166 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13167 ctxt->wellFormed = 0;
13168 }
13169
13170 if (!ctxt->wellFormed) {
13171 if (ctxt->errNo == 0)
13172 ret = XML_ERR_INTERNAL_ERROR;
13173 else
13174 ret = (xmlParserErrors)ctxt->errNo;
13175 } else {
13176 ret = XML_ERR_OK;
13177 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013178
Daniel Veillard29b17482004-08-16 00:39:03 +000013179 /*
13180 * Return the newly created nodeset after unlinking it from
13181 * the pseudo sibling.
13182 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013183
Daniel Veillard29b17482004-08-16 00:39:03 +000013184 cur = fake->next;
13185 fake->next = NULL;
13186 node->last = fake;
13187
13188 if (cur != NULL) {
13189 cur->prev = NULL;
13190 }
13191
13192 *lst = cur;
13193
13194 while (cur != NULL) {
13195 cur->parent = NULL;
13196 cur = cur->next;
13197 }
13198
13199 xmlUnlinkNode(fake);
13200 xmlFreeNode(fake);
13201
13202
13203 if (ret != XML_ERR_OK) {
13204 xmlFreeNodeList(*lst);
13205 *lst = NULL;
13206 }
William M. Brackc3f81342004-10-03 01:22:44 +000013207
William M. Brackb7b54de2004-10-06 16:38:01 +000013208 if (doc->dict != NULL)
13209 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013210 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013211
Daniel Veillard29b17482004-08-16 00:39:03 +000013212 return(ret);
13213#else /* !SAX2 */
13214 return(XML_ERR_INTERNAL_ERROR);
13215#endif
13216}
13217
Daniel Veillard81273902003-09-30 00:43:48 +000013218#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013219/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013220 * xmlParseBalancedChunkMemoryRecover:
13221 * @doc: the document the chunk pertains to
13222 * @sax: the SAX handler bloc (possibly NULL)
13223 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13224 * @depth: Used for loop detection, use 0
13225 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13226 * @lst: the return value for the set of parsed nodes
13227 * @recover: return nodes even if the data is broken (use 0)
13228 *
13229 *
13230 * Parse a well-balanced chunk of an XML document
13231 * called by the parser
13232 * The allowed sequence for the Well Balanced Chunk is the one defined by
13233 * the content production in the XML grammar:
13234 *
13235 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13236 *
13237 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13238 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013239 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013240 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013241 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13242 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013243 */
13244int
13245xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013246 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013247 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013248 xmlParserCtxtPtr ctxt;
13249 xmlDocPtr newDoc;
13250 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013251 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013252 int size;
13253 int ret = 0;
13254
Daniel Veillard0161e632008-08-28 15:36:32 +000013255 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013256 return(XML_ERR_ENTITY_LOOP);
13257 }
13258
13259
Daniel Veillardcda96922001-08-21 10:56:31 +000013260 if (lst != NULL)
13261 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013262 if (string == NULL)
13263 return(-1);
13264
13265 size = xmlStrlen(string);
13266
13267 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13268 if (ctxt == NULL) return(-1);
13269 ctxt->userData = ctxt;
13270 if (sax != NULL) {
13271 oldsax = ctxt->sax;
13272 ctxt->sax = sax;
13273 if (user_data != NULL)
13274 ctxt->userData = user_data;
13275 }
13276 newDoc = xmlNewDoc(BAD_CAST "1.0");
13277 if (newDoc == NULL) {
13278 xmlFreeParserCtxt(ctxt);
13279 return(-1);
13280 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013281 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013282 if ((doc != NULL) && (doc->dict != NULL)) {
13283 xmlDictFree(ctxt->dict);
13284 ctxt->dict = doc->dict;
13285 xmlDictReference(ctxt->dict);
13286 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13287 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13288 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13289 ctxt->dictNames = 1;
13290 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013291 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013292 }
Owen Taylor3473f882001-02-23 17:55:21 +000013293 if (doc != NULL) {
13294 newDoc->intSubset = doc->intSubset;
13295 newDoc->extSubset = doc->extSubset;
13296 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013297 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13298 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013299 if (sax != NULL)
13300 ctxt->sax = oldsax;
13301 xmlFreeParserCtxt(ctxt);
13302 newDoc->intSubset = NULL;
13303 newDoc->extSubset = NULL;
13304 xmlFreeDoc(newDoc);
13305 return(-1);
13306 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013307 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13308 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013309 if (doc == NULL) {
13310 ctxt->myDoc = newDoc;
13311 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013312 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013313 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013314 /* Ensure that doc has XML spec namespace */
13315 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13316 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013317 }
13318 ctxt->instate = XML_PARSER_CONTENT;
13319 ctxt->depth = depth;
13320
13321 /*
13322 * Doing validity checking on chunk doesn't make sense
13323 */
13324 ctxt->validate = 0;
13325 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013326 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013327
Daniel Veillardb39bc392002-10-26 19:29:51 +000013328 if ( doc != NULL ){
13329 content = doc->children;
13330 doc->children = NULL;
13331 xmlParseContent(ctxt);
13332 doc->children = content;
13333 }
13334 else {
13335 xmlParseContent(ctxt);
13336 }
Owen Taylor3473f882001-02-23 17:55:21 +000013337 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013338 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013339 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013340 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013341 }
13342 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013343 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013344 }
13345
13346 if (!ctxt->wellFormed) {
13347 if (ctxt->errNo == 0)
13348 ret = 1;
13349 else
13350 ret = ctxt->errNo;
13351 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013352 ret = 0;
13353 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013354
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013355 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13356 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013357
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013358 /*
13359 * Return the newly created nodeset after unlinking it from
13360 * they pseudo parent.
13361 */
13362 cur = newDoc->children->children;
13363 *lst = cur;
13364 while (cur != NULL) {
13365 xmlSetTreeDoc(cur, doc);
13366 cur->parent = NULL;
13367 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013368 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013369 newDoc->children->children = NULL;
13370 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013371
13372 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013373 ctxt->sax = oldsax;
13374 xmlFreeParserCtxt(ctxt);
13375 newDoc->intSubset = NULL;
13376 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013377 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013378 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013379
Owen Taylor3473f882001-02-23 17:55:21 +000013380 return(ret);
13381}
13382
13383/**
13384 * xmlSAXParseEntity:
13385 * @sax: the SAX handler block
13386 * @filename: the filename
13387 *
13388 * parse an XML external entity out of context and build a tree.
13389 * It use the given SAX function block to handle the parsing callback.
13390 * If sax is NULL, fallback to the default DOM tree building routines.
13391 *
13392 * [78] extParsedEnt ::= TextDecl? content
13393 *
13394 * This correspond to a "Well Balanced" chunk
13395 *
13396 * Returns the resulting document tree
13397 */
13398
13399xmlDocPtr
13400xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13401 xmlDocPtr ret;
13402 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013403
13404 ctxt = xmlCreateFileParserCtxt(filename);
13405 if (ctxt == NULL) {
13406 return(NULL);
13407 }
13408 if (sax != NULL) {
13409 if (ctxt->sax != NULL)
13410 xmlFree(ctxt->sax);
13411 ctxt->sax = sax;
13412 ctxt->userData = NULL;
13413 }
13414
Owen Taylor3473f882001-02-23 17:55:21 +000013415 xmlParseExtParsedEnt(ctxt);
13416
13417 if (ctxt->wellFormed)
13418 ret = ctxt->myDoc;
13419 else {
13420 ret = NULL;
13421 xmlFreeDoc(ctxt->myDoc);
13422 ctxt->myDoc = NULL;
13423 }
13424 if (sax != NULL)
13425 ctxt->sax = NULL;
13426 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013427
Owen Taylor3473f882001-02-23 17:55:21 +000013428 return(ret);
13429}
13430
13431/**
13432 * xmlParseEntity:
13433 * @filename: the filename
13434 *
13435 * parse an XML external entity out of context and build a tree.
13436 *
13437 * [78] extParsedEnt ::= TextDecl? content
13438 *
13439 * This correspond to a "Well Balanced" chunk
13440 *
13441 * Returns the resulting document tree
13442 */
13443
13444xmlDocPtr
13445xmlParseEntity(const char *filename) {
13446 return(xmlSAXParseEntity(NULL, filename));
13447}
Daniel Veillard81273902003-09-30 00:43:48 +000013448#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013449
13450/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013451 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013452 * @URL: the entity URL
13453 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013454 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013455 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013456 *
13457 * Create a parser context for an external entity
13458 * Automatic support for ZLIB/Compress compressed document is provided
13459 * by default if found at compile-time.
13460 *
13461 * Returns the new parser context or NULL
13462 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013463static xmlParserCtxtPtr
13464xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13465 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013466 xmlParserCtxtPtr ctxt;
13467 xmlParserInputPtr inputStream;
13468 char *directory = NULL;
13469 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013470
Owen Taylor3473f882001-02-23 17:55:21 +000013471 ctxt = xmlNewParserCtxt();
13472 if (ctxt == NULL) {
13473 return(NULL);
13474 }
13475
Daniel Veillard48247b42009-07-10 16:12:46 +020013476 if (pctx != NULL) {
13477 ctxt->options = pctx->options;
13478 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000013479 }
13480
Owen Taylor3473f882001-02-23 17:55:21 +000013481 uri = xmlBuildURI(URL, base);
13482
13483 if (uri == NULL) {
13484 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13485 if (inputStream == NULL) {
13486 xmlFreeParserCtxt(ctxt);
13487 return(NULL);
13488 }
13489
13490 inputPush(ctxt, inputStream);
13491
13492 if ((ctxt->directory == NULL) && (directory == NULL))
13493 directory = xmlParserGetDirectory((char *)URL);
13494 if ((ctxt->directory == NULL) && (directory != NULL))
13495 ctxt->directory = directory;
13496 } else {
13497 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13498 if (inputStream == NULL) {
13499 xmlFree(uri);
13500 xmlFreeParserCtxt(ctxt);
13501 return(NULL);
13502 }
13503
13504 inputPush(ctxt, inputStream);
13505
13506 if ((ctxt->directory == NULL) && (directory == NULL))
13507 directory = xmlParserGetDirectory((char *)uri);
13508 if ((ctxt->directory == NULL) && (directory != NULL))
13509 ctxt->directory = directory;
13510 xmlFree(uri);
13511 }
Owen Taylor3473f882001-02-23 17:55:21 +000013512 return(ctxt);
13513}
13514
Rob Richards9c0aa472009-03-26 18:10:19 +000013515/**
13516 * xmlCreateEntityParserCtxt:
13517 * @URL: the entity URL
13518 * @ID: the entity PUBLIC ID
13519 * @base: a possible base for the target URI
13520 *
13521 * Create a parser context for an external entity
13522 * Automatic support for ZLIB/Compress compressed document is provided
13523 * by default if found at compile-time.
13524 *
13525 * Returns the new parser context or NULL
13526 */
13527xmlParserCtxtPtr
13528xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13529 const xmlChar *base) {
13530 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13531
13532}
13533
Owen Taylor3473f882001-02-23 17:55:21 +000013534/************************************************************************
13535 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000013536 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000013537 * *
13538 ************************************************************************/
13539
13540/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013541 * xmlCreateURLParserCtxt:
13542 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013543 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013544 *
Daniel Veillard61b93382003-11-03 14:28:31 +000013545 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000013546 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000013547 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000013548 *
13549 * Returns the new parser context or NULL
13550 */
13551xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000013552xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000013553{
13554 xmlParserCtxtPtr ctxt;
13555 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000013556 char *directory = NULL;
13557
Owen Taylor3473f882001-02-23 17:55:21 +000013558 ctxt = xmlNewParserCtxt();
13559 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000013560 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000013561 return(NULL);
13562 }
13563
Daniel Veillarddf292f72005-01-16 19:00:15 +000013564 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000013565 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000013566 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000013567
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000013568 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013569 if (inputStream == NULL) {
13570 xmlFreeParserCtxt(ctxt);
13571 return(NULL);
13572 }
13573
Owen Taylor3473f882001-02-23 17:55:21 +000013574 inputPush(ctxt, inputStream);
13575 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000013576 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013577 if ((ctxt->directory == NULL) && (directory != NULL))
13578 ctxt->directory = directory;
13579
13580 return(ctxt);
13581}
13582
Daniel Veillard61b93382003-11-03 14:28:31 +000013583/**
13584 * xmlCreateFileParserCtxt:
13585 * @filename: the filename
13586 *
13587 * Create a parser context for a file content.
13588 * Automatic support for ZLIB/Compress compressed document is provided
13589 * by default if found at compile-time.
13590 *
13591 * Returns the new parser context or NULL
13592 */
13593xmlParserCtxtPtr
13594xmlCreateFileParserCtxt(const char *filename)
13595{
13596 return(xmlCreateURLParserCtxt(filename, 0));
13597}
13598
Daniel Veillard81273902003-09-30 00:43:48 +000013599#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013600/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013601 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000013602 * @sax: the SAX handler block
13603 * @filename: the filename
13604 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13605 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000013606 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000013607 *
13608 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13609 * compressed document is provided by default if found at compile-time.
13610 * It use the given SAX function block to handle the parsing callback.
13611 * If sax is NULL, fallback to the default DOM tree building routines.
13612 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000013613 * User data (void *) is stored within the parser context in the
13614 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000013615 *
Owen Taylor3473f882001-02-23 17:55:21 +000013616 * Returns the resulting document tree
13617 */
13618
13619xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000013620xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13621 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000013622 xmlDocPtr ret;
13623 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013624
Daniel Veillard635ef722001-10-29 11:48:19 +000013625 xmlInitParser();
13626
Owen Taylor3473f882001-02-23 17:55:21 +000013627 ctxt = xmlCreateFileParserCtxt(filename);
13628 if (ctxt == NULL) {
13629 return(NULL);
13630 }
13631 if (sax != NULL) {
13632 if (ctxt->sax != NULL)
13633 xmlFree(ctxt->sax);
13634 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013635 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013636 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000013637 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000013638 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000013639 }
Owen Taylor3473f882001-02-23 17:55:21 +000013640
Daniel Veillard37d2d162008-03-14 10:54:00 +000013641 if (ctxt->directory == NULL)
13642 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013643
Daniel Veillarddad3f682002-11-17 16:47:27 +000013644 ctxt->recovery = recovery;
13645
Owen Taylor3473f882001-02-23 17:55:21 +000013646 xmlParseDocument(ctxt);
13647
William M. Brackc07329e2003-09-08 01:57:30 +000013648 if ((ctxt->wellFormed) || recovery) {
13649 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000013650 if (ret != NULL) {
13651 if (ctxt->input->buf->compressed > 0)
13652 ret->compression = 9;
13653 else
13654 ret->compression = ctxt->input->buf->compressed;
13655 }
William M. Brackc07329e2003-09-08 01:57:30 +000013656 }
Owen Taylor3473f882001-02-23 17:55:21 +000013657 else {
13658 ret = NULL;
13659 xmlFreeDoc(ctxt->myDoc);
13660 ctxt->myDoc = NULL;
13661 }
13662 if (sax != NULL)
13663 ctxt->sax = NULL;
13664 xmlFreeParserCtxt(ctxt);
13665
13666 return(ret);
13667}
13668
13669/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013670 * xmlSAXParseFile:
13671 * @sax: the SAX handler block
13672 * @filename: the filename
13673 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13674 * documents
13675 *
13676 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13677 * compressed document is provided by default if found at compile-time.
13678 * It use the given SAX function block to handle the parsing callback.
13679 * If sax is NULL, fallback to the default DOM tree building routines.
13680 *
13681 * Returns the resulting document tree
13682 */
13683
13684xmlDocPtr
13685xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13686 int recovery) {
13687 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13688}
13689
13690/**
Owen Taylor3473f882001-02-23 17:55:21 +000013691 * xmlRecoverDoc:
13692 * @cur: a pointer to an array of xmlChar
13693 *
13694 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013695 * In the case the document is not Well Formed, a attempt to build a
13696 * tree is tried anyway
13697 *
13698 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013699 */
13700
13701xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020013702xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013703 return(xmlSAXParseDoc(NULL, cur, 1));
13704}
13705
13706/**
13707 * xmlParseFile:
13708 * @filename: the filename
13709 *
13710 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13711 * compressed document is provided by default if found at compile-time.
13712 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000013713 * Returns the resulting document tree if the file was wellformed,
13714 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000013715 */
13716
13717xmlDocPtr
13718xmlParseFile(const char *filename) {
13719 return(xmlSAXParseFile(NULL, filename, 0));
13720}
13721
13722/**
13723 * xmlRecoverFile:
13724 * @filename: the filename
13725 *
13726 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13727 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013728 * In the case the document is not Well Formed, it attempts to build
13729 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000013730 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000013731 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013732 */
13733
13734xmlDocPtr
13735xmlRecoverFile(const char *filename) {
13736 return(xmlSAXParseFile(NULL, filename, 1));
13737}
13738
13739
13740/**
13741 * xmlSetupParserForBuffer:
13742 * @ctxt: an XML parser context
13743 * @buffer: a xmlChar * buffer
13744 * @filename: a file name
13745 *
13746 * Setup the parser context to parse a new buffer; Clears any prior
13747 * contents from the parser context. The buffer parameter must not be
13748 * NULL, but the filename parameter can be
13749 */
13750void
13751xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13752 const char* filename)
13753{
13754 xmlParserInputPtr input;
13755
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013756 if ((ctxt == NULL) || (buffer == NULL))
13757 return;
13758
Owen Taylor3473f882001-02-23 17:55:21 +000013759 input = xmlNewInputStream(ctxt);
13760 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000013761 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013762 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013763 return;
13764 }
13765
13766 xmlClearParserCtxt(ctxt);
13767 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000013768 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013769 input->base = buffer;
13770 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013771 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000013772 inputPush(ctxt, input);
13773}
13774
13775/**
13776 * xmlSAXUserParseFile:
13777 * @sax: a SAX handler
13778 * @user_data: The user data returned on SAX callbacks
13779 * @filename: a file name
13780 *
13781 * parse an XML file and call the given SAX handler routines.
13782 * Automatic support for ZLIB/Compress compressed document is provided
13783 *
13784 * Returns 0 in case of success or a error number otherwise
13785 */
13786int
13787xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13788 const char *filename) {
13789 int ret = 0;
13790 xmlParserCtxtPtr ctxt;
13791
13792 ctxt = xmlCreateFileParserCtxt(filename);
13793 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000013794 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000013795 xmlFree(ctxt->sax);
13796 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013797 xmlDetectSAX2(ctxt);
13798
Owen Taylor3473f882001-02-23 17:55:21 +000013799 if (user_data != NULL)
13800 ctxt->userData = user_data;
13801
13802 xmlParseDocument(ctxt);
13803
13804 if (ctxt->wellFormed)
13805 ret = 0;
13806 else {
13807 if (ctxt->errNo != 0)
13808 ret = ctxt->errNo;
13809 else
13810 ret = -1;
13811 }
13812 if (sax != NULL)
13813 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013814 if (ctxt->myDoc != NULL) {
13815 xmlFreeDoc(ctxt->myDoc);
13816 ctxt->myDoc = NULL;
13817 }
Owen Taylor3473f882001-02-23 17:55:21 +000013818 xmlFreeParserCtxt(ctxt);
13819
13820 return ret;
13821}
Daniel Veillard81273902003-09-30 00:43:48 +000013822#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013823
13824/************************************************************************
13825 * *
13826 * Front ends when parsing from memory *
13827 * *
13828 ************************************************************************/
13829
13830/**
13831 * xmlCreateMemoryParserCtxt:
13832 * @buffer: a pointer to a char array
13833 * @size: the size of the array
13834 *
13835 * Create a parser context for an XML in-memory document.
13836 *
13837 * Returns the new parser context or NULL
13838 */
13839xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013840xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013841 xmlParserCtxtPtr ctxt;
13842 xmlParserInputPtr input;
13843 xmlParserInputBufferPtr buf;
13844
13845 if (buffer == NULL)
13846 return(NULL);
13847 if (size <= 0)
13848 return(NULL);
13849
13850 ctxt = xmlNewParserCtxt();
13851 if (ctxt == NULL)
13852 return(NULL);
13853
Daniel Veillard53350552003-09-18 13:35:51 +000013854 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000013855 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013856 if (buf == NULL) {
13857 xmlFreeParserCtxt(ctxt);
13858 return(NULL);
13859 }
Owen Taylor3473f882001-02-23 17:55:21 +000013860
13861 input = xmlNewInputStream(ctxt);
13862 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013863 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000013864 xmlFreeParserCtxt(ctxt);
13865 return(NULL);
13866 }
13867
13868 input->filename = NULL;
13869 input->buf = buf;
13870 input->base = input->buf->buffer->content;
13871 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013872 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000013873
13874 inputPush(ctxt, input);
13875 return(ctxt);
13876}
13877
Daniel Veillard81273902003-09-30 00:43:48 +000013878#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013879/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013880 * xmlSAXParseMemoryWithData:
13881 * @sax: the SAX handler block
13882 * @buffer: an pointer to a char array
13883 * @size: the size of the array
13884 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13885 * documents
13886 * @data: the userdata
13887 *
13888 * parse an XML in-memory block and use the given SAX function block
13889 * to handle the parsing callback. If sax is NULL, fallback to the default
13890 * DOM tree building routines.
13891 *
13892 * User data (void *) is stored within the parser context in the
13893 * context's _private member, so it is available nearly everywhere in libxml
13894 *
13895 * Returns the resulting document tree
13896 */
13897
13898xmlDocPtr
13899xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13900 int size, int recovery, void *data) {
13901 xmlDocPtr ret;
13902 xmlParserCtxtPtr ctxt;
13903
Daniel Veillardab2a7632009-07-09 08:45:03 +020013904 xmlInitParser();
13905
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013906 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13907 if (ctxt == NULL) return(NULL);
13908 if (sax != NULL) {
13909 if (ctxt->sax != NULL)
13910 xmlFree(ctxt->sax);
13911 ctxt->sax = sax;
13912 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013913 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013914 if (data!=NULL) {
13915 ctxt->_private=data;
13916 }
13917
Daniel Veillardadba5f12003-04-04 16:09:01 +000013918 ctxt->recovery = recovery;
13919
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013920 xmlParseDocument(ctxt);
13921
13922 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13923 else {
13924 ret = NULL;
13925 xmlFreeDoc(ctxt->myDoc);
13926 ctxt->myDoc = NULL;
13927 }
13928 if (sax != NULL)
13929 ctxt->sax = NULL;
13930 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020013931
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013932 return(ret);
13933}
13934
13935/**
Owen Taylor3473f882001-02-23 17:55:21 +000013936 * xmlSAXParseMemory:
13937 * @sax: the SAX handler block
13938 * @buffer: an pointer to a char array
13939 * @size: the size of the array
13940 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13941 * documents
13942 *
13943 * parse an XML in-memory block and use the given SAX function block
13944 * to handle the parsing callback. If sax is NULL, fallback to the default
13945 * DOM tree building routines.
13946 *
13947 * Returns the resulting document tree
13948 */
13949xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000013950xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13951 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013952 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013953}
13954
13955/**
13956 * xmlParseMemory:
13957 * @buffer: an pointer to a char array
13958 * @size: the size of the array
13959 *
13960 * parse an XML in-memory block and build a tree.
13961 *
13962 * Returns the resulting document tree
13963 */
13964
Daniel Veillard50822cb2001-07-26 20:05:51 +000013965xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013966 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13967}
13968
13969/**
13970 * xmlRecoverMemory:
13971 * @buffer: an pointer to a char array
13972 * @size: the size of the array
13973 *
13974 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013975 * In the case the document is not Well Formed, an attempt to
13976 * build a tree is tried anyway
13977 *
13978 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000013979 */
13980
Daniel Veillard50822cb2001-07-26 20:05:51 +000013981xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013982 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13983}
13984
13985/**
13986 * xmlSAXUserParseMemory:
13987 * @sax: a SAX handler
13988 * @user_data: The user data returned on SAX callbacks
13989 * @buffer: an in-memory XML document input
13990 * @size: the length of the XML document in bytes
13991 *
13992 * A better SAX parsing routine.
13993 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020013994 *
Owen Taylor3473f882001-02-23 17:55:21 +000013995 * Returns 0 in case of success or a error number otherwise
13996 */
13997int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013998 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013999 int ret = 0;
14000 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014001
14002 xmlInitParser();
14003
Owen Taylor3473f882001-02-23 17:55:21 +000014004 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14005 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014006 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14007 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014008 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014009 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014010
Daniel Veillard30211a02001-04-26 09:33:18 +000014011 if (user_data != NULL)
14012 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014013
Owen Taylor3473f882001-02-23 17:55:21 +000014014 xmlParseDocument(ctxt);
14015
14016 if (ctxt->wellFormed)
14017 ret = 0;
14018 else {
14019 if (ctxt->errNo != 0)
14020 ret = ctxt->errNo;
14021 else
14022 ret = -1;
14023 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014024 if (sax != NULL)
14025 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014026 if (ctxt->myDoc != NULL) {
14027 xmlFreeDoc(ctxt->myDoc);
14028 ctxt->myDoc = NULL;
14029 }
Owen Taylor3473f882001-02-23 17:55:21 +000014030 xmlFreeParserCtxt(ctxt);
14031
14032 return ret;
14033}
Daniel Veillard81273902003-09-30 00:43:48 +000014034#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014035
14036/**
14037 * xmlCreateDocParserCtxt:
14038 * @cur: a pointer to an array of xmlChar
14039 *
14040 * Creates a parser context for an XML in-memory document.
14041 *
14042 * Returns the new parser context or NULL
14043 */
14044xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014045xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014046 int len;
14047
14048 if (cur == NULL)
14049 return(NULL);
14050 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014051 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014052}
14053
Daniel Veillard81273902003-09-30 00:43:48 +000014054#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014055/**
14056 * xmlSAXParseDoc:
14057 * @sax: the SAX handler block
14058 * @cur: a pointer to an array of xmlChar
14059 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14060 * documents
14061 *
14062 * parse an XML in-memory document and build a tree.
14063 * It use the given SAX function block to handle the parsing callback.
14064 * If sax is NULL, fallback to the default DOM tree building routines.
14065 *
14066 * Returns the resulting document tree
14067 */
14068
14069xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014070xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014071 xmlDocPtr ret;
14072 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014073 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014074
Daniel Veillard38936062004-11-04 17:45:11 +000014075 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014076
14077
14078 ctxt = xmlCreateDocParserCtxt(cur);
14079 if (ctxt == NULL) return(NULL);
14080 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014081 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014082 ctxt->sax = sax;
14083 ctxt->userData = NULL;
14084 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014085 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014086
14087 xmlParseDocument(ctxt);
14088 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14089 else {
14090 ret = NULL;
14091 xmlFreeDoc(ctxt->myDoc);
14092 ctxt->myDoc = NULL;
14093 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014094 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014095 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014096 xmlFreeParserCtxt(ctxt);
14097
14098 return(ret);
14099}
14100
14101/**
14102 * xmlParseDoc:
14103 * @cur: a pointer to an array of xmlChar
14104 *
14105 * parse an XML in-memory document and build a tree.
14106 *
14107 * Returns the resulting document tree
14108 */
14109
14110xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014111xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014112 return(xmlSAXParseDoc(NULL, cur, 0));
14113}
Daniel Veillard81273902003-09-30 00:43:48 +000014114#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014115
Daniel Veillard81273902003-09-30 00:43:48 +000014116#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014117/************************************************************************
14118 * *
14119 * Specific function to keep track of entities references *
14120 * and used by the XSLT debugger *
14121 * *
14122 ************************************************************************/
14123
14124static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14125
14126/**
14127 * xmlAddEntityReference:
14128 * @ent : A valid entity
14129 * @firstNode : A valid first node for children of entity
14130 * @lastNode : A valid last node of children entity
14131 *
14132 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14133 */
14134static void
14135xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14136 xmlNodePtr lastNode)
14137{
14138 if (xmlEntityRefFunc != NULL) {
14139 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14140 }
14141}
14142
14143
14144/**
14145 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014146 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014147 *
14148 * Set the function to call call back when a xml reference has been made
14149 */
14150void
14151xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14152{
14153 xmlEntityRefFunc = func;
14154}
Daniel Veillard81273902003-09-30 00:43:48 +000014155#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014156
14157/************************************************************************
14158 * *
14159 * Miscellaneous *
14160 * *
14161 ************************************************************************/
14162
14163#ifdef LIBXML_XPATH_ENABLED
14164#include <libxml/xpath.h>
14165#endif
14166
Daniel Veillardffa3c742005-07-21 13:24:09 +000014167extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014168static int xmlParserInitialized = 0;
14169
14170/**
14171 * xmlInitParser:
14172 *
14173 * Initialization function for the XML parser.
14174 * This is not reentrant. Call once before processing in case of
14175 * use in multithreaded programs.
14176 */
14177
14178void
14179xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014180 if (xmlParserInitialized != 0)
14181 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014182
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014183#ifdef LIBXML_THREAD_ENABLED
14184 __xmlGlobalInitMutexLock();
14185 if (xmlParserInitialized == 0) {
14186#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014187 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014188 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014189 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14190 (xmlGenericError == NULL))
14191 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014192 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014193 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014194 xmlInitCharEncodingHandlers();
14195 xmlDefaultSAXHandlerInit();
14196 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014197#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014198 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014199#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014200#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014201 htmlInitAutoClose();
14202 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014203#endif
14204#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014205 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014206#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014207 xmlParserInitialized = 1;
14208#ifdef LIBXML_THREAD_ENABLED
14209 }
14210 __xmlGlobalInitMutexUnlock();
14211#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014212}
14213
14214/**
14215 * xmlCleanupParser:
14216 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014217 * This function name is somewhat misleading. It does not clean up
14218 * parser state, it cleans up memory allocated by the library itself.
14219 * It is a cleanup function for the XML library. It tries to reclaim all
14220 * related global memory allocated for the library processing.
14221 * It doesn't deallocate any document related memory. One should
14222 * call xmlCleanupParser() only when the process has finished using
14223 * the library and all XML/HTML documents built with it.
14224 * See also xmlInitParser() which has the opposite function of preparing
14225 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014226 *
14227 * WARNING: if your application is multithreaded or has plugin support
14228 * calling this may crash the application if another thread or
14229 * a plugin is still using libxml2. It's sometimes very hard to
14230 * guess if libxml2 is in use in the application, some libraries
14231 * or plugins may use it without notice. In case of doubt abstain
14232 * from calling this function or do it just before calling exit()
14233 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014234 */
14235
14236void
14237xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014238 if (!xmlParserInitialized)
14239 return;
14240
Owen Taylor3473f882001-02-23 17:55:21 +000014241 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014242#ifdef LIBXML_CATALOG_ENABLED
14243 xmlCatalogCleanup();
14244#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014245 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014246 xmlCleanupInputCallbacks();
14247#ifdef LIBXML_OUTPUT_ENABLED
14248 xmlCleanupOutputCallbacks();
14249#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014250#ifdef LIBXML_SCHEMAS_ENABLED
14251 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014252 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014253#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000014254 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014255 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014256 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014257 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014258 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014259}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014260
14261/************************************************************************
14262 * *
14263 * New set (2.6.0) of simpler and more flexible APIs *
14264 * *
14265 ************************************************************************/
14266
14267/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014268 * DICT_FREE:
14269 * @str: a string
14270 *
14271 * Free a string if it is not owned by the "dict" dictionnary in the
14272 * current scope
14273 */
14274#define DICT_FREE(str) \
14275 if ((str) && ((!dict) || \
14276 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14277 xmlFree((char *)(str));
14278
14279/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014280 * xmlCtxtReset:
14281 * @ctxt: an XML parser context
14282 *
14283 * Reset a parser context
14284 */
14285void
14286xmlCtxtReset(xmlParserCtxtPtr ctxt)
14287{
14288 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014289 xmlDictPtr dict;
14290
14291 if (ctxt == NULL)
14292 return;
14293
14294 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014295
14296 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14297 xmlFreeInputStream(input);
14298 }
14299 ctxt->inputNr = 0;
14300 ctxt->input = NULL;
14301
14302 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014303 if (ctxt->spaceTab != NULL) {
14304 ctxt->spaceTab[0] = -1;
14305 ctxt->space = &ctxt->spaceTab[0];
14306 } else {
14307 ctxt->space = NULL;
14308 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014309
14310
14311 ctxt->nodeNr = 0;
14312 ctxt->node = NULL;
14313
14314 ctxt->nameNr = 0;
14315 ctxt->name = NULL;
14316
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014317 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014318 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014319 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014320 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014321 DICT_FREE(ctxt->directory);
14322 ctxt->directory = NULL;
14323 DICT_FREE(ctxt->extSubURI);
14324 ctxt->extSubURI = NULL;
14325 DICT_FREE(ctxt->extSubSystem);
14326 ctxt->extSubSystem = NULL;
14327 if (ctxt->myDoc != NULL)
14328 xmlFreeDoc(ctxt->myDoc);
14329 ctxt->myDoc = NULL;
14330
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014331 ctxt->standalone = -1;
14332 ctxt->hasExternalSubset = 0;
14333 ctxt->hasPErefs = 0;
14334 ctxt->html = 0;
14335 ctxt->external = 0;
14336 ctxt->instate = XML_PARSER_START;
14337 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014338
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014339 ctxt->wellFormed = 1;
14340 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014341 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014342 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014343#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014344 ctxt->vctxt.userData = ctxt;
14345 ctxt->vctxt.error = xmlParserValidityError;
14346 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014347#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014348 ctxt->record_info = 0;
14349 ctxt->nbChars = 0;
14350 ctxt->checkIndex = 0;
14351 ctxt->inSubset = 0;
14352 ctxt->errNo = XML_ERR_OK;
14353 ctxt->depth = 0;
14354 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14355 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014356 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014357 ctxt->sizeentities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014358 xmlInitNodeInfoSeq(&ctxt->node_seq);
14359
14360 if (ctxt->attsDefault != NULL) {
14361 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14362 ctxt->attsDefault = NULL;
14363 }
14364 if (ctxt->attsSpecial != NULL) {
14365 xmlHashFree(ctxt->attsSpecial, NULL);
14366 ctxt->attsSpecial = NULL;
14367 }
14368
Daniel Veillard4432df22003-09-28 18:58:27 +000014369#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014370 if (ctxt->catalogs != NULL)
14371 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014372#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014373 if (ctxt->lastError.code != XML_ERR_OK)
14374 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014375}
14376
14377/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014378 * xmlCtxtResetPush:
14379 * @ctxt: an XML parser context
14380 * @chunk: a pointer to an array of chars
14381 * @size: number of chars in the array
14382 * @filename: an optional file name or URI
14383 * @encoding: the document encoding, or NULL
14384 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014385 * Reset a push parser context
14386 *
14387 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014388 */
14389int
14390xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14391 int size, const char *filename, const char *encoding)
14392{
14393 xmlParserInputPtr inputStream;
14394 xmlParserInputBufferPtr buf;
14395 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14396
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014397 if (ctxt == NULL)
14398 return(1);
14399
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014400 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14401 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14402
14403 buf = xmlAllocParserInputBuffer(enc);
14404 if (buf == NULL)
14405 return(1);
14406
14407 if (ctxt == NULL) {
14408 xmlFreeParserInputBuffer(buf);
14409 return(1);
14410 }
14411
14412 xmlCtxtReset(ctxt);
14413
14414 if (ctxt->pushTab == NULL) {
14415 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14416 sizeof(xmlChar *));
14417 if (ctxt->pushTab == NULL) {
14418 xmlErrMemory(ctxt, NULL);
14419 xmlFreeParserInputBuffer(buf);
14420 return(1);
14421 }
14422 }
14423
14424 if (filename == NULL) {
14425 ctxt->directory = NULL;
14426 } else {
14427 ctxt->directory = xmlParserGetDirectory(filename);
14428 }
14429
14430 inputStream = xmlNewInputStream(ctxt);
14431 if (inputStream == NULL) {
14432 xmlFreeParserInputBuffer(buf);
14433 return(1);
14434 }
14435
14436 if (filename == NULL)
14437 inputStream->filename = NULL;
14438 else
14439 inputStream->filename = (char *)
14440 xmlCanonicPath((const xmlChar *) filename);
14441 inputStream->buf = buf;
14442 inputStream->base = inputStream->buf->buffer->content;
14443 inputStream->cur = inputStream->buf->buffer->content;
14444 inputStream->end =
14445 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14446
14447 inputPush(ctxt, inputStream);
14448
14449 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14450 (ctxt->input->buf != NULL)) {
14451 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14452 int cur = ctxt->input->cur - ctxt->input->base;
14453
14454 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14455
14456 ctxt->input->base = ctxt->input->buf->buffer->content + base;
14457 ctxt->input->cur = ctxt->input->base + cur;
14458 ctxt->input->end =
14459 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14460 use];
14461#ifdef DEBUG_PUSH
14462 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14463#endif
14464 }
14465
14466 if (encoding != NULL) {
14467 xmlCharEncodingHandlerPtr hdlr;
14468
Daniel Veillard37334572008-07-31 08:20:02 +000014469 if (ctxt->encoding != NULL)
14470 xmlFree((xmlChar *) ctxt->encoding);
14471 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14472
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014473 hdlr = xmlFindCharEncodingHandler(encoding);
14474 if (hdlr != NULL) {
14475 xmlSwitchToEncoding(ctxt, hdlr);
14476 } else {
14477 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14478 "Unsupported encoding %s\n", BAD_CAST encoding);
14479 }
14480 } else if (enc != XML_CHAR_ENCODING_NONE) {
14481 xmlSwitchEncoding(ctxt, enc);
14482 }
14483
14484 return(0);
14485}
14486
Daniel Veillard37334572008-07-31 08:20:02 +000014487
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014488/**
Daniel Veillard37334572008-07-31 08:20:02 +000014489 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014490 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014491 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014492 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014493 *
14494 * Applies the options to the parser context
14495 *
14496 * Returns 0 in case of success, the set of unknown or unimplemented options
14497 * in case of error.
14498 */
Daniel Veillard37334572008-07-31 08:20:02 +000014499static int
14500xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014501{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014502 if (ctxt == NULL)
14503 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014504 if (encoding != NULL) {
14505 if (ctxt->encoding != NULL)
14506 xmlFree((xmlChar *) ctxt->encoding);
14507 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14508 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014509 if (options & XML_PARSE_RECOVER) {
14510 ctxt->recovery = 1;
14511 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014512 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014513 } else
14514 ctxt->recovery = 0;
14515 if (options & XML_PARSE_DTDLOAD) {
14516 ctxt->loadsubset = XML_DETECT_IDS;
14517 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014518 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014519 } else
14520 ctxt->loadsubset = 0;
14521 if (options & XML_PARSE_DTDATTR) {
14522 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14523 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014524 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014525 }
14526 if (options & XML_PARSE_NOENT) {
14527 ctxt->replaceEntities = 1;
14528 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14529 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014530 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014531 } else
14532 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014533 if (options & XML_PARSE_PEDANTIC) {
14534 ctxt->pedantic = 1;
14535 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014536 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014537 } else
14538 ctxt->pedantic = 0;
14539 if (options & XML_PARSE_NOBLANKS) {
14540 ctxt->keepBlanks = 0;
14541 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14542 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014543 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014544 } else
14545 ctxt->keepBlanks = 1;
14546 if (options & XML_PARSE_DTDVALID) {
14547 ctxt->validate = 1;
14548 if (options & XML_PARSE_NOWARNING)
14549 ctxt->vctxt.warning = NULL;
14550 if (options & XML_PARSE_NOERROR)
14551 ctxt->vctxt.error = NULL;
14552 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014553 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014554 } else
14555 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000014556 if (options & XML_PARSE_NOWARNING) {
14557 ctxt->sax->warning = NULL;
14558 options -= XML_PARSE_NOWARNING;
14559 }
14560 if (options & XML_PARSE_NOERROR) {
14561 ctxt->sax->error = NULL;
14562 ctxt->sax->fatalError = NULL;
14563 options -= XML_PARSE_NOERROR;
14564 }
Daniel Veillard81273902003-09-30 00:43:48 +000014565#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014566 if (options & XML_PARSE_SAX1) {
14567 ctxt->sax->startElement = xmlSAX2StartElement;
14568 ctxt->sax->endElement = xmlSAX2EndElement;
14569 ctxt->sax->startElementNs = NULL;
14570 ctxt->sax->endElementNs = NULL;
14571 ctxt->sax->initialized = 1;
14572 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014573 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014574 }
Daniel Veillard81273902003-09-30 00:43:48 +000014575#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014576 if (options & XML_PARSE_NODICT) {
14577 ctxt->dictNames = 0;
14578 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014579 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014580 } else {
14581 ctxt->dictNames = 1;
14582 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014583 if (options & XML_PARSE_NOCDATA) {
14584 ctxt->sax->cdataBlock = NULL;
14585 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014586 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014587 }
14588 if (options & XML_PARSE_NSCLEAN) {
14589 ctxt->options |= XML_PARSE_NSCLEAN;
14590 options -= XML_PARSE_NSCLEAN;
14591 }
Daniel Veillard61b93382003-11-03 14:28:31 +000014592 if (options & XML_PARSE_NONET) {
14593 ctxt->options |= XML_PARSE_NONET;
14594 options -= XML_PARSE_NONET;
14595 }
Daniel Veillard8874b942005-08-25 13:19:21 +000014596 if (options & XML_PARSE_COMPACT) {
14597 ctxt->options |= XML_PARSE_COMPACT;
14598 options -= XML_PARSE_COMPACT;
14599 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000014600 if (options & XML_PARSE_OLD10) {
14601 ctxt->options |= XML_PARSE_OLD10;
14602 options -= XML_PARSE_OLD10;
14603 }
Daniel Veillard8915c152008-08-26 13:05:34 +000014604 if (options & XML_PARSE_NOBASEFIX) {
14605 ctxt->options |= XML_PARSE_NOBASEFIX;
14606 options -= XML_PARSE_NOBASEFIX;
14607 }
14608 if (options & XML_PARSE_HUGE) {
14609 ctxt->options |= XML_PARSE_HUGE;
14610 options -= XML_PARSE_HUGE;
14611 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000014612 if (options & XML_PARSE_OLDSAX) {
14613 ctxt->options |= XML_PARSE_OLDSAX;
14614 options -= XML_PARSE_OLDSAX;
14615 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080014616 if (options & XML_PARSE_IGNORE_ENC) {
14617 ctxt->options |= XML_PARSE_IGNORE_ENC;
14618 options -= XML_PARSE_IGNORE_ENC;
14619 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000014620 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014621 return (options);
14622}
14623
14624/**
Daniel Veillard37334572008-07-31 08:20:02 +000014625 * xmlCtxtUseOptions:
14626 * @ctxt: an XML parser context
14627 * @options: a combination of xmlParserOption
14628 *
14629 * Applies the options to the parser context
14630 *
14631 * Returns 0 in case of success, the set of unknown or unimplemented options
14632 * in case of error.
14633 */
14634int
14635xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14636{
14637 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14638}
14639
14640/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014641 * xmlDoRead:
14642 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000014643 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014644 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014645 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014646 * @reuse: keep the context for reuse
14647 *
14648 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000014649 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014650 * Returns the resulting document tree or NULL
14651 */
14652static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014653xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14654 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014655{
14656 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000014657
14658 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014659 if (encoding != NULL) {
14660 xmlCharEncodingHandlerPtr hdlr;
14661
14662 hdlr = xmlFindCharEncodingHandler(encoding);
14663 if (hdlr != NULL)
14664 xmlSwitchToEncoding(ctxt, hdlr);
14665 }
Daniel Veillard60942de2003-09-25 21:05:58 +000014666 if ((URL != NULL) && (ctxt->input != NULL) &&
14667 (ctxt->input->filename == NULL))
14668 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014669 xmlParseDocument(ctxt);
14670 if ((ctxt->wellFormed) || ctxt->recovery)
14671 ret = ctxt->myDoc;
14672 else {
14673 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014674 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014675 xmlFreeDoc(ctxt->myDoc);
14676 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014677 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014678 ctxt->myDoc = NULL;
14679 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014680 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014681 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014682
14683 return (ret);
14684}
14685
14686/**
14687 * xmlReadDoc:
14688 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014689 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014690 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014691 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014692 *
14693 * parse an XML in-memory document and build a tree.
14694 *
14695 * Returns the resulting document tree
14696 */
14697xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014698xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014699{
14700 xmlParserCtxtPtr ctxt;
14701
14702 if (cur == NULL)
14703 return (NULL);
14704
14705 ctxt = xmlCreateDocParserCtxt(cur);
14706 if (ctxt == NULL)
14707 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014708 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014709}
14710
14711/**
14712 * xmlReadFile:
14713 * @filename: a file or URL
14714 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014715 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014716 *
14717 * parse an XML file from the filesystem or the network.
14718 *
14719 * Returns the resulting document tree
14720 */
14721xmlDocPtr
14722xmlReadFile(const char *filename, const char *encoding, int options)
14723{
14724 xmlParserCtxtPtr ctxt;
14725
Daniel Veillard61b93382003-11-03 14:28:31 +000014726 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014727 if (ctxt == NULL)
14728 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014729 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014730}
14731
14732/**
14733 * xmlReadMemory:
14734 * @buffer: a pointer to a char array
14735 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014736 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014737 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014738 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014739 *
14740 * parse an XML in-memory document and build a tree.
14741 *
14742 * Returns the resulting document tree
14743 */
14744xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014745xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014746{
14747 xmlParserCtxtPtr ctxt;
14748
14749 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14750 if (ctxt == NULL)
14751 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014752 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014753}
14754
14755/**
14756 * xmlReadFd:
14757 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014758 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014759 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014760 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014761 *
14762 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014763 * NOTE that the file descriptor will not be closed when the
14764 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014765 *
14766 * Returns the resulting document tree
14767 */
14768xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014769xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014770{
14771 xmlParserCtxtPtr ctxt;
14772 xmlParserInputBufferPtr input;
14773 xmlParserInputPtr stream;
14774
14775 if (fd < 0)
14776 return (NULL);
14777
14778 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14779 if (input == NULL)
14780 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014781 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014782 ctxt = xmlNewParserCtxt();
14783 if (ctxt == NULL) {
14784 xmlFreeParserInputBuffer(input);
14785 return (NULL);
14786 }
14787 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14788 if (stream == NULL) {
14789 xmlFreeParserInputBuffer(input);
14790 xmlFreeParserCtxt(ctxt);
14791 return (NULL);
14792 }
14793 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014794 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014795}
14796
14797/**
14798 * xmlReadIO:
14799 * @ioread: an I/O read function
14800 * @ioclose: an I/O close function
14801 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014802 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014803 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014804 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014805 *
14806 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080014807 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014808 * Returns the resulting document tree
14809 */
14810xmlDocPtr
14811xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000014812 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014813{
14814 xmlParserCtxtPtr ctxt;
14815 xmlParserInputBufferPtr input;
14816 xmlParserInputPtr stream;
14817
14818 if (ioread == NULL)
14819 return (NULL);
14820
14821 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14822 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080014823 if (input == NULL) {
14824 if (ioclose != NULL)
14825 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014826 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080014827 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014828 ctxt = xmlNewParserCtxt();
14829 if (ctxt == NULL) {
14830 xmlFreeParserInputBuffer(input);
14831 return (NULL);
14832 }
14833 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14834 if (stream == NULL) {
14835 xmlFreeParserInputBuffer(input);
14836 xmlFreeParserCtxt(ctxt);
14837 return (NULL);
14838 }
14839 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014840 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014841}
14842
14843/**
14844 * xmlCtxtReadDoc:
14845 * @ctxt: an XML parser context
14846 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014847 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014848 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014849 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014850 *
14851 * parse an XML in-memory document and build a tree.
14852 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080014853 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014854 * Returns the resulting document tree
14855 */
14856xmlDocPtr
14857xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000014858 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014859{
14860 xmlParserInputPtr stream;
14861
14862 if (cur == NULL)
14863 return (NULL);
14864 if (ctxt == NULL)
14865 return (NULL);
14866
14867 xmlCtxtReset(ctxt);
14868
14869 stream = xmlNewStringInputStream(ctxt, cur);
14870 if (stream == NULL) {
14871 return (NULL);
14872 }
14873 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014874 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014875}
14876
14877/**
14878 * xmlCtxtReadFile:
14879 * @ctxt: an XML parser context
14880 * @filename: a file or URL
14881 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014882 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014883 *
14884 * parse an XML file from the filesystem or the network.
14885 * This reuses the existing @ctxt parser context
14886 *
14887 * Returns the resulting document tree
14888 */
14889xmlDocPtr
14890xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14891 const char *encoding, int options)
14892{
14893 xmlParserInputPtr stream;
14894
14895 if (filename == NULL)
14896 return (NULL);
14897 if (ctxt == NULL)
14898 return (NULL);
14899
14900 xmlCtxtReset(ctxt);
14901
Daniel Veillard29614c72004-11-26 10:47:26 +000014902 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014903 if (stream == NULL) {
14904 return (NULL);
14905 }
14906 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014907 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014908}
14909
14910/**
14911 * xmlCtxtReadMemory:
14912 * @ctxt: an XML parser context
14913 * @buffer: a pointer to a char array
14914 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014915 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014916 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014917 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014918 *
14919 * parse an XML in-memory document and build a tree.
14920 * This reuses the existing @ctxt parser context
14921 *
14922 * Returns the resulting document tree
14923 */
14924xmlDocPtr
14925xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000014926 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014927{
14928 xmlParserInputBufferPtr input;
14929 xmlParserInputPtr stream;
14930
14931 if (ctxt == NULL)
14932 return (NULL);
14933 if (buffer == NULL)
14934 return (NULL);
14935
14936 xmlCtxtReset(ctxt);
14937
14938 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14939 if (input == NULL) {
14940 return(NULL);
14941 }
14942
14943 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14944 if (stream == NULL) {
14945 xmlFreeParserInputBuffer(input);
14946 return(NULL);
14947 }
14948
14949 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014950 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014951}
14952
14953/**
14954 * xmlCtxtReadFd:
14955 * @ctxt: an XML parser context
14956 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014957 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014958 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014959 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014960 *
14961 * parse an XML from a file descriptor and build a tree.
14962 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014963 * NOTE that the file descriptor will not be closed when the
14964 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014965 *
14966 * Returns the resulting document tree
14967 */
14968xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014969xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14970 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014971{
14972 xmlParserInputBufferPtr input;
14973 xmlParserInputPtr stream;
14974
14975 if (fd < 0)
14976 return (NULL);
14977 if (ctxt == NULL)
14978 return (NULL);
14979
14980 xmlCtxtReset(ctxt);
14981
14982
14983 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14984 if (input == NULL)
14985 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014986 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014987 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14988 if (stream == NULL) {
14989 xmlFreeParserInputBuffer(input);
14990 return (NULL);
14991 }
14992 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014993 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014994}
14995
14996/**
14997 * xmlCtxtReadIO:
14998 * @ctxt: an XML parser context
14999 * @ioread: an I/O read function
15000 * @ioclose: an I/O close function
15001 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015002 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015003 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015004 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015005 *
15006 * parse an XML document from I/O functions and source and build a tree.
15007 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015008 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015009 * Returns the resulting document tree
15010 */
15011xmlDocPtr
15012xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15013 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015014 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015015 const char *encoding, int options)
15016{
15017 xmlParserInputBufferPtr input;
15018 xmlParserInputPtr stream;
15019
15020 if (ioread == NULL)
15021 return (NULL);
15022 if (ctxt == NULL)
15023 return (NULL);
15024
15025 xmlCtxtReset(ctxt);
15026
15027 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15028 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015029 if (input == NULL) {
15030 if (ioclose != NULL)
15031 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015032 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015033 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015034 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15035 if (stream == NULL) {
15036 xmlFreeParserInputBuffer(input);
15037 return (NULL);
15038 }
15039 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015040 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015041}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015042
15043#define bottom_parser
15044#include "elfgcchack.h"