blob: 6d92656d8eb1d83f5feb847cb5d56e032323d05d [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard0161e632008-08-28 15:36:32 +000083static void
84xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
85
Rob Richards9c0aa472009-03-26 18:10:19 +000086static xmlParserCtxtPtr
87xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
88 const xmlChar *base, xmlParserCtxtPtr pctx);
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090/************************************************************************
91 * *
92 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
93 * *
94 ************************************************************************/
95
96#define XML_PARSER_BIG_ENTITY 1000
97#define XML_PARSER_LOT_ENTITY 5000
98
99/*
100 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
101 * replacement over the size in byte of the input indicates that you have
102 * and eponential behaviour. A value of 10 correspond to at least 3 entity
103 * replacement per byte of input.
104 */
105#define XML_PARSER_NON_LINEAR 10
106
107/*
108 * xmlParserEntityCheck
109 *
110 * Function to check non-linear entity expansion behaviour
111 * This is here to detect and stop exponential linear entity expansion
112 * This is not a limitation of the parser but a safety
113 * boundary feature. It can be disabled with the XML_PARSE_HUGE
114 * parser option.
115 */
116static int
117xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
118 xmlEntityPtr ent)
119{
Daniel Veillardcba68392008-08-29 12:43:40 +0000120 unsigned long consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000121
122 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
123 return (0);
124 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
125 return (1);
126 if (size != 0) {
127 /*
128 * Do the check based on the replacement size of the entity
129 */
130 if (size < XML_PARSER_BIG_ENTITY)
131 return(0);
132
133 /*
134 * A limit on the amount of text data reasonably used
135 */
136 if (ctxt->input != NULL) {
137 consumed = ctxt->input->consumed +
138 (ctxt->input->cur - ctxt->input->base);
139 }
140 consumed += ctxt->sizeentities;
141
142 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
143 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
144 return (0);
145 } else if (ent != NULL) {
146 /*
147 * use the number of parsed entities in the replacement
148 */
149 size = ent->checked;
150
151 /*
152 * The amount of data parsed counting entities size only once
153 */
154 if (ctxt->input != NULL) {
155 consumed = ctxt->input->consumed +
156 (ctxt->input->cur - ctxt->input->base);
157 }
158 consumed += ctxt->sizeentities;
159
160 /*
161 * Check the density of entities for the amount of data
162 * knowing an entity reference will take at least 3 bytes
163 */
164 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
165 return (0);
166 } else {
167 /*
168 * strange we got no data for checking just return
169 */
170 return (0);
171 }
172
173 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
174 return (1);
175}
176
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000177/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000178 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000179 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000180 * arbitrary depth limit for the XML documents that we allow to
181 * process. This is not a limitation of the parser but a safety
182 * boundary feature. It can be disabled with the XML_PARSE_HUGE
183 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000184 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000185unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000186
Daniel Veillard0fb18932003-09-07 09:14:37 +0000187
Daniel Veillard0161e632008-08-28 15:36:32 +0000188
189#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000190#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000191#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000192#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
193
Owen Taylor3473f882001-02-23 17:55:21 +0000194/*
Owen Taylor3473f882001-02-23 17:55:21 +0000195 * List of XML prefixed PI allowed by W3C specs
196 */
197
Daniel Veillardb44025c2001-10-11 22:55:55 +0000198static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000199 "xml-stylesheet",
200 NULL
201};
202
Daniel Veillarda07050d2003-10-19 14:46:32 +0000203
Owen Taylor3473f882001-02-23 17:55:21 +0000204/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200205static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000207
Daniel Veillard7d515752003-09-26 19:12:37 +0000208static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000209xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
210 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000211 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000212 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000213
Daniel Veillard37334572008-07-31 08:20:02 +0000214static int
215xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
216 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000217#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000218static void
219xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
220 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000221#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000222
Daniel Veillard7d515752003-09-26 19:12:37 +0000223static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000224xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
225 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000226
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000227static int
228xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
229
Daniel Veillarde57ec792003-09-10 10:50:59 +0000230/************************************************************************
231 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 * Some factorized error routines *
233 * *
234 ************************************************************************/
235
236/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000237 * xmlErrAttributeDup:
238 * @ctxt: an XML parser context
239 * @prefix: the attribute prefix
240 * @localname: the attribute localname
241 *
242 * Handle a redefinition of attribute error
243 */
244static void
245xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
246 const xmlChar * localname)
247{
Daniel Veillard157fee02003-10-31 10:36:03 +0000248 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
249 (ctxt->instate == XML_PARSER_EOF))
250 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000251 if (ctxt != NULL)
252 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000253 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000254 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000255 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
256 (const char *) localname, NULL, NULL, 0, 0,
257 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000258 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000259 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
261 (const char *) prefix, (const char *) localname,
262 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
263 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000264 if (ctxt != NULL) {
265 ctxt->wellFormed = 0;
266 if (ctxt->recovery == 0)
267 ctxt->disableSAX = 1;
268 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000269}
270
271/**
272 * xmlFatalErr:
273 * @ctxt: an XML parser context
274 * @error: the error number
275 * @extra: extra information string
276 *
277 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
278 */
279static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000280xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000281{
282 const char *errmsg;
283
Daniel Veillard157fee02003-10-31 10:36:03 +0000284 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
285 (ctxt->instate == XML_PARSER_EOF))
286 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 switch (error) {
288 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000289 errmsg = "CharRef: invalid hexadecimal value\n";
290 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000291 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000292 errmsg = "CharRef: invalid decimal value\n";
293 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000294 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000295 errmsg = "CharRef: invalid value\n";
296 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000297 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000298 errmsg = "internal error";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "PEReference at end of document\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "PEReference in prolog\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "PEReference in epilog\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "PEReference: no name\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "PEReference: expecting ';'\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "Detected an entity reference loop\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EntityValue: \" or ' expected\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "PEReferences forbidden in internal subset\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "EntityValue: \" or ' expected\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "AttValue: \" or ' expected\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "Unescaped '<' not allowed in attributes values\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "SystemLiteral \" or ' expected\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Unfinished System or Public ID \" or ' expected\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Sequence ']]>' not allowed in content\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "PUBLIC, the Public Identifier is missing\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Comment must not contain '--' (double-hyphen)\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "xmlParsePI : no target name\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "Invalid PI name\n";
356 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 errmsg = "NOTATION: Name expected here\n";
359 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000360 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 errmsg = "'>' required to close NOTATION declaration\n";
362 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 errmsg = "Entity value required\n";
365 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000367 errmsg = "Fragment not allowed";
368 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000369 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000370 errmsg = "'(' required to start ATTLIST enumeration\n";
371 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000372 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000373 errmsg = "NmToken expected in ATTLIST enumeration\n";
374 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000375 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000376 errmsg = "')' required to finish ATTLIST enumeration\n";
377 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000378 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000379 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
380 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000381 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000382 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
383 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000384 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 errmsg = "ContentDecl : Name or '(' expected\n";
386 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000387 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
389 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000390 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000391 errmsg =
392 "PEReference: forbidden within markup decl in internal subset\n";
393 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000394 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000395 errmsg = "expected '>'\n";
396 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000397 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000398 errmsg = "XML conditional section '[' expected\n";
399 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000400 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000401 errmsg = "Content error in the external subset\n";
402 break;
403 case XML_ERR_CONDSEC_INVALID_KEYWORD:
404 errmsg =
405 "conditional section INCLUDE or IGNORE keyword expected\n";
406 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000407 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000408 errmsg = "XML conditional section not closed\n";
409 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000410 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000411 errmsg = "Text declaration '<?xml' required\n";
412 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000413 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 errmsg = "parsing XML declaration: '?>' expected\n";
415 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000416 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 errmsg = "external parsed entities cannot be standalone\n";
418 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000419 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 errmsg = "EntityRef: expecting ';'\n";
421 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000422 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 errmsg = "DOCTYPE improperly terminated\n";
424 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000425 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 errmsg = "EndTag: '</' not found\n";
427 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000428 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000429 errmsg = "expected '='\n";
430 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000431 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000432 errmsg = "String not closed expecting \" or '\n";
433 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000434 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000435 errmsg = "String not started expecting ' or \"\n";
436 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000437 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000438 errmsg = "Invalid XML encoding name\n";
439 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000440 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000441 errmsg = "standalone accepts only 'yes' or 'no'\n";
442 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000443 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000444 errmsg = "Document is empty\n";
445 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000446 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000447 errmsg = "Extra content at the end of the document\n";
448 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000449 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000450 errmsg = "chunk is not well balanced\n";
451 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000452 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000453 errmsg = "extra content at the end of well balanced chunk\n";
454 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000455 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000456 errmsg = "Malformed declaration expecting version\n";
457 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000458#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000459 case:
460 errmsg = "\n";
461 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000462#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 default:
464 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000465 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000466 if (ctxt != NULL)
467 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000468 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000469 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
470 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000471 if (ctxt != NULL) {
472 ctxt->wellFormed = 0;
473 if (ctxt->recovery == 0)
474 ctxt->disableSAX = 1;
475 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000476}
477
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000478/**
479 * xmlFatalErrMsg:
480 * @ctxt: an XML parser context
481 * @error: the error number
482 * @msg: the error message
483 *
484 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
485 */
486static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000487xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
488 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000489{
Daniel Veillard157fee02003-10-31 10:36:03 +0000490 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
491 (ctxt->instate == XML_PARSER_EOF))
492 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000493 if (ctxt != NULL)
494 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000495 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200496 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000497 if (ctxt != NULL) {
498 ctxt->wellFormed = 0;
499 if (ctxt->recovery == 0)
500 ctxt->disableSAX = 1;
501 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000502}
503
504/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000505 * xmlWarningMsg:
506 * @ctxt: an XML parser context
507 * @error: the error number
508 * @msg: the error message
509 * @str1: extra data
510 * @str2: extra data
511 *
512 * Handle a warning.
513 */
514static void
515xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
516 const char *msg, const xmlChar *str1, const xmlChar *str2)
517{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000518 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000519
Daniel Veillard157fee02003-10-31 10:36:03 +0000520 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
521 (ctxt->instate == XML_PARSER_EOF))
522 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000523 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
524 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000525 schannel = ctxt->sax->serror;
526 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000527 (ctxt->sax) ? ctxt->sax->warning : NULL,
528 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000529 ctxt, NULL, XML_FROM_PARSER, error,
530 XML_ERR_WARNING, NULL, 0,
531 (const char *) str1, (const char *) str2, NULL, 0, 0,
532 msg, (const char *) str1, (const char *) str2);
533}
534
535/**
536 * xmlValidityError:
537 * @ctxt: an XML parser context
538 * @error: the error number
539 * @msg: the error message
540 * @str1: extra data
541 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000542 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000543 */
544static void
545xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000546 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000547{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000548 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000549
550 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
551 (ctxt->instate == XML_PARSER_EOF))
552 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000553 if (ctxt != NULL) {
554 ctxt->errNo = error;
555 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
556 schannel = ctxt->sax->serror;
557 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000558 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000559 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000560 ctxt, NULL, XML_FROM_DTD, error,
561 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000562 (const char *) str2, NULL, 0, 0,
563 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000564 if (ctxt != NULL) {
565 ctxt->valid = 0;
566 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000567}
568
569/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000570 * xmlFatalErrMsgInt:
571 * @ctxt: an XML parser context
572 * @error: the error number
573 * @msg: the error message
574 * @val: an integer value
575 *
576 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
577 */
578static void
579xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000580 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000581{
Daniel Veillard157fee02003-10-31 10:36:03 +0000582 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
583 (ctxt->instate == XML_PARSER_EOF))
584 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000585 if (ctxt != NULL)
586 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000587 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000588 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
589 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000590 if (ctxt != NULL) {
591 ctxt->wellFormed = 0;
592 if (ctxt->recovery == 0)
593 ctxt->disableSAX = 1;
594 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000595}
596
597/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000598 * xmlFatalErrMsgStrIntStr:
599 * @ctxt: an XML parser context
600 * @error: the error number
601 * @msg: the error message
602 * @str1: an string info
603 * @val: an integer value
604 * @str2: an string info
605 *
606 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
607 */
608static void
609xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
610 const char *msg, const xmlChar *str1, int val,
611 const xmlChar *str2)
612{
Daniel Veillard157fee02003-10-31 10:36:03 +0000613 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
614 (ctxt->instate == XML_PARSER_EOF))
615 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000616 if (ctxt != NULL)
617 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000618 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000619 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
620 NULL, 0, (const char *) str1, (const char *) str2,
621 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000622 if (ctxt != NULL) {
623 ctxt->wellFormed = 0;
624 if (ctxt->recovery == 0)
625 ctxt->disableSAX = 1;
626 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000627}
628
629/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000630 * xmlFatalErrMsgStr:
631 * @ctxt: an XML parser context
632 * @error: the error number
633 * @msg: the error message
634 * @val: a string value
635 *
636 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
637 */
638static void
639xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000640 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000641{
Daniel Veillard157fee02003-10-31 10:36:03 +0000642 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
643 (ctxt->instate == XML_PARSER_EOF))
644 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000645 if (ctxt != NULL)
646 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000647 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000648 XML_FROM_PARSER, error, XML_ERR_FATAL,
649 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
650 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000651 if (ctxt != NULL) {
652 ctxt->wellFormed = 0;
653 if (ctxt->recovery == 0)
654 ctxt->disableSAX = 1;
655 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000656}
657
658/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000659 * xmlErrMsgStr:
660 * @ctxt: an XML parser context
661 * @error: the error number
662 * @msg: the error message
663 * @val: a string value
664 *
665 * Handle a non fatal parser error
666 */
667static void
668xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
669 const char *msg, const xmlChar * val)
670{
Daniel Veillard157fee02003-10-31 10:36:03 +0000671 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672 (ctxt->instate == XML_PARSER_EOF))
673 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000674 if (ctxt != NULL)
675 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000676 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000677 XML_FROM_PARSER, error, XML_ERR_ERROR,
678 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
679 val);
680}
681
682/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000683 * xmlNsErr:
684 * @ctxt: an XML parser context
685 * @error: the error number
686 * @msg: the message
687 * @info1: extra information string
688 * @info2: extra information string
689 *
690 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
691 */
692static void
693xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
694 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000695 const xmlChar * info1, const xmlChar * info2,
696 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000697{
Daniel Veillard157fee02003-10-31 10:36:03 +0000698 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
699 (ctxt->instate == XML_PARSER_EOF))
700 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000701 if (ctxt != NULL)
702 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000703 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000704 XML_ERR_ERROR, NULL, 0, (const char *) info1,
705 (const char *) info2, (const char *) info3, 0, 0, msg,
706 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000707 if (ctxt != NULL)
708 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000709}
710
Daniel Veillard37334572008-07-31 08:20:02 +0000711/**
712 * xmlNsWarn
713 * @ctxt: an XML parser context
714 * @error: the error number
715 * @msg: the message
716 * @info1: extra information string
717 * @info2: extra information string
718 *
719 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
720 */
721static void
722xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
723 const char *msg,
724 const xmlChar * info1, const xmlChar * info2,
725 const xmlChar * info3)
726{
727 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
728 (ctxt->instate == XML_PARSER_EOF))
729 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000730 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
731 XML_ERR_WARNING, NULL, 0, (const char *) info1,
732 (const char *) info2, (const char *) info3, 0, 0, msg,
733 info1, info2, info3);
734}
735
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000736/************************************************************************
737 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000738 * Library wide options *
739 * *
740 ************************************************************************/
741
742/**
743 * xmlHasFeature:
744 * @feature: the feature to be examined
745 *
746 * Examines if the library has been compiled with a given feature.
747 *
748 * Returns a non-zero value if the feature exist, otherwise zero.
749 * Returns zero (0) if the feature does not exist or an unknown
750 * unknown feature is requested, non-zero otherwise.
751 */
752int
753xmlHasFeature(xmlFeature feature)
754{
755 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000756 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000757#ifdef LIBXML_THREAD_ENABLED
758 return(1);
759#else
760 return(0);
761#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000762 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000763#ifdef LIBXML_TREE_ENABLED
764 return(1);
765#else
766 return(0);
767#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000768 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000769#ifdef LIBXML_OUTPUT_ENABLED
770 return(1);
771#else
772 return(0);
773#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000774 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000775#ifdef LIBXML_PUSH_ENABLED
776 return(1);
777#else
778 return(0);
779#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000780 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000781#ifdef LIBXML_READER_ENABLED
782 return(1);
783#else
784 return(0);
785#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000786 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000787#ifdef LIBXML_PATTERN_ENABLED
788 return(1);
789#else
790 return(0);
791#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000792 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000793#ifdef LIBXML_WRITER_ENABLED
794 return(1);
795#else
796 return(0);
797#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000798 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000799#ifdef LIBXML_SAX1_ENABLED
800 return(1);
801#else
802 return(0);
803#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000804 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000805#ifdef LIBXML_FTP_ENABLED
806 return(1);
807#else
808 return(0);
809#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000810 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000811#ifdef LIBXML_HTTP_ENABLED
812 return(1);
813#else
814 return(0);
815#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000816 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000817#ifdef LIBXML_VALID_ENABLED
818 return(1);
819#else
820 return(0);
821#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000822 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000823#ifdef LIBXML_HTML_ENABLED
824 return(1);
825#else
826 return(0);
827#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000828 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000829#ifdef LIBXML_LEGACY_ENABLED
830 return(1);
831#else
832 return(0);
833#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000834 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000835#ifdef LIBXML_C14N_ENABLED
836 return(1);
837#else
838 return(0);
839#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000840 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000841#ifdef LIBXML_CATALOG_ENABLED
842 return(1);
843#else
844 return(0);
845#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000846 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000847#ifdef LIBXML_XPATH_ENABLED
848 return(1);
849#else
850 return(0);
851#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000852 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000853#ifdef LIBXML_XPTR_ENABLED
854 return(1);
855#else
856 return(0);
857#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000858 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000859#ifdef LIBXML_XINCLUDE_ENABLED
860 return(1);
861#else
862 return(0);
863#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000864 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000865#ifdef LIBXML_ICONV_ENABLED
866 return(1);
867#else
868 return(0);
869#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000870 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000871#ifdef LIBXML_ISO8859X_ENABLED
872 return(1);
873#else
874 return(0);
875#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000876 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000877#ifdef LIBXML_UNICODE_ENABLED
878 return(1);
879#else
880 return(0);
881#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000882 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000883#ifdef LIBXML_REGEXP_ENABLED
884 return(1);
885#else
886 return(0);
887#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000888 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000889#ifdef LIBXML_AUTOMATA_ENABLED
890 return(1);
891#else
892 return(0);
893#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000894 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000895#ifdef LIBXML_EXPR_ENABLED
896 return(1);
897#else
898 return(0);
899#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000900 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000901#ifdef LIBXML_SCHEMAS_ENABLED
902 return(1);
903#else
904 return(0);
905#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000906 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000907#ifdef LIBXML_SCHEMATRON_ENABLED
908 return(1);
909#else
910 return(0);
911#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000912 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000913#ifdef LIBXML_MODULES_ENABLED
914 return(1);
915#else
916 return(0);
917#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000918 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000919#ifdef LIBXML_DEBUG_ENABLED
920 return(1);
921#else
922 return(0);
923#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000924 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000925#ifdef DEBUG_MEMORY_LOCATION
926 return(1);
927#else
928 return(0);
929#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000930 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000931#ifdef LIBXML_DEBUG_RUNTIME
932 return(1);
933#else
934 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000935#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000936 case XML_WITH_ZLIB:
937#ifdef LIBXML_ZLIB_ENABLED
938 return(1);
939#else
940 return(0);
941#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000942 default:
943 break;
944 }
945 return(0);
946}
947
948/************************************************************************
949 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000950 * SAX2 defaulted attributes handling *
951 * *
952 ************************************************************************/
953
954/**
955 * xmlDetectSAX2:
956 * @ctxt: an XML parser context
957 *
958 * Do the SAX2 detection and specific intialization
959 */
960static void
961xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
962 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000963#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000964 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
965 ((ctxt->sax->startElementNs != NULL) ||
966 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000967#else
968 ctxt->sax2 = 1;
969#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000970
971 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
972 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
973 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000974 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
975 (ctxt->str_xml_ns == NULL)) {
976 xmlErrMemory(ctxt, NULL);
977 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000978}
979
Daniel Veillarde57ec792003-09-10 10:50:59 +0000980typedef struct _xmlDefAttrs xmlDefAttrs;
981typedef xmlDefAttrs *xmlDefAttrsPtr;
982struct _xmlDefAttrs {
983 int nbAttrs; /* number of defaulted attributes on that element */
984 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +0000985 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000986};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000987
988/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000989 * xmlAttrNormalizeSpace:
990 * @src: the source string
991 * @dst: the target string
992 *
993 * Normalize the space in non CDATA attribute values:
994 * If the attribute type is not CDATA, then the XML processor MUST further
995 * process the normalized attribute value by discarding any leading and
996 * trailing space (#x20) characters, and by replacing sequences of space
997 * (#x20) characters by a single space (#x20) character.
998 * Note that the size of dst need to be at least src, and if one doesn't need
999 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1000 * passing src as dst is just fine.
1001 *
1002 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1003 * is needed.
1004 */
1005static xmlChar *
1006xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1007{
1008 if ((src == NULL) || (dst == NULL))
1009 return(NULL);
1010
1011 while (*src == 0x20) src++;
1012 while (*src != 0) {
1013 if (*src == 0x20) {
1014 while (*src == 0x20) src++;
1015 if (*src != 0)
1016 *dst++ = 0x20;
1017 } else {
1018 *dst++ = *src++;
1019 }
1020 }
1021 *dst = 0;
1022 if (dst == src)
1023 return(NULL);
1024 return(dst);
1025}
1026
1027/**
1028 * xmlAttrNormalizeSpace2:
1029 * @src: the source string
1030 *
1031 * Normalize the space in non CDATA attribute values, a slightly more complex
1032 * front end to avoid allocation problems when running on attribute values
1033 * coming from the input.
1034 *
1035 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1036 * is needed.
1037 */
1038static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001039xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001040{
1041 int i;
1042 int remove_head = 0;
1043 int need_realloc = 0;
1044 const xmlChar *cur;
1045
1046 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1047 return(NULL);
1048 i = *len;
1049 if (i <= 0)
1050 return(NULL);
1051
1052 cur = src;
1053 while (*cur == 0x20) {
1054 cur++;
1055 remove_head++;
1056 }
1057 while (*cur != 0) {
1058 if (*cur == 0x20) {
1059 cur++;
1060 if ((*cur == 0x20) || (*cur == 0)) {
1061 need_realloc = 1;
1062 break;
1063 }
1064 } else
1065 cur++;
1066 }
1067 if (need_realloc) {
1068 xmlChar *ret;
1069
1070 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1071 if (ret == NULL) {
1072 xmlErrMemory(ctxt, NULL);
1073 return(NULL);
1074 }
1075 xmlAttrNormalizeSpace(ret, ret);
1076 *len = (int) strlen((const char *)ret);
1077 return(ret);
1078 } else if (remove_head) {
1079 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001080 memmove(src, src + remove_head, 1 + *len);
1081 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001082 }
1083 return(NULL);
1084}
1085
1086/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001087 * xmlAddDefAttrs:
1088 * @ctxt: an XML parser context
1089 * @fullname: the element fullname
1090 * @fullattr: the attribute fullname
1091 * @value: the attribute value
1092 *
1093 * Add a defaulted attribute for an element
1094 */
1095static void
1096xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1097 const xmlChar *fullname,
1098 const xmlChar *fullattr,
1099 const xmlChar *value) {
1100 xmlDefAttrsPtr defaults;
1101 int len;
1102 const xmlChar *name;
1103 const xmlChar *prefix;
1104
Daniel Veillard6a31b832008-03-26 14:06:44 +00001105 /*
1106 * Allows to detect attribute redefinitions
1107 */
1108 if (ctxt->attsSpecial != NULL) {
1109 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1110 return;
1111 }
1112
Daniel Veillarde57ec792003-09-10 10:50:59 +00001113 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001114 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001115 if (ctxt->attsDefault == NULL)
1116 goto mem_error;
1117 }
1118
1119 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001120 * split the element name into prefix:localname , the string found
1121 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001122 */
1123 name = xmlSplitQName3(fullname, &len);
1124 if (name == NULL) {
1125 name = xmlDictLookup(ctxt->dict, fullname, -1);
1126 prefix = NULL;
1127 } else {
1128 name = xmlDictLookup(ctxt->dict, name, -1);
1129 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1130 }
1131
1132 /*
1133 * make sure there is some storage
1134 */
1135 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1136 if (defaults == NULL) {
1137 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001138 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001139 if (defaults == NULL)
1140 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001141 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001142 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001143 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1144 defaults, NULL) < 0) {
1145 xmlFree(defaults);
1146 goto mem_error;
1147 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001148 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001149 xmlDefAttrsPtr temp;
1150
1151 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001152 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001153 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001154 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001155 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001156 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001157 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1158 defaults, NULL) < 0) {
1159 xmlFree(defaults);
1160 goto mem_error;
1161 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001162 }
1163
1164 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001165 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001166 * are within the DTD and hen not associated to namespace names.
1167 */
1168 name = xmlSplitQName3(fullattr, &len);
1169 if (name == NULL) {
1170 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1171 prefix = NULL;
1172 } else {
1173 name = xmlDictLookup(ctxt->dict, name, -1);
1174 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1175 }
1176
Daniel Veillardae0765b2008-07-31 19:54:59 +00001177 defaults->values[5 * defaults->nbAttrs] = name;
1178 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001179 /* intern the string and precompute the end */
1180 len = xmlStrlen(value);
1181 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001182 defaults->values[5 * defaults->nbAttrs + 2] = value;
1183 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1184 if (ctxt->external)
1185 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1186 else
1187 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001188 defaults->nbAttrs++;
1189
1190 return;
1191
1192mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001193 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001194 return;
1195}
1196
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001197/**
1198 * xmlAddSpecialAttr:
1199 * @ctxt: an XML parser context
1200 * @fullname: the element fullname
1201 * @fullattr: the attribute fullname
1202 * @type: the attribute type
1203 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001204 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001205 */
1206static void
1207xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1208 const xmlChar *fullname,
1209 const xmlChar *fullattr,
1210 int type)
1211{
1212 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001213 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001214 if (ctxt->attsSpecial == NULL)
1215 goto mem_error;
1216 }
1217
Daniel Veillardac4118d2008-01-11 05:27:32 +00001218 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1219 return;
1220
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001221 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1222 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001223 return;
1224
1225mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001226 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001227 return;
1228}
1229
Daniel Veillard4432df22003-09-28 18:58:27 +00001230/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001231 * xmlCleanSpecialAttrCallback:
1232 *
1233 * Removes CDATA attributes from the special attribute table
1234 */
1235static void
1236xmlCleanSpecialAttrCallback(void *payload, void *data,
1237 const xmlChar *fullname, const xmlChar *fullattr,
1238 const xmlChar *unused ATTRIBUTE_UNUSED) {
1239 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1240
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001241 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001242 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1243 }
1244}
1245
1246/**
1247 * xmlCleanSpecialAttr:
1248 * @ctxt: an XML parser context
1249 *
1250 * Trim the list of attributes defined to remove all those of type
1251 * CDATA as they are not special. This call should be done when finishing
1252 * to parse the DTD and before starting to parse the document root.
1253 */
1254static void
1255xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1256{
1257 if (ctxt->attsSpecial == NULL)
1258 return;
1259
1260 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1261
1262 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1263 xmlHashFree(ctxt->attsSpecial, NULL);
1264 ctxt->attsSpecial = NULL;
1265 }
1266 return;
1267}
1268
1269/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001270 * xmlCheckLanguageID:
1271 * @lang: pointer to the string value
1272 *
1273 * Checks that the value conforms to the LanguageID production:
1274 *
1275 * NOTE: this is somewhat deprecated, those productions were removed from
1276 * the XML Second edition.
1277 *
1278 * [33] LanguageID ::= Langcode ('-' Subcode)*
1279 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1280 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1281 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1282 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1283 * [38] Subcode ::= ([a-z] | [A-Z])+
1284 *
1285 * Returns 1 if correct 0 otherwise
1286 **/
1287int
1288xmlCheckLanguageID(const xmlChar * lang)
1289{
1290 const xmlChar *cur = lang;
1291
1292 if (cur == NULL)
1293 return (0);
1294 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1295 ((cur[0] == 'I') && (cur[1] == '-'))) {
1296 /*
1297 * IANA code
1298 */
1299 cur += 2;
1300 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1301 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1302 cur++;
1303 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1304 ((cur[0] == 'X') && (cur[1] == '-'))) {
1305 /*
1306 * User code
1307 */
1308 cur += 2;
1309 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1310 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1311 cur++;
1312 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1313 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1314 /*
1315 * ISO639
1316 */
1317 cur++;
1318 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1319 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1320 cur++;
1321 else
1322 return (0);
1323 } else
1324 return (0);
1325 while (cur[0] != 0) { /* non input consuming */
1326 if (cur[0] != '-')
1327 return (0);
1328 cur++;
1329 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1330 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1331 cur++;
1332 else
1333 return (0);
1334 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1335 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1336 cur++;
1337 }
1338 return (1);
1339}
1340
Owen Taylor3473f882001-02-23 17:55:21 +00001341/************************************************************************
1342 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001343 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001344 * *
1345 ************************************************************************/
1346
Daniel Veillard8ed10722009-08-20 19:17:36 +02001347static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1348 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001349
Daniel Veillard0fb18932003-09-07 09:14:37 +00001350#ifdef SAX2
1351/**
1352 * nsPush:
1353 * @ctxt: an XML parser context
1354 * @prefix: the namespace prefix or NULL
1355 * @URL: the namespace name
1356 *
1357 * Pushes a new parser namespace on top of the ns stack
1358 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001359 * Returns -1 in case of error, -2 if the namespace should be discarded
1360 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001361 */
1362static int
1363nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1364{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001365 if (ctxt->options & XML_PARSE_NSCLEAN) {
1366 int i;
1367 for (i = 0;i < ctxt->nsNr;i += 2) {
1368 if (ctxt->nsTab[i] == prefix) {
1369 /* in scope */
1370 if (ctxt->nsTab[i + 1] == URL)
1371 return(-2);
1372 /* out of scope keep it */
1373 break;
1374 }
1375 }
1376 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001377 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1378 ctxt->nsMax = 10;
1379 ctxt->nsNr = 0;
1380 ctxt->nsTab = (const xmlChar **)
1381 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1382 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001383 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001384 ctxt->nsMax = 0;
1385 return (-1);
1386 }
1387 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001388 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001389 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001390 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1391 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1392 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001393 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001394 ctxt->nsMax /= 2;
1395 return (-1);
1396 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001397 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001398 }
1399 ctxt->nsTab[ctxt->nsNr++] = prefix;
1400 ctxt->nsTab[ctxt->nsNr++] = URL;
1401 return (ctxt->nsNr);
1402}
1403/**
1404 * nsPop:
1405 * @ctxt: an XML parser context
1406 * @nr: the number to pop
1407 *
1408 * Pops the top @nr parser prefix/namespace from the ns stack
1409 *
1410 * Returns the number of namespaces removed
1411 */
1412static int
1413nsPop(xmlParserCtxtPtr ctxt, int nr)
1414{
1415 int i;
1416
1417 if (ctxt->nsTab == NULL) return(0);
1418 if (ctxt->nsNr < nr) {
1419 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1420 nr = ctxt->nsNr;
1421 }
1422 if (ctxt->nsNr <= 0)
1423 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001424
Daniel Veillard0fb18932003-09-07 09:14:37 +00001425 for (i = 0;i < nr;i++) {
1426 ctxt->nsNr--;
1427 ctxt->nsTab[ctxt->nsNr] = NULL;
1428 }
1429 return(nr);
1430}
1431#endif
1432
1433static int
1434xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1435 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001436 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001437 int maxatts;
1438
1439 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001440 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001441 atts = (const xmlChar **)
1442 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001443 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001444 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001445 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1446 if (attallocs == NULL) goto mem_error;
1447 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001448 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001449 } else if (nr + 5 > ctxt->maxatts) {
1450 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001451 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1452 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001453 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001454 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001455 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1456 (maxatts / 5) * sizeof(int));
1457 if (attallocs == NULL) goto mem_error;
1458 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001459 ctxt->maxatts = maxatts;
1460 }
1461 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001462mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001463 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001464 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001465}
1466
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001467/**
1468 * inputPush:
1469 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001470 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001471 *
1472 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001473 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001474 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001475 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001476int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001477inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1478{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001479 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001480 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001481 if (ctxt->inputNr >= ctxt->inputMax) {
1482 ctxt->inputMax *= 2;
1483 ctxt->inputTab =
1484 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1485 ctxt->inputMax *
1486 sizeof(ctxt->inputTab[0]));
1487 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001488 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001489 xmlFreeInputStream(value);
1490 ctxt->inputMax /= 2;
1491 value = NULL;
1492 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001493 }
1494 }
1495 ctxt->inputTab[ctxt->inputNr] = value;
1496 ctxt->input = value;
1497 return (ctxt->inputNr++);
1498}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001499/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001500 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001501 * @ctxt: an XML parser context
1502 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001503 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001504 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001505 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001506 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001507xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001508inputPop(xmlParserCtxtPtr ctxt)
1509{
1510 xmlParserInputPtr ret;
1511
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001512 if (ctxt == NULL)
1513 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001514 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001515 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001516 ctxt->inputNr--;
1517 if (ctxt->inputNr > 0)
1518 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1519 else
1520 ctxt->input = NULL;
1521 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001522 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001523 return (ret);
1524}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001525/**
1526 * nodePush:
1527 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001528 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001529 *
1530 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001531 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001532 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001533 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001534int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001535nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1536{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001537 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001538 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001539 xmlNodePtr *tmp;
1540
1541 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1542 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001543 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001544 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001545 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001546 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001547 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001548 ctxt->nodeTab = tmp;
1549 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001550 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001551 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1552 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001553 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001554 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001555 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001556 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001557 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001558 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001559 ctxt->nodeTab[ctxt->nodeNr] = value;
1560 ctxt->node = value;
1561 return (ctxt->nodeNr++);
1562}
Daniel Veillard8915c152008-08-26 13:05:34 +00001563
Daniel Veillard1c732d22002-11-30 11:22:59 +00001564/**
1565 * nodePop:
1566 * @ctxt: an XML parser context
1567 *
1568 * Pops the top element node from the node stack
1569 *
1570 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001571 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001572xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001573nodePop(xmlParserCtxtPtr ctxt)
1574{
1575 xmlNodePtr ret;
1576
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001577 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001578 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001579 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001580 ctxt->nodeNr--;
1581 if (ctxt->nodeNr > 0)
1582 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1583 else
1584 ctxt->node = NULL;
1585 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001586 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001587 return (ret);
1588}
Daniel Veillarda2351322004-06-27 12:08:10 +00001589
1590#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001591/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001592 * nameNsPush:
1593 * @ctxt: an XML parser context
1594 * @value: the element name
1595 * @prefix: the element prefix
1596 * @URI: the element namespace name
1597 *
1598 * Pushes a new element name/prefix/URL on top of the name stack
1599 *
1600 * Returns -1 in case of error, the index in the stack otherwise
1601 */
1602static int
1603nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1604 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1605{
1606 if (ctxt->nameNr >= ctxt->nameMax) {
1607 const xmlChar * *tmp;
1608 void **tmp2;
1609 ctxt->nameMax *= 2;
1610 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1611 ctxt->nameMax *
1612 sizeof(ctxt->nameTab[0]));
1613 if (tmp == NULL) {
1614 ctxt->nameMax /= 2;
1615 goto mem_error;
1616 }
1617 ctxt->nameTab = tmp;
1618 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1619 ctxt->nameMax * 3 *
1620 sizeof(ctxt->pushTab[0]));
1621 if (tmp2 == NULL) {
1622 ctxt->nameMax /= 2;
1623 goto mem_error;
1624 }
1625 ctxt->pushTab = tmp2;
1626 }
1627 ctxt->nameTab[ctxt->nameNr] = value;
1628 ctxt->name = value;
1629 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1630 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001631 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001632 return (ctxt->nameNr++);
1633mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001634 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001635 return (-1);
1636}
1637/**
1638 * nameNsPop:
1639 * @ctxt: an XML parser context
1640 *
1641 * Pops the top element/prefix/URI name from the name stack
1642 *
1643 * Returns the name just removed
1644 */
1645static const xmlChar *
1646nameNsPop(xmlParserCtxtPtr ctxt)
1647{
1648 const xmlChar *ret;
1649
1650 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001651 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001652 ctxt->nameNr--;
1653 if (ctxt->nameNr > 0)
1654 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1655 else
1656 ctxt->name = NULL;
1657 ret = ctxt->nameTab[ctxt->nameNr];
1658 ctxt->nameTab[ctxt->nameNr] = NULL;
1659 return (ret);
1660}
Daniel Veillarda2351322004-06-27 12:08:10 +00001661#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001662
1663/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001664 * namePush:
1665 * @ctxt: an XML parser context
1666 * @value: the element name
1667 *
1668 * Pushes a new element name on top of the name stack
1669 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001670 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001671 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001672int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001673namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001674{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001675 if (ctxt == NULL) return (-1);
1676
Daniel Veillard1c732d22002-11-30 11:22:59 +00001677 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001678 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001679 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001680 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001681 ctxt->nameMax *
1682 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001683 if (tmp == NULL) {
1684 ctxt->nameMax /= 2;
1685 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001686 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001687 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001688 }
1689 ctxt->nameTab[ctxt->nameNr] = value;
1690 ctxt->name = value;
1691 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001692mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001693 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001694 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001695}
1696/**
1697 * namePop:
1698 * @ctxt: an XML parser context
1699 *
1700 * Pops the top element name from the name stack
1701 *
1702 * Returns the name just removed
1703 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001704const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001705namePop(xmlParserCtxtPtr ctxt)
1706{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001707 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001708
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001709 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1710 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001711 ctxt->nameNr--;
1712 if (ctxt->nameNr > 0)
1713 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1714 else
1715 ctxt->name = NULL;
1716 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001717 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001718 return (ret);
1719}
Owen Taylor3473f882001-02-23 17:55:21 +00001720
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001721static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001722 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001723 int *tmp;
1724
Owen Taylor3473f882001-02-23 17:55:21 +00001725 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001726 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1727 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1728 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001729 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001730 ctxt->spaceMax /=2;
1731 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001732 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001733 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001734 }
1735 ctxt->spaceTab[ctxt->spaceNr] = val;
1736 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1737 return(ctxt->spaceNr++);
1738}
1739
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001740static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001741 int ret;
1742 if (ctxt->spaceNr <= 0) return(0);
1743 ctxt->spaceNr--;
1744 if (ctxt->spaceNr > 0)
1745 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1746 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001747 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001748 ret = ctxt->spaceTab[ctxt->spaceNr];
1749 ctxt->spaceTab[ctxt->spaceNr] = -1;
1750 return(ret);
1751}
1752
1753/*
1754 * Macros for accessing the content. Those should be used only by the parser,
1755 * and not exported.
1756 *
1757 * Dirty macros, i.e. one often need to make assumption on the context to
1758 * use them
1759 *
1760 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1761 * To be used with extreme caution since operations consuming
1762 * characters may move the input buffer to a different location !
1763 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1764 * This should be used internally by the parser
1765 * only to compare to ASCII values otherwise it would break when
1766 * running with UTF-8 encoding.
1767 * RAW same as CUR but in the input buffer, bypass any token
1768 * extraction that may have been done
1769 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1770 * to compare on ASCII based substring.
1771 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001772 * strings without newlines within the parser.
1773 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1774 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001775 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1776 *
1777 * NEXT Skip to the next character, this does the proper decoding
1778 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001779 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001780 * CUR_CHAR(l) returns the current unicode character (int), set l
1781 * to the number of xmlChars used for the encoding [0-5].
1782 * CUR_SCHAR same but operate on a string instead of the context
1783 * COPY_BUF copy the current unicode char to the target buffer, increment
1784 * the index
1785 * GROW, SHRINK handling of input buffers
1786 */
1787
Daniel Veillardfdc91562002-07-01 21:52:03 +00001788#define RAW (*ctxt->input->cur)
1789#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001790#define NXT(val) ctxt->input->cur[(val)]
1791#define CUR_PTR ctxt->input->cur
1792
Daniel Veillarda07050d2003-10-19 14:46:32 +00001793#define CMP4( s, c1, c2, c3, c4 ) \
1794 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1795 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1796#define CMP5( s, c1, c2, c3, c4, c5 ) \
1797 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1798#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1799 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1800#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1801 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1802#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1803 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1804#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1805 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1806 ((unsigned char *) s)[ 8 ] == c9 )
1807#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1808 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1809 ((unsigned char *) s)[ 9 ] == c10 )
1810
Owen Taylor3473f882001-02-23 17:55:21 +00001811#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001812 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001813 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001814 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001815 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1816 xmlPopInput(ctxt); \
1817 } while (0)
1818
Daniel Veillard0b787f32004-03-26 17:29:53 +00001819#define SKIPL(val) do { \
1820 int skipl; \
1821 for(skipl=0; skipl<val; skipl++) { \
1822 if (*(ctxt->input->cur) == '\n') { \
1823 ctxt->input->line++; ctxt->input->col = 1; \
1824 } else ctxt->input->col++; \
1825 ctxt->nbChars++; \
1826 ctxt->input->cur++; \
1827 } \
1828 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1829 if ((*ctxt->input->cur == 0) && \
1830 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1831 xmlPopInput(ctxt); \
1832 } while (0)
1833
Daniel Veillarda880b122003-04-21 21:36:41 +00001834#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001835 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1836 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001837 xmlSHRINK (ctxt);
1838
1839static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1840 xmlParserInputShrink(ctxt->input);
1841 if ((*ctxt->input->cur == 0) &&
1842 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1843 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001844 }
Owen Taylor3473f882001-02-23 17:55:21 +00001845
Daniel Veillarda880b122003-04-21 21:36:41 +00001846#define GROW if ((ctxt->progressive == 0) && \
1847 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001848 xmlGROW (ctxt);
1849
1850static void xmlGROW (xmlParserCtxtPtr ctxt) {
1851 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1852 if ((*ctxt->input->cur == 0) &&
1853 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1854 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001855}
Owen Taylor3473f882001-02-23 17:55:21 +00001856
1857#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1858
1859#define NEXT xmlNextChar(ctxt)
1860
Daniel Veillard21a0f912001-02-25 19:54:14 +00001861#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001862 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001863 ctxt->input->cur++; \
1864 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001865 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001866 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1867 }
1868
Owen Taylor3473f882001-02-23 17:55:21 +00001869#define NEXTL(l) do { \
1870 if (*(ctxt->input->cur) == '\n') { \
1871 ctxt->input->line++; ctxt->input->col = 1; \
1872 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001873 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001874 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001875 } while (0)
1876
1877#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1878#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1879
1880#define COPY_BUF(l,b,i,v) \
1881 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001882 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001883
1884/**
1885 * xmlSkipBlankChars:
1886 * @ctxt: the XML parser context
1887 *
1888 * skip all blanks character found at that point in the input streams.
1889 * It pops up finished entities in the process if allowable at that point.
1890 *
1891 * Returns the number of space chars skipped
1892 */
1893
1894int
1895xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001896 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001897
1898 /*
1899 * It's Okay to use CUR/NEXT here since all the blanks are on
1900 * the ASCII range.
1901 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001902 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1903 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001904 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001905 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001906 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001907 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001908 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001909 if (*cur == '\n') {
1910 ctxt->input->line++; ctxt->input->col = 1;
1911 }
1912 cur++;
1913 res++;
1914 if (*cur == 0) {
1915 ctxt->input->cur = cur;
1916 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1917 cur = ctxt->input->cur;
1918 }
1919 }
1920 ctxt->input->cur = cur;
1921 } else {
1922 int cur;
1923 do {
1924 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001925 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001926 NEXT;
1927 cur = CUR;
1928 res++;
1929 }
1930 while ((cur == 0) && (ctxt->inputNr > 1) &&
1931 (ctxt->instate != XML_PARSER_COMMENT)) {
1932 xmlPopInput(ctxt);
1933 cur = CUR;
1934 }
1935 /*
1936 * Need to handle support of entities branching here
1937 */
1938 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1939 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1940 }
Owen Taylor3473f882001-02-23 17:55:21 +00001941 return(res);
1942}
1943
1944/************************************************************************
1945 * *
1946 * Commodity functions to handle entities *
1947 * *
1948 ************************************************************************/
1949
1950/**
1951 * xmlPopInput:
1952 * @ctxt: an XML parser context
1953 *
1954 * xmlPopInput: the current input pointed by ctxt->input came to an end
1955 * pop it and return the next char.
1956 *
1957 * Returns the current xmlChar in the parser context
1958 */
1959xmlChar
1960xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001961 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001962 if (xmlParserDebugEntities)
1963 xmlGenericError(xmlGenericErrorContext,
1964 "Popping input %d\n", ctxt->inputNr);
1965 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001966 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001967 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1968 return(xmlPopInput(ctxt));
1969 return(CUR);
1970}
1971
1972/**
1973 * xmlPushInput:
1974 * @ctxt: an XML parser context
1975 * @input: an XML parser input fragment (entity, XML fragment ...).
1976 *
1977 * xmlPushInput: switch to a new input stream which is stacked on top
1978 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001979 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00001980 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001981int
Owen Taylor3473f882001-02-23 17:55:21 +00001982xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001983 int ret;
1984 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001985
1986 if (xmlParserDebugEntities) {
1987 if ((ctxt->input != NULL) && (ctxt->input->filename))
1988 xmlGenericError(xmlGenericErrorContext,
1989 "%s(%d): ", ctxt->input->filename,
1990 ctxt->input->line);
1991 xmlGenericError(xmlGenericErrorContext,
1992 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1993 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001994 ret = inputPush(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001995 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001996 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001997}
1998
1999/**
2000 * xmlParseCharRef:
2001 * @ctxt: an XML parser context
2002 *
2003 * parse Reference declarations
2004 *
2005 * [66] CharRef ::= '&#' [0-9]+ ';' |
2006 * '&#x' [0-9a-fA-F]+ ';'
2007 *
2008 * [ WFC: Legal Character ]
2009 * Characters referred to using character references must match the
2010 * production for Char.
2011 *
2012 * Returns the value parsed (as an int), 0 in case of error
2013 */
2014int
2015xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002016 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002017 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002018 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002019
Owen Taylor3473f882001-02-23 17:55:21 +00002020 /*
2021 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2022 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002023 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002024 (NXT(2) == 'x')) {
2025 SKIP(3);
2026 GROW;
2027 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002028 if (count++ > 20) {
2029 count = 0;
2030 GROW;
2031 }
2032 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002033 val = val * 16 + (CUR - '0');
2034 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2035 val = val * 16 + (CUR - 'a') + 10;
2036 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2037 val = val * 16 + (CUR - 'A') + 10;
2038 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002039 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002040 val = 0;
2041 break;
2042 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002043 if (val > 0x10FFFF)
2044 outofrange = val;
2045
Owen Taylor3473f882001-02-23 17:55:21 +00002046 NEXT;
2047 count++;
2048 }
2049 if (RAW == ';') {
2050 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002051 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002052 ctxt->nbChars ++;
2053 ctxt->input->cur++;
2054 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002055 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002056 SKIP(2);
2057 GROW;
2058 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002059 if (count++ > 20) {
2060 count = 0;
2061 GROW;
2062 }
2063 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002064 val = val * 10 + (CUR - '0');
2065 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002066 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002067 val = 0;
2068 break;
2069 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002070 if (val > 0x10FFFF)
2071 outofrange = val;
2072
Owen Taylor3473f882001-02-23 17:55:21 +00002073 NEXT;
2074 count++;
2075 }
2076 if (RAW == ';') {
2077 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002078 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002079 ctxt->nbChars ++;
2080 ctxt->input->cur++;
2081 }
2082 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002083 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002084 }
2085
2086 /*
2087 * [ WFC: Legal Character ]
2088 * Characters referred to using character references must match the
2089 * production for Char.
2090 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002091 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002092 return(val);
2093 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002094 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2095 "xmlParseCharRef: invalid xmlChar value %d\n",
2096 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002097 }
2098 return(0);
2099}
2100
2101/**
2102 * xmlParseStringCharRef:
2103 * @ctxt: an XML parser context
2104 * @str: a pointer to an index in the string
2105 *
2106 * parse Reference declarations, variant parsing from a string rather
2107 * than an an input flow.
2108 *
2109 * [66] CharRef ::= '&#' [0-9]+ ';' |
2110 * '&#x' [0-9a-fA-F]+ ';'
2111 *
2112 * [ WFC: Legal Character ]
2113 * Characters referred to using character references must match the
2114 * production for Char.
2115 *
2116 * Returns the value parsed (as an int), 0 in case of error, str will be
2117 * updated to the current value of the index
2118 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002119static int
Owen Taylor3473f882001-02-23 17:55:21 +00002120xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2121 const xmlChar *ptr;
2122 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002123 unsigned int val = 0;
2124 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002125
2126 if ((str == NULL) || (*str == NULL)) return(0);
2127 ptr = *str;
2128 cur = *ptr;
2129 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2130 ptr += 3;
2131 cur = *ptr;
2132 while (cur != ';') { /* Non input consuming loop */
2133 if ((cur >= '0') && (cur <= '9'))
2134 val = val * 16 + (cur - '0');
2135 else if ((cur >= 'a') && (cur <= 'f'))
2136 val = val * 16 + (cur - 'a') + 10;
2137 else if ((cur >= 'A') && (cur <= 'F'))
2138 val = val * 16 + (cur - 'A') + 10;
2139 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002140 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002141 val = 0;
2142 break;
2143 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002144 if (val > 0x10FFFF)
2145 outofrange = val;
2146
Owen Taylor3473f882001-02-23 17:55:21 +00002147 ptr++;
2148 cur = *ptr;
2149 }
2150 if (cur == ';')
2151 ptr++;
2152 } else if ((cur == '&') && (ptr[1] == '#')){
2153 ptr += 2;
2154 cur = *ptr;
2155 while (cur != ';') { /* Non input consuming loops */
2156 if ((cur >= '0') && (cur <= '9'))
2157 val = val * 10 + (cur - '0');
2158 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002159 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002160 val = 0;
2161 break;
2162 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002163 if (val > 0x10FFFF)
2164 outofrange = val;
2165
Owen Taylor3473f882001-02-23 17:55:21 +00002166 ptr++;
2167 cur = *ptr;
2168 }
2169 if (cur == ';')
2170 ptr++;
2171 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002172 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002173 return(0);
2174 }
2175 *str = ptr;
2176
2177 /*
2178 * [ WFC: Legal Character ]
2179 * Characters referred to using character references must match the
2180 * production for Char.
2181 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002182 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002183 return(val);
2184 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002185 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2186 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2187 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002188 }
2189 return(0);
2190}
2191
2192/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002193 * xmlNewBlanksWrapperInputStream:
2194 * @ctxt: an XML parser context
2195 * @entity: an Entity pointer
2196 *
2197 * Create a new input stream for wrapping
2198 * blanks around a PEReference
2199 *
2200 * Returns the new input stream or NULL
2201 */
2202
2203static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2204
Daniel Veillardf4862f02002-09-10 11:13:43 +00002205static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002206xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2207 xmlParserInputPtr input;
2208 xmlChar *buffer;
2209 size_t length;
2210 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002211 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2212 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002213 return(NULL);
2214 }
2215 if (xmlParserDebugEntities)
2216 xmlGenericError(xmlGenericErrorContext,
2217 "new blanks wrapper for entity: %s\n", entity->name);
2218 input = xmlNewInputStream(ctxt);
2219 if (input == NULL) {
2220 return(NULL);
2221 }
2222 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002223 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002224 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002225 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002226 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002227 return(NULL);
2228 }
2229 buffer [0] = ' ';
2230 buffer [1] = '%';
2231 buffer [length-3] = ';';
2232 buffer [length-2] = ' ';
2233 buffer [length-1] = 0;
2234 memcpy(buffer + 2, entity->name, length - 5);
2235 input->free = deallocblankswrapper;
2236 input->base = buffer;
2237 input->cur = buffer;
2238 input->length = length;
2239 input->end = &buffer[length];
2240 return(input);
2241}
2242
2243/**
Owen Taylor3473f882001-02-23 17:55:21 +00002244 * xmlParserHandlePEReference:
2245 * @ctxt: the parser context
2246 *
2247 * [69] PEReference ::= '%' Name ';'
2248 *
2249 * [ WFC: No Recursion ]
2250 * A parsed entity must not contain a recursive
2251 * reference to itself, either directly or indirectly.
2252 *
2253 * [ WFC: Entity Declared ]
2254 * In a document without any DTD, a document with only an internal DTD
2255 * subset which contains no parameter entity references, or a document
2256 * with "standalone='yes'", ... ... The declaration of a parameter
2257 * entity must precede any reference to it...
2258 *
2259 * [ VC: Entity Declared ]
2260 * In a document with an external subset or external parameter entities
2261 * with "standalone='no'", ... ... The declaration of a parameter entity
2262 * must precede any reference to it...
2263 *
2264 * [ WFC: In DTD ]
2265 * Parameter-entity references may only appear in the DTD.
2266 * NOTE: misleading but this is handled.
2267 *
2268 * A PEReference may have been detected in the current input stream
2269 * the handling is done accordingly to
2270 * http://www.w3.org/TR/REC-xml#entproc
2271 * i.e.
2272 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002273 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002274 */
2275void
2276xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002277 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002278 xmlEntityPtr entity = NULL;
2279 xmlParserInputPtr input;
2280
Owen Taylor3473f882001-02-23 17:55:21 +00002281 if (RAW != '%') return;
2282 switch(ctxt->instate) {
2283 case XML_PARSER_CDATA_SECTION:
2284 return;
2285 case XML_PARSER_COMMENT:
2286 return;
2287 case XML_PARSER_START_TAG:
2288 return;
2289 case XML_PARSER_END_TAG:
2290 return;
2291 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002292 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002293 return;
2294 case XML_PARSER_PROLOG:
2295 case XML_PARSER_START:
2296 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002297 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002298 return;
2299 case XML_PARSER_ENTITY_DECL:
2300 case XML_PARSER_CONTENT:
2301 case XML_PARSER_ATTRIBUTE_VALUE:
2302 case XML_PARSER_PI:
2303 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002304 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002305 /* we just ignore it there */
2306 return;
2307 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002308 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002309 return;
2310 case XML_PARSER_ENTITY_VALUE:
2311 /*
2312 * NOTE: in the case of entity values, we don't do the
2313 * substitution here since we need the literal
2314 * entity value to be able to save the internal
2315 * subset of the document.
2316 * This will be handled by xmlStringDecodeEntities
2317 */
2318 return;
2319 case XML_PARSER_DTD:
2320 /*
2321 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2322 * In the internal DTD subset, parameter-entity references
2323 * can occur only where markup declarations can occur, not
2324 * within markup declarations.
2325 * In that case this is handled in xmlParseMarkupDecl
2326 */
2327 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2328 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002329 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002330 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002331 break;
2332 case XML_PARSER_IGNORE:
2333 return;
2334 }
2335
2336 NEXT;
2337 name = xmlParseName(ctxt);
2338 if (xmlParserDebugEntities)
2339 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002340 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002341 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002342 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002343 } else {
2344 if (RAW == ';') {
2345 NEXT;
2346 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2347 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2348 if (entity == NULL) {
2349
2350 /*
2351 * [ WFC: Entity Declared ]
2352 * In a document without any DTD, a document with only an
2353 * internal DTD subset which contains no parameter entity
2354 * references, or a document with "standalone='yes'", ...
2355 * ... The declaration of a parameter entity must precede
2356 * any reference to it...
2357 */
2358 if ((ctxt->standalone == 1) ||
2359 ((ctxt->hasExternalSubset == 0) &&
2360 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002361 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002362 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002363 } else {
2364 /*
2365 * [ VC: Entity Declared ]
2366 * In a document with an external subset or external
2367 * parameter entities with "standalone='no'", ...
2368 * ... The declaration of a parameter entity must precede
2369 * any reference to it...
2370 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002371 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2372 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2373 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002374 name, NULL);
Daniel Veillard24eb9782003-10-04 21:08:09 +00002375 } else
2376 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2377 "PEReference: %%%s; not found\n",
2378 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002379 ctxt->valid = 0;
2380 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002381 } else if (ctxt->input->free != deallocblankswrapper) {
2382 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002383 if (xmlPushInput(ctxt, input) < 0)
2384 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002385 } else {
2386 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2387 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002388 xmlChar start[4];
2389 xmlCharEncoding enc;
2390
Owen Taylor3473f882001-02-23 17:55:21 +00002391 /*
2392 * handle the extra spaces added before and after
2393 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002394 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002395 */
2396 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002397 if (xmlPushInput(ctxt, input) < 0)
2398 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002399
2400 /*
2401 * Get the 4 first bytes and decode the charset
2402 * if enc != XML_CHAR_ENCODING_NONE
2403 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002404 * Note that, since we may have some non-UTF8
2405 * encoding (like UTF16, bug 135229), the 'length'
2406 * is not known, but we can calculate based upon
2407 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002408 */
2409 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002410 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002411 start[0] = RAW;
2412 start[1] = NXT(1);
2413 start[2] = NXT(2);
2414 start[3] = NXT(3);
2415 enc = xmlDetectCharEncoding(start, 4);
2416 if (enc != XML_CHAR_ENCODING_NONE) {
2417 xmlSwitchEncoding(ctxt, enc);
2418 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002419 }
2420
Owen Taylor3473f882001-02-23 17:55:21 +00002421 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002422 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2423 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002424 xmlParseTextDecl(ctxt);
2425 }
Owen Taylor3473f882001-02-23 17:55:21 +00002426 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002427 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2428 "PEReference: %s is not a parameter entity\n",
2429 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002430 }
2431 }
2432 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002433 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002434 }
Owen Taylor3473f882001-02-23 17:55:21 +00002435 }
2436}
2437
2438/*
2439 * Macro used to grow the current buffer.
2440 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002441#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002442 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002443 buffer##_size *= 2; \
Daniel Veillard0161e632008-08-28 15:36:32 +00002444 buffer##_size += n; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002445 tmp = (xmlChar *) \
Daniel Veillard68b6e022008-03-31 09:26:00 +00002446 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002447 if (tmp == NULL) goto mem_error; \
2448 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002449}
2450
2451/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002452 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002453 * @ctxt: the parser context
2454 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002455 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002456 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2457 * @end: an end marker xmlChar, 0 if none
2458 * @end2: an end marker xmlChar, 0 if none
2459 * @end3: an end marker xmlChar, 0 if none
2460 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002461 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002462 *
2463 * [67] Reference ::= EntityRef | CharRef
2464 *
2465 * [69] PEReference ::= '%' Name ';'
2466 *
2467 * Returns A newly allocated string with the substitution done. The caller
2468 * must deallocate it !
2469 */
2470xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002471xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2472 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002473 xmlChar *buffer = NULL;
2474 int buffer_size = 0;
2475
2476 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002477 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002478 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002479 xmlEntityPtr ent;
2480 int c,l;
2481 int nbchars = 0;
2482
Daniel Veillarda82b1822004-11-08 16:24:57 +00002483 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002484 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002485 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002486
Daniel Veillard0161e632008-08-28 15:36:32 +00002487 if (((ctxt->depth > 40) &&
2488 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2489 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002490 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002491 return(NULL);
2492 }
2493
2494 /*
2495 * allocate a translation buffer.
2496 */
2497 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002498 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002499 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002500
2501 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002502 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002503 * we are operating on already parsed values.
2504 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002505 if (str < last)
2506 c = CUR_SCHAR(str, l);
2507 else
2508 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002509 while ((c != 0) && (c != end) && /* non input consuming loop */
2510 (c != end2) && (c != end3)) {
2511
2512 if (c == 0) break;
2513 if ((c == '&') && (str[1] == '#')) {
2514 int val = xmlParseStringCharRef(ctxt, &str);
2515 if (val != 0) {
2516 COPY_BUF(0,buffer,nbchars,val);
2517 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002518 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002519 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002520 }
Owen Taylor3473f882001-02-23 17:55:21 +00002521 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2522 if (xmlParserDebugEntities)
2523 xmlGenericError(xmlGenericErrorContext,
2524 "String decoding Entity Reference: %.30s\n",
2525 str);
2526 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002527 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2528 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002529 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002530 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002531 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002532 if ((ent != NULL) &&
2533 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2534 if (ent->content != NULL) {
2535 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002536 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002537 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002538 }
Owen Taylor3473f882001-02-23 17:55:21 +00002539 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002540 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2541 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002542 }
2543 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002544 ctxt->depth++;
2545 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2546 0, 0, 0);
2547 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002548
Owen Taylor3473f882001-02-23 17:55:21 +00002549 if (rep != NULL) {
2550 current = rep;
2551 while (*current != 0) { /* non input consuming loop */
2552 buffer[nbchars++] = *current++;
2553 if (nbchars >
2554 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002555 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2556 goto int_error;
2557 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002558 }
2559 }
2560 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002561 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002562 }
2563 } else if (ent != NULL) {
2564 int i = xmlStrlen(ent->name);
2565 const xmlChar *cur = ent->name;
2566
2567 buffer[nbchars++] = '&';
2568 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002569 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002570 }
2571 for (;i > 0;i--)
2572 buffer[nbchars++] = *cur++;
2573 buffer[nbchars++] = ';';
2574 }
2575 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2576 if (xmlParserDebugEntities)
2577 xmlGenericError(xmlGenericErrorContext,
2578 "String decoding PE Reference: %.30s\n", str);
2579 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002580 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2581 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002582 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002583 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002584 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002585 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002586 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002587 }
Owen Taylor3473f882001-02-23 17:55:21 +00002588 ctxt->depth++;
2589 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2590 0, 0, 0);
2591 ctxt->depth--;
2592 if (rep != NULL) {
2593 current = rep;
2594 while (*current != 0) { /* non input consuming loop */
2595 buffer[nbchars++] = *current++;
2596 if (nbchars >
2597 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002598 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2599 goto int_error;
2600 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002601 }
2602 }
2603 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002604 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002605 }
2606 }
2607 } else {
2608 COPY_BUF(l,buffer,nbchars,c);
2609 str += l;
2610 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002611 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002612 }
2613 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002614 if (str < last)
2615 c = CUR_SCHAR(str, l);
2616 else
2617 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002618 }
2619 buffer[nbchars++] = 0;
2620 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002621
2622mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002623 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002624int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002625 if (rep != NULL)
2626 xmlFree(rep);
2627 if (buffer != NULL)
2628 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002629 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002630}
2631
Daniel Veillarde57ec792003-09-10 10:50:59 +00002632/**
2633 * xmlStringDecodeEntities:
2634 * @ctxt: the parser context
2635 * @str: the input string
2636 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2637 * @end: an end marker xmlChar, 0 if none
2638 * @end2: an end marker xmlChar, 0 if none
2639 * @end3: an end marker xmlChar, 0 if none
2640 *
2641 * Takes a entity string content and process to do the adequate substitutions.
2642 *
2643 * [67] Reference ::= EntityRef | CharRef
2644 *
2645 * [69] PEReference ::= '%' Name ';'
2646 *
2647 * Returns A newly allocated string with the substitution done. The caller
2648 * must deallocate it !
2649 */
2650xmlChar *
2651xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2652 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002653 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002654 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2655 end, end2, end3));
2656}
Owen Taylor3473f882001-02-23 17:55:21 +00002657
2658/************************************************************************
2659 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002660 * Commodity functions, cleanup needed ? *
2661 * *
2662 ************************************************************************/
2663
2664/**
2665 * areBlanks:
2666 * @ctxt: an XML parser context
2667 * @str: a xmlChar *
2668 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002669 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002670 *
2671 * Is this a sequence of blank chars that one can ignore ?
2672 *
2673 * Returns 1 if ignorable 0 otherwise.
2674 */
2675
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002676static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2677 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002678 int i, ret;
2679 xmlNodePtr lastChild;
2680
Daniel Veillard05c13a22001-09-09 08:38:09 +00002681 /*
2682 * Don't spend time trying to differentiate them, the same callback is
2683 * used !
2684 */
2685 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002686 return(0);
2687
Owen Taylor3473f882001-02-23 17:55:21 +00002688 /*
2689 * Check for xml:space value.
2690 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002691 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2692 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002693 return(0);
2694
2695 /*
2696 * Check that the string is made of blanks
2697 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002698 if (blank_chars == 0) {
2699 for (i = 0;i < len;i++)
2700 if (!(IS_BLANK_CH(str[i]))) return(0);
2701 }
Owen Taylor3473f882001-02-23 17:55:21 +00002702
2703 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002704 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002705 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002706 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002707 if (ctxt->myDoc != NULL) {
2708 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2709 if (ret == 0) return(1);
2710 if (ret == 1) return(0);
2711 }
2712
2713 /*
2714 * Otherwise, heuristic :-\
2715 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002716 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002717 if ((ctxt->node->children == NULL) &&
2718 (RAW == '<') && (NXT(1) == '/')) return(0);
2719
2720 lastChild = xmlGetLastChild(ctxt->node);
2721 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002722 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2723 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002724 } else if (xmlNodeIsText(lastChild))
2725 return(0);
2726 else if ((ctxt->node->children != NULL) &&
2727 (xmlNodeIsText(ctxt->node->children)))
2728 return(0);
2729 return(1);
2730}
2731
Owen Taylor3473f882001-02-23 17:55:21 +00002732/************************************************************************
2733 * *
2734 * Extra stuff for namespace support *
2735 * Relates to http://www.w3.org/TR/WD-xml-names *
2736 * *
2737 ************************************************************************/
2738
2739/**
2740 * xmlSplitQName:
2741 * @ctxt: an XML parser context
2742 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002743 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002744 *
2745 * parse an UTF8 encoded XML qualified name string
2746 *
2747 * [NS 5] QName ::= (Prefix ':')? LocalPart
2748 *
2749 * [NS 6] Prefix ::= NCName
2750 *
2751 * [NS 7] LocalPart ::= NCName
2752 *
2753 * Returns the local part, and prefix is updated
2754 * to get the Prefix if any.
2755 */
2756
2757xmlChar *
2758xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2759 xmlChar buf[XML_MAX_NAMELEN + 5];
2760 xmlChar *buffer = NULL;
2761 int len = 0;
2762 int max = XML_MAX_NAMELEN;
2763 xmlChar *ret = NULL;
2764 const xmlChar *cur = name;
2765 int c;
2766
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002767 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002768 *prefix = NULL;
2769
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002770 if (cur == NULL) return(NULL);
2771
Owen Taylor3473f882001-02-23 17:55:21 +00002772#ifndef XML_XML_NAMESPACE
2773 /* xml: prefix is not really a namespace */
2774 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2775 (cur[2] == 'l') && (cur[3] == ':'))
2776 return(xmlStrdup(name));
2777#endif
2778
Daniel Veillard597bc482003-07-24 16:08:28 +00002779 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002780 if (cur[0] == ':')
2781 return(xmlStrdup(name));
2782
2783 c = *cur++;
2784 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2785 buf[len++] = c;
2786 c = *cur++;
2787 }
2788 if (len >= max) {
2789 /*
2790 * Okay someone managed to make a huge name, so he's ready to pay
2791 * for the processing speed.
2792 */
2793 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002794
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002795 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002796 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002797 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002798 return(NULL);
2799 }
2800 memcpy(buffer, buf, len);
2801 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2802 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002803 xmlChar *tmp;
2804
Owen Taylor3473f882001-02-23 17:55:21 +00002805 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002806 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002807 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002808 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002809 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002810 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002811 return(NULL);
2812 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002813 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002814 }
2815 buffer[len++] = c;
2816 c = *cur++;
2817 }
2818 buffer[len] = 0;
2819 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002820
Daniel Veillard597bc482003-07-24 16:08:28 +00002821 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002822 if (buffer != NULL)
2823 xmlFree(buffer);
2824 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002825 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002826 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002827
Owen Taylor3473f882001-02-23 17:55:21 +00002828 if (buffer == NULL)
2829 ret = xmlStrndup(buf, len);
2830 else {
2831 ret = buffer;
2832 buffer = NULL;
2833 max = XML_MAX_NAMELEN;
2834 }
2835
2836
2837 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002838 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002839 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002840 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002841 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002842 }
Owen Taylor3473f882001-02-23 17:55:21 +00002843 len = 0;
2844
Daniel Veillardbb284f42002-10-16 18:02:47 +00002845 /*
2846 * Check that the first character is proper to start
2847 * a new name
2848 */
2849 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2850 ((c >= 0x41) && (c <= 0x5A)) ||
2851 (c == '_') || (c == ':'))) {
2852 int l;
2853 int first = CUR_SCHAR(cur, l);
2854
2855 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002856 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002857 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002858 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002859 }
2860 }
2861 cur++;
2862
Owen Taylor3473f882001-02-23 17:55:21 +00002863 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2864 buf[len++] = c;
2865 c = *cur++;
2866 }
2867 if (len >= max) {
2868 /*
2869 * Okay someone managed to make a huge name, so he's ready to pay
2870 * for the processing speed.
2871 */
2872 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002873
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002874 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002875 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002876 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002877 return(NULL);
2878 }
2879 memcpy(buffer, buf, len);
2880 while (c != 0) { /* tested bigname2.xml */
2881 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002882 xmlChar *tmp;
2883
Owen Taylor3473f882001-02-23 17:55:21 +00002884 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002885 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002886 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002887 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002888 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002889 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002890 return(NULL);
2891 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002892 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002893 }
2894 buffer[len++] = c;
2895 c = *cur++;
2896 }
2897 buffer[len] = 0;
2898 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00002899
Owen Taylor3473f882001-02-23 17:55:21 +00002900 if (buffer == NULL)
2901 ret = xmlStrndup(buf, len);
2902 else {
2903 ret = buffer;
2904 }
2905 }
2906
2907 return(ret);
2908}
2909
2910/************************************************************************
2911 * *
2912 * The parser itself *
2913 * Relates to http://www.w3.org/TR/REC-xml *
2914 * *
2915 ************************************************************************/
2916
Daniel Veillard34e3f642008-07-29 09:02:27 +00002917/************************************************************************
2918 * *
2919 * Routines to parse Name, NCName and NmToken *
2920 * *
2921 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00002922#ifdef DEBUG
2923static unsigned long nbParseName = 0;
2924static unsigned long nbParseNmToken = 0;
2925static unsigned long nbParseNCName = 0;
2926static unsigned long nbParseNCNameComplex = 0;
2927static unsigned long nbParseNameComplex = 0;
2928static unsigned long nbParseStringName = 0;
2929#endif
2930
Daniel Veillard34e3f642008-07-29 09:02:27 +00002931/*
2932 * The two following functions are related to the change of accepted
2933 * characters for Name and NmToken in the Revision 5 of XML-1.0
2934 * They correspond to the modified production [4] and the new production [4a]
2935 * changes in that revision. Also note that the macros used for the
2936 * productions Letter, Digit, CombiningChar and Extender are not needed
2937 * anymore.
2938 * We still keep compatibility to pre-revision5 parsing semantic if the
2939 * new XML_PARSE_OLD10 option is given to the parser.
2940 */
2941static int
2942xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
2943 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2944 /*
2945 * Use the new checks of production [4] [4a] amd [5] of the
2946 * Update 5 of XML-1.0
2947 */
2948 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2949 (((c >= 'a') && (c <= 'z')) ||
2950 ((c >= 'A') && (c <= 'Z')) ||
2951 (c == '_') || (c == ':') ||
2952 ((c >= 0xC0) && (c <= 0xD6)) ||
2953 ((c >= 0xD8) && (c <= 0xF6)) ||
2954 ((c >= 0xF8) && (c <= 0x2FF)) ||
2955 ((c >= 0x370) && (c <= 0x37D)) ||
2956 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2957 ((c >= 0x200C) && (c <= 0x200D)) ||
2958 ((c >= 0x2070) && (c <= 0x218F)) ||
2959 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2960 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2961 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2962 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2963 ((c >= 0x10000) && (c <= 0xEFFFF))))
2964 return(1);
2965 } else {
2966 if (IS_LETTER(c) || (c == '_') || (c == ':'))
2967 return(1);
2968 }
2969 return(0);
2970}
2971
2972static int
2973xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
2974 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2975 /*
2976 * Use the new checks of production [4] [4a] amd [5] of the
2977 * Update 5 of XML-1.0
2978 */
2979 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2980 (((c >= 'a') && (c <= 'z')) ||
2981 ((c >= 'A') && (c <= 'Z')) ||
2982 ((c >= '0') && (c <= '9')) || /* !start */
2983 (c == '_') || (c == ':') ||
2984 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2985 ((c >= 0xC0) && (c <= 0xD6)) ||
2986 ((c >= 0xD8) && (c <= 0xF6)) ||
2987 ((c >= 0xF8) && (c <= 0x2FF)) ||
2988 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2989 ((c >= 0x370) && (c <= 0x37D)) ||
2990 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2991 ((c >= 0x200C) && (c <= 0x200D)) ||
2992 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2993 ((c >= 0x2070) && (c <= 0x218F)) ||
2994 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2995 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2996 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2997 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2998 ((c >= 0x10000) && (c <= 0xEFFFF))))
2999 return(1);
3000 } else {
3001 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3002 (c == '.') || (c == '-') ||
3003 (c == '_') || (c == ':') ||
3004 (IS_COMBINING(c)) ||
3005 (IS_EXTENDER(c)))
3006 return(1);
3007 }
3008 return(0);
3009}
3010
Daniel Veillarde57ec792003-09-10 10:50:59 +00003011static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003012 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003013
Daniel Veillard34e3f642008-07-29 09:02:27 +00003014static const xmlChar *
3015xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3016 int len = 0, l;
3017 int c;
3018 int count = 0;
3019
Daniel Veillardc6561462009-03-25 10:22:31 +00003020#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003021 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003022#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003023
3024 /*
3025 * Handler for more complex cases
3026 */
3027 GROW;
3028 c = CUR_CHAR(l);
3029 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3030 /*
3031 * Use the new checks of production [4] [4a] amd [5] of the
3032 * Update 5 of XML-1.0
3033 */
3034 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3035 (!(((c >= 'a') && (c <= 'z')) ||
3036 ((c >= 'A') && (c <= 'Z')) ||
3037 (c == '_') || (c == ':') ||
3038 ((c >= 0xC0) && (c <= 0xD6)) ||
3039 ((c >= 0xD8) && (c <= 0xF6)) ||
3040 ((c >= 0xF8) && (c <= 0x2FF)) ||
3041 ((c >= 0x370) && (c <= 0x37D)) ||
3042 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3043 ((c >= 0x200C) && (c <= 0x200D)) ||
3044 ((c >= 0x2070) && (c <= 0x218F)) ||
3045 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3046 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3047 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3048 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3049 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3050 return(NULL);
3051 }
3052 len += l;
3053 NEXTL(l);
3054 c = CUR_CHAR(l);
3055 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3056 (((c >= 'a') && (c <= 'z')) ||
3057 ((c >= 'A') && (c <= 'Z')) ||
3058 ((c >= '0') && (c <= '9')) || /* !start */
3059 (c == '_') || (c == ':') ||
3060 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3061 ((c >= 0xC0) && (c <= 0xD6)) ||
3062 ((c >= 0xD8) && (c <= 0xF6)) ||
3063 ((c >= 0xF8) && (c <= 0x2FF)) ||
3064 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3065 ((c >= 0x370) && (c <= 0x37D)) ||
3066 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3067 ((c >= 0x200C) && (c <= 0x200D)) ||
3068 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3069 ((c >= 0x2070) && (c <= 0x218F)) ||
3070 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3071 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3072 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3073 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3074 ((c >= 0x10000) && (c <= 0xEFFFF))
3075 )) {
3076 if (count++ > 100) {
3077 count = 0;
3078 GROW;
3079 }
3080 len += l;
3081 NEXTL(l);
3082 c = CUR_CHAR(l);
3083 }
3084 } else {
3085 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3086 (!IS_LETTER(c) && (c != '_') &&
3087 (c != ':'))) {
3088 return(NULL);
3089 }
3090 len += l;
3091 NEXTL(l);
3092 c = CUR_CHAR(l);
3093
3094 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3095 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3096 (c == '.') || (c == '-') ||
3097 (c == '_') || (c == ':') ||
3098 (IS_COMBINING(c)) ||
3099 (IS_EXTENDER(c)))) {
3100 if (count++ > 100) {
3101 count = 0;
3102 GROW;
3103 }
3104 len += l;
3105 NEXTL(l);
3106 c = CUR_CHAR(l);
3107 }
3108 }
3109 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3110 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3111 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3112}
3113
Owen Taylor3473f882001-02-23 17:55:21 +00003114/**
3115 * xmlParseName:
3116 * @ctxt: an XML parser context
3117 *
3118 * parse an XML name.
3119 *
3120 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3121 * CombiningChar | Extender
3122 *
3123 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3124 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003125 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003126 *
3127 * Returns the Name parsed or NULL
3128 */
3129
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003130const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003131xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003132 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003133 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003134 int count = 0;
3135
3136 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003137
Daniel Veillardc6561462009-03-25 10:22:31 +00003138#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003139 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003140#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003141
Daniel Veillard48b2f892001-02-25 16:11:03 +00003142 /*
3143 * Accelerator for simple ASCII names
3144 */
3145 in = ctxt->input->cur;
3146 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3147 ((*in >= 0x41) && (*in <= 0x5A)) ||
3148 (*in == '_') || (*in == ':')) {
3149 in++;
3150 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3151 ((*in >= 0x41) && (*in <= 0x5A)) ||
3152 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003153 (*in == '_') || (*in == '-') ||
3154 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003155 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003156 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003157 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003158 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003159 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003160 ctxt->nbChars += count;
3161 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003162 if (ret == NULL)
3163 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003164 return(ret);
3165 }
3166 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003167 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003168 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003169}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003170
Daniel Veillard34e3f642008-07-29 09:02:27 +00003171static const xmlChar *
3172xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3173 int len = 0, l;
3174 int c;
3175 int count = 0;
3176
Daniel Veillardc6561462009-03-25 10:22:31 +00003177#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003178 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003179#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003180
3181 /*
3182 * Handler for more complex cases
3183 */
3184 GROW;
3185 c = CUR_CHAR(l);
3186 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3187 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3188 return(NULL);
3189 }
3190
3191 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3192 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3193 if (count++ > 100) {
3194 count = 0;
3195 GROW;
3196 }
3197 len += l;
3198 NEXTL(l);
3199 c = CUR_CHAR(l);
3200 }
3201 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3202}
3203
3204/**
3205 * xmlParseNCName:
3206 * @ctxt: an XML parser context
3207 * @len: lenght of the string parsed
3208 *
3209 * parse an XML name.
3210 *
3211 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3212 * CombiningChar | Extender
3213 *
3214 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3215 *
3216 * Returns the Name parsed or NULL
3217 */
3218
3219static const xmlChar *
3220xmlParseNCName(xmlParserCtxtPtr ctxt) {
3221 const xmlChar *in;
3222 const xmlChar *ret;
3223 int count = 0;
3224
Daniel Veillardc6561462009-03-25 10:22:31 +00003225#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003226 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003227#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003228
3229 /*
3230 * Accelerator for simple ASCII names
3231 */
3232 in = ctxt->input->cur;
3233 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3234 ((*in >= 0x41) && (*in <= 0x5A)) ||
3235 (*in == '_')) {
3236 in++;
3237 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3238 ((*in >= 0x41) && (*in <= 0x5A)) ||
3239 ((*in >= 0x30) && (*in <= 0x39)) ||
3240 (*in == '_') || (*in == '-') ||
3241 (*in == '.'))
3242 in++;
3243 if ((*in > 0) && (*in < 0x80)) {
3244 count = in - ctxt->input->cur;
3245 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3246 ctxt->input->cur = in;
3247 ctxt->nbChars += count;
3248 ctxt->input->col += count;
3249 if (ret == NULL) {
3250 xmlErrMemory(ctxt, NULL);
3251 }
3252 return(ret);
3253 }
3254 }
3255 return(xmlParseNCNameComplex(ctxt));
3256}
3257
Daniel Veillard46de64e2002-05-29 08:21:33 +00003258/**
3259 * xmlParseNameAndCompare:
3260 * @ctxt: an XML parser context
3261 *
3262 * parse an XML name and compares for match
3263 * (specialized for endtag parsing)
3264 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003265 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3266 * and the name for mismatch
3267 */
3268
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003269static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003270xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003271 register const xmlChar *cmp = other;
3272 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003273 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003274
3275 GROW;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003276
Daniel Veillard46de64e2002-05-29 08:21:33 +00003277 in = ctxt->input->cur;
3278 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003279 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003280 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003281 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003282 }
William M. Brack76e95df2003-10-18 16:20:14 +00003283 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003284 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003285 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003286 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003287 }
3288 /* failure (or end of input buffer), check with full function */
3289 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003290 /* strings coming from the dictionnary direct compare possible */
3291 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003292 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003293 }
3294 return ret;
3295}
3296
Owen Taylor3473f882001-02-23 17:55:21 +00003297/**
3298 * xmlParseStringName:
3299 * @ctxt: an XML parser context
3300 * @str: a pointer to the string pointer (IN/OUT)
3301 *
3302 * parse an XML name.
3303 *
3304 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3305 * CombiningChar | Extender
3306 *
3307 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3308 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003309 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003310 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003311 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003312 * is updated to the current location in the string.
3313 */
3314
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003315static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003316xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3317 xmlChar buf[XML_MAX_NAMELEN + 5];
3318 const xmlChar *cur = *str;
3319 int len = 0, l;
3320 int c;
3321
Daniel Veillardc6561462009-03-25 10:22:31 +00003322#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003323 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003324#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003325
Owen Taylor3473f882001-02-23 17:55:21 +00003326 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003327 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003328 return(NULL);
3329 }
3330
Daniel Veillard34e3f642008-07-29 09:02:27 +00003331 COPY_BUF(l,buf,len,c);
3332 cur += l;
3333 c = CUR_SCHAR(cur, l);
3334 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003335 COPY_BUF(l,buf,len,c);
3336 cur += l;
3337 c = CUR_SCHAR(cur, l);
3338 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3339 /*
3340 * Okay someone managed to make a huge name, so he's ready to pay
3341 * for the processing speed.
3342 */
3343 xmlChar *buffer;
3344 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003345
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003346 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003347 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003348 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003349 return(NULL);
3350 }
3351 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003352 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003353 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003354 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003355 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003356 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003357 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003358 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003359 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003360 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003361 return(NULL);
3362 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003363 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003364 }
3365 COPY_BUF(l,buffer,len,c);
3366 cur += l;
3367 c = CUR_SCHAR(cur, l);
3368 }
3369 buffer[len] = 0;
3370 *str = cur;
3371 return(buffer);
3372 }
3373 }
3374 *str = cur;
3375 return(xmlStrndup(buf, len));
3376}
3377
3378/**
3379 * xmlParseNmtoken:
3380 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003381 *
Owen Taylor3473f882001-02-23 17:55:21 +00003382 * parse an XML Nmtoken.
3383 *
3384 * [7] Nmtoken ::= (NameChar)+
3385 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003386 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003387 *
3388 * Returns the Nmtoken parsed or NULL
3389 */
3390
3391xmlChar *
3392xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3393 xmlChar buf[XML_MAX_NAMELEN + 5];
3394 int len = 0, l;
3395 int c;
3396 int count = 0;
3397
Daniel Veillardc6561462009-03-25 10:22:31 +00003398#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003399 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003400#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003401
Owen Taylor3473f882001-02-23 17:55:21 +00003402 GROW;
3403 c = CUR_CHAR(l);
3404
Daniel Veillard34e3f642008-07-29 09:02:27 +00003405 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003406 if (count++ > 100) {
3407 count = 0;
3408 GROW;
3409 }
3410 COPY_BUF(l,buf,len,c);
3411 NEXTL(l);
3412 c = CUR_CHAR(l);
3413 if (len >= XML_MAX_NAMELEN) {
3414 /*
3415 * Okay someone managed to make a huge token, so he's ready to pay
3416 * for the processing speed.
3417 */
3418 xmlChar *buffer;
3419 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003420
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003421 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003422 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003423 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003424 return(NULL);
3425 }
3426 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003427 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003428 if (count++ > 100) {
3429 count = 0;
3430 GROW;
3431 }
3432 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003433 xmlChar *tmp;
3434
Owen Taylor3473f882001-02-23 17:55:21 +00003435 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003436 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003437 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003438 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003439 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003440 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003441 return(NULL);
3442 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003443 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003444 }
3445 COPY_BUF(l,buffer,len,c);
3446 NEXTL(l);
3447 c = CUR_CHAR(l);
3448 }
3449 buffer[len] = 0;
3450 return(buffer);
3451 }
3452 }
3453 if (len == 0)
3454 return(NULL);
3455 return(xmlStrndup(buf, len));
3456}
3457
3458/**
3459 * xmlParseEntityValue:
3460 * @ctxt: an XML parser context
3461 * @orig: if non-NULL store a copy of the original entity value
3462 *
3463 * parse a value for ENTITY declarations
3464 *
3465 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3466 * "'" ([^%&'] | PEReference | Reference)* "'"
3467 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003468 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003469 */
3470
3471xmlChar *
3472xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3473 xmlChar *buf = NULL;
3474 int len = 0;
3475 int size = XML_PARSER_BUFFER_SIZE;
3476 int c, l;
3477 xmlChar stop;
3478 xmlChar *ret = NULL;
3479 const xmlChar *cur = NULL;
3480 xmlParserInputPtr input;
3481
3482 if (RAW == '"') stop = '"';
3483 else if (RAW == '\'') stop = '\'';
3484 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003485 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003486 return(NULL);
3487 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003488 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003489 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003490 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003491 return(NULL);
3492 }
3493
3494 /*
3495 * The content of the entity definition is copied in a buffer.
3496 */
3497
3498 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3499 input = ctxt->input;
3500 GROW;
3501 NEXT;
3502 c = CUR_CHAR(l);
3503 /*
3504 * NOTE: 4.4.5 Included in Literal
3505 * When a parameter entity reference appears in a literal entity
3506 * value, ... a single or double quote character in the replacement
3507 * text is always treated as a normal data character and will not
3508 * terminate the literal.
3509 * In practice it means we stop the loop only when back at parsing
3510 * the initial entity and the quote is found
3511 */
William M. Brack871611b2003-10-18 04:53:14 +00003512 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003513 (ctxt->input != input))) {
3514 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003515 xmlChar *tmp;
3516
Owen Taylor3473f882001-02-23 17:55:21 +00003517 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003518 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3519 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003520 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003521 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003522 return(NULL);
3523 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003524 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003525 }
3526 COPY_BUF(l,buf,len,c);
3527 NEXTL(l);
3528 /*
3529 * Pop-up of finished entities.
3530 */
3531 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3532 xmlPopInput(ctxt);
3533
3534 GROW;
3535 c = CUR_CHAR(l);
3536 if (c == 0) {
3537 GROW;
3538 c = CUR_CHAR(l);
3539 }
3540 }
3541 buf[len] = 0;
3542
3543 /*
3544 * Raise problem w.r.t. '&' and '%' being used in non-entities
3545 * reference constructs. Note Charref will be handled in
3546 * xmlStringDecodeEntities()
3547 */
3548 cur = buf;
3549 while (*cur != 0) { /* non input consuming */
3550 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3551 xmlChar *name;
3552 xmlChar tmp = *cur;
3553
3554 cur++;
3555 name = xmlParseStringName(ctxt, &cur);
3556 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003557 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003558 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003559 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003560 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003561 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3562 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003563 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003564 }
3565 if (name != NULL)
3566 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003567 if (*cur == 0)
3568 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003569 }
3570 cur++;
3571 }
3572
3573 /*
3574 * Then PEReference entities are substituted.
3575 */
3576 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003577 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003578 xmlFree(buf);
3579 } else {
3580 NEXT;
3581 /*
3582 * NOTE: 4.4.7 Bypassed
3583 * When a general entity reference appears in the EntityValue in
3584 * an entity declaration, it is bypassed and left as is.
3585 * so XML_SUBSTITUTE_REF is not set here.
3586 */
3587 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3588 0, 0, 0);
3589 if (orig != NULL)
3590 *orig = buf;
3591 else
3592 xmlFree(buf);
3593 }
3594
3595 return(ret);
3596}
3597
3598/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003599 * xmlParseAttValueComplex:
3600 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003601 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003602 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003603 *
3604 * parse a value for an attribute, this is the fallback function
3605 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003606 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003607 *
3608 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3609 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003610static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003611xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003612 xmlChar limit = 0;
3613 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003614 xmlChar *rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003615 int len = 0;
3616 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003617 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003618 xmlChar *current = NULL;
3619 xmlEntityPtr ent;
3620
Owen Taylor3473f882001-02-23 17:55:21 +00003621 if (NXT(0) == '"') {
3622 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3623 limit = '"';
3624 NEXT;
3625 } else if (NXT(0) == '\'') {
3626 limit = '\'';
3627 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3628 NEXT;
3629 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003630 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003631 return(NULL);
3632 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003633
Owen Taylor3473f882001-02-23 17:55:21 +00003634 /*
3635 * allocate a translation buffer.
3636 */
3637 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003638 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003639 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003640
3641 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003642 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003643 */
3644 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003645 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003646 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003647 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003648 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003649 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003650 if (NXT(1) == '#') {
3651 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003652
Owen Taylor3473f882001-02-23 17:55:21 +00003653 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003654 if (ctxt->replaceEntities) {
3655 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003656 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003657 }
3658 buf[len++] = '&';
3659 } else {
3660 /*
3661 * The reparsing will be done in xmlStringGetNodeList()
3662 * called by the attribute() function in SAX.c
3663 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003664 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003665 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003666 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003667 buf[len++] = '&';
3668 buf[len++] = '#';
3669 buf[len++] = '3';
3670 buf[len++] = '8';
3671 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003672 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003673 } else if (val != 0) {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003674 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003675 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003676 }
Owen Taylor3473f882001-02-23 17:55:21 +00003677 len += xmlCopyChar(0, &buf[len], val);
3678 }
3679 } else {
3680 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003681 ctxt->nbentities++;
3682 if (ent != NULL)
3683 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003684 if ((ent != NULL) &&
3685 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3686 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003687 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003688 }
3689 if ((ctxt->replaceEntities == 0) &&
3690 (ent->content[0] == '&')) {
3691 buf[len++] = '&';
3692 buf[len++] = '#';
3693 buf[len++] = '3';
3694 buf[len++] = '8';
3695 buf[len++] = ';';
3696 } else {
3697 buf[len++] = ent->content[0];
3698 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003699 } else if ((ent != NULL) &&
3700 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003701 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3702 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003703 XML_SUBSTITUTE_REF,
3704 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003705 if (rep != NULL) {
3706 current = rep;
3707 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02003708 if ((*current == 0xD) || (*current == 0xA) ||
3709 (*current == 0x9)) {
3710 buf[len++] = 0x20;
3711 current++;
3712 } else
3713 buf[len++] = *current++;
Owen Taylor3473f882001-02-23 17:55:21 +00003714 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003715 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003716 }
3717 }
3718 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003719 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003720 }
3721 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003722 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003723 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003724 }
Owen Taylor3473f882001-02-23 17:55:21 +00003725 if (ent->content != NULL)
3726 buf[len++] = ent->content[0];
3727 }
3728 } else if (ent != NULL) {
3729 int i = xmlStrlen(ent->name);
3730 const xmlChar *cur = ent->name;
3731
3732 /*
3733 * This may look absurd but is needed to detect
3734 * entities problems
3735 */
3736 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3737 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003738 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003739 XML_SUBSTITUTE_REF, 0, 0, 0);
3740 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003741 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003742 rep = NULL;
3743 }
Owen Taylor3473f882001-02-23 17:55:21 +00003744 }
3745
3746 /*
3747 * Just output the reference
3748 */
3749 buf[len++] = '&';
Daniel Veillard0161e632008-08-28 15:36:32 +00003750 while (len > buf_size - i - 10) {
3751 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003752 }
3753 for (;i > 0;i--)
3754 buf[len++] = *cur++;
3755 buf[len++] = ';';
3756 }
3757 }
3758 } else {
3759 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003760 if ((len != 0) || (!normalize)) {
3761 if ((!normalize) || (!in_space)) {
3762 COPY_BUF(l,buf,len,0x20);
Daniel Veillard0161e632008-08-28 15:36:32 +00003763 while (len > buf_size - 10) {
3764 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003765 }
3766 }
3767 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003768 }
3769 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003770 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003771 COPY_BUF(l,buf,len,c);
3772 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003773 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003774 }
3775 }
3776 NEXTL(l);
3777 }
3778 GROW;
3779 c = CUR_CHAR(l);
3780 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003781 if ((in_space) && (normalize)) {
3782 while (buf[len - 1] == 0x20) len--;
3783 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003784 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003785 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003786 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003787 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003788 if ((c != 0) && (!IS_CHAR(c))) {
3789 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3790 "invalid character in attribute value\n");
3791 } else {
3792 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3793 "AttValue: ' expected\n");
3794 }
Owen Taylor3473f882001-02-23 17:55:21 +00003795 } else
3796 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003797 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003798 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003799
3800mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003801 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003802 if (buf != NULL)
3803 xmlFree(buf);
3804 if (rep != NULL)
3805 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003806 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003807}
3808
3809/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003810 * xmlParseAttValue:
3811 * @ctxt: an XML parser context
3812 *
3813 * parse a value for an attribute
3814 * Note: the parser won't do substitution of entities here, this
3815 * will be handled later in xmlStringGetNodeList
3816 *
3817 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3818 * "'" ([^<&'] | Reference)* "'"
3819 *
3820 * 3.3.3 Attribute-Value Normalization:
3821 * Before the value of an attribute is passed to the application or
3822 * checked for validity, the XML processor must normalize it as follows:
3823 * - a character reference is processed by appending the referenced
3824 * character to the attribute value
3825 * - an entity reference is processed by recursively processing the
3826 * replacement text of the entity
3827 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3828 * appending #x20 to the normalized value, except that only a single
3829 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3830 * parsed entity or the literal entity value of an internal parsed entity
3831 * - other characters are processed by appending them to the normalized value
3832 * If the declared value is not CDATA, then the XML processor must further
3833 * process the normalized attribute value by discarding any leading and
3834 * trailing space (#x20) characters, and by replacing sequences of space
3835 * (#x20) characters by a single space (#x20) character.
3836 * All attributes for which no declaration has been read should be treated
3837 * by a non-validating parser as if declared CDATA.
3838 *
3839 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3840 */
3841
3842
3843xmlChar *
3844xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003845 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003846 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003847}
3848
3849/**
Owen Taylor3473f882001-02-23 17:55:21 +00003850 * xmlParseSystemLiteral:
3851 * @ctxt: an XML parser context
3852 *
3853 * parse an XML Literal
3854 *
3855 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3856 *
3857 * Returns the SystemLiteral parsed or NULL
3858 */
3859
3860xmlChar *
3861xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3862 xmlChar *buf = NULL;
3863 int len = 0;
3864 int size = XML_PARSER_BUFFER_SIZE;
3865 int cur, l;
3866 xmlChar stop;
3867 int state = ctxt->instate;
3868 int count = 0;
3869
3870 SHRINK;
3871 if (RAW == '"') {
3872 NEXT;
3873 stop = '"';
3874 } else if (RAW == '\'') {
3875 NEXT;
3876 stop = '\'';
3877 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003878 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003879 return(NULL);
3880 }
3881
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003882 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003883 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003884 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003885 return(NULL);
3886 }
3887 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3888 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003889 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003890 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003891 xmlChar *tmp;
3892
Owen Taylor3473f882001-02-23 17:55:21 +00003893 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003894 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3895 if (tmp == NULL) {
3896 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003897 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003898 ctxt->instate = (xmlParserInputState) state;
3899 return(NULL);
3900 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003901 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003902 }
3903 count++;
3904 if (count > 50) {
3905 GROW;
3906 count = 0;
3907 }
3908 COPY_BUF(l,buf,len,cur);
3909 NEXTL(l);
3910 cur = CUR_CHAR(l);
3911 if (cur == 0) {
3912 GROW;
3913 SHRINK;
3914 cur = CUR_CHAR(l);
3915 }
3916 }
3917 buf[len] = 0;
3918 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003919 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003920 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003921 } else {
3922 NEXT;
3923 }
3924 return(buf);
3925}
3926
3927/**
3928 * xmlParsePubidLiteral:
3929 * @ctxt: an XML parser context
3930 *
3931 * parse an XML public literal
3932 *
3933 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3934 *
3935 * Returns the PubidLiteral parsed or NULL.
3936 */
3937
3938xmlChar *
3939xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3940 xmlChar *buf = NULL;
3941 int len = 0;
3942 int size = XML_PARSER_BUFFER_SIZE;
3943 xmlChar cur;
3944 xmlChar stop;
3945 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003946 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003947
3948 SHRINK;
3949 if (RAW == '"') {
3950 NEXT;
3951 stop = '"';
3952 } else if (RAW == '\'') {
3953 NEXT;
3954 stop = '\'';
3955 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003956 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003957 return(NULL);
3958 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003959 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003960 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003961 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003962 return(NULL);
3963 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003964 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003965 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003966 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003967 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003968 xmlChar *tmp;
3969
Owen Taylor3473f882001-02-23 17:55:21 +00003970 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003971 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3972 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003973 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003974 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003975 return(NULL);
3976 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003977 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003978 }
3979 buf[len++] = cur;
3980 count++;
3981 if (count > 50) {
3982 GROW;
3983 count = 0;
3984 }
3985 NEXT;
3986 cur = CUR;
3987 if (cur == 0) {
3988 GROW;
3989 SHRINK;
3990 cur = CUR;
3991 }
3992 }
3993 buf[len] = 0;
3994 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003995 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003996 } else {
3997 NEXT;
3998 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003999 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004000 return(buf);
4001}
4002
Daniel Veillard8ed10722009-08-20 19:17:36 +02004003static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004004
4005/*
4006 * used for the test in the inner loop of the char data testing
4007 */
4008static const unsigned char test_char_data[256] = {
4009 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4010 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4011 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4012 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4013 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4014 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4015 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4016 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4017 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4018 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4019 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4020 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4021 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4022 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4023 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4024 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4025 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4026 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4027 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4028 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4029 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4030 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4031 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4032 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4033 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4034 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4035 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4036 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4037 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4038 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4039 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4040 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4041};
4042
Owen Taylor3473f882001-02-23 17:55:21 +00004043/**
4044 * xmlParseCharData:
4045 * @ctxt: an XML parser context
4046 * @cdata: int indicating whether we are within a CDATA section
4047 *
4048 * parse a CharData section.
4049 * if we are within a CDATA section ']]>' marks an end of section.
4050 *
4051 * The right angle bracket (>) may be represented using the string "&gt;",
4052 * and must, for compatibility, be escaped using "&gt;" or a character
4053 * reference when it appears in the string "]]>" in content, when that
4054 * string is not marking the end of a CDATA section.
4055 *
4056 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4057 */
4058
4059void
4060xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004061 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004062 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004063 int line = ctxt->input->line;
4064 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004065 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004066
4067 SHRINK;
4068 GROW;
4069 /*
4070 * Accelerated common case where input don't need to be
4071 * modified before passing it to the handler.
4072 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004073 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004074 in = ctxt->input->cur;
4075 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004076get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004077 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004078 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004079 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004080 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004081 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004082 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004083 goto get_more_space;
4084 }
4085 if (*in == '<') {
4086 nbchar = in - ctxt->input->cur;
4087 if (nbchar > 0) {
4088 const xmlChar *tmp = ctxt->input->cur;
4089 ctxt->input->cur = in;
4090
Daniel Veillard34099b42004-11-04 17:34:35 +00004091 if ((ctxt->sax != NULL) &&
4092 (ctxt->sax->ignorableWhitespace !=
4093 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004094 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004095 if (ctxt->sax->ignorableWhitespace != NULL)
4096 ctxt->sax->ignorableWhitespace(ctxt->userData,
4097 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004098 } else {
4099 if (ctxt->sax->characters != NULL)
4100 ctxt->sax->characters(ctxt->userData,
4101 tmp, nbchar);
4102 if (*ctxt->space == -1)
4103 *ctxt->space = -2;
4104 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004105 } else if ((ctxt->sax != NULL) &&
4106 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004107 ctxt->sax->characters(ctxt->userData,
4108 tmp, nbchar);
4109 }
4110 }
4111 return;
4112 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004113
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004114get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004115 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004116 while (test_char_data[*in]) {
4117 in++;
4118 ccol++;
4119 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004120 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004121 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004122 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004123 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004124 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004125 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004126 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004127 }
4128 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004129 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004130 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004131 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004132 return;
4133 }
4134 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004135 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004136 goto get_more;
4137 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004138 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004139 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004140 if ((ctxt->sax != NULL) &&
4141 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004142 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004143 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004144 const xmlChar *tmp = ctxt->input->cur;
4145 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004146
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004147 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004148 if (ctxt->sax->ignorableWhitespace != NULL)
4149 ctxt->sax->ignorableWhitespace(ctxt->userData,
4150 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004151 } else {
4152 if (ctxt->sax->characters != NULL)
4153 ctxt->sax->characters(ctxt->userData,
4154 tmp, nbchar);
4155 if (*ctxt->space == -1)
4156 *ctxt->space = -2;
4157 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004158 line = ctxt->input->line;
4159 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004160 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004161 if (ctxt->sax->characters != NULL)
4162 ctxt->sax->characters(ctxt->userData,
4163 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004164 line = ctxt->input->line;
4165 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004166 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004167 /* something really bad happened in the SAX callback */
4168 if (ctxt->instate != XML_PARSER_CONTENT)
4169 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004170 }
4171 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004172 if (*in == 0xD) {
4173 in++;
4174 if (*in == 0xA) {
4175 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004176 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004177 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004178 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004179 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004180 in--;
4181 }
4182 if (*in == '<') {
4183 return;
4184 }
4185 if (*in == '&') {
4186 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004187 }
4188 SHRINK;
4189 GROW;
4190 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004191 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004192 nbchar = 0;
4193 }
Daniel Veillard50582112001-03-26 22:52:16 +00004194 ctxt->input->line = line;
4195 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004196 xmlParseCharDataComplex(ctxt, cdata);
4197}
4198
Daniel Veillard01c13b52002-12-10 15:19:08 +00004199/**
4200 * xmlParseCharDataComplex:
4201 * @ctxt: an XML parser context
4202 * @cdata: int indicating whether we are within a CDATA section
4203 *
4204 * parse a CharData section.this is the fallback function
4205 * of xmlParseCharData() when the parsing requires handling
4206 * of non-ASCII characters.
4207 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004208static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004209xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004210 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4211 int nbchar = 0;
4212 int cur, l;
4213 int count = 0;
4214
4215 SHRINK;
4216 GROW;
4217 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004218 while ((cur != '<') && /* checked */
4219 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004220 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004221 if ((cur == ']') && (NXT(1) == ']') &&
4222 (NXT(2) == '>')) {
4223 if (cdata) break;
4224 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004225 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004226 }
4227 }
4228 COPY_BUF(l,buf,nbchar,cur);
4229 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004230 buf[nbchar] = 0;
4231
Owen Taylor3473f882001-02-23 17:55:21 +00004232 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004233 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004234 */
4235 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004236 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004237 if (ctxt->sax->ignorableWhitespace != NULL)
4238 ctxt->sax->ignorableWhitespace(ctxt->userData,
4239 buf, nbchar);
4240 } else {
4241 if (ctxt->sax->characters != NULL)
4242 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004243 if ((ctxt->sax->characters !=
4244 ctxt->sax->ignorableWhitespace) &&
4245 (*ctxt->space == -1))
4246 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004247 }
4248 }
4249 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004250 /* something really bad happened in the SAX callback */
4251 if (ctxt->instate != XML_PARSER_CONTENT)
4252 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004253 }
4254 count++;
4255 if (count > 50) {
4256 GROW;
4257 count = 0;
4258 }
4259 NEXTL(l);
4260 cur = CUR_CHAR(l);
4261 }
4262 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004263 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004264 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004265 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004266 */
4267 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004268 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004269 if (ctxt->sax->ignorableWhitespace != NULL)
4270 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4271 } else {
4272 if (ctxt->sax->characters != NULL)
4273 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004274 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4275 (*ctxt->space == -1))
4276 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004277 }
4278 }
4279 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004280 if ((cur != 0) && (!IS_CHAR(cur))) {
4281 /* Generate the error and skip the offending character */
4282 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4283 "PCDATA invalid Char value %d\n",
4284 cur);
4285 NEXTL(l);
4286 }
Owen Taylor3473f882001-02-23 17:55:21 +00004287}
4288
4289/**
4290 * xmlParseExternalID:
4291 * @ctxt: an XML parser context
4292 * @publicID: a xmlChar** receiving PubidLiteral
4293 * @strict: indicate whether we should restrict parsing to only
4294 * production [75], see NOTE below
4295 *
4296 * Parse an External ID or a Public ID
4297 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004298 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004299 * 'PUBLIC' S PubidLiteral S SystemLiteral
4300 *
4301 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4302 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4303 *
4304 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4305 *
4306 * Returns the function returns SystemLiteral and in the second
4307 * case publicID receives PubidLiteral, is strict is off
4308 * it is possible to return NULL and have publicID set.
4309 */
4310
4311xmlChar *
4312xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4313 xmlChar *URI = NULL;
4314
4315 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004316
4317 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004318 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004319 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004320 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004321 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4322 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004323 }
4324 SKIP_BLANKS;
4325 URI = xmlParseSystemLiteral(ctxt);
4326 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004327 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004328 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004329 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004330 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004331 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004332 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004333 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004334 }
4335 SKIP_BLANKS;
4336 *publicID = xmlParsePubidLiteral(ctxt);
4337 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004338 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004339 }
4340 if (strict) {
4341 /*
4342 * We don't handle [83] so "S SystemLiteral" is required.
4343 */
William M. Brack76e95df2003-10-18 16:20:14 +00004344 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004345 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004346 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004347 }
4348 } else {
4349 /*
4350 * We handle [83] so we return immediately, if
4351 * "S SystemLiteral" is not detected. From a purely parsing
4352 * point of view that's a nice mess.
4353 */
4354 const xmlChar *ptr;
4355 GROW;
4356
4357 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004358 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004359
William M. Brack76e95df2003-10-18 16:20:14 +00004360 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004361 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4362 }
4363 SKIP_BLANKS;
4364 URI = xmlParseSystemLiteral(ctxt);
4365 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004366 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004367 }
4368 }
4369 return(URI);
4370}
4371
4372/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004373 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004374 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004375 * @buf: the already parsed part of the buffer
4376 * @len: number of bytes filles in the buffer
4377 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004378 *
4379 * Skip an XML (SGML) comment <!-- .... -->
4380 * The spec says that "For compatibility, the string "--" (double-hyphen)
4381 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004382 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004383 *
4384 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4385 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004386static void
4387xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004388 int q, ql;
4389 int r, rl;
4390 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00004391 int count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004392 int inputid;
4393
4394 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004395
Owen Taylor3473f882001-02-23 17:55:21 +00004396 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004397 len = 0;
4398 size = XML_PARSER_BUFFER_SIZE;
4399 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4400 if (buf == NULL) {
4401 xmlErrMemory(ctxt, NULL);
4402 return;
4403 }
Owen Taylor3473f882001-02-23 17:55:21 +00004404 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004405 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004406 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004407 if (q == 0)
4408 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004409 if (!IS_CHAR(q)) {
4410 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4411 "xmlParseComment: invalid xmlChar value %d\n",
4412 q);
4413 xmlFree (buf);
4414 return;
4415 }
Owen Taylor3473f882001-02-23 17:55:21 +00004416 NEXTL(ql);
4417 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004418 if (r == 0)
4419 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004420 if (!IS_CHAR(r)) {
4421 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4422 "xmlParseComment: invalid xmlChar value %d\n",
4423 q);
4424 xmlFree (buf);
4425 return;
4426 }
Owen Taylor3473f882001-02-23 17:55:21 +00004427 NEXTL(rl);
4428 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004429 if (cur == 0)
4430 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004431 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004432 ((cur != '>') ||
4433 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004434 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004435 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004436 }
4437 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004438 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004439 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004440 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4441 if (new_buf == NULL) {
4442 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004443 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004444 return;
4445 }
William M. Bracka3215c72004-07-31 16:24:01 +00004446 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004447 }
4448 COPY_BUF(ql,buf,len,q);
4449 q = r;
4450 ql = rl;
4451 r = cur;
4452 rl = l;
4453
4454 count++;
4455 if (count > 50) {
4456 GROW;
4457 count = 0;
4458 }
4459 NEXTL(l);
4460 cur = CUR_CHAR(l);
4461 if (cur == 0) {
4462 SHRINK;
4463 GROW;
4464 cur = CUR_CHAR(l);
4465 }
4466 }
4467 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004468 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004469 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004470 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004471 } else if (!IS_CHAR(cur)) {
4472 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4473 "xmlParseComment: invalid xmlChar value %d\n",
4474 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004475 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004476 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004477 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4478 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004479 }
4480 NEXT;
4481 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4482 (!ctxt->disableSAX))
4483 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004484 }
Daniel Veillardda629342007-08-01 07:49:06 +00004485 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004486 return;
4487not_terminated:
4488 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4489 "Comment not terminated\n", NULL);
4490 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004491 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004492}
Daniel Veillardda629342007-08-01 07:49:06 +00004493
Daniel Veillard4c778d82005-01-23 17:37:44 +00004494/**
4495 * xmlParseComment:
4496 * @ctxt: an XML parser context
4497 *
4498 * Skip an XML (SGML) comment <!-- .... -->
4499 * The spec says that "For compatibility, the string "--" (double-hyphen)
4500 * must not occur within comments. "
4501 *
4502 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4503 */
4504void
4505xmlParseComment(xmlParserCtxtPtr ctxt) {
4506 xmlChar *buf = NULL;
4507 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004508 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004509 xmlParserInputState state;
4510 const xmlChar *in;
4511 int nbchar = 0, ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004512 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004513
4514 /*
4515 * Check that there is a comment right here.
4516 */
4517 if ((RAW != '<') || (NXT(1) != '!') ||
4518 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004519 state = ctxt->instate;
4520 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004521 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004522 SKIP(4);
4523 SHRINK;
4524 GROW;
4525
4526 /*
4527 * Accelerated common case where input don't need to be
4528 * modified before passing it to the handler.
4529 */
4530 in = ctxt->input->cur;
4531 do {
4532 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004533 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004534 ctxt->input->line++; ctxt->input->col = 1;
4535 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004536 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004537 }
4538get_more:
4539 ccol = ctxt->input->col;
4540 while (((*in > '-') && (*in <= 0x7F)) ||
4541 ((*in >= 0x20) && (*in < '-')) ||
4542 (*in == 0x09)) {
4543 in++;
4544 ccol++;
4545 }
4546 ctxt->input->col = ccol;
4547 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004548 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004549 ctxt->input->line++; ctxt->input->col = 1;
4550 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004551 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004552 goto get_more;
4553 }
4554 nbchar = in - ctxt->input->cur;
4555 /*
4556 * save current set of data
4557 */
4558 if (nbchar > 0) {
4559 if ((ctxt->sax != NULL) &&
4560 (ctxt->sax->comment != NULL)) {
4561 if (buf == NULL) {
4562 if ((*in == '-') && (in[1] == '-'))
4563 size = nbchar + 1;
4564 else
4565 size = XML_PARSER_BUFFER_SIZE + nbchar;
4566 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4567 if (buf == NULL) {
4568 xmlErrMemory(ctxt, NULL);
4569 ctxt->instate = state;
4570 return;
4571 }
4572 len = 0;
4573 } else if (len + nbchar + 1 >= size) {
4574 xmlChar *new_buf;
4575 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4576 new_buf = (xmlChar *) xmlRealloc(buf,
4577 size * sizeof(xmlChar));
4578 if (new_buf == NULL) {
4579 xmlFree (buf);
4580 xmlErrMemory(ctxt, NULL);
4581 ctxt->instate = state;
4582 return;
4583 }
4584 buf = new_buf;
4585 }
4586 memcpy(&buf[len], ctxt->input->cur, nbchar);
4587 len += nbchar;
4588 buf[len] = 0;
4589 }
4590 }
4591 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004592 if (*in == 0xA) {
4593 in++;
4594 ctxt->input->line++; ctxt->input->col = 1;
4595 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004596 if (*in == 0xD) {
4597 in++;
4598 if (*in == 0xA) {
4599 ctxt->input->cur = in;
4600 in++;
4601 ctxt->input->line++; ctxt->input->col = 1;
4602 continue; /* while */
4603 }
4604 in--;
4605 }
4606 SHRINK;
4607 GROW;
4608 in = ctxt->input->cur;
4609 if (*in == '-') {
4610 if (in[1] == '-') {
4611 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004612 if (ctxt->input->id != inputid) {
4613 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4614 "comment doesn't start and stop in the same entity\n");
4615 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004616 SKIP(3);
4617 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4618 (!ctxt->disableSAX)) {
4619 if (buf != NULL)
4620 ctxt->sax->comment(ctxt->userData, buf);
4621 else
4622 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4623 }
4624 if (buf != NULL)
4625 xmlFree(buf);
4626 ctxt->instate = state;
4627 return;
4628 }
4629 if (buf != NULL)
4630 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4631 "Comment not terminated \n<!--%.50s\n",
4632 buf);
4633 else
4634 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4635 "Comment not terminated \n", NULL);
4636 in++;
4637 ctxt->input->col++;
4638 }
4639 in++;
4640 ctxt->input->col++;
4641 goto get_more;
4642 }
4643 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4644 xmlParseCommentComplex(ctxt, buf, len, size);
4645 ctxt->instate = state;
4646 return;
4647}
4648
Owen Taylor3473f882001-02-23 17:55:21 +00004649
4650/**
4651 * xmlParsePITarget:
4652 * @ctxt: an XML parser context
4653 *
4654 * parse the name of a PI
4655 *
4656 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4657 *
4658 * Returns the PITarget name or NULL
4659 */
4660
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004661const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004662xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004663 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004664
4665 name = xmlParseName(ctxt);
4666 if ((name != NULL) &&
4667 ((name[0] == 'x') || (name[0] == 'X')) &&
4668 ((name[1] == 'm') || (name[1] == 'M')) &&
4669 ((name[2] == 'l') || (name[2] == 'L'))) {
4670 int i;
4671 if ((name[0] == 'x') && (name[1] == 'm') &&
4672 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004673 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004674 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004675 return(name);
4676 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004677 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004678 return(name);
4679 }
4680 for (i = 0;;i++) {
4681 if (xmlW3CPIs[i] == NULL) break;
4682 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4683 return(name);
4684 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004685 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4686 "xmlParsePITarget: invalid name prefix 'xml'\n",
4687 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004688 }
Daniel Veillard37334572008-07-31 08:20:02 +00004689 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4690 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4691 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4692 }
Owen Taylor3473f882001-02-23 17:55:21 +00004693 return(name);
4694}
4695
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004696#ifdef LIBXML_CATALOG_ENABLED
4697/**
4698 * xmlParseCatalogPI:
4699 * @ctxt: an XML parser context
4700 * @catalog: the PI value string
4701 *
4702 * parse an XML Catalog Processing Instruction.
4703 *
4704 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4705 *
4706 * Occurs only if allowed by the user and if happening in the Misc
4707 * part of the document before any doctype informations
4708 * This will add the given catalog to the parsing context in order
4709 * to be used if there is a resolution need further down in the document
4710 */
4711
4712static void
4713xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4714 xmlChar *URL = NULL;
4715 const xmlChar *tmp, *base;
4716 xmlChar marker;
4717
4718 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004719 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004720 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4721 goto error;
4722 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004723 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004724 if (*tmp != '=') {
4725 return;
4726 }
4727 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004728 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004729 marker = *tmp;
4730 if ((marker != '\'') && (marker != '"'))
4731 goto error;
4732 tmp++;
4733 base = tmp;
4734 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4735 if (*tmp == 0)
4736 goto error;
4737 URL = xmlStrndup(base, tmp - base);
4738 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004739 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004740 if (*tmp != 0)
4741 goto error;
4742
4743 if (URL != NULL) {
4744 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4745 xmlFree(URL);
4746 }
4747 return;
4748
4749error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004750 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4751 "Catalog PI syntax error: %s\n",
4752 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004753 if (URL != NULL)
4754 xmlFree(URL);
4755}
4756#endif
4757
Owen Taylor3473f882001-02-23 17:55:21 +00004758/**
4759 * xmlParsePI:
4760 * @ctxt: an XML parser context
4761 *
4762 * parse an XML Processing Instruction.
4763 *
4764 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4765 *
4766 * The processing is transfered to SAX once parsed.
4767 */
4768
4769void
4770xmlParsePI(xmlParserCtxtPtr ctxt) {
4771 xmlChar *buf = NULL;
4772 int len = 0;
4773 int size = XML_PARSER_BUFFER_SIZE;
4774 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004775 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004776 xmlParserInputState state;
4777 int count = 0;
4778
4779 if ((RAW == '<') && (NXT(1) == '?')) {
4780 xmlParserInputPtr input = ctxt->input;
4781 state = ctxt->instate;
4782 ctxt->instate = XML_PARSER_PI;
4783 /*
4784 * this is a Processing Instruction.
4785 */
4786 SKIP(2);
4787 SHRINK;
4788
4789 /*
4790 * Parse the target name and check for special support like
4791 * namespace.
4792 */
4793 target = xmlParsePITarget(ctxt);
4794 if (target != NULL) {
4795 if ((RAW == '?') && (NXT(1) == '>')) {
4796 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004797 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4798 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004799 }
4800 SKIP(2);
4801
4802 /*
4803 * SAX: PI detected.
4804 */
4805 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4806 (ctxt->sax->processingInstruction != NULL))
4807 ctxt->sax->processingInstruction(ctxt->userData,
4808 target, NULL);
4809 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004810 return;
4811 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004812 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004813 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004814 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004815 ctxt->instate = state;
4816 return;
4817 }
4818 cur = CUR;
4819 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004820 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4821 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004822 }
4823 SKIP_BLANKS;
4824 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004825 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004826 ((cur != '?') || (NXT(1) != '>'))) {
4827 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004828 xmlChar *tmp;
4829
Owen Taylor3473f882001-02-23 17:55:21 +00004830 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004831 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4832 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004833 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004834 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004835 ctxt->instate = state;
4836 return;
4837 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004838 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004839 }
4840 count++;
4841 if (count > 50) {
4842 GROW;
4843 count = 0;
4844 }
4845 COPY_BUF(l,buf,len,cur);
4846 NEXTL(l);
4847 cur = CUR_CHAR(l);
4848 if (cur == 0) {
4849 SHRINK;
4850 GROW;
4851 cur = CUR_CHAR(l);
4852 }
4853 }
4854 buf[len] = 0;
4855 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004856 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4857 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004858 } else {
4859 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004860 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4861 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004862 }
4863 SKIP(2);
4864
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004865#ifdef LIBXML_CATALOG_ENABLED
4866 if (((state == XML_PARSER_MISC) ||
4867 (state == XML_PARSER_START)) &&
4868 (xmlStrEqual(target, XML_CATALOG_PI))) {
4869 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4870 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4871 (allow == XML_CATA_ALLOW_ALL))
4872 xmlParseCatalogPI(ctxt, buf);
4873 }
4874#endif
4875
4876
Owen Taylor3473f882001-02-23 17:55:21 +00004877 /*
4878 * SAX: PI detected.
4879 */
4880 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4881 (ctxt->sax->processingInstruction != NULL))
4882 ctxt->sax->processingInstruction(ctxt->userData,
4883 target, buf);
4884 }
4885 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004886 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004887 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004888 }
4889 ctxt->instate = state;
4890 }
4891}
4892
4893/**
4894 * xmlParseNotationDecl:
4895 * @ctxt: an XML parser context
4896 *
4897 * parse a notation declaration
4898 *
4899 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4900 *
4901 * Hence there is actually 3 choices:
4902 * 'PUBLIC' S PubidLiteral
4903 * 'PUBLIC' S PubidLiteral S SystemLiteral
4904 * and 'SYSTEM' S SystemLiteral
4905 *
4906 * See the NOTE on xmlParseExternalID().
4907 */
4908
4909void
4910xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004911 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004912 xmlChar *Pubid;
4913 xmlChar *Systemid;
4914
Daniel Veillarda07050d2003-10-19 14:46:32 +00004915 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004916 xmlParserInputPtr input = ctxt->input;
4917 SHRINK;
4918 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004919 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004920 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4921 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004922 return;
4923 }
4924 SKIP_BLANKS;
4925
Daniel Veillard76d66f42001-05-16 21:05:17 +00004926 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004927 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004928 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004929 return;
4930 }
William M. Brack76e95df2003-10-18 16:20:14 +00004931 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004932 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004933 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004934 return;
4935 }
Daniel Veillard37334572008-07-31 08:20:02 +00004936 if (xmlStrchr(name, ':') != NULL) {
4937 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4938 "colon are forbidden from notation names '%s'\n",
4939 name, NULL, NULL);
4940 }
Owen Taylor3473f882001-02-23 17:55:21 +00004941 SKIP_BLANKS;
4942
4943 /*
4944 * Parse the IDs.
4945 */
4946 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4947 SKIP_BLANKS;
4948
4949 if (RAW == '>') {
4950 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004951 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4952 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004953 }
4954 NEXT;
4955 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4956 (ctxt->sax->notationDecl != NULL))
4957 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4958 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004959 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004960 }
Owen Taylor3473f882001-02-23 17:55:21 +00004961 if (Systemid != NULL) xmlFree(Systemid);
4962 if (Pubid != NULL) xmlFree(Pubid);
4963 }
4964}
4965
4966/**
4967 * xmlParseEntityDecl:
4968 * @ctxt: an XML parser context
4969 *
4970 * parse <!ENTITY declarations
4971 *
4972 * [70] EntityDecl ::= GEDecl | PEDecl
4973 *
4974 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4975 *
4976 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4977 *
4978 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4979 *
4980 * [74] PEDef ::= EntityValue | ExternalID
4981 *
4982 * [76] NDataDecl ::= S 'NDATA' S Name
4983 *
4984 * [ VC: Notation Declared ]
4985 * The Name must match the declared name of a notation.
4986 */
4987
4988void
4989xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004990 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004991 xmlChar *value = NULL;
4992 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004993 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004994 int isParameter = 0;
4995 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004996 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004997
Daniel Veillard4c778d82005-01-23 17:37:44 +00004998 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004999 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005000 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005001 SHRINK;
5002 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005003 skipped = SKIP_BLANKS;
5004 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005005 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5006 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005007 }
Owen Taylor3473f882001-02-23 17:55:21 +00005008
5009 if (RAW == '%') {
5010 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005011 skipped = SKIP_BLANKS;
5012 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005013 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5014 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005015 }
Owen Taylor3473f882001-02-23 17:55:21 +00005016 isParameter = 1;
5017 }
5018
Daniel Veillard76d66f42001-05-16 21:05:17 +00005019 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005020 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005021 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5022 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005023 return;
5024 }
Daniel Veillard37334572008-07-31 08:20:02 +00005025 if (xmlStrchr(name, ':') != NULL) {
5026 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5027 "colon are forbidden from entities names '%s'\n",
5028 name, NULL, NULL);
5029 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005030 skipped = SKIP_BLANKS;
5031 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005032 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5033 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005034 }
Owen Taylor3473f882001-02-23 17:55:21 +00005035
Daniel Veillardf5582f12002-06-11 10:08:16 +00005036 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005037 /*
5038 * handle the various case of definitions...
5039 */
5040 if (isParameter) {
5041 if ((RAW == '"') || (RAW == '\'')) {
5042 value = xmlParseEntityValue(ctxt, &orig);
5043 if (value) {
5044 if ((ctxt->sax != NULL) &&
5045 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5046 ctxt->sax->entityDecl(ctxt->userData, name,
5047 XML_INTERNAL_PARAMETER_ENTITY,
5048 NULL, NULL, value);
5049 }
5050 } else {
5051 URI = xmlParseExternalID(ctxt, &literal, 1);
5052 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005053 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005054 }
5055 if (URI) {
5056 xmlURIPtr uri;
5057
5058 uri = xmlParseURI((const char *) URI);
5059 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005060 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5061 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005062 /*
5063 * This really ought to be a well formedness error
5064 * but the XML Core WG decided otherwise c.f. issue
5065 * E26 of the XML erratas.
5066 */
Owen Taylor3473f882001-02-23 17:55:21 +00005067 } else {
5068 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005069 /*
5070 * Okay this is foolish to block those but not
5071 * invalid URIs.
5072 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005073 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005074 } else {
5075 if ((ctxt->sax != NULL) &&
5076 (!ctxt->disableSAX) &&
5077 (ctxt->sax->entityDecl != NULL))
5078 ctxt->sax->entityDecl(ctxt->userData, name,
5079 XML_EXTERNAL_PARAMETER_ENTITY,
5080 literal, URI, NULL);
5081 }
5082 xmlFreeURI(uri);
5083 }
5084 }
5085 }
5086 } else {
5087 if ((RAW == '"') || (RAW == '\'')) {
5088 value = xmlParseEntityValue(ctxt, &orig);
5089 if ((ctxt->sax != NULL) &&
5090 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5091 ctxt->sax->entityDecl(ctxt->userData, name,
5092 XML_INTERNAL_GENERAL_ENTITY,
5093 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005094 /*
5095 * For expat compatibility in SAX mode.
5096 */
5097 if ((ctxt->myDoc == NULL) ||
5098 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5099 if (ctxt->myDoc == NULL) {
5100 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005101 if (ctxt->myDoc == NULL) {
5102 xmlErrMemory(ctxt, "New Doc failed");
5103 return;
5104 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005105 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005106 }
5107 if (ctxt->myDoc->intSubset == NULL)
5108 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5109 BAD_CAST "fake", NULL, NULL);
5110
Daniel Veillard1af9a412003-08-20 22:54:39 +00005111 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5112 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005113 }
Owen Taylor3473f882001-02-23 17:55:21 +00005114 } else {
5115 URI = xmlParseExternalID(ctxt, &literal, 1);
5116 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005117 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005118 }
5119 if (URI) {
5120 xmlURIPtr uri;
5121
5122 uri = xmlParseURI((const char *)URI);
5123 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005124 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5125 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005126 /*
5127 * This really ought to be a well formedness error
5128 * but the XML Core WG decided otherwise c.f. issue
5129 * E26 of the XML erratas.
5130 */
Owen Taylor3473f882001-02-23 17:55:21 +00005131 } else {
5132 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005133 /*
5134 * Okay this is foolish to block those but not
5135 * invalid URIs.
5136 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005137 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005138 }
5139 xmlFreeURI(uri);
5140 }
5141 }
William M. Brack76e95df2003-10-18 16:20:14 +00005142 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005143 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5144 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005145 }
5146 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005147 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005148 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005149 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005150 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5151 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005152 }
5153 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005154 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005155 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5156 (ctxt->sax->unparsedEntityDecl != NULL))
5157 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5158 literal, URI, ndata);
5159 } else {
5160 if ((ctxt->sax != NULL) &&
5161 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5162 ctxt->sax->entityDecl(ctxt->userData, name,
5163 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5164 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005165 /*
5166 * For expat compatibility in SAX mode.
5167 * assuming the entity repalcement was asked for
5168 */
5169 if ((ctxt->replaceEntities != 0) &&
5170 ((ctxt->myDoc == NULL) ||
5171 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5172 if (ctxt->myDoc == NULL) {
5173 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005174 if (ctxt->myDoc == NULL) {
5175 xmlErrMemory(ctxt, "New Doc failed");
5176 return;
5177 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005178 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005179 }
5180
5181 if (ctxt->myDoc->intSubset == NULL)
5182 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5183 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005184 xmlSAX2EntityDecl(ctxt, name,
5185 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5186 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005187 }
Owen Taylor3473f882001-02-23 17:55:21 +00005188 }
5189 }
5190 }
5191 SKIP_BLANKS;
5192 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005193 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005194 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005195 } else {
5196 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005197 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5198 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005199 }
5200 NEXT;
5201 }
5202 if (orig != NULL) {
5203 /*
5204 * Ugly mechanism to save the raw entity value.
5205 */
5206 xmlEntityPtr cur = NULL;
5207
5208 if (isParameter) {
5209 if ((ctxt->sax != NULL) &&
5210 (ctxt->sax->getParameterEntity != NULL))
5211 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5212 } else {
5213 if ((ctxt->sax != NULL) &&
5214 (ctxt->sax->getEntity != NULL))
5215 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005216 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005217 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005218 }
Owen Taylor3473f882001-02-23 17:55:21 +00005219 }
5220 if (cur != NULL) {
5221 if (cur->orig != NULL)
5222 xmlFree(orig);
5223 else
5224 cur->orig = orig;
5225 } else
5226 xmlFree(orig);
5227 }
Owen Taylor3473f882001-02-23 17:55:21 +00005228 if (value != NULL) xmlFree(value);
5229 if (URI != NULL) xmlFree(URI);
5230 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005231 }
5232}
5233
5234/**
5235 * xmlParseDefaultDecl:
5236 * @ctxt: an XML parser context
5237 * @value: Receive a possible fixed default value for the attribute
5238 *
5239 * Parse an attribute default declaration
5240 *
5241 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5242 *
5243 * [ VC: Required Attribute ]
5244 * if the default declaration is the keyword #REQUIRED, then the
5245 * attribute must be specified for all elements of the type in the
5246 * attribute-list declaration.
5247 *
5248 * [ VC: Attribute Default Legal ]
5249 * The declared default value must meet the lexical constraints of
5250 * the declared attribute type c.f. xmlValidateAttributeDecl()
5251 *
5252 * [ VC: Fixed Attribute Default ]
5253 * if an attribute has a default value declared with the #FIXED
5254 * keyword, instances of that attribute must match the default value.
5255 *
5256 * [ WFC: No < in Attribute Values ]
5257 * handled in xmlParseAttValue()
5258 *
5259 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5260 * or XML_ATTRIBUTE_FIXED.
5261 */
5262
5263int
5264xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5265 int val;
5266 xmlChar *ret;
5267
5268 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005269 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005270 SKIP(9);
5271 return(XML_ATTRIBUTE_REQUIRED);
5272 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005273 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005274 SKIP(8);
5275 return(XML_ATTRIBUTE_IMPLIED);
5276 }
5277 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005278 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005279 SKIP(6);
5280 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005281 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005282 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5283 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005284 }
5285 SKIP_BLANKS;
5286 }
5287 ret = xmlParseAttValue(ctxt);
5288 ctxt->instate = XML_PARSER_DTD;
5289 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005290 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005291 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005292 } else
5293 *value = ret;
5294 return(val);
5295}
5296
5297/**
5298 * xmlParseNotationType:
5299 * @ctxt: an XML parser context
5300 *
5301 * parse an Notation attribute type.
5302 *
5303 * Note: the leading 'NOTATION' S part has already being parsed...
5304 *
5305 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5306 *
5307 * [ VC: Notation Attributes ]
5308 * Values of this type must match one of the notation names included
5309 * in the declaration; all notation names in the declaration must be declared.
5310 *
5311 * Returns: the notation attribute tree built while parsing
5312 */
5313
5314xmlEnumerationPtr
5315xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005316 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005317 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005318
5319 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005320 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005321 return(NULL);
5322 }
5323 SHRINK;
5324 do {
5325 NEXT;
5326 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005327 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005328 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005329 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5330 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005331 xmlFreeEnumeration(ret);
5332 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005333 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005334 tmp = ret;
5335 while (tmp != NULL) {
5336 if (xmlStrEqual(name, tmp->name)) {
5337 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5338 "standalone: attribute notation value token %s duplicated\n",
5339 name, NULL);
5340 if (!xmlDictOwns(ctxt->dict, name))
5341 xmlFree((xmlChar *) name);
5342 break;
5343 }
5344 tmp = tmp->next;
5345 }
5346 if (tmp == NULL) {
5347 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005348 if (cur == NULL) {
5349 xmlFreeEnumeration(ret);
5350 return(NULL);
5351 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005352 if (last == NULL) ret = last = cur;
5353 else {
5354 last->next = cur;
5355 last = cur;
5356 }
Owen Taylor3473f882001-02-23 17:55:21 +00005357 }
5358 SKIP_BLANKS;
5359 } while (RAW == '|');
5360 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005361 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005362 xmlFreeEnumeration(ret);
5363 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005364 }
5365 NEXT;
5366 return(ret);
5367}
5368
5369/**
5370 * xmlParseEnumerationType:
5371 * @ctxt: an XML parser context
5372 *
5373 * parse an Enumeration attribute type.
5374 *
5375 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5376 *
5377 * [ VC: Enumeration ]
5378 * Values of this type must match one of the Nmtoken tokens in
5379 * the declaration
5380 *
5381 * Returns: the enumeration attribute tree built while parsing
5382 */
5383
5384xmlEnumerationPtr
5385xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5386 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005387 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005388
5389 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005390 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005391 return(NULL);
5392 }
5393 SHRINK;
5394 do {
5395 NEXT;
5396 SKIP_BLANKS;
5397 name = xmlParseNmtoken(ctxt);
5398 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005399 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005400 return(ret);
5401 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005402 tmp = ret;
5403 while (tmp != NULL) {
5404 if (xmlStrEqual(name, tmp->name)) {
5405 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5406 "standalone: attribute enumeration value token %s duplicated\n",
5407 name, NULL);
5408 if (!xmlDictOwns(ctxt->dict, name))
5409 xmlFree(name);
5410 break;
5411 }
5412 tmp = tmp->next;
5413 }
5414 if (tmp == NULL) {
5415 cur = xmlCreateEnumeration(name);
5416 if (!xmlDictOwns(ctxt->dict, name))
5417 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005418 if (cur == NULL) {
5419 xmlFreeEnumeration(ret);
5420 return(NULL);
5421 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005422 if (last == NULL) ret = last = cur;
5423 else {
5424 last->next = cur;
5425 last = cur;
5426 }
Owen Taylor3473f882001-02-23 17:55:21 +00005427 }
5428 SKIP_BLANKS;
5429 } while (RAW == '|');
5430 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005431 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005432 return(ret);
5433 }
5434 NEXT;
5435 return(ret);
5436}
5437
5438/**
5439 * xmlParseEnumeratedType:
5440 * @ctxt: an XML parser context
5441 * @tree: the enumeration tree built while parsing
5442 *
5443 * parse an Enumerated attribute type.
5444 *
5445 * [57] EnumeratedType ::= NotationType | Enumeration
5446 *
5447 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5448 *
5449 *
5450 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5451 */
5452
5453int
5454xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005455 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005456 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005457 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005458 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5459 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005460 return(0);
5461 }
5462 SKIP_BLANKS;
5463 *tree = xmlParseNotationType(ctxt);
5464 if (*tree == NULL) return(0);
5465 return(XML_ATTRIBUTE_NOTATION);
5466 }
5467 *tree = xmlParseEnumerationType(ctxt);
5468 if (*tree == NULL) return(0);
5469 return(XML_ATTRIBUTE_ENUMERATION);
5470}
5471
5472/**
5473 * xmlParseAttributeType:
5474 * @ctxt: an XML parser context
5475 * @tree: the enumeration tree built while parsing
5476 *
5477 * parse the Attribute list def for an element
5478 *
5479 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5480 *
5481 * [55] StringType ::= 'CDATA'
5482 *
5483 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5484 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5485 *
5486 * Validity constraints for attribute values syntax are checked in
5487 * xmlValidateAttributeValue()
5488 *
5489 * [ VC: ID ]
5490 * Values of type ID must match the Name production. A name must not
5491 * appear more than once in an XML document as a value of this type;
5492 * i.e., ID values must uniquely identify the elements which bear them.
5493 *
5494 * [ VC: One ID per Element Type ]
5495 * No element type may have more than one ID attribute specified.
5496 *
5497 * [ VC: ID Attribute Default ]
5498 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5499 *
5500 * [ VC: IDREF ]
5501 * Values of type IDREF must match the Name production, and values
5502 * of type IDREFS must match Names; each IDREF Name must match the value
5503 * of an ID attribute on some element in the XML document; i.e. IDREF
5504 * values must match the value of some ID attribute.
5505 *
5506 * [ VC: Entity Name ]
5507 * Values of type ENTITY must match the Name production, values
5508 * of type ENTITIES must match Names; each Entity Name must match the
5509 * name of an unparsed entity declared in the DTD.
5510 *
5511 * [ VC: Name Token ]
5512 * Values of type NMTOKEN must match the Nmtoken production; values
5513 * of type NMTOKENS must match Nmtokens.
5514 *
5515 * Returns the attribute type
5516 */
5517int
5518xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5519 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005520 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005521 SKIP(5);
5522 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005523 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005524 SKIP(6);
5525 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005526 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005527 SKIP(5);
5528 return(XML_ATTRIBUTE_IDREF);
5529 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5530 SKIP(2);
5531 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005532 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005533 SKIP(6);
5534 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005535 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005536 SKIP(8);
5537 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005538 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005539 SKIP(8);
5540 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005541 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005542 SKIP(7);
5543 return(XML_ATTRIBUTE_NMTOKEN);
5544 }
5545 return(xmlParseEnumeratedType(ctxt, tree));
5546}
5547
5548/**
5549 * xmlParseAttributeListDecl:
5550 * @ctxt: an XML parser context
5551 *
5552 * : parse the Attribute list def for an element
5553 *
5554 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5555 *
5556 * [53] AttDef ::= S Name S AttType S DefaultDecl
5557 *
5558 */
5559void
5560xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005561 const xmlChar *elemName;
5562 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005563 xmlEnumerationPtr tree;
5564
Daniel Veillarda07050d2003-10-19 14:46:32 +00005565 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005566 xmlParserInputPtr input = ctxt->input;
5567
5568 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005569 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005570 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005571 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005572 }
5573 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005574 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005575 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005576 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5577 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005578 return;
5579 }
5580 SKIP_BLANKS;
5581 GROW;
5582 while (RAW != '>') {
5583 const xmlChar *check = CUR_PTR;
5584 int type;
5585 int def;
5586 xmlChar *defaultValue = NULL;
5587
5588 GROW;
5589 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005590 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005591 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005592 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5593 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005594 break;
5595 }
5596 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005597 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005598 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005599 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005600 break;
5601 }
5602 SKIP_BLANKS;
5603
5604 type = xmlParseAttributeType(ctxt, &tree);
5605 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005606 break;
5607 }
5608
5609 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005610 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005611 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5612 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005613 if (tree != NULL)
5614 xmlFreeEnumeration(tree);
5615 break;
5616 }
5617 SKIP_BLANKS;
5618
5619 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5620 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005621 if (defaultValue != NULL)
5622 xmlFree(defaultValue);
5623 if (tree != NULL)
5624 xmlFreeEnumeration(tree);
5625 break;
5626 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005627 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5628 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005629
5630 GROW;
5631 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005632 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005633 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005634 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005635 if (defaultValue != NULL)
5636 xmlFree(defaultValue);
5637 if (tree != NULL)
5638 xmlFreeEnumeration(tree);
5639 break;
5640 }
5641 SKIP_BLANKS;
5642 }
5643 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005644 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5645 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005646 if (defaultValue != NULL)
5647 xmlFree(defaultValue);
5648 if (tree != NULL)
5649 xmlFreeEnumeration(tree);
5650 break;
5651 }
5652 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5653 (ctxt->sax->attributeDecl != NULL))
5654 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5655 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005656 else if (tree != NULL)
5657 xmlFreeEnumeration(tree);
5658
5659 if ((ctxt->sax2) && (defaultValue != NULL) &&
5660 (def != XML_ATTRIBUTE_IMPLIED) &&
5661 (def != XML_ATTRIBUTE_REQUIRED)) {
5662 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5663 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005664 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005665 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5666 }
Owen Taylor3473f882001-02-23 17:55:21 +00005667 if (defaultValue != NULL)
5668 xmlFree(defaultValue);
5669 GROW;
5670 }
5671 if (RAW == '>') {
5672 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00005673 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5674 "Attribute list declaration doesn't start and stop in the same entity\n",
5675 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005676 }
5677 NEXT;
5678 }
Owen Taylor3473f882001-02-23 17:55:21 +00005679 }
5680}
5681
5682/**
5683 * xmlParseElementMixedContentDecl:
5684 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005685 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005686 *
5687 * parse the declaration for a Mixed Element content
5688 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5689 *
5690 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5691 * '(' S? '#PCDATA' S? ')'
5692 *
5693 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5694 *
5695 * [ VC: No Duplicate Types ]
5696 * The same name must not appear more than once in a single
5697 * mixed-content declaration.
5698 *
5699 * returns: the list of the xmlElementContentPtr describing the element choices
5700 */
5701xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005702xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005703 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005704 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005705
5706 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005707 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005708 SKIP(7);
5709 SKIP_BLANKS;
5710 SHRINK;
5711 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005712 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005713 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5714"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005715 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005716 }
Owen Taylor3473f882001-02-23 17:55:21 +00005717 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005718 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005719 if (ret == NULL)
5720 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005721 if (RAW == '*') {
5722 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5723 NEXT;
5724 }
5725 return(ret);
5726 }
5727 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005728 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005729 if (ret == NULL) return(NULL);
5730 }
5731 while (RAW == '|') {
5732 NEXT;
5733 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005734 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005735 if (ret == NULL) return(NULL);
5736 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005737 if (cur != NULL)
5738 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005739 cur = ret;
5740 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005741 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005742 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005743 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005744 if (n->c1 != NULL)
5745 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005746 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005747 if (n != NULL)
5748 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005749 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005750 }
5751 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005752 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005753 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005754 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005755 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005756 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005757 return(NULL);
5758 }
5759 SKIP_BLANKS;
5760 GROW;
5761 }
5762 if ((RAW == ')') && (NXT(1) == '*')) {
5763 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005764 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005765 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005766 if (cur->c2 != NULL)
5767 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005768 }
5769 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005770 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005771 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5772"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005773 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005774 }
Owen Taylor3473f882001-02-23 17:55:21 +00005775 SKIP(2);
5776 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005777 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005778 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005779 return(NULL);
5780 }
5781
5782 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005783 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005784 }
5785 return(ret);
5786}
5787
5788/**
Daniel Veillard489f9672009-08-10 16:49:30 +02005789 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00005790 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005791 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02005792 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00005793 *
5794 * parse the declaration for a Mixed Element content
5795 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5796 *
5797 *
5798 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5799 *
5800 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5801 *
5802 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5803 *
5804 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5805 *
5806 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5807 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005808 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005809 * opening or closing parentheses in a choice, seq, or Mixed
5810 * construct is contained in the replacement text for a parameter
5811 * entity, both must be contained in the same replacement text. For
5812 * interoperability, if a parameter-entity reference appears in a
5813 * choice, seq, or Mixed construct, its replacement text should not
5814 * be empty, and neither the first nor last non-blank character of
5815 * the replacement text should be a connector (| or ,).
5816 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005817 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005818 * hierarchy.
5819 */
Daniel Veillard489f9672009-08-10 16:49:30 +02005820static xmlElementContentPtr
5821xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5822 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00005823 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005824 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005825 xmlChar type = 0;
5826
Daniel Veillard489f9672009-08-10 16:49:30 +02005827 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5828 (depth > 2048)) {
5829 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5830"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5831 depth);
5832 return(NULL);
5833 }
Owen Taylor3473f882001-02-23 17:55:21 +00005834 SKIP_BLANKS;
5835 GROW;
5836 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005837 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005838
Owen Taylor3473f882001-02-23 17:55:21 +00005839 /* Recurse on first child */
5840 NEXT;
5841 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02005842 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5843 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00005844 SKIP_BLANKS;
5845 GROW;
5846 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005847 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005848 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005849 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005850 return(NULL);
5851 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005852 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005853 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005854 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005855 return(NULL);
5856 }
Owen Taylor3473f882001-02-23 17:55:21 +00005857 GROW;
5858 if (RAW == '?') {
5859 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5860 NEXT;
5861 } else if (RAW == '*') {
5862 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5863 NEXT;
5864 } else if (RAW == '+') {
5865 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5866 NEXT;
5867 } else {
5868 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5869 }
Owen Taylor3473f882001-02-23 17:55:21 +00005870 GROW;
5871 }
5872 SKIP_BLANKS;
5873 SHRINK;
5874 while (RAW != ')') {
5875 /*
5876 * Each loop we parse one separator and one element.
5877 */
5878 if (RAW == ',') {
5879 if (type == 0) type = CUR;
5880
5881 /*
5882 * Detect "Name | Name , Name" error
5883 */
5884 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005885 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005886 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005887 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005888 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005889 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005890 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005891 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005892 return(NULL);
5893 }
5894 NEXT;
5895
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005896 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005897 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005898 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005899 xmlFreeDocElementContent(ctxt->myDoc, last);
5900 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005901 return(NULL);
5902 }
5903 if (last == NULL) {
5904 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005905 if (ret != NULL)
5906 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005907 ret = cur = op;
5908 } else {
5909 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005910 if (op != NULL)
5911 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005912 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005913 if (last != NULL)
5914 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005915 cur =op;
5916 last = NULL;
5917 }
5918 } else if (RAW == '|') {
5919 if (type == 0) type = CUR;
5920
5921 /*
5922 * Detect "Name , Name | Name" error
5923 */
5924 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005925 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005926 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005927 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005928 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005929 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005930 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005931 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005932 return(NULL);
5933 }
5934 NEXT;
5935
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005936 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005937 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005938 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005939 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005940 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005941 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005942 return(NULL);
5943 }
5944 if (last == NULL) {
5945 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005946 if (ret != NULL)
5947 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005948 ret = cur = op;
5949 } else {
5950 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005951 if (op != NULL)
5952 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005953 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005954 if (last != NULL)
5955 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005956 cur =op;
5957 last = NULL;
5958 }
5959 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005960 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00005961 if ((last != NULL) && (last != ret))
5962 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005963 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005964 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005965 return(NULL);
5966 }
5967 GROW;
5968 SKIP_BLANKS;
5969 GROW;
5970 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005971 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005972 /* Recurse on second child */
5973 NEXT;
5974 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02005975 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5976 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00005977 SKIP_BLANKS;
5978 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005979 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005980 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005981 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005982 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005983 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005984 return(NULL);
5985 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005986 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005987 if (last == NULL) {
5988 if (ret != NULL)
5989 xmlFreeDocElementContent(ctxt->myDoc, ret);
5990 return(NULL);
5991 }
Owen Taylor3473f882001-02-23 17:55:21 +00005992 if (RAW == '?') {
5993 last->ocur = XML_ELEMENT_CONTENT_OPT;
5994 NEXT;
5995 } else if (RAW == '*') {
5996 last->ocur = XML_ELEMENT_CONTENT_MULT;
5997 NEXT;
5998 } else if (RAW == '+') {
5999 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6000 NEXT;
6001 } else {
6002 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6003 }
6004 }
6005 SKIP_BLANKS;
6006 GROW;
6007 }
6008 if ((cur != NULL) && (last != NULL)) {
6009 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006010 if (last != NULL)
6011 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006012 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006013 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006014 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6015"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006016 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006017 }
Owen Taylor3473f882001-02-23 17:55:21 +00006018 NEXT;
6019 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006020 if (ret != NULL) {
6021 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6022 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6023 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6024 else
6025 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6026 }
Owen Taylor3473f882001-02-23 17:55:21 +00006027 NEXT;
6028 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006029 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006030 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006031 cur = ret;
6032 /*
6033 * Some normalization:
6034 * (a | b* | c?)* == (a | b | c)*
6035 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006036 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006037 if ((cur->c1 != NULL) &&
6038 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6039 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6040 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6041 if ((cur->c2 != NULL) &&
6042 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6043 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6044 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6045 cur = cur->c2;
6046 }
6047 }
Owen Taylor3473f882001-02-23 17:55:21 +00006048 NEXT;
6049 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006050 if (ret != NULL) {
6051 int found = 0;
6052
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006053 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6054 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6055 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006056 else
6057 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006058 /*
6059 * Some normalization:
6060 * (a | b*)+ == (a | b)*
6061 * (a | b?)+ == (a | b)*
6062 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006063 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006064 if ((cur->c1 != NULL) &&
6065 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6066 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6067 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6068 found = 1;
6069 }
6070 if ((cur->c2 != NULL) &&
6071 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6072 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6073 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6074 found = 1;
6075 }
6076 cur = cur->c2;
6077 }
6078 if (found)
6079 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6080 }
Owen Taylor3473f882001-02-23 17:55:21 +00006081 NEXT;
6082 }
6083 return(ret);
6084}
6085
6086/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006087 * xmlParseElementChildrenContentDecl:
6088 * @ctxt: an XML parser context
6089 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006090 *
6091 * parse the declaration for a Mixed Element content
6092 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6093 *
6094 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6095 *
6096 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6097 *
6098 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6099 *
6100 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6101 *
6102 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6103 * TODO Parameter-entity replacement text must be properly nested
6104 * with parenthesized groups. That is to say, if either of the
6105 * opening or closing parentheses in a choice, seq, or Mixed
6106 * construct is contained in the replacement text for a parameter
6107 * entity, both must be contained in the same replacement text. For
6108 * interoperability, if a parameter-entity reference appears in a
6109 * choice, seq, or Mixed construct, its replacement text should not
6110 * be empty, and neither the first nor last non-blank character of
6111 * the replacement text should be a connector (| or ,).
6112 *
6113 * Returns the tree of xmlElementContentPtr describing the element
6114 * hierarchy.
6115 */
6116xmlElementContentPtr
6117xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6118 /* stub left for API/ABI compat */
6119 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6120}
6121
6122/**
Owen Taylor3473f882001-02-23 17:55:21 +00006123 * xmlParseElementContentDecl:
6124 * @ctxt: an XML parser context
6125 * @name: the name of the element being defined.
6126 * @result: the Element Content pointer will be stored here if any
6127 *
6128 * parse the declaration for an Element content either Mixed or Children,
6129 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6130 *
6131 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6132 *
6133 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6134 */
6135
6136int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006137xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006138 xmlElementContentPtr *result) {
6139
6140 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006141 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006142 int res;
6143
6144 *result = NULL;
6145
6146 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006147 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006148 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006149 return(-1);
6150 }
6151 NEXT;
6152 GROW;
6153 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006154 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006155 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006156 res = XML_ELEMENT_TYPE_MIXED;
6157 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006158 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006159 res = XML_ELEMENT_TYPE_ELEMENT;
6160 }
Owen Taylor3473f882001-02-23 17:55:21 +00006161 SKIP_BLANKS;
6162 *result = tree;
6163 return(res);
6164}
6165
6166/**
6167 * xmlParseElementDecl:
6168 * @ctxt: an XML parser context
6169 *
6170 * parse an Element declaration.
6171 *
6172 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6173 *
6174 * [ VC: Unique Element Type Declaration ]
6175 * No element type may be declared more than once
6176 *
6177 * Returns the type of the element, or -1 in case of error
6178 */
6179int
6180xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006181 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006182 int ret = -1;
6183 xmlElementContentPtr content = NULL;
6184
Daniel Veillard4c778d82005-01-23 17:37:44 +00006185 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006186 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006187 xmlParserInputPtr input = ctxt->input;
6188
6189 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006190 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006191 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6192 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006193 }
6194 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006195 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006196 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006197 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6198 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006199 return(-1);
6200 }
6201 while ((RAW == 0) && (ctxt->inputNr > 1))
6202 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006203 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006204 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6205 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006206 }
6207 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006208 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006209 SKIP(5);
6210 /*
6211 * Element must always be empty.
6212 */
6213 ret = XML_ELEMENT_TYPE_EMPTY;
6214 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6215 (NXT(2) == 'Y')) {
6216 SKIP(3);
6217 /*
6218 * Element is a generic container.
6219 */
6220 ret = XML_ELEMENT_TYPE_ANY;
6221 } else if (RAW == '(') {
6222 ret = xmlParseElementContentDecl(ctxt, name, &content);
6223 } else {
6224 /*
6225 * [ WFC: PEs in Internal Subset ] error handling.
6226 */
6227 if ((RAW == '%') && (ctxt->external == 0) &&
6228 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006229 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006230 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006231 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006232 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006233 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6234 }
Owen Taylor3473f882001-02-23 17:55:21 +00006235 return(-1);
6236 }
6237
6238 SKIP_BLANKS;
6239 /*
6240 * Pop-up of finished entities.
6241 */
6242 while ((RAW == 0) && (ctxt->inputNr > 1))
6243 xmlPopInput(ctxt);
6244 SKIP_BLANKS;
6245
6246 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006247 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006248 if (content != NULL) {
6249 xmlFreeDocElementContent(ctxt->myDoc, content);
6250 }
Owen Taylor3473f882001-02-23 17:55:21 +00006251 } else {
6252 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006253 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6254 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006255 }
6256
6257 NEXT;
6258 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006259 (ctxt->sax->elementDecl != NULL)) {
6260 if (content != NULL)
6261 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006262 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6263 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006264 if ((content != NULL) && (content->parent == NULL)) {
6265 /*
6266 * this is a trick: if xmlAddElementDecl is called,
6267 * instead of copying the full tree it is plugged directly
6268 * if called from the parser. Avoid duplicating the
6269 * interfaces or change the API/ABI
6270 */
6271 xmlFreeDocElementContent(ctxt->myDoc, content);
6272 }
6273 } else if (content != NULL) {
6274 xmlFreeDocElementContent(ctxt->myDoc, content);
6275 }
Owen Taylor3473f882001-02-23 17:55:21 +00006276 }
Owen Taylor3473f882001-02-23 17:55:21 +00006277 }
6278 return(ret);
6279}
6280
6281/**
Owen Taylor3473f882001-02-23 17:55:21 +00006282 * xmlParseConditionalSections
6283 * @ctxt: an XML parser context
6284 *
6285 * [61] conditionalSect ::= includeSect | ignoreSect
6286 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6287 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6288 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6289 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6290 */
6291
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006292static void
Owen Taylor3473f882001-02-23 17:55:21 +00006293xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006294 int id = ctxt->input->id;
6295
Owen Taylor3473f882001-02-23 17:55:21 +00006296 SKIP(3);
6297 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006298 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006299 SKIP(7);
6300 SKIP_BLANKS;
6301 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006302 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006303 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006304 if (ctxt->input->id != id) {
6305 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6306 "All markup of the conditional section is not in the same entity\n",
6307 NULL, NULL);
6308 }
Owen Taylor3473f882001-02-23 17:55:21 +00006309 NEXT;
6310 }
6311 if (xmlParserDebugEntities) {
6312 if ((ctxt->input != NULL) && (ctxt->input->filename))
6313 xmlGenericError(xmlGenericErrorContext,
6314 "%s(%d): ", ctxt->input->filename,
6315 ctxt->input->line);
6316 xmlGenericError(xmlGenericErrorContext,
6317 "Entering INCLUDE Conditional Section\n");
6318 }
6319
6320 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6321 (NXT(2) != '>'))) {
6322 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006323 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006324
6325 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6326 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006327 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006328 NEXT;
6329 } else if (RAW == '%') {
6330 xmlParsePEReference(ctxt);
6331 } else
6332 xmlParseMarkupDecl(ctxt);
6333
6334 /*
6335 * Pop-up of finished entities.
6336 */
6337 while ((RAW == 0) && (ctxt->inputNr > 1))
6338 xmlPopInput(ctxt);
6339
Daniel Veillardfdc91562002-07-01 21:52:03 +00006340 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006341 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006342 break;
6343 }
6344 }
6345 if (xmlParserDebugEntities) {
6346 if ((ctxt->input != NULL) && (ctxt->input->filename))
6347 xmlGenericError(xmlGenericErrorContext,
6348 "%s(%d): ", ctxt->input->filename,
6349 ctxt->input->line);
6350 xmlGenericError(xmlGenericErrorContext,
6351 "Leaving INCLUDE Conditional Section\n");
6352 }
6353
Daniel Veillarda07050d2003-10-19 14:46:32 +00006354 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006355 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006356 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006357 int depth = 0;
6358
6359 SKIP(6);
6360 SKIP_BLANKS;
6361 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006362 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006363 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006364 if (ctxt->input->id != id) {
6365 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6366 "All markup of the conditional section is not in the same entity\n",
6367 NULL, NULL);
6368 }
Owen Taylor3473f882001-02-23 17:55:21 +00006369 NEXT;
6370 }
6371 if (xmlParserDebugEntities) {
6372 if ((ctxt->input != NULL) && (ctxt->input->filename))
6373 xmlGenericError(xmlGenericErrorContext,
6374 "%s(%d): ", ctxt->input->filename,
6375 ctxt->input->line);
6376 xmlGenericError(xmlGenericErrorContext,
6377 "Entering IGNORE Conditional Section\n");
6378 }
6379
6380 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006381 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006382 * But disable SAX event generating DTD building in the meantime
6383 */
6384 state = ctxt->disableSAX;
6385 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006386 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006387 ctxt->instate = XML_PARSER_IGNORE;
6388
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006389 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006390 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6391 depth++;
6392 SKIP(3);
6393 continue;
6394 }
6395 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6396 if (--depth >= 0) SKIP(3);
6397 continue;
6398 }
6399 NEXT;
6400 continue;
6401 }
6402
6403 ctxt->disableSAX = state;
6404 ctxt->instate = instate;
6405
6406 if (xmlParserDebugEntities) {
6407 if ((ctxt->input != NULL) && (ctxt->input->filename))
6408 xmlGenericError(xmlGenericErrorContext,
6409 "%s(%d): ", ctxt->input->filename,
6410 ctxt->input->line);
6411 xmlGenericError(xmlGenericErrorContext,
6412 "Leaving IGNORE Conditional Section\n");
6413 }
6414
6415 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006416 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006417 }
6418
6419 if (RAW == 0)
6420 SHRINK;
6421
6422 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006423 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006424 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006425 if (ctxt->input->id != id) {
6426 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6427 "All markup of the conditional section is not in the same entity\n",
6428 NULL, NULL);
6429 }
Owen Taylor3473f882001-02-23 17:55:21 +00006430 SKIP(3);
6431 }
6432}
6433
6434/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006435 * xmlParseMarkupDecl:
6436 * @ctxt: an XML parser context
6437 *
6438 * parse Markup declarations
6439 *
6440 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6441 * NotationDecl | PI | Comment
6442 *
6443 * [ VC: Proper Declaration/PE Nesting ]
6444 * Parameter-entity replacement text must be properly nested with
6445 * markup declarations. That is to say, if either the first character
6446 * or the last character of a markup declaration (markupdecl above) is
6447 * contained in the replacement text for a parameter-entity reference,
6448 * both must be contained in the same replacement text.
6449 *
6450 * [ WFC: PEs in Internal Subset ]
6451 * In the internal DTD subset, parameter-entity references can occur
6452 * only where markup declarations can occur, not within markup declarations.
6453 * (This does not apply to references that occur in external parameter
6454 * entities or to the external subset.)
6455 */
6456void
6457xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6458 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006459 if (CUR == '<') {
6460 if (NXT(1) == '!') {
6461 switch (NXT(2)) {
6462 case 'E':
6463 if (NXT(3) == 'L')
6464 xmlParseElementDecl(ctxt);
6465 else if (NXT(3) == 'N')
6466 xmlParseEntityDecl(ctxt);
6467 break;
6468 case 'A':
6469 xmlParseAttributeListDecl(ctxt);
6470 break;
6471 case 'N':
6472 xmlParseNotationDecl(ctxt);
6473 break;
6474 case '-':
6475 xmlParseComment(ctxt);
6476 break;
6477 default:
6478 /* there is an error but it will be detected later */
6479 break;
6480 }
6481 } else if (NXT(1) == '?') {
6482 xmlParsePI(ctxt);
6483 }
6484 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006485 /*
6486 * This is only for internal subset. On external entities,
6487 * the replacement is done before parsing stage
6488 */
6489 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6490 xmlParsePEReference(ctxt);
6491
6492 /*
6493 * Conditional sections are allowed from entities included
6494 * by PE References in the internal subset.
6495 */
6496 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6497 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6498 xmlParseConditionalSections(ctxt);
6499 }
6500 }
6501
6502 ctxt->instate = XML_PARSER_DTD;
6503}
6504
6505/**
6506 * xmlParseTextDecl:
6507 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006508 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006509 * parse an XML declaration header for external entities
6510 *
6511 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006512 */
6513
6514void
6515xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6516 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006517 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006518
6519 /*
6520 * We know that '<?xml' is here.
6521 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006522 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006523 SKIP(5);
6524 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006525 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006526 return;
6527 }
6528
William M. Brack76e95df2003-10-18 16:20:14 +00006529 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006530 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6531 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006532 }
6533 SKIP_BLANKS;
6534
6535 /*
6536 * We may have the VersionInfo here.
6537 */
6538 version = xmlParseVersionInfo(ctxt);
6539 if (version == NULL)
6540 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006541 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006542 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006543 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6544 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006545 }
6546 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006547 ctxt->input->version = version;
6548
6549 /*
6550 * We must have the encoding declaration
6551 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006552 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006553 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6554 /*
6555 * The XML REC instructs us to stop parsing right here
6556 */
6557 return;
6558 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006559 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6560 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6561 "Missing encoding in text declaration\n");
6562 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006563
6564 SKIP_BLANKS;
6565 if ((RAW == '?') && (NXT(1) == '>')) {
6566 SKIP(2);
6567 } else if (RAW == '>') {
6568 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006569 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006570 NEXT;
6571 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006572 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006573 MOVETO_ENDTAG(CUR_PTR);
6574 NEXT;
6575 }
6576}
6577
6578/**
Owen Taylor3473f882001-02-23 17:55:21 +00006579 * xmlParseExternalSubset:
6580 * @ctxt: an XML parser context
6581 * @ExternalID: the external identifier
6582 * @SystemID: the system identifier (or URL)
6583 *
6584 * parse Markup declarations from an external subset
6585 *
6586 * [30] extSubset ::= textDecl? extSubsetDecl
6587 *
6588 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6589 */
6590void
6591xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6592 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006593 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006594 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006595
6596 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
6597 (ctxt->input->end - ctxt->input->cur >= 4)) {
6598 xmlChar start[4];
6599 xmlCharEncoding enc;
6600
6601 start[0] = RAW;
6602 start[1] = NXT(1);
6603 start[2] = NXT(2);
6604 start[3] = NXT(3);
6605 enc = xmlDetectCharEncoding(start, 4);
6606 if (enc != XML_CHAR_ENCODING_NONE)
6607 xmlSwitchEncoding(ctxt, enc);
6608 }
6609
Daniel Veillarda07050d2003-10-19 14:46:32 +00006610 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006611 xmlParseTextDecl(ctxt);
6612 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6613 /*
6614 * The XML REC instructs us to stop parsing right here
6615 */
6616 ctxt->instate = XML_PARSER_EOF;
6617 return;
6618 }
6619 }
6620 if (ctxt->myDoc == NULL) {
6621 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006622 if (ctxt->myDoc == NULL) {
6623 xmlErrMemory(ctxt, "New Doc failed");
6624 return;
6625 }
6626 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006627 }
6628 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6629 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6630
6631 ctxt->instate = XML_PARSER_DTD;
6632 ctxt->external = 1;
6633 while (((RAW == '<') && (NXT(1) == '?')) ||
6634 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006635 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006636 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006637 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006638
6639 GROW;
6640 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6641 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006642 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006643 NEXT;
6644 } else if (RAW == '%') {
6645 xmlParsePEReference(ctxt);
6646 } else
6647 xmlParseMarkupDecl(ctxt);
6648
6649 /*
6650 * Pop-up of finished entities.
6651 */
6652 while ((RAW == 0) && (ctxt->inputNr > 1))
6653 xmlPopInput(ctxt);
6654
Daniel Veillardfdc91562002-07-01 21:52:03 +00006655 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006656 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006657 break;
6658 }
6659 }
6660
6661 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006662 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006663 }
6664
6665}
6666
6667/**
6668 * xmlParseReference:
6669 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006670 *
Owen Taylor3473f882001-02-23 17:55:21 +00006671 * parse and handle entity references in content, depending on the SAX
6672 * interface, this may end-up in a call to character() if this is a
6673 * CharRef, a predefined entity, if there is no reference() callback.
6674 * or if the parser was asked to switch to that mode.
6675 *
6676 * [67] Reference ::= EntityRef | CharRef
6677 */
6678void
6679xmlParseReference(xmlParserCtxtPtr ctxt) {
6680 xmlEntityPtr ent;
6681 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00006682 int was_checked;
6683 xmlNodePtr list = NULL;
6684 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006685
Daniel Veillard0161e632008-08-28 15:36:32 +00006686
6687 if (RAW != '&')
6688 return;
6689
6690 /*
6691 * Simple case of a CharRef
6692 */
Owen Taylor3473f882001-02-23 17:55:21 +00006693 if (NXT(1) == '#') {
6694 int i = 0;
6695 xmlChar out[10];
6696 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006697 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00006698
Daniel Veillarddc171602008-03-26 17:41:38 +00006699 if (value == 0)
6700 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006701 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6702 /*
6703 * So we are using non-UTF-8 buffers
6704 * Check that the char fit on 8bits, if not
6705 * generate a CharRef.
6706 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006707 if (value <= 0xFF) {
6708 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006709 out[1] = 0;
6710 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6711 (!ctxt->disableSAX))
6712 ctxt->sax->characters(ctxt->userData, out, 1);
6713 } else {
6714 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006715 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006716 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006717 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006718 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6719 (!ctxt->disableSAX))
6720 ctxt->sax->reference(ctxt->userData, out);
6721 }
6722 } else {
6723 /*
6724 * Just encode the value in UTF-8
6725 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006726 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006727 out[i] = 0;
6728 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6729 (!ctxt->disableSAX))
6730 ctxt->sax->characters(ctxt->userData, out, i);
6731 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006732 return;
6733 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006734
Daniel Veillard0161e632008-08-28 15:36:32 +00006735 /*
6736 * We are seeing an entity reference
6737 */
6738 ent = xmlParseEntityRef(ctxt);
6739 if (ent == NULL) return;
6740 if (!ctxt->wellFormed)
6741 return;
6742 was_checked = ent->checked;
6743
6744 /* special case of predefined entities */
6745 if ((ent->name == NULL) ||
6746 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6747 val = ent->content;
6748 if (val == NULL) return;
6749 /*
6750 * inline the entity.
6751 */
6752 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6753 (!ctxt->disableSAX))
6754 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6755 return;
6756 }
6757
6758 /*
6759 * The first reference to the entity trigger a parsing phase
6760 * where the ent->children is filled with the result from
6761 * the parsing.
6762 */
6763 if (ent->checked == 0) {
6764 unsigned long oldnbent = ctxt->nbentities;
6765
6766 /*
6767 * This is a bit hackish but this seems the best
6768 * way to make sure both SAX and DOM entity support
6769 * behaves okay.
6770 */
6771 void *user_data;
6772 if (ctxt->userData == ctxt)
6773 user_data = NULL;
6774 else
6775 user_data = ctxt->userData;
6776
6777 /*
6778 * Check that this entity is well formed
6779 * 4.3.2: An internal general parsed entity is well-formed
6780 * if its replacement text matches the production labeled
6781 * content.
6782 */
6783 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6784 ctxt->depth++;
6785 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6786 user_data, &list);
6787 ctxt->depth--;
6788
6789 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6790 ctxt->depth++;
6791 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6792 user_data, ctxt->depth, ent->URI,
6793 ent->ExternalID, &list);
6794 ctxt->depth--;
6795 } else {
6796 ret = XML_ERR_ENTITY_PE_INTERNAL;
6797 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6798 "invalid entity type found\n", NULL);
6799 }
6800
6801 /*
6802 * Store the number of entities needing parsing for this entity
6803 * content and do checkings
6804 */
6805 ent->checked = ctxt->nbentities - oldnbent;
6806 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006807 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00006808 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006809 return;
6810 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006811 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6812 xmlFreeNodeList(list);
6813 return;
6814 }
Owen Taylor3473f882001-02-23 17:55:21 +00006815
Daniel Veillard0161e632008-08-28 15:36:32 +00006816 if ((ret == XML_ERR_OK) && (list != NULL)) {
6817 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6818 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6819 (ent->children == NULL)) {
6820 ent->children = list;
6821 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00006822 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006823 * Prune it directly in the generated document
6824 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00006825 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006826 if (((list->type == XML_TEXT_NODE) &&
6827 (list->next == NULL)) ||
6828 (ctxt->parseMode == XML_PARSE_READER)) {
6829 list->parent = (xmlNodePtr) ent;
6830 list = NULL;
6831 ent->owner = 1;
6832 } else {
6833 ent->owner = 0;
6834 while (list != NULL) {
6835 list->parent = (xmlNodePtr) ctxt->node;
6836 list->doc = ctxt->myDoc;
6837 if (list->next == NULL)
6838 ent->last = list;
6839 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006840 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006841 list = ent->children;
6842#ifdef LIBXML_LEGACY_ENABLED
6843 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6844 xmlAddEntityReference(ent, list, NULL);
6845#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00006846 }
6847 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00006848 ent->owner = 1;
6849 while (list != NULL) {
6850 list->parent = (xmlNodePtr) ent;
6851 if (list->next == NULL)
6852 ent->last = list;
6853 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006854 }
6855 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006856 } else {
6857 xmlFreeNodeList(list);
6858 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006859 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006860 } else if ((ret != XML_ERR_OK) &&
6861 (ret != XML_WAR_UNDECLARED_ENTITY)) {
6862 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6863 "Entity '%s' failed to parse\n", ent->name);
6864 } else if (list != NULL) {
6865 xmlFreeNodeList(list);
6866 list = NULL;
6867 }
6868 if (ent->checked == 0)
6869 ent->checked = 1;
6870 } else if (ent->checked != 1) {
6871 ctxt->nbentities += ent->checked;
6872 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006873
Daniel Veillard0161e632008-08-28 15:36:32 +00006874 /*
6875 * Now that the entity content has been gathered
6876 * provide it to the application, this can take different forms based
6877 * on the parsing modes.
6878 */
6879 if (ent->children == NULL) {
6880 /*
6881 * Probably running in SAX mode and the callbacks don't
6882 * build the entity content. So unless we already went
6883 * though parsing for first checking go though the entity
6884 * content to generate callbacks associated to the entity
6885 */
6886 if (was_checked != 0) {
6887 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00006888 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006889 * This is a bit hackish but this seems the best
6890 * way to make sure both SAX and DOM entity support
6891 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00006892 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006893 if (ctxt->userData == ctxt)
6894 user_data = NULL;
6895 else
6896 user_data = ctxt->userData;
6897
6898 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6899 ctxt->depth++;
6900 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6901 ent->content, user_data, NULL);
6902 ctxt->depth--;
6903 } else if (ent->etype ==
6904 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6905 ctxt->depth++;
6906 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6907 ctxt->sax, user_data, ctxt->depth,
6908 ent->URI, ent->ExternalID, NULL);
6909 ctxt->depth--;
6910 } else {
6911 ret = XML_ERR_ENTITY_PE_INTERNAL;
6912 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6913 "invalid entity type found\n", NULL);
6914 }
6915 if (ret == XML_ERR_ENTITY_LOOP) {
6916 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6917 return;
6918 }
6919 }
6920 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6921 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6922 /*
6923 * Entity reference callback comes second, it's somewhat
6924 * superfluous but a compatibility to historical behaviour
6925 */
6926 ctxt->sax->reference(ctxt->userData, ent->name);
6927 }
6928 return;
6929 }
6930
6931 /*
6932 * If we didn't get any children for the entity being built
6933 */
6934 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6935 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6936 /*
6937 * Create a node.
6938 */
6939 ctxt->sax->reference(ctxt->userData, ent->name);
6940 return;
6941 }
6942
6943 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
6944 /*
6945 * There is a problem on the handling of _private for entities
6946 * (bug 155816): Should we copy the content of the field from
6947 * the entity (possibly overwriting some value set by the user
6948 * when a copy is created), should we leave it alone, or should
6949 * we try to take care of different situations? The problem
6950 * is exacerbated by the usage of this field by the xmlReader.
6951 * To fix this bug, we look at _private on the created node
6952 * and, if it's NULL, we copy in whatever was in the entity.
6953 * If it's not NULL we leave it alone. This is somewhat of a
6954 * hack - maybe we should have further tests to determine
6955 * what to do.
6956 */
6957 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6958 /*
6959 * Seems we are generating the DOM content, do
6960 * a simple tree copy for all references except the first
6961 * In the first occurrence list contains the replacement.
6962 * progressive == 2 means we are operating on the Reader
6963 * and since nodes are discarded we must copy all the time.
6964 */
6965 if (((list == NULL) && (ent->owner == 0)) ||
6966 (ctxt->parseMode == XML_PARSE_READER)) {
6967 xmlNodePtr nw = NULL, cur, firstChild = NULL;
6968
6969 /*
6970 * when operating on a reader, the entities definitions
6971 * are always owning the entities subtree.
6972 if (ctxt->parseMode == XML_PARSE_READER)
6973 ent->owner = 1;
6974 */
6975
6976 cur = ent->children;
6977 while (cur != NULL) {
6978 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6979 if (nw != NULL) {
6980 if (nw->_private == NULL)
6981 nw->_private = cur->_private;
6982 if (firstChild == NULL){
6983 firstChild = nw;
6984 }
6985 nw = xmlAddChild(ctxt->node, nw);
6986 }
6987 if (cur == ent->last) {
6988 /*
6989 * needed to detect some strange empty
6990 * node cases in the reader tests
6991 */
6992 if ((ctxt->parseMode == XML_PARSE_READER) &&
6993 (nw != NULL) &&
6994 (nw->type == XML_ELEMENT_NODE) &&
6995 (nw->children == NULL))
6996 nw->extra = 1;
6997
6998 break;
6999 }
7000 cur = cur->next;
7001 }
7002#ifdef LIBXML_LEGACY_ENABLED
7003 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7004 xmlAddEntityReference(ent, firstChild, nw);
7005#endif /* LIBXML_LEGACY_ENABLED */
7006 } else if (list == NULL) {
7007 xmlNodePtr nw = NULL, cur, next, last,
7008 firstChild = NULL;
7009 /*
7010 * Copy the entity child list and make it the new
7011 * entity child list. The goal is to make sure any
7012 * ID or REF referenced will be the one from the
7013 * document content and not the entity copy.
7014 */
7015 cur = ent->children;
7016 ent->children = NULL;
7017 last = ent->last;
7018 ent->last = NULL;
7019 while (cur != NULL) {
7020 next = cur->next;
7021 cur->next = NULL;
7022 cur->parent = NULL;
7023 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7024 if (nw != NULL) {
7025 if (nw->_private == NULL)
7026 nw->_private = cur->_private;
7027 if (firstChild == NULL){
7028 firstChild = cur;
7029 }
7030 xmlAddChild((xmlNodePtr) ent, nw);
7031 xmlAddChild(ctxt->node, cur);
7032 }
7033 if (cur == last)
7034 break;
7035 cur = next;
7036 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007037 if (ent->owner == 0)
7038 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007039#ifdef LIBXML_LEGACY_ENABLED
7040 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7041 xmlAddEntityReference(ent, firstChild, nw);
7042#endif /* LIBXML_LEGACY_ENABLED */
7043 } else {
7044 const xmlChar *nbktext;
7045
7046 /*
7047 * the name change is to avoid coalescing of the
7048 * node with a possible previous text one which
7049 * would make ent->children a dangling pointer
7050 */
7051 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7052 -1);
7053 if (ent->children->type == XML_TEXT_NODE)
7054 ent->children->name = nbktext;
7055 if ((ent->last != ent->children) &&
7056 (ent->last->type == XML_TEXT_NODE))
7057 ent->last->name = nbktext;
7058 xmlAddChildList(ctxt->node, ent->children);
7059 }
7060
7061 /*
7062 * This is to avoid a nasty side effect, see
7063 * characters() in SAX.c
7064 */
7065 ctxt->nodemem = 0;
7066 ctxt->nodelen = 0;
7067 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007068 }
7069 }
7070}
7071
7072/**
7073 * xmlParseEntityRef:
7074 * @ctxt: an XML parser context
7075 *
7076 * parse ENTITY references declarations
7077 *
7078 * [68] EntityRef ::= '&' Name ';'
7079 *
7080 * [ WFC: Entity Declared ]
7081 * In a document without any DTD, a document with only an internal DTD
7082 * subset which contains no parameter entity references, or a document
7083 * with "standalone='yes'", the Name given in the entity reference
7084 * must match that in an entity declaration, except that well-formed
7085 * documents need not declare any of the following entities: amp, lt,
7086 * gt, apos, quot. The declaration of a parameter entity must precede
7087 * any reference to it. Similarly, the declaration of a general entity
7088 * must precede any reference to it which appears in a default value in an
7089 * attribute-list declaration. Note that if entities are declared in the
7090 * external subset or in external parameter entities, a non-validating
7091 * processor is not obligated to read and process their declarations;
7092 * for such documents, the rule that an entity must be declared is a
7093 * well-formedness constraint only if standalone='yes'.
7094 *
7095 * [ WFC: Parsed Entity ]
7096 * An entity reference must not contain the name of an unparsed entity
7097 *
7098 * Returns the xmlEntityPtr if found, or NULL otherwise.
7099 */
7100xmlEntityPtr
7101xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007102 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007103 xmlEntityPtr ent = NULL;
7104
7105 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007106
Daniel Veillard0161e632008-08-28 15:36:32 +00007107 if (RAW != '&')
7108 return(NULL);
7109 NEXT;
7110 name = xmlParseName(ctxt);
7111 if (name == NULL) {
7112 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7113 "xmlParseEntityRef: no name\n");
7114 return(NULL);
7115 }
7116 if (RAW != ';') {
7117 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7118 return(NULL);
7119 }
7120 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007121
Daniel Veillard0161e632008-08-28 15:36:32 +00007122 /*
7123 * Predefined entites override any extra definition
7124 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007125 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7126 ent = xmlGetPredefinedEntity(name);
7127 if (ent != NULL)
7128 return(ent);
7129 }
Owen Taylor3473f882001-02-23 17:55:21 +00007130
Daniel Veillard0161e632008-08-28 15:36:32 +00007131 /*
7132 * Increate the number of entity references parsed
7133 */
7134 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007135
Daniel Veillard0161e632008-08-28 15:36:32 +00007136 /*
7137 * Ask first SAX for entity resolution, otherwise try the
7138 * entities which may have stored in the parser context.
7139 */
7140 if (ctxt->sax != NULL) {
7141 if (ctxt->sax->getEntity != NULL)
7142 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007143 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7144 (ctxt->options & XML_PARSE_OLDSAX))
7145 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007146 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7147 (ctxt->userData==ctxt)) {
7148 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007149 }
7150 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007151 /*
7152 * [ WFC: Entity Declared ]
7153 * In a document without any DTD, a document with only an
7154 * internal DTD subset which contains no parameter entity
7155 * references, or a document with "standalone='yes'", the
7156 * Name given in the entity reference must match that in an
7157 * entity declaration, except that well-formed documents
7158 * need not declare any of the following entities: amp, lt,
7159 * gt, apos, quot.
7160 * The declaration of a parameter entity must precede any
7161 * reference to it.
7162 * Similarly, the declaration of a general entity must
7163 * precede any reference to it which appears in a default
7164 * value in an attribute-list declaration. Note that if
7165 * entities are declared in the external subset or in
7166 * external parameter entities, a non-validating processor
7167 * is not obligated to read and process their declarations;
7168 * for such documents, the rule that an entity must be
7169 * declared is a well-formedness constraint only if
7170 * standalone='yes'.
7171 */
7172 if (ent == NULL) {
7173 if ((ctxt->standalone == 1) ||
7174 ((ctxt->hasExternalSubset == 0) &&
7175 (ctxt->hasPErefs == 0))) {
7176 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7177 "Entity '%s' not defined\n", name);
7178 } else {
7179 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7180 "Entity '%s' not defined\n", name);
7181 if ((ctxt->inSubset == 0) &&
7182 (ctxt->sax != NULL) &&
7183 (ctxt->sax->reference != NULL)) {
7184 ctxt->sax->reference(ctxt->userData, name);
7185 }
7186 }
7187 ctxt->valid = 0;
7188 }
7189
7190 /*
7191 * [ WFC: Parsed Entity ]
7192 * An entity reference must not contain the name of an
7193 * unparsed entity
7194 */
7195 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7196 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7197 "Entity reference to unparsed entity %s\n", name);
7198 }
7199
7200 /*
7201 * [ WFC: No External Entity References ]
7202 * Attribute values cannot contain direct or indirect
7203 * entity references to external entities.
7204 */
7205 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7206 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7207 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7208 "Attribute references external entity '%s'\n", name);
7209 }
7210 /*
7211 * [ WFC: No < in Attribute Values ]
7212 * The replacement text of any entity referred to directly or
7213 * indirectly in an attribute value (other than "&lt;") must
7214 * not contain a <.
7215 */
7216 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7217 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007218 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007219 (xmlStrchr(ent->content, '<'))) {
7220 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7221 "'<' in entity '%s' is not allowed in attributes values\n", name);
7222 }
7223
7224 /*
7225 * Internal check, no parameter entities here ...
7226 */
7227 else {
7228 switch (ent->etype) {
7229 case XML_INTERNAL_PARAMETER_ENTITY:
7230 case XML_EXTERNAL_PARAMETER_ENTITY:
7231 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7232 "Attempt to reference the parameter entity '%s'\n",
7233 name);
7234 break;
7235 default:
7236 break;
7237 }
7238 }
7239
7240 /*
7241 * [ WFC: No Recursion ]
7242 * A parsed entity must not contain a recursive reference
7243 * to itself, either directly or indirectly.
7244 * Done somewhere else
7245 */
Owen Taylor3473f882001-02-23 17:55:21 +00007246 return(ent);
7247}
7248
7249/**
7250 * xmlParseStringEntityRef:
7251 * @ctxt: an XML parser context
7252 * @str: a pointer to an index in the string
7253 *
7254 * parse ENTITY references declarations, but this version parses it from
7255 * a string value.
7256 *
7257 * [68] EntityRef ::= '&' Name ';'
7258 *
7259 * [ WFC: Entity Declared ]
7260 * In a document without any DTD, a document with only an internal DTD
7261 * subset which contains no parameter entity references, or a document
7262 * with "standalone='yes'", the Name given in the entity reference
7263 * must match that in an entity declaration, except that well-formed
7264 * documents need not declare any of the following entities: amp, lt,
7265 * gt, apos, quot. The declaration of a parameter entity must precede
7266 * any reference to it. Similarly, the declaration of a general entity
7267 * must precede any reference to it which appears in a default value in an
7268 * attribute-list declaration. Note that if entities are declared in the
7269 * external subset or in external parameter entities, a non-validating
7270 * processor is not obligated to read and process their declarations;
7271 * for such documents, the rule that an entity must be declared is a
7272 * well-formedness constraint only if standalone='yes'.
7273 *
7274 * [ WFC: Parsed Entity ]
7275 * An entity reference must not contain the name of an unparsed entity
7276 *
7277 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7278 * is updated to the current location in the string.
7279 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007280static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007281xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7282 xmlChar *name;
7283 const xmlChar *ptr;
7284 xmlChar cur;
7285 xmlEntityPtr ent = NULL;
7286
7287 if ((str == NULL) || (*str == NULL))
7288 return(NULL);
7289 ptr = *str;
7290 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007291 if (cur != '&')
7292 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007293
Daniel Veillard0161e632008-08-28 15:36:32 +00007294 ptr++;
7295 cur = *ptr;
7296 name = xmlParseStringName(ctxt, &ptr);
7297 if (name == NULL) {
7298 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7299 "xmlParseStringEntityRef: no name\n");
7300 *str = ptr;
7301 return(NULL);
7302 }
7303 if (*ptr != ';') {
7304 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007305 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007306 *str = ptr;
7307 return(NULL);
7308 }
7309 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007310
Owen Taylor3473f882001-02-23 17:55:21 +00007311
Daniel Veillard0161e632008-08-28 15:36:32 +00007312 /*
7313 * Predefined entites override any extra definition
7314 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007315 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7316 ent = xmlGetPredefinedEntity(name);
7317 if (ent != NULL) {
7318 xmlFree(name);
7319 *str = ptr;
7320 return(ent);
7321 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007322 }
Owen Taylor3473f882001-02-23 17:55:21 +00007323
Daniel Veillard0161e632008-08-28 15:36:32 +00007324 /*
7325 * Increate the number of entity references parsed
7326 */
7327 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007328
Daniel Veillard0161e632008-08-28 15:36:32 +00007329 /*
7330 * Ask first SAX for entity resolution, otherwise try the
7331 * entities which may have stored in the parser context.
7332 */
7333 if (ctxt->sax != NULL) {
7334 if (ctxt->sax->getEntity != NULL)
7335 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007336 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7337 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007338 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7339 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007340 }
7341 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007342
7343 /*
7344 * [ WFC: Entity Declared ]
7345 * In a document without any DTD, a document with only an
7346 * internal DTD subset which contains no parameter entity
7347 * references, or a document with "standalone='yes'", the
7348 * Name given in the entity reference must match that in an
7349 * entity declaration, except that well-formed documents
7350 * need not declare any of the following entities: amp, lt,
7351 * gt, apos, quot.
7352 * The declaration of a parameter entity must precede any
7353 * reference to it.
7354 * Similarly, the declaration of a general entity must
7355 * precede any reference to it which appears in a default
7356 * value in an attribute-list declaration. Note that if
7357 * entities are declared in the external subset or in
7358 * external parameter entities, a non-validating processor
7359 * is not obligated to read and process their declarations;
7360 * for such documents, the rule that an entity must be
7361 * declared is a well-formedness constraint only if
7362 * standalone='yes'.
7363 */
7364 if (ent == NULL) {
7365 if ((ctxt->standalone == 1) ||
7366 ((ctxt->hasExternalSubset == 0) &&
7367 (ctxt->hasPErefs == 0))) {
7368 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7369 "Entity '%s' not defined\n", name);
7370 } else {
7371 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7372 "Entity '%s' not defined\n",
7373 name);
7374 }
7375 /* TODO ? check regressions ctxt->valid = 0; */
7376 }
7377
7378 /*
7379 * [ WFC: Parsed Entity ]
7380 * An entity reference must not contain the name of an
7381 * unparsed entity
7382 */
7383 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7384 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7385 "Entity reference to unparsed entity %s\n", name);
7386 }
7387
7388 /*
7389 * [ WFC: No External Entity References ]
7390 * Attribute values cannot contain direct or indirect
7391 * entity references to external entities.
7392 */
7393 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7394 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7395 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7396 "Attribute references external entity '%s'\n", name);
7397 }
7398 /*
7399 * [ WFC: No < in Attribute Values ]
7400 * The replacement text of any entity referred to directly or
7401 * indirectly in an attribute value (other than "&lt;") must
7402 * not contain a <.
7403 */
7404 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7405 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007406 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007407 (xmlStrchr(ent->content, '<'))) {
7408 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7409 "'<' in entity '%s' is not allowed in attributes values\n",
7410 name);
7411 }
7412
7413 /*
7414 * Internal check, no parameter entities here ...
7415 */
7416 else {
7417 switch (ent->etype) {
7418 case XML_INTERNAL_PARAMETER_ENTITY:
7419 case XML_EXTERNAL_PARAMETER_ENTITY:
7420 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7421 "Attempt to reference the parameter entity '%s'\n",
7422 name);
7423 break;
7424 default:
7425 break;
7426 }
7427 }
7428
7429 /*
7430 * [ WFC: No Recursion ]
7431 * A parsed entity must not contain a recursive reference
7432 * to itself, either directly or indirectly.
7433 * Done somewhere else
7434 */
7435
7436 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007437 *str = ptr;
7438 return(ent);
7439}
7440
7441/**
7442 * xmlParsePEReference:
7443 * @ctxt: an XML parser context
7444 *
7445 * parse PEReference declarations
7446 * The entity content is handled directly by pushing it's content as
7447 * a new input stream.
7448 *
7449 * [69] PEReference ::= '%' Name ';'
7450 *
7451 * [ WFC: No Recursion ]
7452 * A parsed entity must not contain a recursive
7453 * reference to itself, either directly or indirectly.
7454 *
7455 * [ WFC: Entity Declared ]
7456 * In a document without any DTD, a document with only an internal DTD
7457 * subset which contains no parameter entity references, or a document
7458 * with "standalone='yes'", ... ... The declaration of a parameter
7459 * entity must precede any reference to it...
7460 *
7461 * [ VC: Entity Declared ]
7462 * In a document with an external subset or external parameter entities
7463 * with "standalone='no'", ... ... The declaration of a parameter entity
7464 * must precede any reference to it...
7465 *
7466 * [ WFC: In DTD ]
7467 * Parameter-entity references may only appear in the DTD.
7468 * NOTE: misleading but this is handled.
7469 */
7470void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007471xmlParsePEReference(xmlParserCtxtPtr ctxt)
7472{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007473 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007474 xmlEntityPtr entity = NULL;
7475 xmlParserInputPtr input;
7476
Daniel Veillard0161e632008-08-28 15:36:32 +00007477 if (RAW != '%')
7478 return;
7479 NEXT;
7480 name = xmlParseName(ctxt);
7481 if (name == NULL) {
7482 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7483 "xmlParsePEReference: no name\n");
7484 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007485 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007486 if (RAW != ';') {
7487 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7488 return;
7489 }
7490
7491 NEXT;
7492
7493 /*
7494 * Increate the number of entity references parsed
7495 */
7496 ctxt->nbentities++;
7497
7498 /*
7499 * Request the entity from SAX
7500 */
7501 if ((ctxt->sax != NULL) &&
7502 (ctxt->sax->getParameterEntity != NULL))
7503 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7504 name);
7505 if (entity == NULL) {
7506 /*
7507 * [ WFC: Entity Declared ]
7508 * In a document without any DTD, a document with only an
7509 * internal DTD subset which contains no parameter entity
7510 * references, or a document with "standalone='yes'", ...
7511 * ... The declaration of a parameter entity must precede
7512 * any reference to it...
7513 */
7514 if ((ctxt->standalone == 1) ||
7515 ((ctxt->hasExternalSubset == 0) &&
7516 (ctxt->hasPErefs == 0))) {
7517 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7518 "PEReference: %%%s; not found\n",
7519 name);
7520 } else {
7521 /*
7522 * [ VC: Entity Declared ]
7523 * In a document with an external subset or external
7524 * parameter entities with "standalone='no'", ...
7525 * ... The declaration of a parameter entity must
7526 * precede any reference to it...
7527 */
7528 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7529 "PEReference: %%%s; not found\n",
7530 name, NULL);
7531 ctxt->valid = 0;
7532 }
7533 } else {
7534 /*
7535 * Internal checking in case the entity quest barfed
7536 */
7537 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7538 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7539 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7540 "Internal: %%%s; is not a parameter entity\n",
7541 name, NULL);
7542 } else if (ctxt->input->free != deallocblankswrapper) {
7543 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7544 if (xmlPushInput(ctxt, input) < 0)
7545 return;
7546 } else {
7547 /*
7548 * TODO !!!
7549 * handle the extra spaces added before and after
7550 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7551 */
7552 input = xmlNewEntityInputStream(ctxt, entity);
7553 if (xmlPushInput(ctxt, input) < 0)
7554 return;
7555 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7556 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7557 (IS_BLANK_CH(NXT(5)))) {
7558 xmlParseTextDecl(ctxt);
7559 if (ctxt->errNo ==
7560 XML_ERR_UNSUPPORTED_ENCODING) {
7561 /*
7562 * The XML REC instructs us to stop parsing
7563 * right here
7564 */
7565 ctxt->instate = XML_PARSER_EOF;
7566 return;
7567 }
7568 }
7569 }
7570 }
7571 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007572}
7573
7574/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007575 * xmlLoadEntityContent:
7576 * @ctxt: an XML parser context
7577 * @entity: an unloaded system entity
7578 *
7579 * Load the original content of the given system entity from the
7580 * ExternalID/SystemID given. This is to be used for Included in Literal
7581 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7582 *
7583 * Returns 0 in case of success and -1 in case of failure
7584 */
7585static int
7586xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7587 xmlParserInputPtr input;
7588 xmlBufferPtr buf;
7589 int l, c;
7590 int count = 0;
7591
7592 if ((ctxt == NULL) || (entity == NULL) ||
7593 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7594 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7595 (entity->content != NULL)) {
7596 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7597 "xmlLoadEntityContent parameter error");
7598 return(-1);
7599 }
7600
7601 if (xmlParserDebugEntities)
7602 xmlGenericError(xmlGenericErrorContext,
7603 "Reading %s entity content input\n", entity->name);
7604
7605 buf = xmlBufferCreate();
7606 if (buf == NULL) {
7607 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7608 "xmlLoadEntityContent parameter error");
7609 return(-1);
7610 }
7611
7612 input = xmlNewEntityInputStream(ctxt, entity);
7613 if (input == NULL) {
7614 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7615 "xmlLoadEntityContent input error");
7616 xmlBufferFree(buf);
7617 return(-1);
7618 }
7619
7620 /*
7621 * Push the entity as the current input, read char by char
7622 * saving to the buffer until the end of the entity or an error
7623 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00007624 if (xmlPushInput(ctxt, input) < 0) {
7625 xmlBufferFree(buf);
7626 return(-1);
7627 }
7628
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007629 GROW;
7630 c = CUR_CHAR(l);
7631 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7632 (IS_CHAR(c))) {
7633 xmlBufferAdd(buf, ctxt->input->cur, l);
7634 if (count++ > 100) {
7635 count = 0;
7636 GROW;
7637 }
7638 NEXTL(l);
7639 c = CUR_CHAR(l);
7640 }
7641
7642 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7643 xmlPopInput(ctxt);
7644 } else if (!IS_CHAR(c)) {
7645 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7646 "xmlLoadEntityContent: invalid char value %d\n",
7647 c);
7648 xmlBufferFree(buf);
7649 return(-1);
7650 }
7651 entity->content = buf->content;
7652 buf->content = NULL;
7653 xmlBufferFree(buf);
7654
7655 return(0);
7656}
7657
7658/**
Owen Taylor3473f882001-02-23 17:55:21 +00007659 * xmlParseStringPEReference:
7660 * @ctxt: an XML parser context
7661 * @str: a pointer to an index in the string
7662 *
7663 * parse PEReference declarations
7664 *
7665 * [69] PEReference ::= '%' Name ';'
7666 *
7667 * [ WFC: No Recursion ]
7668 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007669 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007670 *
7671 * [ WFC: Entity Declared ]
7672 * In a document without any DTD, a document with only an internal DTD
7673 * subset which contains no parameter entity references, or a document
7674 * with "standalone='yes'", ... ... The declaration of a parameter
7675 * entity must precede any reference to it...
7676 *
7677 * [ VC: Entity Declared ]
7678 * In a document with an external subset or external parameter entities
7679 * with "standalone='no'", ... ... The declaration of a parameter entity
7680 * must precede any reference to it...
7681 *
7682 * [ WFC: In DTD ]
7683 * Parameter-entity references may only appear in the DTD.
7684 * NOTE: misleading but this is handled.
7685 *
7686 * Returns the string of the entity content.
7687 * str is updated to the current value of the index
7688 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007689static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007690xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7691 const xmlChar *ptr;
7692 xmlChar cur;
7693 xmlChar *name;
7694 xmlEntityPtr entity = NULL;
7695
7696 if ((str == NULL) || (*str == NULL)) return(NULL);
7697 ptr = *str;
7698 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007699 if (cur != '%')
7700 return(NULL);
7701 ptr++;
7702 cur = *ptr;
7703 name = xmlParseStringName(ctxt, &ptr);
7704 if (name == NULL) {
7705 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7706 "xmlParseStringPEReference: no name\n");
7707 *str = ptr;
7708 return(NULL);
7709 }
7710 cur = *ptr;
7711 if (cur != ';') {
7712 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7713 xmlFree(name);
7714 *str = ptr;
7715 return(NULL);
7716 }
7717 ptr++;
7718
7719 /*
7720 * Increate the number of entity references parsed
7721 */
7722 ctxt->nbentities++;
7723
7724 /*
7725 * Request the entity from SAX
7726 */
7727 if ((ctxt->sax != NULL) &&
7728 (ctxt->sax->getParameterEntity != NULL))
7729 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7730 name);
7731 if (entity == NULL) {
7732 /*
7733 * [ WFC: Entity Declared ]
7734 * In a document without any DTD, a document with only an
7735 * internal DTD subset which contains no parameter entity
7736 * references, or a document with "standalone='yes'", ...
7737 * ... The declaration of a parameter entity must precede
7738 * any reference to it...
7739 */
7740 if ((ctxt->standalone == 1) ||
7741 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7742 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7743 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007744 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007745 /*
7746 * [ VC: Entity Declared ]
7747 * In a document with an external subset or external
7748 * parameter entities with "standalone='no'", ...
7749 * ... The declaration of a parameter entity must
7750 * precede any reference to it...
7751 */
7752 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7753 "PEReference: %%%s; not found\n",
7754 name, NULL);
7755 ctxt->valid = 0;
7756 }
7757 } else {
7758 /*
7759 * Internal checking in case the entity quest barfed
7760 */
7761 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7762 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7763 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7764 "%%%s; is not a parameter entity\n",
7765 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007766 }
7767 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007768 ctxt->hasPErefs = 1;
7769 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007770 *str = ptr;
7771 return(entity);
7772}
7773
7774/**
7775 * xmlParseDocTypeDecl:
7776 * @ctxt: an XML parser context
7777 *
7778 * parse a DOCTYPE declaration
7779 *
7780 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7781 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7782 *
7783 * [ VC: Root Element Type ]
7784 * The Name in the document type declaration must match the element
7785 * type of the root element.
7786 */
7787
7788void
7789xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007790 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007791 xmlChar *ExternalID = NULL;
7792 xmlChar *URI = NULL;
7793
7794 /*
7795 * We know that '<!DOCTYPE' has been detected.
7796 */
7797 SKIP(9);
7798
7799 SKIP_BLANKS;
7800
7801 /*
7802 * Parse the DOCTYPE name.
7803 */
7804 name = xmlParseName(ctxt);
7805 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007806 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7807 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007808 }
7809 ctxt->intSubName = name;
7810
7811 SKIP_BLANKS;
7812
7813 /*
7814 * Check for SystemID and ExternalID
7815 */
7816 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7817
7818 if ((URI != NULL) || (ExternalID != NULL)) {
7819 ctxt->hasExternalSubset = 1;
7820 }
7821 ctxt->extSubURI = URI;
7822 ctxt->extSubSystem = ExternalID;
7823
7824 SKIP_BLANKS;
7825
7826 /*
7827 * Create and update the internal subset.
7828 */
7829 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7830 (!ctxt->disableSAX))
7831 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7832
7833 /*
7834 * Is there any internal subset declarations ?
7835 * they are handled separately in xmlParseInternalSubset()
7836 */
7837 if (RAW == '[')
7838 return;
7839
7840 /*
7841 * We should be at the end of the DOCTYPE declaration.
7842 */
7843 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007844 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007845 }
7846 NEXT;
7847}
7848
7849/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007850 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007851 * @ctxt: an XML parser context
7852 *
7853 * parse the internal subset declaration
7854 *
7855 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7856 */
7857
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007858static void
Owen Taylor3473f882001-02-23 17:55:21 +00007859xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7860 /*
7861 * Is there any DTD definition ?
7862 */
7863 if (RAW == '[') {
7864 ctxt->instate = XML_PARSER_DTD;
7865 NEXT;
7866 /*
7867 * Parse the succession of Markup declarations and
7868 * PEReferences.
7869 * Subsequence (markupdecl | PEReference | S)*
7870 */
7871 while (RAW != ']') {
7872 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007873 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007874
7875 SKIP_BLANKS;
7876 xmlParseMarkupDecl(ctxt);
7877 xmlParsePEReference(ctxt);
7878
7879 /*
7880 * Pop-up of finished entities.
7881 */
7882 while ((RAW == 0) && (ctxt->inputNr > 1))
7883 xmlPopInput(ctxt);
7884
7885 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007886 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00007887 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007888 break;
7889 }
7890 }
7891 if (RAW == ']') {
7892 NEXT;
7893 SKIP_BLANKS;
7894 }
7895 }
7896
7897 /*
7898 * We should be at the end of the DOCTYPE declaration.
7899 */
7900 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007901 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007902 }
7903 NEXT;
7904}
7905
Daniel Veillard81273902003-09-30 00:43:48 +00007906#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007907/**
7908 * xmlParseAttribute:
7909 * @ctxt: an XML parser context
7910 * @value: a xmlChar ** used to store the value of the attribute
7911 *
7912 * parse an attribute
7913 *
7914 * [41] Attribute ::= Name Eq AttValue
7915 *
7916 * [ WFC: No External Entity References ]
7917 * Attribute values cannot contain direct or indirect entity references
7918 * to external entities.
7919 *
7920 * [ WFC: No < in Attribute Values ]
7921 * The replacement text of any entity referred to directly or indirectly in
7922 * an attribute value (other than "&lt;") must not contain a <.
7923 *
7924 * [ VC: Attribute Value Type ]
7925 * The attribute must have been declared; the value must be of the type
7926 * declared for it.
7927 *
7928 * [25] Eq ::= S? '=' S?
7929 *
7930 * With namespace:
7931 *
7932 * [NS 11] Attribute ::= QName Eq AttValue
7933 *
7934 * Also the case QName == xmlns:??? is handled independently as a namespace
7935 * definition.
7936 *
7937 * Returns the attribute name, and the value in *value.
7938 */
7939
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007940const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007941xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007942 const xmlChar *name;
7943 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007944
7945 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007946 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007947 name = xmlParseName(ctxt);
7948 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007949 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007950 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007951 return(NULL);
7952 }
7953
7954 /*
7955 * read the value
7956 */
7957 SKIP_BLANKS;
7958 if (RAW == '=') {
7959 NEXT;
7960 SKIP_BLANKS;
7961 val = xmlParseAttValue(ctxt);
7962 ctxt->instate = XML_PARSER_CONTENT;
7963 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007964 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007965 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007966 return(NULL);
7967 }
7968
7969 /*
7970 * Check that xml:lang conforms to the specification
7971 * No more registered as an error, just generate a warning now
7972 * since this was deprecated in XML second edition
7973 */
7974 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7975 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007976 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7977 "Malformed value for xml:lang : %s\n",
7978 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007979 }
7980 }
7981
7982 /*
7983 * Check that xml:space conforms to the specification
7984 */
7985 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7986 if (xmlStrEqual(val, BAD_CAST "default"))
7987 *(ctxt->space) = 0;
7988 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7989 *(ctxt->space) = 1;
7990 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007991 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007992"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007993 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007994 }
7995 }
7996
7997 *value = val;
7998 return(name);
7999}
8000
8001/**
8002 * xmlParseStartTag:
8003 * @ctxt: an XML parser context
8004 *
8005 * parse a start of tag either for rule element or
8006 * EmptyElement. In both case we don't parse the tag closing chars.
8007 *
8008 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8009 *
8010 * [ WFC: Unique Att Spec ]
8011 * No attribute name may appear more than once in the same start-tag or
8012 * empty-element tag.
8013 *
8014 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8015 *
8016 * [ WFC: Unique Att Spec ]
8017 * No attribute name may appear more than once in the same start-tag or
8018 * empty-element tag.
8019 *
8020 * With namespace:
8021 *
8022 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8023 *
8024 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8025 *
8026 * Returns the element name parsed
8027 */
8028
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008029const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008030xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008031 const xmlChar *name;
8032 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008033 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008034 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008035 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008036 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008037 int i;
8038
8039 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008040 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008041
8042 name = xmlParseName(ctxt);
8043 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008044 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008045 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008046 return(NULL);
8047 }
8048
8049 /*
8050 * Now parse the attributes, it ends up with the ending
8051 *
8052 * (S Attribute)* S?
8053 */
8054 SKIP_BLANKS;
8055 GROW;
8056
Daniel Veillard21a0f912001-02-25 19:54:14 +00008057 while ((RAW != '>') &&
8058 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008059 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008060 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008061 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008062
8063 attname = xmlParseAttribute(ctxt, &attvalue);
8064 if ((attname != NULL) && (attvalue != NULL)) {
8065 /*
8066 * [ WFC: Unique Att Spec ]
8067 * No attribute name may appear more than once in the same
8068 * start-tag or empty-element tag.
8069 */
8070 for (i = 0; i < nbatts;i += 2) {
8071 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008072 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008073 xmlFree(attvalue);
8074 goto failed;
8075 }
8076 }
Owen Taylor3473f882001-02-23 17:55:21 +00008077 /*
8078 * Add the pair to atts
8079 */
8080 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008081 maxatts = 22; /* allow for 10 attrs by default */
8082 atts = (const xmlChar **)
8083 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008084 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008085 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008086 if (attvalue != NULL)
8087 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008088 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008089 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008090 ctxt->atts = atts;
8091 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008092 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008093 const xmlChar **n;
8094
Owen Taylor3473f882001-02-23 17:55:21 +00008095 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008096 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008097 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008098 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008099 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008100 if (attvalue != NULL)
8101 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008102 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008103 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008104 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008105 ctxt->atts = atts;
8106 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008107 }
8108 atts[nbatts++] = attname;
8109 atts[nbatts++] = attvalue;
8110 atts[nbatts] = NULL;
8111 atts[nbatts + 1] = NULL;
8112 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008113 if (attvalue != NULL)
8114 xmlFree(attvalue);
8115 }
8116
8117failed:
8118
Daniel Veillard3772de32002-12-17 10:31:45 +00008119 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008120 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8121 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008122 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008123 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8124 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008125 }
8126 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008127 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8128 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008129 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8130 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008131 break;
8132 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008133 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008134 GROW;
8135 }
8136
8137 /*
8138 * SAX: Start of Element !
8139 */
8140 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008141 (!ctxt->disableSAX)) {
8142 if (nbatts > 0)
8143 ctxt->sax->startElement(ctxt->userData, name, atts);
8144 else
8145 ctxt->sax->startElement(ctxt->userData, name, NULL);
8146 }
Owen Taylor3473f882001-02-23 17:55:21 +00008147
8148 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008149 /* Free only the content strings */
8150 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008151 if (atts[i] != NULL)
8152 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008153 }
8154 return(name);
8155}
8156
8157/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008158 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008159 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008160 * @line: line of the start tag
8161 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008162 *
8163 * parse an end of tag
8164 *
8165 * [42] ETag ::= '</' Name S? '>'
8166 *
8167 * With namespace
8168 *
8169 * [NS 9] ETag ::= '</' QName S? '>'
8170 */
8171
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008172static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008173xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008174 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008175
8176 GROW;
8177 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008178 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008179 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008180 return;
8181 }
8182 SKIP(2);
8183
Daniel Veillard46de64e2002-05-29 08:21:33 +00008184 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008185
8186 /*
8187 * We should definitely be at the ending "S? '>'" part
8188 */
8189 GROW;
8190 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008191 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008192 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008193 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008194 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008195
8196 /*
8197 * [ WFC: Element Type Match ]
8198 * The Name in an element's end-tag must match the element type in the
8199 * start-tag.
8200 *
8201 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008202 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008203 if (name == NULL) name = BAD_CAST "unparseable";
8204 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008205 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008206 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008207 }
8208
8209 /*
8210 * SAX: End of Tag
8211 */
8212 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8213 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008214 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008215
Daniel Veillarde57ec792003-09-10 10:50:59 +00008216 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008217 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008218 return;
8219}
8220
8221/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008222 * xmlParseEndTag:
8223 * @ctxt: an XML parser context
8224 *
8225 * parse an end of tag
8226 *
8227 * [42] ETag ::= '</' Name S? '>'
8228 *
8229 * With namespace
8230 *
8231 * [NS 9] ETag ::= '</' QName S? '>'
8232 */
8233
8234void
8235xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008236 xmlParseEndTag1(ctxt, 0);
8237}
Daniel Veillard81273902003-09-30 00:43:48 +00008238#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008239
8240/************************************************************************
8241 * *
8242 * SAX 2 specific operations *
8243 * *
8244 ************************************************************************/
8245
Daniel Veillard0fb18932003-09-07 09:14:37 +00008246/*
8247 * xmlGetNamespace:
8248 * @ctxt: an XML parser context
8249 * @prefix: the prefix to lookup
8250 *
8251 * Lookup the namespace name for the @prefix (which ca be NULL)
8252 * The prefix must come from the @ctxt->dict dictionnary
8253 *
8254 * Returns the namespace name or NULL if not bound
8255 */
8256static const xmlChar *
8257xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8258 int i;
8259
Daniel Veillarde57ec792003-09-10 10:50:59 +00008260 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008261 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008262 if (ctxt->nsTab[i] == prefix) {
8263 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8264 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008265 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008266 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008267 return(NULL);
8268}
8269
8270/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008271 * xmlParseQName:
8272 * @ctxt: an XML parser context
8273 * @prefix: pointer to store the prefix part
8274 *
8275 * parse an XML Namespace QName
8276 *
8277 * [6] QName ::= (Prefix ':')? LocalPart
8278 * [7] Prefix ::= NCName
8279 * [8] LocalPart ::= NCName
8280 *
8281 * Returns the Name parsed or NULL
8282 */
8283
8284static const xmlChar *
8285xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8286 const xmlChar *l, *p;
8287
8288 GROW;
8289
8290 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008291 if (l == NULL) {
8292 if (CUR == ':') {
8293 l = xmlParseName(ctxt);
8294 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008295 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8296 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008297 *prefix = NULL;
8298 return(l);
8299 }
8300 }
8301 return(NULL);
8302 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008303 if (CUR == ':') {
8304 NEXT;
8305 p = l;
8306 l = xmlParseNCName(ctxt);
8307 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008308 xmlChar *tmp;
8309
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008310 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8311 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008312 l = xmlParseNmtoken(ctxt);
8313 if (l == NULL)
8314 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8315 else {
8316 tmp = xmlBuildQName(l, p, NULL, 0);
8317 xmlFree((char *)l);
8318 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008319 p = xmlDictLookup(ctxt->dict, tmp, -1);
8320 if (tmp != NULL) xmlFree(tmp);
8321 *prefix = NULL;
8322 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008323 }
8324 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008325 xmlChar *tmp;
8326
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008327 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8328 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008329 NEXT;
8330 tmp = (xmlChar *) xmlParseName(ctxt);
8331 if (tmp != NULL) {
8332 tmp = xmlBuildQName(tmp, l, NULL, 0);
8333 l = xmlDictLookup(ctxt->dict, tmp, -1);
8334 if (tmp != NULL) xmlFree(tmp);
8335 *prefix = p;
8336 return(l);
8337 }
8338 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8339 l = xmlDictLookup(ctxt->dict, tmp, -1);
8340 if (tmp != NULL) xmlFree(tmp);
8341 *prefix = p;
8342 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008343 }
8344 *prefix = p;
8345 } else
8346 *prefix = NULL;
8347 return(l);
8348}
8349
8350/**
8351 * xmlParseQNameAndCompare:
8352 * @ctxt: an XML parser context
8353 * @name: the localname
8354 * @prefix: the prefix, if any.
8355 *
8356 * parse an XML name and compares for match
8357 * (specialized for endtag parsing)
8358 *
8359 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8360 * and the name for mismatch
8361 */
8362
8363static const xmlChar *
8364xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8365 xmlChar const *prefix) {
8366 const xmlChar *cmp = name;
8367 const xmlChar *in;
8368 const xmlChar *ret;
8369 const xmlChar *prefix2;
8370
8371 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8372
8373 GROW;
8374 in = ctxt->input->cur;
8375
8376 cmp = prefix;
8377 while (*in != 0 && *in == *cmp) {
8378 ++in;
8379 ++cmp;
8380 }
8381 if ((*cmp == 0) && (*in == ':')) {
8382 in++;
8383 cmp = name;
8384 while (*in != 0 && *in == *cmp) {
8385 ++in;
8386 ++cmp;
8387 }
William M. Brack76e95df2003-10-18 16:20:14 +00008388 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008389 /* success */
8390 ctxt->input->cur = in;
8391 return((const xmlChar*) 1);
8392 }
8393 }
8394 /*
8395 * all strings coms from the dictionary, equality can be done directly
8396 */
8397 ret = xmlParseQName (ctxt, &prefix2);
8398 if ((ret == name) && (prefix == prefix2))
8399 return((const xmlChar*) 1);
8400 return ret;
8401}
8402
8403/**
8404 * xmlParseAttValueInternal:
8405 * @ctxt: an XML parser context
8406 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008407 * @alloc: whether the attribute was reallocated as a new string
8408 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008409 *
8410 * parse a value for an attribute.
8411 * NOTE: if no normalization is needed, the routine will return pointers
8412 * directly from the data buffer.
8413 *
8414 * 3.3.3 Attribute-Value Normalization:
8415 * Before the value of an attribute is passed to the application or
8416 * checked for validity, the XML processor must normalize it as follows:
8417 * - a character reference is processed by appending the referenced
8418 * character to the attribute value
8419 * - an entity reference is processed by recursively processing the
8420 * replacement text of the entity
8421 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8422 * appending #x20 to the normalized value, except that only a single
8423 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8424 * parsed entity or the literal entity value of an internal parsed entity
8425 * - other characters are processed by appending them to the normalized value
8426 * If the declared value is not CDATA, then the XML processor must further
8427 * process the normalized attribute value by discarding any leading and
8428 * trailing space (#x20) characters, and by replacing sequences of space
8429 * (#x20) characters by a single space (#x20) character.
8430 * All attributes for which no declaration has been read should be treated
8431 * by a non-validating parser as if declared CDATA.
8432 *
8433 * Returns the AttValue parsed or NULL. The value has to be freed by the
8434 * caller if it was copied, this can be detected by val[*len] == 0.
8435 */
8436
8437static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008438xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8439 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008440{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008441 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008442 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008443 xmlChar *ret = NULL;
8444
8445 GROW;
8446 in = (xmlChar *) CUR_PTR;
8447 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008448 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008449 return (NULL);
8450 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008451 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008452
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008453 /*
8454 * try to handle in this routine the most common case where no
8455 * allocation of a new string is required and where content is
8456 * pure ASCII.
8457 */
8458 limit = *in++;
8459 end = ctxt->input->end;
8460 start = in;
8461 if (in >= end) {
8462 const xmlChar *oldbase = ctxt->input->base;
8463 GROW;
8464 if (oldbase != ctxt->input->base) {
8465 long delta = ctxt->input->base - oldbase;
8466 start = start + delta;
8467 in = in + delta;
8468 }
8469 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008470 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008471 if (normalize) {
8472 /*
8473 * Skip any leading spaces
8474 */
8475 while ((in < end) && (*in != limit) &&
8476 ((*in == 0x20) || (*in == 0x9) ||
8477 (*in == 0xA) || (*in == 0xD))) {
8478 in++;
8479 start = in;
8480 if (in >= end) {
8481 const xmlChar *oldbase = ctxt->input->base;
8482 GROW;
8483 if (oldbase != ctxt->input->base) {
8484 long delta = ctxt->input->base - oldbase;
8485 start = start + delta;
8486 in = in + delta;
8487 }
8488 end = ctxt->input->end;
8489 }
8490 }
8491 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8492 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8493 if ((*in++ == 0x20) && (*in == 0x20)) break;
8494 if (in >= end) {
8495 const xmlChar *oldbase = ctxt->input->base;
8496 GROW;
8497 if (oldbase != ctxt->input->base) {
8498 long delta = ctxt->input->base - oldbase;
8499 start = start + delta;
8500 in = in + delta;
8501 }
8502 end = ctxt->input->end;
8503 }
8504 }
8505 last = in;
8506 /*
8507 * skip the trailing blanks
8508 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008509 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008510 while ((in < end) && (*in != limit) &&
8511 ((*in == 0x20) || (*in == 0x9) ||
8512 (*in == 0xA) || (*in == 0xD))) {
8513 in++;
8514 if (in >= end) {
8515 const xmlChar *oldbase = ctxt->input->base;
8516 GROW;
8517 if (oldbase != ctxt->input->base) {
8518 long delta = ctxt->input->base - oldbase;
8519 start = start + delta;
8520 in = in + delta;
8521 last = last + delta;
8522 }
8523 end = ctxt->input->end;
8524 }
8525 }
8526 if (*in != limit) goto need_complex;
8527 } else {
8528 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8529 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8530 in++;
8531 if (in >= end) {
8532 const xmlChar *oldbase = ctxt->input->base;
8533 GROW;
8534 if (oldbase != ctxt->input->base) {
8535 long delta = ctxt->input->base - oldbase;
8536 start = start + delta;
8537 in = in + delta;
8538 }
8539 end = ctxt->input->end;
8540 }
8541 }
8542 last = in;
8543 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008544 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008545 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008546 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008547 *len = last - start;
8548 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008549 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008550 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008551 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008552 }
8553 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008554 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008555 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008556need_complex:
8557 if (alloc) *alloc = 1;
8558 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008559}
8560
8561/**
8562 * xmlParseAttribute2:
8563 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008564 * @pref: the element prefix
8565 * @elem: the element name
8566 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008567 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008568 * @len: an int * to save the length of the attribute
8569 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008570 *
8571 * parse an attribute in the new SAX2 framework.
8572 *
8573 * Returns the attribute name, and the value in *value, .
8574 */
8575
8576static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008577xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008578 const xmlChar * pref, const xmlChar * elem,
8579 const xmlChar ** prefix, xmlChar ** value,
8580 int *len, int *alloc)
8581{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008582 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008583 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008584 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008585
8586 *value = NULL;
8587 GROW;
8588 name = xmlParseQName(ctxt, prefix);
8589 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008590 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8591 "error parsing attribute name\n");
8592 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008593 }
8594
8595 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008596 * get the type if needed
8597 */
8598 if (ctxt->attsSpecial != NULL) {
8599 int type;
8600
8601 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008602 pref, elem, *prefix, name);
8603 if (type != 0)
8604 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008605 }
8606
8607 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008608 * read the value
8609 */
8610 SKIP_BLANKS;
8611 if (RAW == '=') {
8612 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008613 SKIP_BLANKS;
8614 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8615 if (normalize) {
8616 /*
8617 * Sometimes a second normalisation pass for spaces is needed
8618 * but that only happens if charrefs or entities refernces
8619 * have been used in the attribute value, i.e. the attribute
8620 * value have been extracted in an allocated string already.
8621 */
8622 if (*alloc) {
8623 const xmlChar *val2;
8624
8625 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008626 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008627 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008628 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008629 }
8630 }
8631 }
8632 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008633 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008634 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8635 "Specification mandate value for attribute %s\n",
8636 name);
8637 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008638 }
8639
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008640 if (*prefix == ctxt->str_xml) {
8641 /*
8642 * Check that xml:lang conforms to the specification
8643 * No more registered as an error, just generate a warning now
8644 * since this was deprecated in XML second edition
8645 */
8646 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8647 internal_val = xmlStrndup(val, *len);
8648 if (!xmlCheckLanguageID(internal_val)) {
8649 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8650 "Malformed value for xml:lang : %s\n",
8651 internal_val, NULL);
8652 }
8653 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008654
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008655 /*
8656 * Check that xml:space conforms to the specification
8657 */
8658 if (xmlStrEqual(name, BAD_CAST "space")) {
8659 internal_val = xmlStrndup(val, *len);
8660 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8661 *(ctxt->space) = 0;
8662 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8663 *(ctxt->space) = 1;
8664 else {
8665 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8666 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8667 internal_val, NULL);
8668 }
8669 }
8670 if (internal_val) {
8671 xmlFree(internal_val);
8672 }
8673 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008674
8675 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008676 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008677}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008678/**
8679 * xmlParseStartTag2:
8680 * @ctxt: an XML parser context
8681 *
8682 * parse a start of tag either for rule element or
8683 * EmptyElement. In both case we don't parse the tag closing chars.
8684 * This routine is called when running SAX2 parsing
8685 *
8686 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8687 *
8688 * [ WFC: Unique Att Spec ]
8689 * No attribute name may appear more than once in the same start-tag or
8690 * empty-element tag.
8691 *
8692 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8693 *
8694 * [ WFC: Unique Att Spec ]
8695 * No attribute name may appear more than once in the same start-tag or
8696 * empty-element tag.
8697 *
8698 * With namespace:
8699 *
8700 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8701 *
8702 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8703 *
8704 * Returns the element name parsed
8705 */
8706
8707static const xmlChar *
8708xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008709 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008710 const xmlChar *localname;
8711 const xmlChar *prefix;
8712 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008713 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008714 const xmlChar *nsname;
8715 xmlChar *attvalue;
8716 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008717 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008718 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008719 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008720 const xmlChar *base;
8721 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008722 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008723
8724 if (RAW != '<') return(NULL);
8725 NEXT1;
8726
8727 /*
8728 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8729 * point since the attribute values may be stored as pointers to
8730 * the buffer and calling SHRINK would destroy them !
8731 * The Shrinking is only possible once the full set of attribute
8732 * callbacks have been done.
8733 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008734reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008735 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008736 base = ctxt->input->base;
8737 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008738 oldline = ctxt->input->line;
8739 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008740 nbatts = 0;
8741 nratts = 0;
8742 nbdef = 0;
8743 nbNs = 0;
8744 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008745 /* Forget any namespaces added during an earlier parse of this element. */
8746 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008747
8748 localname = xmlParseQName(ctxt, &prefix);
8749 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008750 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8751 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008752 return(NULL);
8753 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008754 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008755
8756 /*
8757 * Now parse the attributes, it ends up with the ending
8758 *
8759 * (S Attribute)* S?
8760 */
8761 SKIP_BLANKS;
8762 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008763 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008764
8765 while ((RAW != '>') &&
8766 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008767 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008768 const xmlChar *q = CUR_PTR;
8769 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008770 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008771
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008772 attname = xmlParseAttribute2(ctxt, prefix, localname,
8773 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008774 if (ctxt->input->base != base) {
8775 if ((attvalue != NULL) && (alloc != 0))
8776 xmlFree(attvalue);
8777 attvalue = NULL;
8778 goto base_changed;
8779 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008780 if ((attname != NULL) && (attvalue != NULL)) {
8781 if (len < 0) len = xmlStrlen(attvalue);
8782 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008783 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8784 xmlURIPtr uri;
8785
8786 if (*URL != 0) {
8787 uri = xmlParseURI((const char *) URL);
8788 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008789 xmlNsErr(ctxt, XML_WAR_NS_URI,
8790 "xmlns: '%s' is not a valid URI\n",
8791 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008792 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00008793 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008794 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8795 "xmlns: URI %s is not absolute\n",
8796 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008797 }
8798 xmlFreeURI(uri);
8799 }
Daniel Veillard37334572008-07-31 08:20:02 +00008800 if (URL == ctxt->str_xml_ns) {
8801 if (attname != ctxt->str_xml) {
8802 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8803 "xml namespace URI cannot be the default namespace\n",
8804 NULL, NULL, NULL);
8805 }
8806 goto skip_default_ns;
8807 }
8808 if ((len == 29) &&
8809 (xmlStrEqual(URL,
8810 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8811 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8812 "reuse of the xmlns namespace name is forbidden\n",
8813 NULL, NULL, NULL);
8814 goto skip_default_ns;
8815 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008816 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008817 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008818 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008819 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008820 for (j = 1;j <= nbNs;j++)
8821 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8822 break;
8823 if (j <= nbNs)
8824 xmlErrAttributeDup(ctxt, NULL, attname);
8825 else
8826 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008827skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008828 if (alloc != 0) xmlFree(attvalue);
8829 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008830 continue;
8831 }
8832 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008833 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8834 xmlURIPtr uri;
8835
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008836 if (attname == ctxt->str_xml) {
8837 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008838 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8839 "xml namespace prefix mapped to wrong URI\n",
8840 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008841 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008842 /*
8843 * Do not keep a namespace definition node
8844 */
Daniel Veillard37334572008-07-31 08:20:02 +00008845 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008846 }
Daniel Veillard37334572008-07-31 08:20:02 +00008847 if (URL == ctxt->str_xml_ns) {
8848 if (attname != ctxt->str_xml) {
8849 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8850 "xml namespace URI mapped to wrong prefix\n",
8851 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008852 }
Daniel Veillard37334572008-07-31 08:20:02 +00008853 goto skip_ns;
8854 }
8855 if (attname == ctxt->str_xmlns) {
8856 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8857 "redefinition of the xmlns prefix is forbidden\n",
8858 NULL, NULL, NULL);
8859 goto skip_ns;
8860 }
8861 if ((len == 29) &&
8862 (xmlStrEqual(URL,
8863 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8864 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8865 "reuse of the xmlns namespace name is forbidden\n",
8866 NULL, NULL, NULL);
8867 goto skip_ns;
8868 }
8869 if ((URL == NULL) || (URL[0] == 0)) {
8870 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8871 "xmlns:%s: Empty XML namespace is not allowed\n",
8872 attname, NULL, NULL);
8873 goto skip_ns;
8874 } else {
8875 uri = xmlParseURI((const char *) URL);
8876 if (uri == NULL) {
8877 xmlNsErr(ctxt, XML_WAR_NS_URI,
8878 "xmlns:%s: '%s' is not a valid URI\n",
8879 attname, URL, NULL);
8880 } else {
8881 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8882 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8883 "xmlns:%s: URI %s is not absolute\n",
8884 attname, URL, NULL);
8885 }
8886 xmlFreeURI(uri);
8887 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008888 }
8889
Daniel Veillard0fb18932003-09-07 09:14:37 +00008890 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008891 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008892 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008893 for (j = 1;j <= nbNs;j++)
8894 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8895 break;
8896 if (j <= nbNs)
8897 xmlErrAttributeDup(ctxt, aprefix, attname);
8898 else
8899 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008900skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008901 if (alloc != 0) xmlFree(attvalue);
8902 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008903 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008904 continue;
8905 }
8906
8907 /*
8908 * Add the pair to atts
8909 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008910 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8911 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008912 if (attvalue[len] == 0)
8913 xmlFree(attvalue);
8914 goto failed;
8915 }
8916 maxatts = ctxt->maxatts;
8917 atts = ctxt->atts;
8918 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008919 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008920 atts[nbatts++] = attname;
8921 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008922 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008923 atts[nbatts++] = attvalue;
8924 attvalue += len;
8925 atts[nbatts++] = attvalue;
8926 /*
8927 * tag if some deallocation is needed
8928 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008929 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008930 } else {
8931 if ((attvalue != NULL) && (attvalue[len] == 0))
8932 xmlFree(attvalue);
8933 }
8934
Daniel Veillard37334572008-07-31 08:20:02 +00008935failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008936
8937 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008938 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008939 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8940 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008941 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008942 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8943 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008944 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008945 }
8946 SKIP_BLANKS;
8947 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8948 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008949 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008950 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008951 break;
8952 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008953 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008954 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008955 }
8956
Daniel Veillard0fb18932003-09-07 09:14:37 +00008957 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008958 * The attributes defaulting
8959 */
8960 if (ctxt->attsDefault != NULL) {
8961 xmlDefAttrsPtr defaults;
8962
8963 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8964 if (defaults != NULL) {
8965 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00008966 attname = defaults->values[5 * i];
8967 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00008968
8969 /*
8970 * special work for namespaces defaulted defs
8971 */
8972 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8973 /*
8974 * check that it's not a defined namespace
8975 */
8976 for (j = 1;j <= nbNs;j++)
8977 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8978 break;
8979 if (j <= nbNs) continue;
8980
8981 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008982 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008983 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008984 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008985 nbNs++;
8986 }
8987 } else if (aprefix == ctxt->str_xmlns) {
8988 /*
8989 * check that it's not a defined namespace
8990 */
8991 for (j = 1;j <= nbNs;j++)
8992 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8993 break;
8994 if (j <= nbNs) continue;
8995
8996 nsname = xmlGetNamespace(ctxt, attname);
8997 if (nsname != defaults->values[2]) {
8998 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008999 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009000 nbNs++;
9001 }
9002 } else {
9003 /*
9004 * check that it's not a defined attribute
9005 */
9006 for (j = 0;j < nbatts;j+=5) {
9007 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9008 break;
9009 }
9010 if (j < nbatts) continue;
9011
9012 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9013 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009014 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009015 }
9016 maxatts = ctxt->maxatts;
9017 atts = ctxt->atts;
9018 }
9019 atts[nbatts++] = attname;
9020 atts[nbatts++] = aprefix;
9021 if (aprefix == NULL)
9022 atts[nbatts++] = NULL;
9023 else
9024 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009025 atts[nbatts++] = defaults->values[5 * i + 2];
9026 atts[nbatts++] = defaults->values[5 * i + 3];
9027 if ((ctxt->standalone == 1) &&
9028 (defaults->values[5 * i + 4] != NULL)) {
9029 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9030 "standalone: attribute %s on %s defaulted from external subset\n",
9031 attname, localname);
9032 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009033 nbdef++;
9034 }
9035 }
9036 }
9037 }
9038
Daniel Veillarde70c8772003-11-25 07:21:18 +00009039 /*
9040 * The attributes checkings
9041 */
9042 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009043 /*
9044 * The default namespace does not apply to attribute names.
9045 */
9046 if (atts[i + 1] != NULL) {
9047 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9048 if (nsname == NULL) {
9049 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9050 "Namespace prefix %s for %s on %s is not defined\n",
9051 atts[i + 1], atts[i], localname);
9052 }
9053 atts[i + 2] = nsname;
9054 } else
9055 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009056 /*
9057 * [ WFC: Unique Att Spec ]
9058 * No attribute name may appear more than once in the same
9059 * start-tag or empty-element tag.
9060 * As extended by the Namespace in XML REC.
9061 */
9062 for (j = 0; j < i;j += 5) {
9063 if (atts[i] == atts[j]) {
9064 if (atts[i+1] == atts[j+1]) {
9065 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9066 break;
9067 }
9068 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9069 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9070 "Namespaced Attribute %s in '%s' redefined\n",
9071 atts[i], nsname, NULL);
9072 break;
9073 }
9074 }
9075 }
9076 }
9077
Daniel Veillarde57ec792003-09-10 10:50:59 +00009078 nsname = xmlGetNamespace(ctxt, prefix);
9079 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009080 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9081 "Namespace prefix %s on %s is not defined\n",
9082 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009083 }
9084 *pref = prefix;
9085 *URI = nsname;
9086
9087 /*
9088 * SAX: Start of Element !
9089 */
9090 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9091 (!ctxt->disableSAX)) {
9092 if (nbNs > 0)
9093 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9094 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9095 nbatts / 5, nbdef, atts);
9096 else
9097 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9098 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9099 }
9100
9101 /*
9102 * Free up attribute allocated strings if needed
9103 */
9104 if (attval != 0) {
9105 for (i = 3,j = 0; j < nratts;i += 5,j++)
9106 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9107 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009108 }
9109
9110 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009111
9112base_changed:
9113 /*
9114 * the attribute strings are valid iif the base didn't changed
9115 */
9116 if (attval != 0) {
9117 for (i = 3,j = 0; j < nratts;i += 5,j++)
9118 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9119 xmlFree((xmlChar *) atts[i]);
9120 }
9121 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009122 ctxt->input->line = oldline;
9123 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009124 if (ctxt->wellFormed == 1) {
9125 goto reparse;
9126 }
9127 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009128}
9129
9130/**
9131 * xmlParseEndTag2:
9132 * @ctxt: an XML parser context
9133 * @line: line of the start tag
9134 * @nsNr: number of namespaces on the start tag
9135 *
9136 * parse an end of tag
9137 *
9138 * [42] ETag ::= '</' Name S? '>'
9139 *
9140 * With namespace
9141 *
9142 * [NS 9] ETag ::= '</' QName S? '>'
9143 */
9144
9145static void
9146xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009147 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009148 const xmlChar *name;
9149
9150 GROW;
9151 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009152 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009153 return;
9154 }
9155 SKIP(2);
9156
William M. Brack13dfa872004-09-18 04:52:08 +00009157 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009158 if (ctxt->input->cur[tlen] == '>') {
9159 ctxt->input->cur += tlen + 1;
9160 goto done;
9161 }
9162 ctxt->input->cur += tlen;
9163 name = (xmlChar*)1;
9164 } else {
9165 if (prefix == NULL)
9166 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9167 else
9168 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9169 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009170
9171 /*
9172 * We should definitely be at the ending "S? '>'" part
9173 */
9174 GROW;
9175 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009176 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009177 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009178 } else
9179 NEXT1;
9180
9181 /*
9182 * [ WFC: Element Type Match ]
9183 * The Name in an element's end-tag must match the element type in the
9184 * start-tag.
9185 *
9186 */
9187 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009188 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009189 if ((line == 0) && (ctxt->node != NULL))
9190 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009191 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009192 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009193 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009194 }
9195
9196 /*
9197 * SAX: End of Tag
9198 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009199done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009200 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9201 (!ctxt->disableSAX))
9202 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9203
Daniel Veillard0fb18932003-09-07 09:14:37 +00009204 spacePop(ctxt);
9205 if (nsNr != 0)
9206 nsPop(ctxt, nsNr);
9207 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009208}
9209
9210/**
Owen Taylor3473f882001-02-23 17:55:21 +00009211 * xmlParseCDSect:
9212 * @ctxt: an XML parser context
9213 *
9214 * Parse escaped pure raw content.
9215 *
9216 * [18] CDSect ::= CDStart CData CDEnd
9217 *
9218 * [19] CDStart ::= '<![CDATA['
9219 *
9220 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9221 *
9222 * [21] CDEnd ::= ']]>'
9223 */
9224void
9225xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9226 xmlChar *buf = NULL;
9227 int len = 0;
9228 int size = XML_PARSER_BUFFER_SIZE;
9229 int r, rl;
9230 int s, sl;
9231 int cur, l;
9232 int count = 0;
9233
Daniel Veillard8f597c32003-10-06 08:19:27 +00009234 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009235 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009236 SKIP(9);
9237 } else
9238 return;
9239
9240 ctxt->instate = XML_PARSER_CDATA_SECTION;
9241 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009242 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009243 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009244 ctxt->instate = XML_PARSER_CONTENT;
9245 return;
9246 }
9247 NEXTL(rl);
9248 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009249 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009250 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009251 ctxt->instate = XML_PARSER_CONTENT;
9252 return;
9253 }
9254 NEXTL(sl);
9255 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009256 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009257 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009258 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009259 return;
9260 }
William M. Brack871611b2003-10-18 04:53:14 +00009261 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009262 ((r != ']') || (s != ']') || (cur != '>'))) {
9263 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009264 xmlChar *tmp;
9265
Owen Taylor3473f882001-02-23 17:55:21 +00009266 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009267 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9268 if (tmp == NULL) {
9269 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009270 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009271 return;
9272 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009273 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009274 }
9275 COPY_BUF(rl,buf,len,r);
9276 r = s;
9277 rl = sl;
9278 s = cur;
9279 sl = l;
9280 count++;
9281 if (count > 50) {
9282 GROW;
9283 count = 0;
9284 }
9285 NEXTL(l);
9286 cur = CUR_CHAR(l);
9287 }
9288 buf[len] = 0;
9289 ctxt->instate = XML_PARSER_CONTENT;
9290 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009291 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009292 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009293 xmlFree(buf);
9294 return;
9295 }
9296 NEXTL(l);
9297
9298 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009299 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009300 */
9301 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9302 if (ctxt->sax->cdataBlock != NULL)
9303 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009304 else if (ctxt->sax->characters != NULL)
9305 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009306 }
9307 xmlFree(buf);
9308}
9309
9310/**
9311 * xmlParseContent:
9312 * @ctxt: an XML parser context
9313 *
9314 * Parse a content:
9315 *
9316 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9317 */
9318
9319void
9320xmlParseContent(xmlParserCtxtPtr ctxt) {
9321 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009322 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009323 ((RAW != '<') || (NXT(1) != '/')) &&
9324 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009325 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009326 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009327 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009328
9329 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009330 * First case : a Processing Instruction.
9331 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009332 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009333 xmlParsePI(ctxt);
9334 }
9335
9336 /*
9337 * Second case : a CDSection
9338 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009339 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009340 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009341 xmlParseCDSect(ctxt);
9342 }
9343
9344 /*
9345 * Third case : a comment
9346 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009347 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009348 (NXT(2) == '-') && (NXT(3) == '-')) {
9349 xmlParseComment(ctxt);
9350 ctxt->instate = XML_PARSER_CONTENT;
9351 }
9352
9353 /*
9354 * Fourth case : a sub-element.
9355 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009356 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009357 xmlParseElement(ctxt);
9358 }
9359
9360 /*
9361 * Fifth case : a reference. If if has not been resolved,
9362 * parsing returns it's Name, create the node
9363 */
9364
Daniel Veillard21a0f912001-02-25 19:54:14 +00009365 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009366 xmlParseReference(ctxt);
9367 }
9368
9369 /*
9370 * Last case, text. Note that References are handled directly.
9371 */
9372 else {
9373 xmlParseCharData(ctxt, 0);
9374 }
9375
9376 GROW;
9377 /*
9378 * Pop-up of finished entities.
9379 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009380 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009381 xmlPopInput(ctxt);
9382 SHRINK;
9383
Daniel Veillardfdc91562002-07-01 21:52:03 +00009384 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009385 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9386 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009387 ctxt->instate = XML_PARSER_EOF;
9388 break;
9389 }
9390 }
9391}
9392
9393/**
9394 * xmlParseElement:
9395 * @ctxt: an XML parser context
9396 *
9397 * parse an XML element, this is highly recursive
9398 *
9399 * [39] element ::= EmptyElemTag | STag content ETag
9400 *
9401 * [ WFC: Element Type Match ]
9402 * The Name in an element's end-tag must match the element type in the
9403 * start-tag.
9404 *
Owen Taylor3473f882001-02-23 17:55:21 +00009405 */
9406
9407void
9408xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009409 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009410 const xmlChar *prefix;
9411 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00009412 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009413 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009414 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009415 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009416
Daniel Veillard8915c152008-08-26 13:05:34 +00009417 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9418 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9419 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9420 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9421 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009422 ctxt->instate = XML_PARSER_EOF;
9423 return;
9424 }
9425
Owen Taylor3473f882001-02-23 17:55:21 +00009426 /* Capture start position */
9427 if (ctxt->record_info) {
9428 node_info.begin_pos = ctxt->input->consumed +
9429 (CUR_PTR - ctxt->input->base);
9430 node_info.begin_line = ctxt->input->line;
9431 }
9432
9433 if (ctxt->spaceNr == 0)
9434 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009435 else if (*ctxt->space == -2)
9436 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009437 else
9438 spacePush(ctxt, *ctxt->space);
9439
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009440 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009441#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009442 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009443#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009444 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009445#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009446 else
9447 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009448#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009449 if (name == NULL) {
9450 spacePop(ctxt);
9451 return;
9452 }
9453 namePush(ctxt, name);
9454 ret = ctxt->node;
9455
Daniel Veillard4432df22003-09-28 18:58:27 +00009456#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009457 /*
9458 * [ VC: Root Element Type ]
9459 * The Name in the document type declaration must match the element
9460 * type of the root element.
9461 */
9462 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9463 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9464 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009465#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009466
9467 /*
9468 * Check for an Empty Element.
9469 */
9470 if ((RAW == '/') && (NXT(1) == '>')) {
9471 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009472 if (ctxt->sax2) {
9473 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9474 (!ctxt->disableSAX))
9475 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009476#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009477 } else {
9478 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9479 (!ctxt->disableSAX))
9480 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009481#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009482 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009483 namePop(ctxt);
9484 spacePop(ctxt);
9485 if (nsNr != ctxt->nsNr)
9486 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009487 if ( ret != NULL && ctxt->record_info ) {
9488 node_info.end_pos = ctxt->input->consumed +
9489 (CUR_PTR - ctxt->input->base);
9490 node_info.end_line = ctxt->input->line;
9491 node_info.node = ret;
9492 xmlParserAddNodeInfo(ctxt, &node_info);
9493 }
9494 return;
9495 }
9496 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009497 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009498 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009499 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9500 "Couldn't find end of Start Tag %s line %d\n",
9501 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009502
9503 /*
9504 * end of parsing of this node.
9505 */
9506 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009507 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009508 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009509 if (nsNr != ctxt->nsNr)
9510 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009511
9512 /*
9513 * Capture end position and add node
9514 */
9515 if ( ret != NULL && ctxt->record_info ) {
9516 node_info.end_pos = ctxt->input->consumed +
9517 (CUR_PTR - ctxt->input->base);
9518 node_info.end_line = ctxt->input->line;
9519 node_info.node = ret;
9520 xmlParserAddNodeInfo(ctxt, &node_info);
9521 }
9522 return;
9523 }
9524
9525 /*
9526 * Parse the content of the element:
9527 */
9528 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009529 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009530 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009531 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009532 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009533
9534 /*
9535 * end of parsing of this node.
9536 */
9537 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009538 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009539 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009540 if (nsNr != ctxt->nsNr)
9541 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009542 return;
9543 }
9544
9545 /*
9546 * parse the end of tag: '</' should be here.
9547 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009548 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009549 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009550 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009551 }
9552#ifdef LIBXML_SAX1_ENABLED
9553 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00009554 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00009555#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009556
9557 /*
9558 * Capture end position and add node
9559 */
9560 if ( ret != NULL && ctxt->record_info ) {
9561 node_info.end_pos = ctxt->input->consumed +
9562 (CUR_PTR - ctxt->input->base);
9563 node_info.end_line = ctxt->input->line;
9564 node_info.node = ret;
9565 xmlParserAddNodeInfo(ctxt, &node_info);
9566 }
9567}
9568
9569/**
9570 * xmlParseVersionNum:
9571 * @ctxt: an XML parser context
9572 *
9573 * parse the XML version value.
9574 *
Daniel Veillard34e3f642008-07-29 09:02:27 +00009575 * [26] VersionNum ::= '1.' [0-9]+
9576 *
9577 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +00009578 *
9579 * Returns the string giving the XML version number, or NULL
9580 */
9581xmlChar *
9582xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9583 xmlChar *buf = NULL;
9584 int len = 0;
9585 int size = 10;
9586 xmlChar cur;
9587
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009588 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009589 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009590 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009591 return(NULL);
9592 }
9593 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00009594 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009595 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009596 return(NULL);
9597 }
9598 buf[len++] = cur;
9599 NEXT;
9600 cur=CUR;
9601 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009602 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009603 return(NULL);
9604 }
9605 buf[len++] = cur;
9606 NEXT;
9607 cur=CUR;
9608 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009609 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009610 xmlChar *tmp;
9611
Owen Taylor3473f882001-02-23 17:55:21 +00009612 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009613 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9614 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009615 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009616 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009617 return(NULL);
9618 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009619 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009620 }
9621 buf[len++] = cur;
9622 NEXT;
9623 cur=CUR;
9624 }
9625 buf[len] = 0;
9626 return(buf);
9627}
9628
9629/**
9630 * xmlParseVersionInfo:
9631 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009632 *
Owen Taylor3473f882001-02-23 17:55:21 +00009633 * parse the XML version.
9634 *
9635 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009636 *
Owen Taylor3473f882001-02-23 17:55:21 +00009637 * [25] Eq ::= S? '=' S?
9638 *
9639 * Returns the version string, e.g. "1.0"
9640 */
9641
9642xmlChar *
9643xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9644 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009645
Daniel Veillarda07050d2003-10-19 14:46:32 +00009646 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009647 SKIP(7);
9648 SKIP_BLANKS;
9649 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009650 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009651 return(NULL);
9652 }
9653 NEXT;
9654 SKIP_BLANKS;
9655 if (RAW == '"') {
9656 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009657 version = xmlParseVersionNum(ctxt);
9658 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009659 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009660 } else
9661 NEXT;
9662 } else if (RAW == '\''){
9663 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009664 version = xmlParseVersionNum(ctxt);
9665 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009666 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009667 } else
9668 NEXT;
9669 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009670 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009671 }
9672 }
9673 return(version);
9674}
9675
9676/**
9677 * xmlParseEncName:
9678 * @ctxt: an XML parser context
9679 *
9680 * parse the XML encoding name
9681 *
9682 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9683 *
9684 * Returns the encoding name value or NULL
9685 */
9686xmlChar *
9687xmlParseEncName(xmlParserCtxtPtr ctxt) {
9688 xmlChar *buf = NULL;
9689 int len = 0;
9690 int size = 10;
9691 xmlChar cur;
9692
9693 cur = CUR;
9694 if (((cur >= 'a') && (cur <= 'z')) ||
9695 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009696 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009697 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009698 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009699 return(NULL);
9700 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00009701
Owen Taylor3473f882001-02-23 17:55:21 +00009702 buf[len++] = cur;
9703 NEXT;
9704 cur = CUR;
9705 while (((cur >= 'a') && (cur <= 'z')) ||
9706 ((cur >= 'A') && (cur <= 'Z')) ||
9707 ((cur >= '0') && (cur <= '9')) ||
9708 (cur == '.') || (cur == '_') ||
9709 (cur == '-')) {
9710 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009711 xmlChar *tmp;
9712
Owen Taylor3473f882001-02-23 17:55:21 +00009713 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009714 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9715 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009716 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009717 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009718 return(NULL);
9719 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009720 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009721 }
9722 buf[len++] = cur;
9723 NEXT;
9724 cur = CUR;
9725 if (cur == 0) {
9726 SHRINK;
9727 GROW;
9728 cur = CUR;
9729 }
9730 }
9731 buf[len] = 0;
9732 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009733 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009734 }
9735 return(buf);
9736}
9737
9738/**
9739 * xmlParseEncodingDecl:
9740 * @ctxt: an XML parser context
9741 *
9742 * parse the XML encoding declaration
9743 *
9744 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9745 *
9746 * this setups the conversion filters.
9747 *
9748 * Returns the encoding value or NULL
9749 */
9750
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009751const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009752xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9753 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009754
9755 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009756 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009757 SKIP(8);
9758 SKIP_BLANKS;
9759 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009760 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009761 return(NULL);
9762 }
9763 NEXT;
9764 SKIP_BLANKS;
9765 if (RAW == '"') {
9766 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009767 encoding = xmlParseEncName(ctxt);
9768 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009769 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009770 } else
9771 NEXT;
9772 } else if (RAW == '\''){
9773 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009774 encoding = xmlParseEncName(ctxt);
9775 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009776 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009777 } else
9778 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009779 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009780 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009781 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00009782 /*
9783 * UTF-16 encoding stwich has already taken place at this stage,
9784 * more over the little-endian/big-endian selection is already done
9785 */
9786 if ((encoding != NULL) &&
9787 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9788 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +00009789 /*
9790 * If no encoding was passed to the parser, that we are
9791 * using UTF-16 and no decoder is present i.e. the
9792 * document is apparently UTF-8 compatible, then raise an
9793 * encoding mismatch fatal error
9794 */
9795 if ((ctxt->encoding == NULL) &&
9796 (ctxt->input->buf != NULL) &&
9797 (ctxt->input->buf->encoder == NULL)) {
9798 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9799 "Document labelled UTF-16 but has UTF-8 content\n");
9800 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009801 if (ctxt->encoding != NULL)
9802 xmlFree((xmlChar *) ctxt->encoding);
9803 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009804 }
9805 /*
9806 * UTF-8 encoding is handled natively
9807 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009808 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009809 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9810 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009811 if (ctxt->encoding != NULL)
9812 xmlFree((xmlChar *) ctxt->encoding);
9813 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009814 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009815 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009816 xmlCharEncodingHandlerPtr handler;
9817
9818 if (ctxt->input->encoding != NULL)
9819 xmlFree((xmlChar *) ctxt->input->encoding);
9820 ctxt->input->encoding = encoding;
9821
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009822 handler = xmlFindCharEncodingHandler((const char *) encoding);
9823 if (handler != NULL) {
9824 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009825 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009826 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009827 "Unsupported encoding %s\n", encoding);
9828 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009829 }
9830 }
9831 }
9832 return(encoding);
9833}
9834
9835/**
9836 * xmlParseSDDecl:
9837 * @ctxt: an XML parser context
9838 *
9839 * parse the XML standalone declaration
9840 *
9841 * [32] SDDecl ::= S 'standalone' Eq
9842 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9843 *
9844 * [ VC: Standalone Document Declaration ]
9845 * TODO The standalone document declaration must have the value "no"
9846 * if any external markup declarations contain declarations of:
9847 * - attributes with default values, if elements to which these
9848 * attributes apply appear in the document without specifications
9849 * of values for these attributes, or
9850 * - entities (other than amp, lt, gt, apos, quot), if references
9851 * to those entities appear in the document, or
9852 * - attributes with values subject to normalization, where the
9853 * attribute appears in the document with a value which will change
9854 * as a result of normalization, or
9855 * - element types with element content, if white space occurs directly
9856 * within any instance of those types.
9857 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009858 * Returns:
9859 * 1 if standalone="yes"
9860 * 0 if standalone="no"
9861 * -2 if standalone attribute is missing or invalid
9862 * (A standalone value of -2 means that the XML declaration was found,
9863 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00009864 */
9865
9866int
9867xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009868 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00009869
9870 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009871 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009872 SKIP(10);
9873 SKIP_BLANKS;
9874 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009875 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009876 return(standalone);
9877 }
9878 NEXT;
9879 SKIP_BLANKS;
9880 if (RAW == '\''){
9881 NEXT;
9882 if ((RAW == 'n') && (NXT(1) == 'o')) {
9883 standalone = 0;
9884 SKIP(2);
9885 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9886 (NXT(2) == 's')) {
9887 standalone = 1;
9888 SKIP(3);
9889 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009890 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009891 }
9892 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009893 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009894 } else
9895 NEXT;
9896 } else if (RAW == '"'){
9897 NEXT;
9898 if ((RAW == 'n') && (NXT(1) == 'o')) {
9899 standalone = 0;
9900 SKIP(2);
9901 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9902 (NXT(2) == 's')) {
9903 standalone = 1;
9904 SKIP(3);
9905 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009906 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009907 }
9908 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009909 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009910 } else
9911 NEXT;
9912 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009913 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009914 }
9915 }
9916 return(standalone);
9917}
9918
9919/**
9920 * xmlParseXMLDecl:
9921 * @ctxt: an XML parser context
9922 *
9923 * parse an XML declaration header
9924 *
9925 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9926 */
9927
9928void
9929xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9930 xmlChar *version;
9931
9932 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009933 * This value for standalone indicates that the document has an
9934 * XML declaration but it does not have a standalone attribute.
9935 * It will be overwritten later if a standalone attribute is found.
9936 */
9937 ctxt->input->standalone = -2;
9938
9939 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009940 * We know that '<?xml' is here.
9941 */
9942 SKIP(5);
9943
William M. Brack76e95df2003-10-18 16:20:14 +00009944 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009945 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9946 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009947 }
9948 SKIP_BLANKS;
9949
9950 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009951 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009952 */
9953 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009954 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009955 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009956 } else {
9957 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9958 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +00009959 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +00009960 */
Daniel Veillard34e3f642008-07-29 09:02:27 +00009961 if (ctxt->options & XML_PARSE_OLD10) {
9962 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9963 "Unsupported version '%s'\n",
9964 version);
9965 } else {
9966 if ((version[0] == '1') && ((version[1] == '.'))) {
9967 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9968 "Unsupported version '%s'\n",
9969 version, NULL);
9970 } else {
9971 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9972 "Unsupported version '%s'\n",
9973 version);
9974 }
9975 }
Daniel Veillard19840942001-11-29 16:11:38 +00009976 }
9977 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009978 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009979 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009980 }
Owen Taylor3473f882001-02-23 17:55:21 +00009981
9982 /*
9983 * We may have the encoding declaration
9984 */
William M. Brack76e95df2003-10-18 16:20:14 +00009985 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009986 if ((RAW == '?') && (NXT(1) == '>')) {
9987 SKIP(2);
9988 return;
9989 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009990 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009991 }
9992 xmlParseEncodingDecl(ctxt);
9993 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9994 /*
9995 * The XML REC instructs us to stop parsing right here
9996 */
9997 return;
9998 }
9999
10000 /*
10001 * We may have the standalone status.
10002 */
William M. Brack76e95df2003-10-18 16:20:14 +000010003 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010004 if ((RAW == '?') && (NXT(1) == '>')) {
10005 SKIP(2);
10006 return;
10007 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010008 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010009 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010010
10011 /*
10012 * We can grow the input buffer freely at that point
10013 */
10014 GROW;
10015
Owen Taylor3473f882001-02-23 17:55:21 +000010016 SKIP_BLANKS;
10017 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10018
10019 SKIP_BLANKS;
10020 if ((RAW == '?') && (NXT(1) == '>')) {
10021 SKIP(2);
10022 } else if (RAW == '>') {
10023 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010024 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010025 NEXT;
10026 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010027 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010028 MOVETO_ENDTAG(CUR_PTR);
10029 NEXT;
10030 }
10031}
10032
10033/**
10034 * xmlParseMisc:
10035 * @ctxt: an XML parser context
10036 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010037 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010038 *
10039 * [27] Misc ::= Comment | PI | S
10040 */
10041
10042void
10043xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010044 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +000010045 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +000010046 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010047 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010048 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010049 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010050 NEXT;
10051 } else
10052 xmlParseComment(ctxt);
10053 }
10054}
10055
10056/**
10057 * xmlParseDocument:
10058 * @ctxt: an XML parser context
10059 *
10060 * parse an XML document (and build a tree if using the standard SAX
10061 * interface).
10062 *
10063 * [1] document ::= prolog element Misc*
10064 *
10065 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10066 *
10067 * Returns 0, -1 in case of error. the parser context is augmented
10068 * as a result of the parsing.
10069 */
10070
10071int
10072xmlParseDocument(xmlParserCtxtPtr ctxt) {
10073 xmlChar start[4];
10074 xmlCharEncoding enc;
10075
10076 xmlInitParser();
10077
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010078 if ((ctxt == NULL) || (ctxt->input == NULL))
10079 return(-1);
10080
Owen Taylor3473f882001-02-23 17:55:21 +000010081 GROW;
10082
10083 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010084 * SAX: detecting the level.
10085 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010086 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010087
10088 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010089 * SAX: beginning of the document processing.
10090 */
10091 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10092 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10093
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010094 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
10095 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +000010096 /*
10097 * Get the 4 first bytes and decode the charset
10098 * if enc != XML_CHAR_ENCODING_NONE
10099 * plug some encoding conversion routines.
10100 */
10101 start[0] = RAW;
10102 start[1] = NXT(1);
10103 start[2] = NXT(2);
10104 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010105 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010106 if (enc != XML_CHAR_ENCODING_NONE) {
10107 xmlSwitchEncoding(ctxt, enc);
10108 }
Owen Taylor3473f882001-02-23 17:55:21 +000010109 }
10110
10111
10112 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010113 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010114 }
10115
10116 /*
10117 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010118 * do not GROW here to avoid the detected encoder to decode more
10119 * than just the first line
Owen Taylor3473f882001-02-23 17:55:21 +000010120 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000010121 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010122
10123 /*
10124 * Note that we will switch encoding on the fly.
10125 */
10126 xmlParseXMLDecl(ctxt);
10127 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10128 /*
10129 * The XML REC instructs us to stop parsing right here
10130 */
10131 return(-1);
10132 }
10133 ctxt->standalone = ctxt->input->standalone;
10134 SKIP_BLANKS;
10135 } else {
10136 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10137 }
10138 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10139 ctxt->sax->startDocument(ctxt->userData);
10140
10141 /*
10142 * The Misc part of the Prolog
10143 */
10144 GROW;
10145 xmlParseMisc(ctxt);
10146
10147 /*
10148 * Then possibly doc type declaration(s) and more Misc
10149 * (doctypedecl Misc*)?
10150 */
10151 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010152 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010153
10154 ctxt->inSubset = 1;
10155 xmlParseDocTypeDecl(ctxt);
10156 if (RAW == '[') {
10157 ctxt->instate = XML_PARSER_DTD;
10158 xmlParseInternalSubset(ctxt);
10159 }
10160
10161 /*
10162 * Create and update the external subset.
10163 */
10164 ctxt->inSubset = 2;
10165 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10166 (!ctxt->disableSAX))
10167 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10168 ctxt->extSubSystem, ctxt->extSubURI);
10169 ctxt->inSubset = 0;
10170
Daniel Veillardac4118d2008-01-11 05:27:32 +000010171 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010172
10173 ctxt->instate = XML_PARSER_PROLOG;
10174 xmlParseMisc(ctxt);
10175 }
10176
10177 /*
10178 * Time to start parsing the tree itself
10179 */
10180 GROW;
10181 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010182 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10183 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010184 } else {
10185 ctxt->instate = XML_PARSER_CONTENT;
10186 xmlParseElement(ctxt);
10187 ctxt->instate = XML_PARSER_EPILOG;
10188
10189
10190 /*
10191 * The Misc part at the end
10192 */
10193 xmlParseMisc(ctxt);
10194
Daniel Veillard561b7f82002-03-20 21:55:57 +000010195 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010196 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010197 }
10198 ctxt->instate = XML_PARSER_EOF;
10199 }
10200
10201 /*
10202 * SAX: end of the document processing.
10203 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010204 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010205 ctxt->sax->endDocument(ctxt->userData);
10206
Daniel Veillard5997aca2002-03-18 18:36:20 +000010207 /*
10208 * Remove locally kept entity definitions if the tree was not built
10209 */
10210 if ((ctxt->myDoc != NULL) &&
10211 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10212 xmlFreeDoc(ctxt->myDoc);
10213 ctxt->myDoc = NULL;
10214 }
10215
Daniel Veillardae0765b2008-07-31 19:54:59 +000010216 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10217 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10218 if (ctxt->valid)
10219 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10220 if (ctxt->nsWellFormed)
10221 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10222 if (ctxt->options & XML_PARSE_OLD10)
10223 ctxt->myDoc->properties |= XML_DOC_OLD10;
10224 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010225 if (! ctxt->wellFormed) {
10226 ctxt->valid = 0;
10227 return(-1);
10228 }
Owen Taylor3473f882001-02-23 17:55:21 +000010229 return(0);
10230}
10231
10232/**
10233 * xmlParseExtParsedEnt:
10234 * @ctxt: an XML parser context
10235 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010236 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010237 * An external general parsed entity is well-formed if it matches the
10238 * production labeled extParsedEnt.
10239 *
10240 * [78] extParsedEnt ::= TextDecl? content
10241 *
10242 * Returns 0, -1 in case of error. the parser context is augmented
10243 * as a result of the parsing.
10244 */
10245
10246int
10247xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10248 xmlChar start[4];
10249 xmlCharEncoding enc;
10250
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010251 if ((ctxt == NULL) || (ctxt->input == NULL))
10252 return(-1);
10253
Owen Taylor3473f882001-02-23 17:55:21 +000010254 xmlDefaultSAXHandlerInit();
10255
Daniel Veillard309f81d2003-09-23 09:02:53 +000010256 xmlDetectSAX2(ctxt);
10257
Owen Taylor3473f882001-02-23 17:55:21 +000010258 GROW;
10259
10260 /*
10261 * SAX: beginning of the document processing.
10262 */
10263 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10264 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10265
10266 /*
10267 * Get the 4 first bytes and decode the charset
10268 * if enc != XML_CHAR_ENCODING_NONE
10269 * plug some encoding conversion routines.
10270 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010271 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10272 start[0] = RAW;
10273 start[1] = NXT(1);
10274 start[2] = NXT(2);
10275 start[3] = NXT(3);
10276 enc = xmlDetectCharEncoding(start, 4);
10277 if (enc != XML_CHAR_ENCODING_NONE) {
10278 xmlSwitchEncoding(ctxt, enc);
10279 }
Owen Taylor3473f882001-02-23 17:55:21 +000010280 }
10281
10282
10283 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010284 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010285 }
10286
10287 /*
10288 * Check for the XMLDecl in the Prolog.
10289 */
10290 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010291 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010292
10293 /*
10294 * Note that we will switch encoding on the fly.
10295 */
10296 xmlParseXMLDecl(ctxt);
10297 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10298 /*
10299 * The XML REC instructs us to stop parsing right here
10300 */
10301 return(-1);
10302 }
10303 SKIP_BLANKS;
10304 } else {
10305 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10306 }
10307 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10308 ctxt->sax->startDocument(ctxt->userData);
10309
10310 /*
10311 * Doing validity checking on chunk doesn't make sense
10312 */
10313 ctxt->instate = XML_PARSER_CONTENT;
10314 ctxt->validate = 0;
10315 ctxt->loadsubset = 0;
10316 ctxt->depth = 0;
10317
10318 xmlParseContent(ctxt);
10319
10320 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010321 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010322 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010323 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010324 }
10325
10326 /*
10327 * SAX: end of the document processing.
10328 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010329 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010330 ctxt->sax->endDocument(ctxt->userData);
10331
10332 if (! ctxt->wellFormed) return(-1);
10333 return(0);
10334}
10335
Daniel Veillard73b013f2003-09-30 12:36:01 +000010336#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010337/************************************************************************
10338 * *
10339 * Progressive parsing interfaces *
10340 * *
10341 ************************************************************************/
10342
10343/**
10344 * xmlParseLookupSequence:
10345 * @ctxt: an XML parser context
10346 * @first: the first char to lookup
10347 * @next: the next char to lookup or zero
10348 * @third: the next char to lookup or zero
10349 *
10350 * Try to find if a sequence (first, next, third) or just (first next) or
10351 * (first) is available in the input stream.
10352 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10353 * to avoid rescanning sequences of bytes, it DOES change the state of the
10354 * parser, do not use liberally.
10355 *
10356 * Returns the index to the current parsing point if the full sequence
10357 * is available, -1 otherwise.
10358 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010359static int
Owen Taylor3473f882001-02-23 17:55:21 +000010360xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10361 xmlChar next, xmlChar third) {
10362 int base, len;
10363 xmlParserInputPtr in;
10364 const xmlChar *buf;
10365
10366 in = ctxt->input;
10367 if (in == NULL) return(-1);
10368 base = in->cur - in->base;
10369 if (base < 0) return(-1);
10370 if (ctxt->checkIndex > base)
10371 base = ctxt->checkIndex;
10372 if (in->buf == NULL) {
10373 buf = in->base;
10374 len = in->length;
10375 } else {
10376 buf = in->buf->buffer->content;
10377 len = in->buf->buffer->use;
10378 }
10379 /* take into account the sequence length */
10380 if (third) len -= 2;
10381 else if (next) len --;
10382 for (;base < len;base++) {
10383 if (buf[base] == first) {
10384 if (third != 0) {
10385 if ((buf[base + 1] != next) ||
10386 (buf[base + 2] != third)) continue;
10387 } else if (next != 0) {
10388 if (buf[base + 1] != next) continue;
10389 }
10390 ctxt->checkIndex = 0;
10391#ifdef DEBUG_PUSH
10392 if (next == 0)
10393 xmlGenericError(xmlGenericErrorContext,
10394 "PP: lookup '%c' found at %d\n",
10395 first, base);
10396 else if (third == 0)
10397 xmlGenericError(xmlGenericErrorContext,
10398 "PP: lookup '%c%c' found at %d\n",
10399 first, next, base);
10400 else
10401 xmlGenericError(xmlGenericErrorContext,
10402 "PP: lookup '%c%c%c' found at %d\n",
10403 first, next, third, base);
10404#endif
10405 return(base - (in->cur - in->base));
10406 }
10407 }
10408 ctxt->checkIndex = base;
10409#ifdef DEBUG_PUSH
10410 if (next == 0)
10411 xmlGenericError(xmlGenericErrorContext,
10412 "PP: lookup '%c' failed\n", first);
10413 else if (third == 0)
10414 xmlGenericError(xmlGenericErrorContext,
10415 "PP: lookup '%c%c' failed\n", first, next);
10416 else
10417 xmlGenericError(xmlGenericErrorContext,
10418 "PP: lookup '%c%c%c' failed\n", first, next, third);
10419#endif
10420 return(-1);
10421}
10422
10423/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010424 * xmlParseGetLasts:
10425 * @ctxt: an XML parser context
10426 * @lastlt: pointer to store the last '<' from the input
10427 * @lastgt: pointer to store the last '>' from the input
10428 *
10429 * Lookup the last < and > in the current chunk
10430 */
10431static void
10432xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10433 const xmlChar **lastgt) {
10434 const xmlChar *tmp;
10435
10436 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10437 xmlGenericError(xmlGenericErrorContext,
10438 "Internal error: xmlParseGetLasts\n");
10439 return;
10440 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010441 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010442 tmp = ctxt->input->end;
10443 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010444 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010445 if (tmp < ctxt->input->base) {
10446 *lastlt = NULL;
10447 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010448 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010449 *lastlt = tmp;
10450 tmp++;
10451 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10452 if (*tmp == '\'') {
10453 tmp++;
10454 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10455 if (tmp < ctxt->input->end) tmp++;
10456 } else if (*tmp == '"') {
10457 tmp++;
10458 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10459 if (tmp < ctxt->input->end) tmp++;
10460 } else
10461 tmp++;
10462 }
10463 if (tmp < ctxt->input->end)
10464 *lastgt = tmp;
10465 else {
10466 tmp = *lastlt;
10467 tmp--;
10468 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10469 if (tmp >= ctxt->input->base)
10470 *lastgt = tmp;
10471 else
10472 *lastgt = NULL;
10473 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010474 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010475 } else {
10476 *lastlt = NULL;
10477 *lastgt = NULL;
10478 }
10479}
10480/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010481 * xmlCheckCdataPush:
10482 * @cur: pointer to the bock of characters
10483 * @len: length of the block in bytes
10484 *
10485 * Check that the block of characters is okay as SCdata content [20]
10486 *
10487 * Returns the number of bytes to pass if okay, a negative index where an
10488 * UTF-8 error occured otherwise
10489 */
10490static int
10491xmlCheckCdataPush(const xmlChar *utf, int len) {
10492 int ix;
10493 unsigned char c;
10494 int codepoint;
10495
10496 if ((utf == NULL) || (len <= 0))
10497 return(0);
10498
10499 for (ix = 0; ix < len;) { /* string is 0-terminated */
10500 c = utf[ix];
10501 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10502 if (c >= 0x20)
10503 ix++;
10504 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10505 ix++;
10506 else
10507 return(-ix);
10508 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10509 if (ix + 2 > len) return(ix);
10510 if ((utf[ix+1] & 0xc0 ) != 0x80)
10511 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010512 codepoint = (utf[ix] & 0x1f) << 6;
10513 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010514 if (!xmlIsCharQ(codepoint))
10515 return(-ix);
10516 ix += 2;
10517 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10518 if (ix + 3 > len) return(ix);
10519 if (((utf[ix+1] & 0xc0) != 0x80) ||
10520 ((utf[ix+2] & 0xc0) != 0x80))
10521 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010522 codepoint = (utf[ix] & 0xf) << 12;
10523 codepoint |= (utf[ix+1] & 0x3f) << 6;
10524 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010525 if (!xmlIsCharQ(codepoint))
10526 return(-ix);
10527 ix += 3;
10528 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10529 if (ix + 4 > len) return(ix);
10530 if (((utf[ix+1] & 0xc0) != 0x80) ||
10531 ((utf[ix+2] & 0xc0) != 0x80) ||
10532 ((utf[ix+3] & 0xc0) != 0x80))
10533 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010534 codepoint = (utf[ix] & 0x7) << 18;
10535 codepoint |= (utf[ix+1] & 0x3f) << 12;
10536 codepoint |= (utf[ix+2] & 0x3f) << 6;
10537 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010538 if (!xmlIsCharQ(codepoint))
10539 return(-ix);
10540 ix += 4;
10541 } else /* unknown encoding */
10542 return(-ix);
10543 }
10544 return(ix);
10545}
10546
10547/**
Owen Taylor3473f882001-02-23 17:55:21 +000010548 * xmlParseTryOrFinish:
10549 * @ctxt: an XML parser context
10550 * @terminate: last chunk indicator
10551 *
10552 * Try to progress on parsing
10553 *
10554 * Returns zero if no parsing was possible
10555 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010556static int
Owen Taylor3473f882001-02-23 17:55:21 +000010557xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10558 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010559 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000010560 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000010561 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000010562
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010563 if (ctxt->input == NULL)
10564 return(0);
10565
Owen Taylor3473f882001-02-23 17:55:21 +000010566#ifdef DEBUG_PUSH
10567 switch (ctxt->instate) {
10568 case XML_PARSER_EOF:
10569 xmlGenericError(xmlGenericErrorContext,
10570 "PP: try EOF\n"); break;
10571 case XML_PARSER_START:
10572 xmlGenericError(xmlGenericErrorContext,
10573 "PP: try START\n"); break;
10574 case XML_PARSER_MISC:
10575 xmlGenericError(xmlGenericErrorContext,
10576 "PP: try MISC\n");break;
10577 case XML_PARSER_COMMENT:
10578 xmlGenericError(xmlGenericErrorContext,
10579 "PP: try COMMENT\n");break;
10580 case XML_PARSER_PROLOG:
10581 xmlGenericError(xmlGenericErrorContext,
10582 "PP: try PROLOG\n");break;
10583 case XML_PARSER_START_TAG:
10584 xmlGenericError(xmlGenericErrorContext,
10585 "PP: try START_TAG\n");break;
10586 case XML_PARSER_CONTENT:
10587 xmlGenericError(xmlGenericErrorContext,
10588 "PP: try CONTENT\n");break;
10589 case XML_PARSER_CDATA_SECTION:
10590 xmlGenericError(xmlGenericErrorContext,
10591 "PP: try CDATA_SECTION\n");break;
10592 case XML_PARSER_END_TAG:
10593 xmlGenericError(xmlGenericErrorContext,
10594 "PP: try END_TAG\n");break;
10595 case XML_PARSER_ENTITY_DECL:
10596 xmlGenericError(xmlGenericErrorContext,
10597 "PP: try ENTITY_DECL\n");break;
10598 case XML_PARSER_ENTITY_VALUE:
10599 xmlGenericError(xmlGenericErrorContext,
10600 "PP: try ENTITY_VALUE\n");break;
10601 case XML_PARSER_ATTRIBUTE_VALUE:
10602 xmlGenericError(xmlGenericErrorContext,
10603 "PP: try ATTRIBUTE_VALUE\n");break;
10604 case XML_PARSER_DTD:
10605 xmlGenericError(xmlGenericErrorContext,
10606 "PP: try DTD\n");break;
10607 case XML_PARSER_EPILOG:
10608 xmlGenericError(xmlGenericErrorContext,
10609 "PP: try EPILOG\n");break;
10610 case XML_PARSER_PI:
10611 xmlGenericError(xmlGenericErrorContext,
10612 "PP: try PI\n");break;
10613 case XML_PARSER_IGNORE:
10614 xmlGenericError(xmlGenericErrorContext,
10615 "PP: try IGNORE\n");break;
10616 }
10617#endif
10618
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010619 if ((ctxt->input != NULL) &&
10620 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010621 xmlSHRINK(ctxt);
10622 ctxt->checkIndex = 0;
10623 }
10624 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010625
Daniel Veillarda880b122003-04-21 21:36:41 +000010626 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010627 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010628 return(0);
10629
10630
Owen Taylor3473f882001-02-23 17:55:21 +000010631 /*
10632 * Pop-up of finished entities.
10633 */
10634 while ((RAW == 0) && (ctxt->inputNr > 1))
10635 xmlPopInput(ctxt);
10636
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010637 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010638 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010639 avail = ctxt->input->length -
10640 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010641 else {
10642 /*
10643 * If we are operating on converted input, try to flush
10644 * remainng chars to avoid them stalling in the non-converted
10645 * buffer.
10646 */
10647 if ((ctxt->input->buf->raw != NULL) &&
10648 (ctxt->input->buf->raw->use > 0)) {
10649 int base = ctxt->input->base -
10650 ctxt->input->buf->buffer->content;
10651 int current = ctxt->input->cur - ctxt->input->base;
10652
10653 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10654 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10655 ctxt->input->cur = ctxt->input->base + current;
10656 ctxt->input->end =
10657 &ctxt->input->buf->buffer->content[
10658 ctxt->input->buf->buffer->use];
10659 }
10660 avail = ctxt->input->buf->buffer->use -
10661 (ctxt->input->cur - ctxt->input->base);
10662 }
Owen Taylor3473f882001-02-23 17:55:21 +000010663 if (avail < 1)
10664 goto done;
10665 switch (ctxt->instate) {
10666 case XML_PARSER_EOF:
10667 /*
10668 * Document parsing is done !
10669 */
10670 goto done;
10671 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010672 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10673 xmlChar start[4];
10674 xmlCharEncoding enc;
10675
10676 /*
10677 * Very first chars read from the document flow.
10678 */
10679 if (avail < 4)
10680 goto done;
10681
10682 /*
10683 * Get the 4 first bytes and decode the charset
10684 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010685 * plug some encoding conversion routines,
10686 * else xmlSwitchEncoding will set to (default)
10687 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010688 */
10689 start[0] = RAW;
10690 start[1] = NXT(1);
10691 start[2] = NXT(2);
10692 start[3] = NXT(3);
10693 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010694 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010695 break;
10696 }
Owen Taylor3473f882001-02-23 17:55:21 +000010697
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010698 if (avail < 2)
10699 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010700 cur = ctxt->input->cur[0];
10701 next = ctxt->input->cur[1];
10702 if (cur == 0) {
10703 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10704 ctxt->sax->setDocumentLocator(ctxt->userData,
10705 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010706 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010707 ctxt->instate = XML_PARSER_EOF;
10708#ifdef DEBUG_PUSH
10709 xmlGenericError(xmlGenericErrorContext,
10710 "PP: entering EOF\n");
10711#endif
10712 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10713 ctxt->sax->endDocument(ctxt->userData);
10714 goto done;
10715 }
10716 if ((cur == '<') && (next == '?')) {
10717 /* PI or XML decl */
10718 if (avail < 5) return(ret);
10719 if ((!terminate) &&
10720 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10721 return(ret);
10722 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10723 ctxt->sax->setDocumentLocator(ctxt->userData,
10724 &xmlDefaultSAXLocator);
10725 if ((ctxt->input->cur[2] == 'x') &&
10726 (ctxt->input->cur[3] == 'm') &&
10727 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010728 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010729 ret += 5;
10730#ifdef DEBUG_PUSH
10731 xmlGenericError(xmlGenericErrorContext,
10732 "PP: Parsing XML Decl\n");
10733#endif
10734 xmlParseXMLDecl(ctxt);
10735 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10736 /*
10737 * The XML REC instructs us to stop parsing right
10738 * here
10739 */
10740 ctxt->instate = XML_PARSER_EOF;
10741 return(0);
10742 }
10743 ctxt->standalone = ctxt->input->standalone;
10744 if ((ctxt->encoding == NULL) &&
10745 (ctxt->input->encoding != NULL))
10746 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10747 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10748 (!ctxt->disableSAX))
10749 ctxt->sax->startDocument(ctxt->userData);
10750 ctxt->instate = XML_PARSER_MISC;
10751#ifdef DEBUG_PUSH
10752 xmlGenericError(xmlGenericErrorContext,
10753 "PP: entering MISC\n");
10754#endif
10755 } else {
10756 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10757 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10758 (!ctxt->disableSAX))
10759 ctxt->sax->startDocument(ctxt->userData);
10760 ctxt->instate = XML_PARSER_MISC;
10761#ifdef DEBUG_PUSH
10762 xmlGenericError(xmlGenericErrorContext,
10763 "PP: entering MISC\n");
10764#endif
10765 }
10766 } else {
10767 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10768 ctxt->sax->setDocumentLocator(ctxt->userData,
10769 &xmlDefaultSAXLocator);
10770 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010771 if (ctxt->version == NULL) {
10772 xmlErrMemory(ctxt, NULL);
10773 break;
10774 }
Owen Taylor3473f882001-02-23 17:55:21 +000010775 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10776 (!ctxt->disableSAX))
10777 ctxt->sax->startDocument(ctxt->userData);
10778 ctxt->instate = XML_PARSER_MISC;
10779#ifdef DEBUG_PUSH
10780 xmlGenericError(xmlGenericErrorContext,
10781 "PP: entering MISC\n");
10782#endif
10783 }
10784 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010785 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010786 const xmlChar *name;
10787 const xmlChar *prefix;
10788 const xmlChar *URI;
10789 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010790
10791 if ((avail < 2) && (ctxt->inputNr == 1))
10792 goto done;
10793 cur = ctxt->input->cur[0];
10794 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010795 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010796 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010797 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10798 ctxt->sax->endDocument(ctxt->userData);
10799 goto done;
10800 }
10801 if (!terminate) {
10802 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010803 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010804 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010805 goto done;
10806 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10807 goto done;
10808 }
10809 }
10810 if (ctxt->spaceNr == 0)
10811 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010812 else if (*ctxt->space == -2)
10813 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010814 else
10815 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010816#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010817 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010818#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010819 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010820#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010821 else
10822 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010823#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010824 if (name == NULL) {
10825 spacePop(ctxt);
10826 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010827 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10828 ctxt->sax->endDocument(ctxt->userData);
10829 goto done;
10830 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010831#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000010832 /*
10833 * [ VC: Root Element Type ]
10834 * The Name in the document type declaration must match
10835 * the element type of the root element.
10836 */
10837 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10838 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10839 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010840#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010841
10842 /*
10843 * Check for an Empty Element.
10844 */
10845 if ((RAW == '/') && (NXT(1) == '>')) {
10846 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010847
10848 if (ctxt->sax2) {
10849 if ((ctxt->sax != NULL) &&
10850 (ctxt->sax->endElementNs != NULL) &&
10851 (!ctxt->disableSAX))
10852 ctxt->sax->endElementNs(ctxt->userData, name,
10853 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000010854 if (ctxt->nsNr - nsNr > 0)
10855 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010856#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010857 } else {
10858 if ((ctxt->sax != NULL) &&
10859 (ctxt->sax->endElement != NULL) &&
10860 (!ctxt->disableSAX))
10861 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010862#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010863 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010864 spacePop(ctxt);
10865 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010866 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010867 } else {
10868 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010869 }
10870 break;
10871 }
10872 if (RAW == '>') {
10873 NEXT;
10874 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000010875 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000010876 "Couldn't find end of Start Tag %s\n",
10877 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000010878 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010879 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010880 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010881 if (ctxt->sax2)
10882 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010883#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010884 else
10885 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010886#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010887
Daniel Veillarda880b122003-04-21 21:36:41 +000010888 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010889 break;
10890 }
10891 case XML_PARSER_CONTENT: {
10892 const xmlChar *test;
10893 unsigned int cons;
10894 if ((avail < 2) && (ctxt->inputNr == 1))
10895 goto done;
10896 cur = ctxt->input->cur[0];
10897 next = ctxt->input->cur[1];
10898
10899 test = CUR_PTR;
10900 cons = ctxt->input->consumed;
10901 if ((cur == '<') && (next == '/')) {
10902 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010903 break;
10904 } else if ((cur == '<') && (next == '?')) {
10905 if ((!terminate) &&
10906 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10907 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010908 xmlParsePI(ctxt);
10909 } else if ((cur == '<') && (next != '!')) {
10910 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010911 break;
10912 } else if ((cur == '<') && (next == '!') &&
10913 (ctxt->input->cur[2] == '-') &&
10914 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000010915 int term;
10916
10917 if (avail < 4)
10918 goto done;
10919 ctxt->input->cur += 4;
10920 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10921 ctxt->input->cur -= 4;
10922 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000010923 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010924 xmlParseComment(ctxt);
10925 ctxt->instate = XML_PARSER_CONTENT;
10926 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10927 (ctxt->input->cur[2] == '[') &&
10928 (ctxt->input->cur[3] == 'C') &&
10929 (ctxt->input->cur[4] == 'D') &&
10930 (ctxt->input->cur[5] == 'A') &&
10931 (ctxt->input->cur[6] == 'T') &&
10932 (ctxt->input->cur[7] == 'A') &&
10933 (ctxt->input->cur[8] == '[')) {
10934 SKIP(9);
10935 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000010936 break;
10937 } else if ((cur == '<') && (next == '!') &&
10938 (avail < 9)) {
10939 goto done;
10940 } else if (cur == '&') {
10941 if ((!terminate) &&
10942 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10943 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010944 xmlParseReference(ctxt);
10945 } else {
10946 /* TODO Avoid the extra copy, handle directly !!! */
10947 /*
10948 * Goal of the following test is:
10949 * - minimize calls to the SAX 'character' callback
10950 * when they are mergeable
10951 * - handle an problem for isBlank when we only parse
10952 * a sequence of blank chars and the next one is
10953 * not available to check against '<' presence.
10954 * - tries to homogenize the differences in SAX
10955 * callbacks between the push and pull versions
10956 * of the parser.
10957 */
10958 if ((ctxt->inputNr == 1) &&
10959 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10960 if (!terminate) {
10961 if (ctxt->progressive) {
10962 if ((lastlt == NULL) ||
10963 (ctxt->input->cur > lastlt))
10964 goto done;
10965 } else if (xmlParseLookupSequence(ctxt,
10966 '<', 0, 0) < 0) {
10967 goto done;
10968 }
10969 }
10970 }
10971 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010972 xmlParseCharData(ctxt, 0);
10973 }
10974 /*
10975 * Pop-up of finished entities.
10976 */
10977 while ((RAW == 0) && (ctxt->inputNr > 1))
10978 xmlPopInput(ctxt);
10979 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010980 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10981 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010982 ctxt->instate = XML_PARSER_EOF;
10983 break;
10984 }
10985 break;
10986 }
10987 case XML_PARSER_END_TAG:
10988 if (avail < 2)
10989 goto done;
10990 if (!terminate) {
10991 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010992 /* > can be found unescaped in attribute values */
10993 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010994 goto done;
10995 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10996 goto done;
10997 }
10998 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010999 if (ctxt->sax2) {
11000 xmlParseEndTag2(ctxt,
11001 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11002 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011003 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011004 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011005 }
11006#ifdef LIBXML_SAX1_ENABLED
11007 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011008 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011009#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011010 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011011 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011012 } else {
11013 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011014 }
11015 break;
11016 case XML_PARSER_CDATA_SECTION: {
11017 /*
11018 * The Push mode need to have the SAX callback for
11019 * cdataBlock merge back contiguous callbacks.
11020 */
11021 int base;
11022
11023 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11024 if (base < 0) {
11025 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011026 int tmp;
11027
11028 tmp = xmlCheckCdataPush(ctxt->input->cur,
11029 XML_PARSER_BIG_BUFFER_SIZE);
11030 if (tmp < 0) {
11031 tmp = -tmp;
11032 ctxt->input->cur += tmp;
11033 goto encoding_error;
11034 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011035 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11036 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011037 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011038 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011039 else if (ctxt->sax->characters != NULL)
11040 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011041 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011042 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011043 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011044 ctxt->checkIndex = 0;
11045 }
11046 goto done;
11047 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011048 int tmp;
11049
11050 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11051 if ((tmp < 0) || (tmp != base)) {
11052 tmp = -tmp;
11053 ctxt->input->cur += tmp;
11054 goto encoding_error;
11055 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011056 if ((ctxt->sax != NULL) && (base == 0) &&
11057 (ctxt->sax->cdataBlock != NULL) &&
11058 (!ctxt->disableSAX)) {
11059 /*
11060 * Special case to provide identical behaviour
11061 * between pull and push parsers on enpty CDATA
11062 * sections
11063 */
11064 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11065 (!strncmp((const char *)&ctxt->input->cur[-9],
11066 "<![CDATA[", 9)))
11067 ctxt->sax->cdataBlock(ctxt->userData,
11068 BAD_CAST "", 0);
11069 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011070 (!ctxt->disableSAX)) {
11071 if (ctxt->sax->cdataBlock != NULL)
11072 ctxt->sax->cdataBlock(ctxt->userData,
11073 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011074 else if (ctxt->sax->characters != NULL)
11075 ctxt->sax->characters(ctxt->userData,
11076 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011077 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000011078 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011079 ctxt->checkIndex = 0;
11080 ctxt->instate = XML_PARSER_CONTENT;
11081#ifdef DEBUG_PUSH
11082 xmlGenericError(xmlGenericErrorContext,
11083 "PP: entering CONTENT\n");
11084#endif
11085 }
11086 break;
11087 }
Owen Taylor3473f882001-02-23 17:55:21 +000011088 case XML_PARSER_MISC:
11089 SKIP_BLANKS;
11090 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011091 avail = ctxt->input->length -
11092 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011093 else
Daniel Veillarda880b122003-04-21 21:36:41 +000011094 avail = ctxt->input->buf->buffer->use -
11095 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011096 if (avail < 2)
11097 goto done;
11098 cur = ctxt->input->cur[0];
11099 next = ctxt->input->cur[1];
11100 if ((cur == '<') && (next == '?')) {
11101 if ((!terminate) &&
11102 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11103 goto done;
11104#ifdef DEBUG_PUSH
11105 xmlGenericError(xmlGenericErrorContext,
11106 "PP: Parsing PI\n");
11107#endif
11108 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000011109 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011110 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011111 (ctxt->input->cur[2] == '-') &&
11112 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011113 if ((!terminate) &&
11114 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11115 goto done;
11116#ifdef DEBUG_PUSH
11117 xmlGenericError(xmlGenericErrorContext,
11118 "PP: Parsing Comment\n");
11119#endif
11120 xmlParseComment(ctxt);
11121 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011122 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011123 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011124 (ctxt->input->cur[2] == 'D') &&
11125 (ctxt->input->cur[3] == 'O') &&
11126 (ctxt->input->cur[4] == 'C') &&
11127 (ctxt->input->cur[5] == 'T') &&
11128 (ctxt->input->cur[6] == 'Y') &&
11129 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011130 (ctxt->input->cur[8] == 'E')) {
11131 if ((!terminate) &&
11132 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11133 goto done;
11134#ifdef DEBUG_PUSH
11135 xmlGenericError(xmlGenericErrorContext,
11136 "PP: Parsing internal subset\n");
11137#endif
11138 ctxt->inSubset = 1;
11139 xmlParseDocTypeDecl(ctxt);
11140 if (RAW == '[') {
11141 ctxt->instate = XML_PARSER_DTD;
11142#ifdef DEBUG_PUSH
11143 xmlGenericError(xmlGenericErrorContext,
11144 "PP: entering DTD\n");
11145#endif
11146 } else {
11147 /*
11148 * Create and update the external subset.
11149 */
11150 ctxt->inSubset = 2;
11151 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11152 (ctxt->sax->externalSubset != NULL))
11153 ctxt->sax->externalSubset(ctxt->userData,
11154 ctxt->intSubName, ctxt->extSubSystem,
11155 ctxt->extSubURI);
11156 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011157 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011158 ctxt->instate = XML_PARSER_PROLOG;
11159#ifdef DEBUG_PUSH
11160 xmlGenericError(xmlGenericErrorContext,
11161 "PP: entering PROLOG\n");
11162#endif
11163 }
11164 } else if ((cur == '<') && (next == '!') &&
11165 (avail < 9)) {
11166 goto done;
11167 } else {
11168 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011169 ctxt->progressive = 1;
11170 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011171#ifdef DEBUG_PUSH
11172 xmlGenericError(xmlGenericErrorContext,
11173 "PP: entering START_TAG\n");
11174#endif
11175 }
11176 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011177 case XML_PARSER_PROLOG:
11178 SKIP_BLANKS;
11179 if (ctxt->input->buf == NULL)
11180 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11181 else
11182 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11183 if (avail < 2)
11184 goto done;
11185 cur = ctxt->input->cur[0];
11186 next = ctxt->input->cur[1];
11187 if ((cur == '<') && (next == '?')) {
11188 if ((!terminate) &&
11189 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11190 goto done;
11191#ifdef DEBUG_PUSH
11192 xmlGenericError(xmlGenericErrorContext,
11193 "PP: Parsing PI\n");
11194#endif
11195 xmlParsePI(ctxt);
11196 } else if ((cur == '<') && (next == '!') &&
11197 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11198 if ((!terminate) &&
11199 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11200 goto done;
11201#ifdef DEBUG_PUSH
11202 xmlGenericError(xmlGenericErrorContext,
11203 "PP: Parsing Comment\n");
11204#endif
11205 xmlParseComment(ctxt);
11206 ctxt->instate = XML_PARSER_PROLOG;
11207 } else if ((cur == '<') && (next == '!') &&
11208 (avail < 4)) {
11209 goto done;
11210 } else {
11211 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011212 if (ctxt->progressive == 0)
11213 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011214 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011215#ifdef DEBUG_PUSH
11216 xmlGenericError(xmlGenericErrorContext,
11217 "PP: entering START_TAG\n");
11218#endif
11219 }
11220 break;
11221 case XML_PARSER_EPILOG:
11222 SKIP_BLANKS;
11223 if (ctxt->input->buf == NULL)
11224 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11225 else
11226 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11227 if (avail < 2)
11228 goto done;
11229 cur = ctxt->input->cur[0];
11230 next = ctxt->input->cur[1];
11231 if ((cur == '<') && (next == '?')) {
11232 if ((!terminate) &&
11233 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11234 goto done;
11235#ifdef DEBUG_PUSH
11236 xmlGenericError(xmlGenericErrorContext,
11237 "PP: Parsing PI\n");
11238#endif
11239 xmlParsePI(ctxt);
11240 ctxt->instate = XML_PARSER_EPILOG;
11241 } else if ((cur == '<') && (next == '!') &&
11242 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11243 if ((!terminate) &&
11244 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11245 goto done;
11246#ifdef DEBUG_PUSH
11247 xmlGenericError(xmlGenericErrorContext,
11248 "PP: Parsing Comment\n");
11249#endif
11250 xmlParseComment(ctxt);
11251 ctxt->instate = XML_PARSER_EPILOG;
11252 } else if ((cur == '<') && (next == '!') &&
11253 (avail < 4)) {
11254 goto done;
11255 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011256 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011257 ctxt->instate = XML_PARSER_EOF;
11258#ifdef DEBUG_PUSH
11259 xmlGenericError(xmlGenericErrorContext,
11260 "PP: entering EOF\n");
11261#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011262 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011263 ctxt->sax->endDocument(ctxt->userData);
11264 goto done;
11265 }
11266 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011267 case XML_PARSER_DTD: {
11268 /*
11269 * Sorry but progressive parsing of the internal subset
11270 * is not expected to be supported. We first check that
11271 * the full content of the internal subset is available and
11272 * the parsing is launched only at that point.
11273 * Internal subset ends up with "']' S? '>'" in an unescaped
11274 * section and not in a ']]>' sequence which are conditional
11275 * sections (whoever argued to keep that crap in XML deserve
11276 * a place in hell !).
11277 */
11278 int base, i;
11279 xmlChar *buf;
11280 xmlChar quote = 0;
11281
11282 base = ctxt->input->cur - ctxt->input->base;
11283 if (base < 0) return(0);
11284 if (ctxt->checkIndex > base)
11285 base = ctxt->checkIndex;
11286 buf = ctxt->input->buf->buffer->content;
11287 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11288 base++) {
11289 if (quote != 0) {
11290 if (buf[base] == quote)
11291 quote = 0;
11292 continue;
11293 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011294 if ((quote == 0) && (buf[base] == '<')) {
11295 int found = 0;
11296 /* special handling of comments */
11297 if (((unsigned int) base + 4 <
11298 ctxt->input->buf->buffer->use) &&
11299 (buf[base + 1] == '!') &&
11300 (buf[base + 2] == '-') &&
11301 (buf[base + 3] == '-')) {
11302 for (;(unsigned int) base + 3 <
11303 ctxt->input->buf->buffer->use; base++) {
11304 if ((buf[base] == '-') &&
11305 (buf[base + 1] == '-') &&
11306 (buf[base + 2] == '>')) {
11307 found = 1;
11308 base += 2;
11309 break;
11310 }
11311 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011312 if (!found) {
11313#if 0
11314 fprintf(stderr, "unfinished comment\n");
11315#endif
11316 break; /* for */
11317 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011318 continue;
11319 }
11320 }
Owen Taylor3473f882001-02-23 17:55:21 +000011321 if (buf[base] == '"') {
11322 quote = '"';
11323 continue;
11324 }
11325 if (buf[base] == '\'') {
11326 quote = '\'';
11327 continue;
11328 }
11329 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011330#if 0
11331 fprintf(stderr, "%c%c%c%c: ", buf[base],
11332 buf[base + 1], buf[base + 2], buf[base + 3]);
11333#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011334 if ((unsigned int) base +1 >=
11335 ctxt->input->buf->buffer->use)
11336 break;
11337 if (buf[base + 1] == ']') {
11338 /* conditional crap, skip both ']' ! */
11339 base++;
11340 continue;
11341 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011342 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011343 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11344 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011345 if (buf[base + i] == '>') {
11346#if 0
11347 fprintf(stderr, "found\n");
11348#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011349 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011350 }
11351 if (!IS_BLANK_CH(buf[base + i])) {
11352#if 0
11353 fprintf(stderr, "not found\n");
11354#endif
11355 goto not_end_of_int_subset;
11356 }
Owen Taylor3473f882001-02-23 17:55:21 +000011357 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011358#if 0
11359 fprintf(stderr, "end of stream\n");
11360#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011361 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011362
Owen Taylor3473f882001-02-23 17:55:21 +000011363 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011364not_end_of_int_subset:
11365 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011366 }
11367 /*
11368 * We didn't found the end of the Internal subset
11369 */
Owen Taylor3473f882001-02-23 17:55:21 +000011370#ifdef DEBUG_PUSH
11371 if (next == 0)
11372 xmlGenericError(xmlGenericErrorContext,
11373 "PP: lookup of int subset end filed\n");
11374#endif
11375 goto done;
11376
11377found_end_int_subset:
11378 xmlParseInternalSubset(ctxt);
11379 ctxt->inSubset = 2;
11380 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11381 (ctxt->sax->externalSubset != NULL))
11382 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11383 ctxt->extSubSystem, ctxt->extSubURI);
11384 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011385 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011386 ctxt->instate = XML_PARSER_PROLOG;
11387 ctxt->checkIndex = 0;
11388#ifdef DEBUG_PUSH
11389 xmlGenericError(xmlGenericErrorContext,
11390 "PP: entering PROLOG\n");
11391#endif
11392 break;
11393 }
11394 case XML_PARSER_COMMENT:
11395 xmlGenericError(xmlGenericErrorContext,
11396 "PP: internal error, state == COMMENT\n");
11397 ctxt->instate = XML_PARSER_CONTENT;
11398#ifdef DEBUG_PUSH
11399 xmlGenericError(xmlGenericErrorContext,
11400 "PP: entering CONTENT\n");
11401#endif
11402 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011403 case XML_PARSER_IGNORE:
11404 xmlGenericError(xmlGenericErrorContext,
11405 "PP: internal error, state == IGNORE");
11406 ctxt->instate = XML_PARSER_DTD;
11407#ifdef DEBUG_PUSH
11408 xmlGenericError(xmlGenericErrorContext,
11409 "PP: entering DTD\n");
11410#endif
11411 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011412 case XML_PARSER_PI:
11413 xmlGenericError(xmlGenericErrorContext,
11414 "PP: internal error, state == PI\n");
11415 ctxt->instate = XML_PARSER_CONTENT;
11416#ifdef DEBUG_PUSH
11417 xmlGenericError(xmlGenericErrorContext,
11418 "PP: entering CONTENT\n");
11419#endif
11420 break;
11421 case XML_PARSER_ENTITY_DECL:
11422 xmlGenericError(xmlGenericErrorContext,
11423 "PP: internal error, state == ENTITY_DECL\n");
11424 ctxt->instate = XML_PARSER_DTD;
11425#ifdef DEBUG_PUSH
11426 xmlGenericError(xmlGenericErrorContext,
11427 "PP: entering DTD\n");
11428#endif
11429 break;
11430 case XML_PARSER_ENTITY_VALUE:
11431 xmlGenericError(xmlGenericErrorContext,
11432 "PP: internal error, state == ENTITY_VALUE\n");
11433 ctxt->instate = XML_PARSER_CONTENT;
11434#ifdef DEBUG_PUSH
11435 xmlGenericError(xmlGenericErrorContext,
11436 "PP: entering DTD\n");
11437#endif
11438 break;
11439 case XML_PARSER_ATTRIBUTE_VALUE:
11440 xmlGenericError(xmlGenericErrorContext,
11441 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11442 ctxt->instate = XML_PARSER_START_TAG;
11443#ifdef DEBUG_PUSH
11444 xmlGenericError(xmlGenericErrorContext,
11445 "PP: entering START_TAG\n");
11446#endif
11447 break;
11448 case XML_PARSER_SYSTEM_LITERAL:
11449 xmlGenericError(xmlGenericErrorContext,
11450 "PP: internal error, state == SYSTEM_LITERAL\n");
11451 ctxt->instate = XML_PARSER_START_TAG;
11452#ifdef DEBUG_PUSH
11453 xmlGenericError(xmlGenericErrorContext,
11454 "PP: entering START_TAG\n");
11455#endif
11456 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011457 case XML_PARSER_PUBLIC_LITERAL:
11458 xmlGenericError(xmlGenericErrorContext,
11459 "PP: internal error, state == PUBLIC_LITERAL\n");
11460 ctxt->instate = XML_PARSER_START_TAG;
11461#ifdef DEBUG_PUSH
11462 xmlGenericError(xmlGenericErrorContext,
11463 "PP: entering START_TAG\n");
11464#endif
11465 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011466 }
11467 }
11468done:
11469#ifdef DEBUG_PUSH
11470 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11471#endif
11472 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011473encoding_error:
11474 {
11475 char buffer[150];
11476
11477 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11478 ctxt->input->cur[0], ctxt->input->cur[1],
11479 ctxt->input->cur[2], ctxt->input->cur[3]);
11480 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11481 "Input is not proper UTF-8, indicate encoding !\n%s",
11482 BAD_CAST buffer, NULL);
11483 }
11484 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000011485}
11486
11487/**
Owen Taylor3473f882001-02-23 17:55:21 +000011488 * xmlParseChunk:
11489 * @ctxt: an XML parser context
11490 * @chunk: an char array
11491 * @size: the size in byte of the chunk
11492 * @terminate: last chunk indicator
11493 *
11494 * Parse a Chunk of memory
11495 *
11496 * Returns zero if no error, the xmlParserErrors otherwise.
11497 */
11498int
11499xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11500 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000011501 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011502 int remain = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000011503
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011504 if (ctxt == NULL)
11505 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000011506 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011507 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000011508 if (ctxt->instate == XML_PARSER_START)
11509 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000011510 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11511 (chunk[size - 1] == '\r')) {
11512 end_in_lf = 1;
11513 size--;
11514 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011515
11516xmldecl_done:
11517
Owen Taylor3473f882001-02-23 17:55:21 +000011518 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11519 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11520 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11521 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000011522 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011523
11524 /*
11525 * Specific handling if we autodetected an encoding, we should not
11526 * push more than the first line ... which depend on the encoding
11527 * And only push the rest once the final encoding was detected
11528 */
11529 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
11530 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
11531 int len = 45;
11532
11533 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11534 BAD_CAST "UTF-16")) ||
11535 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11536 BAD_CAST "UTF16")))
11537 len = 90;
11538 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11539 BAD_CAST "UCS-4")) ||
11540 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11541 BAD_CAST "UCS4")))
11542 len = 180;
11543
11544 if (ctxt->input->buf->rawconsumed < len)
11545 len -= ctxt->input->buf->rawconsumed;
11546
11547 remain = size - len;
11548 size = len;
11549 }
William M. Bracka3215c72004-07-31 16:24:01 +000011550 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11551 if (res < 0) {
11552 ctxt->errNo = XML_PARSER_EOF;
11553 ctxt->disableSAX = 1;
11554 return (XML_PARSER_EOF);
11555 }
Owen Taylor3473f882001-02-23 17:55:21 +000011556 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11557 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011558 ctxt->input->end =
11559 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011560#ifdef DEBUG_PUSH
11561 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11562#endif
11563
Owen Taylor3473f882001-02-23 17:55:21 +000011564 } else if (ctxt->instate != XML_PARSER_EOF) {
11565 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11566 xmlParserInputBufferPtr in = ctxt->input->buf;
11567 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11568 (in->raw != NULL)) {
11569 int nbchars;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011570
Owen Taylor3473f882001-02-23 17:55:21 +000011571 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11572 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011573 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000011574 xmlGenericError(xmlGenericErrorContext,
11575 "xmlParseChunk: encoder error\n");
11576 return(XML_ERR_INVALID_ENCODING);
11577 }
11578 }
11579 }
11580 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011581 if (remain != 0)
11582 xmlParseTryOrFinish(ctxt, 0);
11583 else
11584 xmlParseTryOrFinish(ctxt, terminate);
11585 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11586 return(ctxt->errNo);
11587
11588 if (remain != 0) {
11589 chunk += size;
11590 size = remain;
11591 remain = 0;
11592 goto xmldecl_done;
11593 }
Daniel Veillarda617e242006-01-09 14:38:44 +000011594 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11595 (ctxt->input->buf != NULL)) {
11596 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11597 }
Owen Taylor3473f882001-02-23 17:55:21 +000011598 if (terminate) {
11599 /*
11600 * Check for termination
11601 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011602 int avail = 0;
11603
11604 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011605 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011606 avail = ctxt->input->length -
11607 (ctxt->input->cur - ctxt->input->base);
11608 else
11609 avail = ctxt->input->buf->buffer->use -
11610 (ctxt->input->cur - ctxt->input->base);
11611 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011612
Owen Taylor3473f882001-02-23 17:55:21 +000011613 if ((ctxt->instate != XML_PARSER_EOF) &&
11614 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011615 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011616 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011617 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011618 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011619 }
Owen Taylor3473f882001-02-23 17:55:21 +000011620 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011621 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011622 ctxt->sax->endDocument(ctxt->userData);
11623 }
11624 ctxt->instate = XML_PARSER_EOF;
11625 }
11626 return((xmlParserErrors) ctxt->errNo);
11627}
11628
11629/************************************************************************
11630 * *
11631 * I/O front end functions to the parser *
11632 * *
11633 ************************************************************************/
11634
11635/**
Owen Taylor3473f882001-02-23 17:55:21 +000011636 * xmlCreatePushParserCtxt:
11637 * @sax: a SAX handler
11638 * @user_data: The user data returned on SAX callbacks
11639 * @chunk: a pointer to an array of chars
11640 * @size: number of chars in the array
11641 * @filename: an optional file name or URI
11642 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000011643 * Create a parser context for using the XML parser in push mode.
11644 * If @buffer and @size are non-NULL, the data is used to detect
11645 * the encoding. The remaining characters will be parsed so they
11646 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000011647 * To allow content encoding detection, @size should be >= 4
11648 * The value of @filename is used for fetching external entities
11649 * and error/warning reports.
11650 *
11651 * Returns the new parser context or NULL
11652 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000011653
Owen Taylor3473f882001-02-23 17:55:21 +000011654xmlParserCtxtPtr
11655xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11656 const char *chunk, int size, const char *filename) {
11657 xmlParserCtxtPtr ctxt;
11658 xmlParserInputPtr inputStream;
11659 xmlParserInputBufferPtr buf;
11660 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11661
11662 /*
11663 * plug some encoding conversion routines
11664 */
11665 if ((chunk != NULL) && (size >= 4))
11666 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11667
11668 buf = xmlAllocParserInputBuffer(enc);
11669 if (buf == NULL) return(NULL);
11670
11671 ctxt = xmlNewParserCtxt();
11672 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011673 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011674 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011675 return(NULL);
11676 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011677 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011678 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11679 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011680 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011681 xmlFreeParserInputBuffer(buf);
11682 xmlFreeParserCtxt(ctxt);
11683 return(NULL);
11684 }
Owen Taylor3473f882001-02-23 17:55:21 +000011685 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011686#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011687 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011688#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011689 xmlFree(ctxt->sax);
11690 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11691 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011692 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011693 xmlFreeParserInputBuffer(buf);
11694 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011695 return(NULL);
11696 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011697 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11698 if (sax->initialized == XML_SAX2_MAGIC)
11699 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11700 else
11701 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011702 if (user_data != NULL)
11703 ctxt->userData = user_data;
11704 }
11705 if (filename == NULL) {
11706 ctxt->directory = NULL;
11707 } else {
11708 ctxt->directory = xmlParserGetDirectory(filename);
11709 }
11710
11711 inputStream = xmlNewInputStream(ctxt);
11712 if (inputStream == NULL) {
11713 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011714 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011715 return(NULL);
11716 }
11717
11718 if (filename == NULL)
11719 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011720 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011721 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011722 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011723 if (inputStream->filename == NULL) {
11724 xmlFreeParserCtxt(ctxt);
11725 xmlFreeParserInputBuffer(buf);
11726 return(NULL);
11727 }
11728 }
Owen Taylor3473f882001-02-23 17:55:21 +000011729 inputStream->buf = buf;
11730 inputStream->base = inputStream->buf->buffer->content;
11731 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011732 inputStream->end =
11733 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011734
11735 inputPush(ctxt, inputStream);
11736
William M. Brack3a1cd212005-02-11 14:35:54 +000011737 /*
11738 * If the caller didn't provide an initial 'chunk' for determining
11739 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11740 * that it can be automatically determined later
11741 */
11742 if ((size == 0) || (chunk == NULL)) {
11743 ctxt->charset = XML_CHAR_ENCODING_NONE;
11744 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011745 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11746 int cur = ctxt->input->cur - ctxt->input->base;
11747
Owen Taylor3473f882001-02-23 17:55:21 +000011748 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011749
11750 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11751 ctxt->input->cur = ctxt->input->base + cur;
11752 ctxt->input->end =
11753 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011754#ifdef DEBUG_PUSH
11755 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11756#endif
11757 }
11758
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011759 if (enc != XML_CHAR_ENCODING_NONE) {
11760 xmlSwitchEncoding(ctxt, enc);
11761 }
11762
Owen Taylor3473f882001-02-23 17:55:21 +000011763 return(ctxt);
11764}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011765#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011766
11767/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011768 * xmlStopParser:
11769 * @ctxt: an XML parser context
11770 *
11771 * Blocks further parser processing
11772 */
11773void
11774xmlStopParser(xmlParserCtxtPtr ctxt) {
11775 if (ctxt == NULL)
11776 return;
11777 ctxt->instate = XML_PARSER_EOF;
11778 ctxt->disableSAX = 1;
11779 if (ctxt->input != NULL) {
11780 ctxt->input->cur = BAD_CAST"";
11781 ctxt->input->base = ctxt->input->cur;
11782 }
11783}
11784
11785/**
Owen Taylor3473f882001-02-23 17:55:21 +000011786 * xmlCreateIOParserCtxt:
11787 * @sax: a SAX handler
11788 * @user_data: The user data returned on SAX callbacks
11789 * @ioread: an I/O read function
11790 * @ioclose: an I/O close function
11791 * @ioctx: an I/O handler
11792 * @enc: the charset encoding if known
11793 *
11794 * Create a parser context for using the XML parser with an existing
11795 * I/O stream
11796 *
11797 * Returns the new parser context or NULL
11798 */
11799xmlParserCtxtPtr
11800xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11801 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11802 void *ioctx, xmlCharEncoding enc) {
11803 xmlParserCtxtPtr ctxt;
11804 xmlParserInputPtr inputStream;
11805 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011806
11807 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011808
11809 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11810 if (buf == NULL) return(NULL);
11811
11812 ctxt = xmlNewParserCtxt();
11813 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011814 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011815 return(NULL);
11816 }
11817 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011818#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011819 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011820#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011821 xmlFree(ctxt->sax);
11822 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11823 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011824 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011825 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011826 return(NULL);
11827 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011828 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11829 if (sax->initialized == XML_SAX2_MAGIC)
11830 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11831 else
11832 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011833 if (user_data != NULL)
11834 ctxt->userData = user_data;
11835 }
11836
11837 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11838 if (inputStream == NULL) {
11839 xmlFreeParserCtxt(ctxt);
11840 return(NULL);
11841 }
11842 inputPush(ctxt, inputStream);
11843
11844 return(ctxt);
11845}
11846
Daniel Veillard4432df22003-09-28 18:58:27 +000011847#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011848/************************************************************************
11849 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011850 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000011851 * *
11852 ************************************************************************/
11853
11854/**
11855 * xmlIOParseDTD:
11856 * @sax: the SAX handler block or NULL
11857 * @input: an Input Buffer
11858 * @enc: the charset encoding if known
11859 *
11860 * Load and parse a DTD
11861 *
11862 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000011863 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000011864 */
11865
11866xmlDtdPtr
11867xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11868 xmlCharEncoding enc) {
11869 xmlDtdPtr ret = NULL;
11870 xmlParserCtxtPtr ctxt;
11871 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011872 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000011873
11874 if (input == NULL)
11875 return(NULL);
11876
11877 ctxt = xmlNewParserCtxt();
11878 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000011879 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011880 return(NULL);
11881 }
11882
11883 /*
11884 * Set-up the SAX context
11885 */
11886 if (sax != NULL) {
11887 if (ctxt->sax != NULL)
11888 xmlFree(ctxt->sax);
11889 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000011890 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011891 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011892 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011893
11894 /*
11895 * generate a parser input from the I/O handler
11896 */
11897
Daniel Veillard43caefb2003-12-07 19:32:22 +000011898 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000011899 if (pinput == NULL) {
11900 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000011901 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011902 xmlFreeParserCtxt(ctxt);
11903 return(NULL);
11904 }
11905
11906 /*
11907 * plug some encoding conversion routines here.
11908 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000011909 if (xmlPushInput(ctxt, pinput) < 0) {
11910 if (sax != NULL) ctxt->sax = NULL;
11911 xmlFreeParserCtxt(ctxt);
11912 return(NULL);
11913 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000011914 if (enc != XML_CHAR_ENCODING_NONE) {
11915 xmlSwitchEncoding(ctxt, enc);
11916 }
Owen Taylor3473f882001-02-23 17:55:21 +000011917
11918 pinput->filename = NULL;
11919 pinput->line = 1;
11920 pinput->col = 1;
11921 pinput->base = ctxt->input->cur;
11922 pinput->cur = ctxt->input->cur;
11923 pinput->free = NULL;
11924
11925 /*
11926 * let's parse that entity knowing it's an external subset.
11927 */
11928 ctxt->inSubset = 2;
11929 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000011930 if (ctxt->myDoc == NULL) {
11931 xmlErrMemory(ctxt, "New Doc failed");
11932 return(NULL);
11933 }
11934 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000011935 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11936 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000011937
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011938 if ((enc == XML_CHAR_ENCODING_NONE) &&
11939 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000011940 /*
11941 * Get the 4 first bytes and decode the charset
11942 * if enc != XML_CHAR_ENCODING_NONE
11943 * plug some encoding conversion routines.
11944 */
11945 start[0] = RAW;
11946 start[1] = NXT(1);
11947 start[2] = NXT(2);
11948 start[3] = NXT(3);
11949 enc = xmlDetectCharEncoding(start, 4);
11950 if (enc != XML_CHAR_ENCODING_NONE) {
11951 xmlSwitchEncoding(ctxt, enc);
11952 }
11953 }
11954
Owen Taylor3473f882001-02-23 17:55:21 +000011955 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11956
11957 if (ctxt->myDoc != NULL) {
11958 if (ctxt->wellFormed) {
11959 ret = ctxt->myDoc->extSubset;
11960 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000011961 if (ret != NULL) {
11962 xmlNodePtr tmp;
11963
11964 ret->doc = NULL;
11965 tmp = ret->children;
11966 while (tmp != NULL) {
11967 tmp->doc = NULL;
11968 tmp = tmp->next;
11969 }
11970 }
Owen Taylor3473f882001-02-23 17:55:21 +000011971 } else {
11972 ret = NULL;
11973 }
11974 xmlFreeDoc(ctxt->myDoc);
11975 ctxt->myDoc = NULL;
11976 }
11977 if (sax != NULL) ctxt->sax = NULL;
11978 xmlFreeParserCtxt(ctxt);
11979
11980 return(ret);
11981}
11982
11983/**
11984 * xmlSAXParseDTD:
11985 * @sax: the SAX handler block
11986 * @ExternalID: a NAME* containing the External ID of the DTD
11987 * @SystemID: a NAME* containing the URL to the DTD
11988 *
11989 * Load and parse an external subset.
11990 *
11991 * Returns the resulting xmlDtdPtr or NULL in case of error.
11992 */
11993
11994xmlDtdPtr
11995xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11996 const xmlChar *SystemID) {
11997 xmlDtdPtr ret = NULL;
11998 xmlParserCtxtPtr ctxt;
11999 xmlParserInputPtr input = NULL;
12000 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012001 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012002
12003 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12004
12005 ctxt = xmlNewParserCtxt();
12006 if (ctxt == NULL) {
12007 return(NULL);
12008 }
12009
12010 /*
12011 * Set-up the SAX context
12012 */
12013 if (sax != NULL) {
12014 if (ctxt->sax != NULL)
12015 xmlFree(ctxt->sax);
12016 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012017 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012018 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012019
12020 /*
12021 * Canonicalise the system ID
12022 */
12023 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012024 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012025 xmlFreeParserCtxt(ctxt);
12026 return(NULL);
12027 }
Owen Taylor3473f882001-02-23 17:55:21 +000012028
12029 /*
12030 * Ask the Entity resolver to load the damn thing
12031 */
12032
12033 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012034 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12035 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012036 if (input == NULL) {
12037 if (sax != NULL) ctxt->sax = NULL;
12038 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012039 if (systemIdCanonic != NULL)
12040 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012041 return(NULL);
12042 }
12043
12044 /*
12045 * plug some encoding conversion routines here.
12046 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012047 if (xmlPushInput(ctxt, input) < 0) {
12048 if (sax != NULL) ctxt->sax = NULL;
12049 xmlFreeParserCtxt(ctxt);
12050 if (systemIdCanonic != NULL)
12051 xmlFree(systemIdCanonic);
12052 return(NULL);
12053 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012054 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12055 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12056 xmlSwitchEncoding(ctxt, enc);
12057 }
Owen Taylor3473f882001-02-23 17:55:21 +000012058
12059 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012060 input->filename = (char *) systemIdCanonic;
12061 else
12062 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012063 input->line = 1;
12064 input->col = 1;
12065 input->base = ctxt->input->cur;
12066 input->cur = ctxt->input->cur;
12067 input->free = NULL;
12068
12069 /*
12070 * let's parse that entity knowing it's an external subset.
12071 */
12072 ctxt->inSubset = 2;
12073 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012074 if (ctxt->myDoc == NULL) {
12075 xmlErrMemory(ctxt, "New Doc failed");
12076 if (sax != NULL) ctxt->sax = NULL;
12077 xmlFreeParserCtxt(ctxt);
12078 return(NULL);
12079 }
12080 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012081 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12082 ExternalID, SystemID);
12083 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12084
12085 if (ctxt->myDoc != NULL) {
12086 if (ctxt->wellFormed) {
12087 ret = ctxt->myDoc->extSubset;
12088 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012089 if (ret != NULL) {
12090 xmlNodePtr tmp;
12091
12092 ret->doc = NULL;
12093 tmp = ret->children;
12094 while (tmp != NULL) {
12095 tmp->doc = NULL;
12096 tmp = tmp->next;
12097 }
12098 }
Owen Taylor3473f882001-02-23 17:55:21 +000012099 } else {
12100 ret = NULL;
12101 }
12102 xmlFreeDoc(ctxt->myDoc);
12103 ctxt->myDoc = NULL;
12104 }
12105 if (sax != NULL) ctxt->sax = NULL;
12106 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012107
Owen Taylor3473f882001-02-23 17:55:21 +000012108 return(ret);
12109}
12110
Daniel Veillard4432df22003-09-28 18:58:27 +000012111
Owen Taylor3473f882001-02-23 17:55:21 +000012112/**
12113 * xmlParseDTD:
12114 * @ExternalID: a NAME* containing the External ID of the DTD
12115 * @SystemID: a NAME* containing the URL to the DTD
12116 *
12117 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012118 *
Owen Taylor3473f882001-02-23 17:55:21 +000012119 * Returns the resulting xmlDtdPtr or NULL in case of error.
12120 */
12121
12122xmlDtdPtr
12123xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12124 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12125}
Daniel Veillard4432df22003-09-28 18:58:27 +000012126#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012127
12128/************************************************************************
12129 * *
12130 * Front ends when parsing an Entity *
12131 * *
12132 ************************************************************************/
12133
12134/**
Owen Taylor3473f882001-02-23 17:55:21 +000012135 * xmlParseCtxtExternalEntity:
12136 * @ctx: the existing parsing context
12137 * @URL: the URL for the entity to load
12138 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012139 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012140 *
12141 * Parse an external general entity within an existing parsing context
12142 * An external general parsed entity is well-formed if it matches the
12143 * production labeled extParsedEnt.
12144 *
12145 * [78] extParsedEnt ::= TextDecl? content
12146 *
12147 * Returns 0 if the entity is well formed, -1 in case of args problem and
12148 * the parser error code otherwise
12149 */
12150
12151int
12152xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012153 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012154 xmlParserCtxtPtr ctxt;
12155 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012156 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012157 xmlSAXHandlerPtr oldsax = NULL;
12158 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012159 xmlChar start[4];
12160 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012161
Daniel Veillardce682bc2004-11-05 17:22:25 +000012162 if (ctx == NULL) return(-1);
12163
Daniel Veillard0161e632008-08-28 15:36:32 +000012164 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12165 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012166 return(XML_ERR_ENTITY_LOOP);
12167 }
12168
Daniel Veillardcda96922001-08-21 10:56:31 +000012169 if (lst != NULL)
12170 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012171 if ((URL == NULL) && (ID == NULL))
12172 return(-1);
12173 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12174 return(-1);
12175
Rob Richards798743a2009-06-19 13:54:25 -040012176 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012177 if (ctxt == NULL) {
12178 return(-1);
12179 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012180
Owen Taylor3473f882001-02-23 17:55:21 +000012181 oldsax = ctxt->sax;
12182 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012183 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012184 newDoc = xmlNewDoc(BAD_CAST "1.0");
12185 if (newDoc == NULL) {
12186 xmlFreeParserCtxt(ctxt);
12187 return(-1);
12188 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012189 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012190 if (ctx->myDoc->dict) {
12191 newDoc->dict = ctx->myDoc->dict;
12192 xmlDictReference(newDoc->dict);
12193 }
Owen Taylor3473f882001-02-23 17:55:21 +000012194 if (ctx->myDoc != NULL) {
12195 newDoc->intSubset = ctx->myDoc->intSubset;
12196 newDoc->extSubset = ctx->myDoc->extSubset;
12197 }
12198 if (ctx->myDoc->URL != NULL) {
12199 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12200 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012201 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12202 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012203 ctxt->sax = oldsax;
12204 xmlFreeParserCtxt(ctxt);
12205 newDoc->intSubset = NULL;
12206 newDoc->extSubset = NULL;
12207 xmlFreeDoc(newDoc);
12208 return(-1);
12209 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012210 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012211 nodePush(ctxt, newDoc->children);
12212 if (ctx->myDoc == NULL) {
12213 ctxt->myDoc = newDoc;
12214 } else {
12215 ctxt->myDoc = ctx->myDoc;
12216 newDoc->children->doc = ctx->myDoc;
12217 }
12218
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012219 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012220 * Get the 4 first bytes and decode the charset
12221 * if enc != XML_CHAR_ENCODING_NONE
12222 * plug some encoding conversion routines.
12223 */
12224 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012225 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12226 start[0] = RAW;
12227 start[1] = NXT(1);
12228 start[2] = NXT(2);
12229 start[3] = NXT(3);
12230 enc = xmlDetectCharEncoding(start, 4);
12231 if (enc != XML_CHAR_ENCODING_NONE) {
12232 xmlSwitchEncoding(ctxt, enc);
12233 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012234 }
12235
Owen Taylor3473f882001-02-23 17:55:21 +000012236 /*
12237 * Parse a possible text declaration first
12238 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012239 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012240 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012241 /*
12242 * An XML-1.0 document can't reference an entity not XML-1.0
12243 */
12244 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12245 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12246 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12247 "Version mismatch between document and entity\n");
12248 }
Owen Taylor3473f882001-02-23 17:55:21 +000012249 }
12250
12251 /*
12252 * Doing validity checking on chunk doesn't make sense
12253 */
12254 ctxt->instate = XML_PARSER_CONTENT;
12255 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012256 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012257 ctxt->loadsubset = ctx->loadsubset;
12258 ctxt->depth = ctx->depth + 1;
12259 ctxt->replaceEntities = ctx->replaceEntities;
12260 if (ctxt->validate) {
12261 ctxt->vctxt.error = ctx->vctxt.error;
12262 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012263 } else {
12264 ctxt->vctxt.error = NULL;
12265 ctxt->vctxt.warning = NULL;
12266 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012267 ctxt->vctxt.nodeTab = NULL;
12268 ctxt->vctxt.nodeNr = 0;
12269 ctxt->vctxt.nodeMax = 0;
12270 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012271 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12272 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012273 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12274 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12275 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012276 ctxt->dictNames = ctx->dictNames;
12277 ctxt->attsDefault = ctx->attsDefault;
12278 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012279 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012280
12281 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012282
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012283 ctx->validate = ctxt->validate;
12284 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012285 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012286 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012287 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012288 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012289 }
12290 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012291 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012292 }
12293
12294 if (!ctxt->wellFormed) {
12295 if (ctxt->errNo == 0)
12296 ret = 1;
12297 else
12298 ret = ctxt->errNo;
12299 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012300 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012301 xmlNodePtr cur;
12302
12303 /*
12304 * Return the newly created nodeset after unlinking it from
12305 * they pseudo parent.
12306 */
12307 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012308 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012309 while (cur != NULL) {
12310 cur->parent = NULL;
12311 cur = cur->next;
12312 }
12313 newDoc->children->children = NULL;
12314 }
12315 ret = 0;
12316 }
12317 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012318 ctxt->dict = NULL;
12319 ctxt->attsDefault = NULL;
12320 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012321 xmlFreeParserCtxt(ctxt);
12322 newDoc->intSubset = NULL;
12323 newDoc->extSubset = NULL;
12324 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012325
Owen Taylor3473f882001-02-23 17:55:21 +000012326 return(ret);
12327}
12328
12329/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012330 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012331 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012332 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012333 * @sax: the SAX handler bloc (possibly NULL)
12334 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12335 * @depth: Used for loop detection, use 0
12336 * @URL: the URL for the entity to load
12337 * @ID: the System ID for the entity to load
12338 * @list: the return value for the set of parsed nodes
12339 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012340 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012341 *
12342 * Returns 0 if the entity is well formed, -1 in case of args problem and
12343 * the parser error code otherwise
12344 */
12345
Daniel Veillard7d515752003-09-26 19:12:37 +000012346static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012347xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12348 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012349 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012350 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012351 xmlParserCtxtPtr ctxt;
12352 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012353 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012354 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012355 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012356 xmlChar start[4];
12357 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012358
Daniel Veillard0161e632008-08-28 15:36:32 +000012359 if (((depth > 40) &&
12360 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12361 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012362 return(XML_ERR_ENTITY_LOOP);
12363 }
12364
Owen Taylor3473f882001-02-23 17:55:21 +000012365 if (list != NULL)
12366 *list = NULL;
12367 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012368 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012369 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012370 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012371
12372
Rob Richards9c0aa472009-03-26 18:10:19 +000012373 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000012374 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012375 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012376 if (oldctxt != NULL) {
12377 ctxt->_private = oldctxt->_private;
12378 ctxt->loadsubset = oldctxt->loadsubset;
12379 ctxt->validate = oldctxt->validate;
12380 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012381 ctxt->record_info = oldctxt->record_info;
12382 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12383 ctxt->node_seq.length = oldctxt->node_seq.length;
12384 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012385 } else {
12386 /*
12387 * Doing validity checking on chunk without context
12388 * doesn't make sense
12389 */
12390 ctxt->_private = NULL;
12391 ctxt->validate = 0;
12392 ctxt->external = 2;
12393 ctxt->loadsubset = 0;
12394 }
Owen Taylor3473f882001-02-23 17:55:21 +000012395 if (sax != NULL) {
12396 oldsax = ctxt->sax;
12397 ctxt->sax = sax;
12398 if (user_data != NULL)
12399 ctxt->userData = user_data;
12400 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012401 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012402 newDoc = xmlNewDoc(BAD_CAST "1.0");
12403 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012404 ctxt->node_seq.maximum = 0;
12405 ctxt->node_seq.length = 0;
12406 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012407 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000012408 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012409 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012410 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000012411 newDoc->intSubset = doc->intSubset;
12412 newDoc->extSubset = doc->extSubset;
12413 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012414 xmlDictReference(newDoc->dict);
12415
Owen Taylor3473f882001-02-23 17:55:21 +000012416 if (doc->URL != NULL) {
12417 newDoc->URL = xmlStrdup(doc->URL);
12418 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012419 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12420 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012421 if (sax != NULL)
12422 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012423 ctxt->node_seq.maximum = 0;
12424 ctxt->node_seq.length = 0;
12425 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012426 xmlFreeParserCtxt(ctxt);
12427 newDoc->intSubset = NULL;
12428 newDoc->extSubset = NULL;
12429 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000012430 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012431 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012432 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012433 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000012434 ctxt->myDoc = doc;
12435 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000012436
Daniel Veillard0161e632008-08-28 15:36:32 +000012437 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012438 * Get the 4 first bytes and decode the charset
12439 * if enc != XML_CHAR_ENCODING_NONE
12440 * plug some encoding conversion routines.
12441 */
12442 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012443 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12444 start[0] = RAW;
12445 start[1] = NXT(1);
12446 start[2] = NXT(2);
12447 start[3] = NXT(3);
12448 enc = xmlDetectCharEncoding(start, 4);
12449 if (enc != XML_CHAR_ENCODING_NONE) {
12450 xmlSwitchEncoding(ctxt, enc);
12451 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012452 }
12453
Owen Taylor3473f882001-02-23 17:55:21 +000012454 /*
12455 * Parse a possible text declaration first
12456 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012457 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012458 xmlParseTextDecl(ctxt);
12459 }
12460
Owen Taylor3473f882001-02-23 17:55:21 +000012461 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012462 ctxt->depth = depth;
12463
12464 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012465
Daniel Veillard561b7f82002-03-20 21:55:57 +000012466 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012467 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000012468 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012469 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012470 }
12471 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012472 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012473 }
12474
12475 if (!ctxt->wellFormed) {
12476 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012477 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000012478 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012479 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000012480 } else {
12481 if (list != NULL) {
12482 xmlNodePtr cur;
12483
12484 /*
12485 * Return the newly created nodeset after unlinking it from
12486 * they pseudo parent.
12487 */
12488 cur = newDoc->children->children;
12489 *list = cur;
12490 while (cur != NULL) {
12491 cur->parent = NULL;
12492 cur = cur->next;
12493 }
12494 newDoc->children->children = NULL;
12495 }
Daniel Veillard7d515752003-09-26 19:12:37 +000012496 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000012497 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012498
12499 /*
12500 * Record in the parent context the number of entities replacement
12501 * done when parsing that reference.
12502 */
12503 oldctxt->nbentities += ctxt->nbentities;
12504 /*
12505 * Also record the size of the entity parsed
12506 */
12507 if (ctxt->input != NULL) {
12508 oldctxt->sizeentities += ctxt->input->consumed;
12509 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12510 }
12511 /*
12512 * And record the last error if any
12513 */
12514 if (ctxt->lastError.code != XML_ERR_OK)
12515 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12516
Owen Taylor3473f882001-02-23 17:55:21 +000012517 if (sax != NULL)
12518 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000012519 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12520 oldctxt->node_seq.length = ctxt->node_seq.length;
12521 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012522 ctxt->node_seq.maximum = 0;
12523 ctxt->node_seq.length = 0;
12524 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012525 xmlFreeParserCtxt(ctxt);
12526 newDoc->intSubset = NULL;
12527 newDoc->extSubset = NULL;
12528 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000012529
Owen Taylor3473f882001-02-23 17:55:21 +000012530 return(ret);
12531}
12532
Daniel Veillard81273902003-09-30 00:43:48 +000012533#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012534/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012535 * xmlParseExternalEntity:
12536 * @doc: the document the chunk pertains to
12537 * @sax: the SAX handler bloc (possibly NULL)
12538 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12539 * @depth: Used for loop detection, use 0
12540 * @URL: the URL for the entity to load
12541 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012542 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000012543 *
12544 * Parse an external general entity
12545 * An external general parsed entity is well-formed if it matches the
12546 * production labeled extParsedEnt.
12547 *
12548 * [78] extParsedEnt ::= TextDecl? content
12549 *
12550 * Returns 0 if the entity is well formed, -1 in case of args problem and
12551 * the parser error code otherwise
12552 */
12553
12554int
12555xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000012556 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012557 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012558 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000012559}
12560
12561/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000012562 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000012563 * @doc: the document the chunk pertains to
12564 * @sax: the SAX handler bloc (possibly NULL)
12565 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12566 * @depth: Used for loop detection, use 0
12567 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000012568 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012569 *
12570 * Parse a well-balanced chunk of an XML document
12571 * called by the parser
12572 * The allowed sequence for the Well Balanced Chunk is the one defined by
12573 * the content production in the XML grammar:
12574 *
12575 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12576 *
12577 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12578 * the parser error code otherwise
12579 */
12580
12581int
12582xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000012583 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012584 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12585 depth, string, lst, 0 );
12586}
Daniel Veillard81273902003-09-30 00:43:48 +000012587#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000012588
12589/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000012590 * xmlParseBalancedChunkMemoryInternal:
12591 * @oldctxt: the existing parsing context
12592 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12593 * @user_data: the user data field for the parser context
12594 * @lst: the return value for the set of parsed nodes
12595 *
12596 *
12597 * Parse a well-balanced chunk of an XML document
12598 * called by the parser
12599 * The allowed sequence for the Well Balanced Chunk is the one defined by
12600 * the content production in the XML grammar:
12601 *
12602 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12603 *
Daniel Veillard7d515752003-09-26 19:12:37 +000012604 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12605 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000012606 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000012607 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000012608 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000012609 */
Daniel Veillard7d515752003-09-26 19:12:37 +000012610static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000012611xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12612 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12613 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012614 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012615 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012616 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012617 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012618 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012619 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000012620 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020012621#ifdef SAX2
12622 int i;
12623#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000012624
Daniel Veillard0161e632008-08-28 15:36:32 +000012625 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12626 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012627 return(XML_ERR_ENTITY_LOOP);
12628 }
12629
12630
12631 if (lst != NULL)
12632 *lst = NULL;
12633 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000012634 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012635
12636 size = xmlStrlen(string);
12637
12638 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000012639 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012640 if (user_data != NULL)
12641 ctxt->userData = user_data;
12642 else
12643 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012644 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12645 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012646 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12647 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12648 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012649
Daniel Veillard74eaec12009-08-26 15:57:20 +020012650#ifdef SAX2
12651 /* propagate namespaces down the entity */
12652 for (i = 0;i < oldctxt->nsNr;i += 2) {
12653 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12654 }
12655#endif
12656
Daniel Veillard328f48c2002-11-15 15:24:34 +000012657 oldsax = ctxt->sax;
12658 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012659 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012660 ctxt->replaceEntities = oldctxt->replaceEntities;
12661 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000012662
Daniel Veillarde1ca5032002-12-09 14:13:43 +000012663 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012664 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012665 newDoc = xmlNewDoc(BAD_CAST "1.0");
12666 if (newDoc == NULL) {
12667 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012668 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012669 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000012670 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012671 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012672 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012673 newDoc->dict = ctxt->dict;
12674 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012675 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012676 } else {
12677 ctxt->myDoc = oldctxt->myDoc;
12678 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012679 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012680 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012681 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12682 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012683 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012684 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012685 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012686 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012687 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012688 }
William M. Brack7b9154b2003-09-27 19:23:50 +000012689 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012690 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012691 ctxt->myDoc->children = NULL;
12692 ctxt->myDoc->last = NULL;
12693 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012694 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012695 ctxt->instate = XML_PARSER_CONTENT;
12696 ctxt->depth = oldctxt->depth + 1;
12697
Daniel Veillard328f48c2002-11-15 15:24:34 +000012698 ctxt->validate = 0;
12699 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000012700 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12701 /*
12702 * ID/IDREF registration will be done in xmlValidateElement below
12703 */
12704 ctxt->loadsubset |= XML_SKIP_IDS;
12705 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012706 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012707 ctxt->attsDefault = oldctxt->attsDefault;
12708 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012709
Daniel Veillard68e9e742002-11-16 15:35:11 +000012710 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012711 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012712 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012713 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012714 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012715 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012716 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012717 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012718 }
12719
12720 if (!ctxt->wellFormed) {
12721 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012722 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012723 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012724 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012725 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012726 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012727 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012728
William M. Brack7b9154b2003-09-27 19:23:50 +000012729 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012730 xmlNodePtr cur;
12731
12732 /*
12733 * Return the newly created nodeset after unlinking it from
12734 * they pseudo parent.
12735 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012736 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012737 *lst = cur;
12738 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012739#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012740 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12741 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12742 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012743 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12744 oldctxt->myDoc, cur);
12745 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012746#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012747 cur->parent = NULL;
12748 cur = cur->next;
12749 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012750 ctxt->myDoc->children->children = NULL;
12751 }
12752 if (ctxt->myDoc != NULL) {
12753 xmlFreeNode(ctxt->myDoc->children);
12754 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012755 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012756 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012757
12758 /*
12759 * Record in the parent context the number of entities replacement
12760 * done when parsing that reference.
12761 */
Daniel Veillard4bf899b2008-08-20 17:04:30 +000012762 oldctxt->nbentities += ctxt->nbentities;
Daniel Veillard0161e632008-08-28 15:36:32 +000012763 /*
12764 * Also record the last error if any
12765 */
12766 if (ctxt->lastError.code != XML_ERR_OK)
12767 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12768
Daniel Veillard328f48c2002-11-15 15:24:34 +000012769 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012770 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012771 ctxt->attsDefault = NULL;
12772 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012773 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012774 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012775 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012776 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012777
Daniel Veillard328f48c2002-11-15 15:24:34 +000012778 return(ret);
12779}
12780
Daniel Veillard29b17482004-08-16 00:39:03 +000012781/**
12782 * xmlParseInNodeContext:
12783 * @node: the context node
12784 * @data: the input string
12785 * @datalen: the input string length in bytes
12786 * @options: a combination of xmlParserOption
12787 * @lst: the return value for the set of parsed nodes
12788 *
12789 * Parse a well-balanced chunk of an XML document
12790 * within the context (DTD, namespaces, etc ...) of the given node.
12791 *
12792 * The allowed sequence for the data is a Well Balanced Chunk defined by
12793 * the content production in the XML grammar:
12794 *
12795 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12796 *
12797 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12798 * error code otherwise
12799 */
12800xmlParserErrors
12801xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12802 int options, xmlNodePtr *lst) {
12803#ifdef SAX2
12804 xmlParserCtxtPtr ctxt;
12805 xmlDocPtr doc = NULL;
12806 xmlNodePtr fake, cur;
12807 int nsnr = 0;
12808
12809 xmlParserErrors ret = XML_ERR_OK;
12810
12811 /*
12812 * check all input parameters, grab the document
12813 */
12814 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12815 return(XML_ERR_INTERNAL_ERROR);
12816 switch (node->type) {
12817 case XML_ELEMENT_NODE:
12818 case XML_ATTRIBUTE_NODE:
12819 case XML_TEXT_NODE:
12820 case XML_CDATA_SECTION_NODE:
12821 case XML_ENTITY_REF_NODE:
12822 case XML_PI_NODE:
12823 case XML_COMMENT_NODE:
12824 case XML_DOCUMENT_NODE:
12825 case XML_HTML_DOCUMENT_NODE:
12826 break;
12827 default:
12828 return(XML_ERR_INTERNAL_ERROR);
12829
12830 }
12831 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12832 (node->type != XML_DOCUMENT_NODE) &&
12833 (node->type != XML_HTML_DOCUMENT_NODE))
12834 node = node->parent;
12835 if (node == NULL)
12836 return(XML_ERR_INTERNAL_ERROR);
12837 if (node->type == XML_ELEMENT_NODE)
12838 doc = node->doc;
12839 else
12840 doc = (xmlDocPtr) node;
12841 if (doc == NULL)
12842 return(XML_ERR_INTERNAL_ERROR);
12843
12844 /*
12845 * allocate a context and set-up everything not related to the
12846 * node position in the tree
12847 */
12848 if (doc->type == XML_DOCUMENT_NODE)
12849 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12850#ifdef LIBXML_HTML_ENABLED
12851 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12852 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12853#endif
12854 else
12855 return(XML_ERR_INTERNAL_ERROR);
12856
12857 if (ctxt == NULL)
12858 return(XML_ERR_NO_MEMORY);
12859 fake = xmlNewComment(NULL);
12860 if (fake == NULL) {
12861 xmlFreeParserCtxt(ctxt);
12862 return(XML_ERR_NO_MEMORY);
12863 }
12864 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000012865
12866 /*
12867 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12868 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12869 * we must wait until the last moment to free the original one.
12870 */
Daniel Veillard29b17482004-08-16 00:39:03 +000012871 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000012872 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000012873 xmlDictFree(ctxt->dict);
12874 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000012875 } else
12876 options |= XML_PARSE_NODICT;
12877
Daniel Veillard37334572008-07-31 08:20:02 +000012878 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000012879 xmlDetectSAX2(ctxt);
12880 ctxt->myDoc = doc;
12881
12882 if (node->type == XML_ELEMENT_NODE) {
12883 nodePush(ctxt, node);
12884 /*
12885 * initialize the SAX2 namespaces stack
12886 */
12887 cur = node;
12888 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12889 xmlNsPtr ns = cur->nsDef;
12890 const xmlChar *iprefix, *ihref;
12891
12892 while (ns != NULL) {
12893 if (ctxt->dict) {
12894 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12895 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12896 } else {
12897 iprefix = ns->prefix;
12898 ihref = ns->href;
12899 }
12900
12901 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12902 nsPush(ctxt, iprefix, ihref);
12903 nsnr++;
12904 }
12905 ns = ns->next;
12906 }
12907 cur = cur->parent;
12908 }
12909 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000012910 }
Daniel Veillard29b17482004-08-16 00:39:03 +000012911
12912 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12913 /*
12914 * ID/IDREF registration will be done in xmlValidateElement below
12915 */
12916 ctxt->loadsubset |= XML_SKIP_IDS;
12917 }
12918
Daniel Veillard499cc922006-01-18 17:22:35 +000012919#ifdef LIBXML_HTML_ENABLED
12920 if (doc->type == XML_HTML_DOCUMENT_NODE)
12921 __htmlParseContent(ctxt);
12922 else
12923#endif
12924 xmlParseContent(ctxt);
12925
Daniel Veillard29b17482004-08-16 00:39:03 +000012926 nsPop(ctxt, nsnr);
12927 if ((RAW == '<') && (NXT(1) == '/')) {
12928 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12929 } else if (RAW != 0) {
12930 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12931 }
12932 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12933 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12934 ctxt->wellFormed = 0;
12935 }
12936
12937 if (!ctxt->wellFormed) {
12938 if (ctxt->errNo == 0)
12939 ret = XML_ERR_INTERNAL_ERROR;
12940 else
12941 ret = (xmlParserErrors)ctxt->errNo;
12942 } else {
12943 ret = XML_ERR_OK;
12944 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012945
Daniel Veillard29b17482004-08-16 00:39:03 +000012946 /*
12947 * Return the newly created nodeset after unlinking it from
12948 * the pseudo sibling.
12949 */
Daniel Veillard0161e632008-08-28 15:36:32 +000012950
Daniel Veillard29b17482004-08-16 00:39:03 +000012951 cur = fake->next;
12952 fake->next = NULL;
12953 node->last = fake;
12954
12955 if (cur != NULL) {
12956 cur->prev = NULL;
12957 }
12958
12959 *lst = cur;
12960
12961 while (cur != NULL) {
12962 cur->parent = NULL;
12963 cur = cur->next;
12964 }
12965
12966 xmlUnlinkNode(fake);
12967 xmlFreeNode(fake);
12968
12969
12970 if (ret != XML_ERR_OK) {
12971 xmlFreeNodeList(*lst);
12972 *lst = NULL;
12973 }
William M. Brackc3f81342004-10-03 01:22:44 +000012974
William M. Brackb7b54de2004-10-06 16:38:01 +000012975 if (doc->dict != NULL)
12976 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000012977 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012978
Daniel Veillard29b17482004-08-16 00:39:03 +000012979 return(ret);
12980#else /* !SAX2 */
12981 return(XML_ERR_INTERNAL_ERROR);
12982#endif
12983}
12984
Daniel Veillard81273902003-09-30 00:43:48 +000012985#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000012986/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000012987 * xmlParseBalancedChunkMemoryRecover:
12988 * @doc: the document the chunk pertains to
12989 * @sax: the SAX handler bloc (possibly NULL)
12990 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12991 * @depth: Used for loop detection, use 0
12992 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12993 * @lst: the return value for the set of parsed nodes
12994 * @recover: return nodes even if the data is broken (use 0)
12995 *
12996 *
12997 * Parse a well-balanced chunk of an XML document
12998 * called by the parser
12999 * The allowed sequence for the Well Balanced Chunk is the one defined by
13000 * the content production in the XML grammar:
13001 *
13002 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13003 *
13004 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13005 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013006 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013007 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013008 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13009 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013010 */
13011int
13012xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013013 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013014 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013015 xmlParserCtxtPtr ctxt;
13016 xmlDocPtr newDoc;
13017 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013018 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013019 int size;
13020 int ret = 0;
13021
Daniel Veillard0161e632008-08-28 15:36:32 +000013022 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013023 return(XML_ERR_ENTITY_LOOP);
13024 }
13025
13026
Daniel Veillardcda96922001-08-21 10:56:31 +000013027 if (lst != NULL)
13028 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013029 if (string == NULL)
13030 return(-1);
13031
13032 size = xmlStrlen(string);
13033
13034 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13035 if (ctxt == NULL) return(-1);
13036 ctxt->userData = ctxt;
13037 if (sax != NULL) {
13038 oldsax = ctxt->sax;
13039 ctxt->sax = sax;
13040 if (user_data != NULL)
13041 ctxt->userData = user_data;
13042 }
13043 newDoc = xmlNewDoc(BAD_CAST "1.0");
13044 if (newDoc == NULL) {
13045 xmlFreeParserCtxt(ctxt);
13046 return(-1);
13047 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013048 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013049 if ((doc != NULL) && (doc->dict != NULL)) {
13050 xmlDictFree(ctxt->dict);
13051 ctxt->dict = doc->dict;
13052 xmlDictReference(ctxt->dict);
13053 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13054 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13055 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13056 ctxt->dictNames = 1;
13057 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013058 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013059 }
Owen Taylor3473f882001-02-23 17:55:21 +000013060 if (doc != NULL) {
13061 newDoc->intSubset = doc->intSubset;
13062 newDoc->extSubset = doc->extSubset;
13063 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013064 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13065 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013066 if (sax != NULL)
13067 ctxt->sax = oldsax;
13068 xmlFreeParserCtxt(ctxt);
13069 newDoc->intSubset = NULL;
13070 newDoc->extSubset = NULL;
13071 xmlFreeDoc(newDoc);
13072 return(-1);
13073 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013074 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13075 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013076 if (doc == NULL) {
13077 ctxt->myDoc = newDoc;
13078 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013079 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013080 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013081 /* Ensure that doc has XML spec namespace */
13082 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13083 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013084 }
13085 ctxt->instate = XML_PARSER_CONTENT;
13086 ctxt->depth = depth;
13087
13088 /*
13089 * Doing validity checking on chunk doesn't make sense
13090 */
13091 ctxt->validate = 0;
13092 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013093 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013094
Daniel Veillardb39bc392002-10-26 19:29:51 +000013095 if ( doc != NULL ){
13096 content = doc->children;
13097 doc->children = NULL;
13098 xmlParseContent(ctxt);
13099 doc->children = content;
13100 }
13101 else {
13102 xmlParseContent(ctxt);
13103 }
Owen Taylor3473f882001-02-23 17:55:21 +000013104 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013105 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013106 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013107 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013108 }
13109 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013110 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013111 }
13112
13113 if (!ctxt->wellFormed) {
13114 if (ctxt->errNo == 0)
13115 ret = 1;
13116 else
13117 ret = ctxt->errNo;
13118 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013119 ret = 0;
13120 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013121
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013122 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13123 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013124
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013125 /*
13126 * Return the newly created nodeset after unlinking it from
13127 * they pseudo parent.
13128 */
13129 cur = newDoc->children->children;
13130 *lst = cur;
13131 while (cur != NULL) {
13132 xmlSetTreeDoc(cur, doc);
13133 cur->parent = NULL;
13134 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013135 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013136 newDoc->children->children = NULL;
13137 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013138
13139 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013140 ctxt->sax = oldsax;
13141 xmlFreeParserCtxt(ctxt);
13142 newDoc->intSubset = NULL;
13143 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013144 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013145 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013146
Owen Taylor3473f882001-02-23 17:55:21 +000013147 return(ret);
13148}
13149
13150/**
13151 * xmlSAXParseEntity:
13152 * @sax: the SAX handler block
13153 * @filename: the filename
13154 *
13155 * parse an XML external entity out of context and build a tree.
13156 * It use the given SAX function block to handle the parsing callback.
13157 * If sax is NULL, fallback to the default DOM tree building routines.
13158 *
13159 * [78] extParsedEnt ::= TextDecl? content
13160 *
13161 * This correspond to a "Well Balanced" chunk
13162 *
13163 * Returns the resulting document tree
13164 */
13165
13166xmlDocPtr
13167xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13168 xmlDocPtr ret;
13169 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013170
13171 ctxt = xmlCreateFileParserCtxt(filename);
13172 if (ctxt == NULL) {
13173 return(NULL);
13174 }
13175 if (sax != NULL) {
13176 if (ctxt->sax != NULL)
13177 xmlFree(ctxt->sax);
13178 ctxt->sax = sax;
13179 ctxt->userData = NULL;
13180 }
13181
Owen Taylor3473f882001-02-23 17:55:21 +000013182 xmlParseExtParsedEnt(ctxt);
13183
13184 if (ctxt->wellFormed)
13185 ret = ctxt->myDoc;
13186 else {
13187 ret = NULL;
13188 xmlFreeDoc(ctxt->myDoc);
13189 ctxt->myDoc = NULL;
13190 }
13191 if (sax != NULL)
13192 ctxt->sax = NULL;
13193 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013194
Owen Taylor3473f882001-02-23 17:55:21 +000013195 return(ret);
13196}
13197
13198/**
13199 * xmlParseEntity:
13200 * @filename: the filename
13201 *
13202 * parse an XML external entity out of context and build a tree.
13203 *
13204 * [78] extParsedEnt ::= TextDecl? content
13205 *
13206 * This correspond to a "Well Balanced" chunk
13207 *
13208 * Returns the resulting document tree
13209 */
13210
13211xmlDocPtr
13212xmlParseEntity(const char *filename) {
13213 return(xmlSAXParseEntity(NULL, filename));
13214}
Daniel Veillard81273902003-09-30 00:43:48 +000013215#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013216
13217/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013218 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013219 * @URL: the entity URL
13220 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013221 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013222 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013223 *
13224 * Create a parser context for an external entity
13225 * Automatic support for ZLIB/Compress compressed document is provided
13226 * by default if found at compile-time.
13227 *
13228 * Returns the new parser context or NULL
13229 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013230static xmlParserCtxtPtr
13231xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13232 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013233 xmlParserCtxtPtr ctxt;
13234 xmlParserInputPtr inputStream;
13235 char *directory = NULL;
13236 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013237
Owen Taylor3473f882001-02-23 17:55:21 +000013238 ctxt = xmlNewParserCtxt();
13239 if (ctxt == NULL) {
13240 return(NULL);
13241 }
13242
Daniel Veillard48247b42009-07-10 16:12:46 +020013243 if (pctx != NULL) {
13244 ctxt->options = pctx->options;
13245 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000013246 }
13247
Owen Taylor3473f882001-02-23 17:55:21 +000013248 uri = xmlBuildURI(URL, base);
13249
13250 if (uri == NULL) {
13251 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13252 if (inputStream == NULL) {
13253 xmlFreeParserCtxt(ctxt);
13254 return(NULL);
13255 }
13256
13257 inputPush(ctxt, inputStream);
13258
13259 if ((ctxt->directory == NULL) && (directory == NULL))
13260 directory = xmlParserGetDirectory((char *)URL);
13261 if ((ctxt->directory == NULL) && (directory != NULL))
13262 ctxt->directory = directory;
13263 } else {
13264 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13265 if (inputStream == NULL) {
13266 xmlFree(uri);
13267 xmlFreeParserCtxt(ctxt);
13268 return(NULL);
13269 }
13270
13271 inputPush(ctxt, inputStream);
13272
13273 if ((ctxt->directory == NULL) && (directory == NULL))
13274 directory = xmlParserGetDirectory((char *)uri);
13275 if ((ctxt->directory == NULL) && (directory != NULL))
13276 ctxt->directory = directory;
13277 xmlFree(uri);
13278 }
Owen Taylor3473f882001-02-23 17:55:21 +000013279 return(ctxt);
13280}
13281
Rob Richards9c0aa472009-03-26 18:10:19 +000013282/**
13283 * xmlCreateEntityParserCtxt:
13284 * @URL: the entity URL
13285 * @ID: the entity PUBLIC ID
13286 * @base: a possible base for the target URI
13287 *
13288 * Create a parser context for an external entity
13289 * Automatic support for ZLIB/Compress compressed document is provided
13290 * by default if found at compile-time.
13291 *
13292 * Returns the new parser context or NULL
13293 */
13294xmlParserCtxtPtr
13295xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13296 const xmlChar *base) {
13297 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13298
13299}
13300
Owen Taylor3473f882001-02-23 17:55:21 +000013301/************************************************************************
13302 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000013303 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000013304 * *
13305 ************************************************************************/
13306
13307/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013308 * xmlCreateURLParserCtxt:
13309 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013310 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013311 *
Daniel Veillard61b93382003-11-03 14:28:31 +000013312 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000013313 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000013314 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000013315 *
13316 * Returns the new parser context or NULL
13317 */
13318xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000013319xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000013320{
13321 xmlParserCtxtPtr ctxt;
13322 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000013323 char *directory = NULL;
13324
Owen Taylor3473f882001-02-23 17:55:21 +000013325 ctxt = xmlNewParserCtxt();
13326 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000013327 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000013328 return(NULL);
13329 }
13330
Daniel Veillarddf292f72005-01-16 19:00:15 +000013331 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000013332 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000013333 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000013334
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000013335 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013336 if (inputStream == NULL) {
13337 xmlFreeParserCtxt(ctxt);
13338 return(NULL);
13339 }
13340
Owen Taylor3473f882001-02-23 17:55:21 +000013341 inputPush(ctxt, inputStream);
13342 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000013343 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013344 if ((ctxt->directory == NULL) && (directory != NULL))
13345 ctxt->directory = directory;
13346
13347 return(ctxt);
13348}
13349
Daniel Veillard61b93382003-11-03 14:28:31 +000013350/**
13351 * xmlCreateFileParserCtxt:
13352 * @filename: the filename
13353 *
13354 * Create a parser context for a file content.
13355 * Automatic support for ZLIB/Compress compressed document is provided
13356 * by default if found at compile-time.
13357 *
13358 * Returns the new parser context or NULL
13359 */
13360xmlParserCtxtPtr
13361xmlCreateFileParserCtxt(const char *filename)
13362{
13363 return(xmlCreateURLParserCtxt(filename, 0));
13364}
13365
Daniel Veillard81273902003-09-30 00:43:48 +000013366#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013367/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013368 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000013369 * @sax: the SAX handler block
13370 * @filename: the filename
13371 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13372 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000013373 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000013374 *
13375 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13376 * compressed document is provided by default if found at compile-time.
13377 * It use the given SAX function block to handle the parsing callback.
13378 * If sax is NULL, fallback to the default DOM tree building routines.
13379 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000013380 * User data (void *) is stored within the parser context in the
13381 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000013382 *
Owen Taylor3473f882001-02-23 17:55:21 +000013383 * Returns the resulting document tree
13384 */
13385
13386xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000013387xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13388 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000013389 xmlDocPtr ret;
13390 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013391
Daniel Veillard635ef722001-10-29 11:48:19 +000013392 xmlInitParser();
13393
Owen Taylor3473f882001-02-23 17:55:21 +000013394 ctxt = xmlCreateFileParserCtxt(filename);
13395 if (ctxt == NULL) {
13396 return(NULL);
13397 }
13398 if (sax != NULL) {
13399 if (ctxt->sax != NULL)
13400 xmlFree(ctxt->sax);
13401 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013402 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013403 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000013404 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000013405 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000013406 }
Owen Taylor3473f882001-02-23 17:55:21 +000013407
Daniel Veillard37d2d162008-03-14 10:54:00 +000013408 if (ctxt->directory == NULL)
13409 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013410
Daniel Veillarddad3f682002-11-17 16:47:27 +000013411 ctxt->recovery = recovery;
13412
Owen Taylor3473f882001-02-23 17:55:21 +000013413 xmlParseDocument(ctxt);
13414
William M. Brackc07329e2003-09-08 01:57:30 +000013415 if ((ctxt->wellFormed) || recovery) {
13416 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000013417 if (ret != NULL) {
13418 if (ctxt->input->buf->compressed > 0)
13419 ret->compression = 9;
13420 else
13421 ret->compression = ctxt->input->buf->compressed;
13422 }
William M. Brackc07329e2003-09-08 01:57:30 +000013423 }
Owen Taylor3473f882001-02-23 17:55:21 +000013424 else {
13425 ret = NULL;
13426 xmlFreeDoc(ctxt->myDoc);
13427 ctxt->myDoc = NULL;
13428 }
13429 if (sax != NULL)
13430 ctxt->sax = NULL;
13431 xmlFreeParserCtxt(ctxt);
13432
13433 return(ret);
13434}
13435
13436/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013437 * xmlSAXParseFile:
13438 * @sax: the SAX handler block
13439 * @filename: the filename
13440 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13441 * documents
13442 *
13443 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13444 * compressed document is provided by default if found at compile-time.
13445 * It use the given SAX function block to handle the parsing callback.
13446 * If sax is NULL, fallback to the default DOM tree building routines.
13447 *
13448 * Returns the resulting document tree
13449 */
13450
13451xmlDocPtr
13452xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13453 int recovery) {
13454 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13455}
13456
13457/**
Owen Taylor3473f882001-02-23 17:55:21 +000013458 * xmlRecoverDoc:
13459 * @cur: a pointer to an array of xmlChar
13460 *
13461 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013462 * In the case the document is not Well Formed, a attempt to build a
13463 * tree is tried anyway
13464 *
13465 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013466 */
13467
13468xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020013469xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013470 return(xmlSAXParseDoc(NULL, cur, 1));
13471}
13472
13473/**
13474 * xmlParseFile:
13475 * @filename: the filename
13476 *
13477 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13478 * compressed document is provided by default if found at compile-time.
13479 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000013480 * Returns the resulting document tree if the file was wellformed,
13481 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000013482 */
13483
13484xmlDocPtr
13485xmlParseFile(const char *filename) {
13486 return(xmlSAXParseFile(NULL, filename, 0));
13487}
13488
13489/**
13490 * xmlRecoverFile:
13491 * @filename: the filename
13492 *
13493 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13494 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013495 * In the case the document is not Well Formed, it attempts to build
13496 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000013497 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000013498 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013499 */
13500
13501xmlDocPtr
13502xmlRecoverFile(const char *filename) {
13503 return(xmlSAXParseFile(NULL, filename, 1));
13504}
13505
13506
13507/**
13508 * xmlSetupParserForBuffer:
13509 * @ctxt: an XML parser context
13510 * @buffer: a xmlChar * buffer
13511 * @filename: a file name
13512 *
13513 * Setup the parser context to parse a new buffer; Clears any prior
13514 * contents from the parser context. The buffer parameter must not be
13515 * NULL, but the filename parameter can be
13516 */
13517void
13518xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13519 const char* filename)
13520{
13521 xmlParserInputPtr input;
13522
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013523 if ((ctxt == NULL) || (buffer == NULL))
13524 return;
13525
Owen Taylor3473f882001-02-23 17:55:21 +000013526 input = xmlNewInputStream(ctxt);
13527 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000013528 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013529 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013530 return;
13531 }
13532
13533 xmlClearParserCtxt(ctxt);
13534 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000013535 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013536 input->base = buffer;
13537 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013538 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000013539 inputPush(ctxt, input);
13540}
13541
13542/**
13543 * xmlSAXUserParseFile:
13544 * @sax: a SAX handler
13545 * @user_data: The user data returned on SAX callbacks
13546 * @filename: a file name
13547 *
13548 * parse an XML file and call the given SAX handler routines.
13549 * Automatic support for ZLIB/Compress compressed document is provided
13550 *
13551 * Returns 0 in case of success or a error number otherwise
13552 */
13553int
13554xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13555 const char *filename) {
13556 int ret = 0;
13557 xmlParserCtxtPtr ctxt;
13558
13559 ctxt = xmlCreateFileParserCtxt(filename);
13560 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000013561 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000013562 xmlFree(ctxt->sax);
13563 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013564 xmlDetectSAX2(ctxt);
13565
Owen Taylor3473f882001-02-23 17:55:21 +000013566 if (user_data != NULL)
13567 ctxt->userData = user_data;
13568
13569 xmlParseDocument(ctxt);
13570
13571 if (ctxt->wellFormed)
13572 ret = 0;
13573 else {
13574 if (ctxt->errNo != 0)
13575 ret = ctxt->errNo;
13576 else
13577 ret = -1;
13578 }
13579 if (sax != NULL)
13580 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013581 if (ctxt->myDoc != NULL) {
13582 xmlFreeDoc(ctxt->myDoc);
13583 ctxt->myDoc = NULL;
13584 }
Owen Taylor3473f882001-02-23 17:55:21 +000013585 xmlFreeParserCtxt(ctxt);
13586
13587 return ret;
13588}
Daniel Veillard81273902003-09-30 00:43:48 +000013589#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013590
13591/************************************************************************
13592 * *
13593 * Front ends when parsing from memory *
13594 * *
13595 ************************************************************************/
13596
13597/**
13598 * xmlCreateMemoryParserCtxt:
13599 * @buffer: a pointer to a char array
13600 * @size: the size of the array
13601 *
13602 * Create a parser context for an XML in-memory document.
13603 *
13604 * Returns the new parser context or NULL
13605 */
13606xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013607xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013608 xmlParserCtxtPtr ctxt;
13609 xmlParserInputPtr input;
13610 xmlParserInputBufferPtr buf;
13611
13612 if (buffer == NULL)
13613 return(NULL);
13614 if (size <= 0)
13615 return(NULL);
13616
13617 ctxt = xmlNewParserCtxt();
13618 if (ctxt == NULL)
13619 return(NULL);
13620
Daniel Veillard53350552003-09-18 13:35:51 +000013621 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000013622 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013623 if (buf == NULL) {
13624 xmlFreeParserCtxt(ctxt);
13625 return(NULL);
13626 }
Owen Taylor3473f882001-02-23 17:55:21 +000013627
13628 input = xmlNewInputStream(ctxt);
13629 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013630 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000013631 xmlFreeParserCtxt(ctxt);
13632 return(NULL);
13633 }
13634
13635 input->filename = NULL;
13636 input->buf = buf;
13637 input->base = input->buf->buffer->content;
13638 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013639 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000013640
13641 inputPush(ctxt, input);
13642 return(ctxt);
13643}
13644
Daniel Veillard81273902003-09-30 00:43:48 +000013645#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013646/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013647 * xmlSAXParseMemoryWithData:
13648 * @sax: the SAX handler block
13649 * @buffer: an pointer to a char array
13650 * @size: the size of the array
13651 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13652 * documents
13653 * @data: the userdata
13654 *
13655 * parse an XML in-memory block and use the given SAX function block
13656 * to handle the parsing callback. If sax is NULL, fallback to the default
13657 * DOM tree building routines.
13658 *
13659 * User data (void *) is stored within the parser context in the
13660 * context's _private member, so it is available nearly everywhere in libxml
13661 *
13662 * Returns the resulting document tree
13663 */
13664
13665xmlDocPtr
13666xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13667 int size, int recovery, void *data) {
13668 xmlDocPtr ret;
13669 xmlParserCtxtPtr ctxt;
13670
Daniel Veillardab2a7632009-07-09 08:45:03 +020013671 xmlInitParser();
13672
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013673 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13674 if (ctxt == NULL) return(NULL);
13675 if (sax != NULL) {
13676 if (ctxt->sax != NULL)
13677 xmlFree(ctxt->sax);
13678 ctxt->sax = sax;
13679 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013680 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013681 if (data!=NULL) {
13682 ctxt->_private=data;
13683 }
13684
Daniel Veillardadba5f12003-04-04 16:09:01 +000013685 ctxt->recovery = recovery;
13686
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013687 xmlParseDocument(ctxt);
13688
13689 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13690 else {
13691 ret = NULL;
13692 xmlFreeDoc(ctxt->myDoc);
13693 ctxt->myDoc = NULL;
13694 }
13695 if (sax != NULL)
13696 ctxt->sax = NULL;
13697 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020013698
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013699 return(ret);
13700}
13701
13702/**
Owen Taylor3473f882001-02-23 17:55:21 +000013703 * xmlSAXParseMemory:
13704 * @sax: the SAX handler block
13705 * @buffer: an pointer to a char array
13706 * @size: the size of the array
13707 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13708 * documents
13709 *
13710 * parse an XML in-memory block and use the given SAX function block
13711 * to handle the parsing callback. If sax is NULL, fallback to the default
13712 * DOM tree building routines.
13713 *
13714 * Returns the resulting document tree
13715 */
13716xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000013717xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13718 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013719 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013720}
13721
13722/**
13723 * xmlParseMemory:
13724 * @buffer: an pointer to a char array
13725 * @size: the size of the array
13726 *
13727 * parse an XML in-memory block and build a tree.
13728 *
13729 * Returns the resulting document tree
13730 */
13731
Daniel Veillard50822cb2001-07-26 20:05:51 +000013732xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013733 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13734}
13735
13736/**
13737 * xmlRecoverMemory:
13738 * @buffer: an pointer to a char array
13739 * @size: the size of the array
13740 *
13741 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013742 * In the case the document is not Well Formed, an attempt to
13743 * build a tree is tried anyway
13744 *
13745 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000013746 */
13747
Daniel Veillard50822cb2001-07-26 20:05:51 +000013748xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013749 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13750}
13751
13752/**
13753 * xmlSAXUserParseMemory:
13754 * @sax: a SAX handler
13755 * @user_data: The user data returned on SAX callbacks
13756 * @buffer: an in-memory XML document input
13757 * @size: the length of the XML document in bytes
13758 *
13759 * A better SAX parsing routine.
13760 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020013761 *
Owen Taylor3473f882001-02-23 17:55:21 +000013762 * Returns 0 in case of success or a error number otherwise
13763 */
13764int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013765 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013766 int ret = 0;
13767 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020013768
13769 xmlInitParser();
13770
Owen Taylor3473f882001-02-23 17:55:21 +000013771 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13772 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013773 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13774 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000013775 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013776 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013777
Daniel Veillard30211a02001-04-26 09:33:18 +000013778 if (user_data != NULL)
13779 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020013780
Owen Taylor3473f882001-02-23 17:55:21 +000013781 xmlParseDocument(ctxt);
13782
13783 if (ctxt->wellFormed)
13784 ret = 0;
13785 else {
13786 if (ctxt->errNo != 0)
13787 ret = ctxt->errNo;
13788 else
13789 ret = -1;
13790 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013791 if (sax != NULL)
13792 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013793 if (ctxt->myDoc != NULL) {
13794 xmlFreeDoc(ctxt->myDoc);
13795 ctxt->myDoc = NULL;
13796 }
Owen Taylor3473f882001-02-23 17:55:21 +000013797 xmlFreeParserCtxt(ctxt);
13798
13799 return ret;
13800}
Daniel Veillard81273902003-09-30 00:43:48 +000013801#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013802
13803/**
13804 * xmlCreateDocParserCtxt:
13805 * @cur: a pointer to an array of xmlChar
13806 *
13807 * Creates a parser context for an XML in-memory document.
13808 *
13809 * Returns the new parser context or NULL
13810 */
13811xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013812xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013813 int len;
13814
13815 if (cur == NULL)
13816 return(NULL);
13817 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013818 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000013819}
13820
Daniel Veillard81273902003-09-30 00:43:48 +000013821#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013822/**
13823 * xmlSAXParseDoc:
13824 * @sax: the SAX handler block
13825 * @cur: a pointer to an array of xmlChar
13826 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13827 * documents
13828 *
13829 * parse an XML in-memory document and build a tree.
13830 * It use the given SAX function block to handle the parsing callback.
13831 * If sax is NULL, fallback to the default DOM tree building routines.
13832 *
13833 * Returns the resulting document tree
13834 */
13835
13836xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013837xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000013838 xmlDocPtr ret;
13839 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000013840 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013841
Daniel Veillard38936062004-11-04 17:45:11 +000013842 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013843
13844
13845 ctxt = xmlCreateDocParserCtxt(cur);
13846 if (ctxt == NULL) return(NULL);
13847 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000013848 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013849 ctxt->sax = sax;
13850 ctxt->userData = NULL;
13851 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013852 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013853
13854 xmlParseDocument(ctxt);
13855 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13856 else {
13857 ret = NULL;
13858 xmlFreeDoc(ctxt->myDoc);
13859 ctxt->myDoc = NULL;
13860 }
Daniel Veillard34099b42004-11-04 17:34:35 +000013861 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000013862 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000013863 xmlFreeParserCtxt(ctxt);
13864
13865 return(ret);
13866}
13867
13868/**
13869 * xmlParseDoc:
13870 * @cur: a pointer to an array of xmlChar
13871 *
13872 * parse an XML in-memory document and build a tree.
13873 *
13874 * Returns the resulting document tree
13875 */
13876
13877xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013878xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013879 return(xmlSAXParseDoc(NULL, cur, 0));
13880}
Daniel Veillard81273902003-09-30 00:43:48 +000013881#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013882
Daniel Veillard81273902003-09-30 00:43:48 +000013883#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000013884/************************************************************************
13885 * *
13886 * Specific function to keep track of entities references *
13887 * and used by the XSLT debugger *
13888 * *
13889 ************************************************************************/
13890
13891static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13892
13893/**
13894 * xmlAddEntityReference:
13895 * @ent : A valid entity
13896 * @firstNode : A valid first node for children of entity
13897 * @lastNode : A valid last node of children entity
13898 *
13899 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13900 */
13901static void
13902xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13903 xmlNodePtr lastNode)
13904{
13905 if (xmlEntityRefFunc != NULL) {
13906 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13907 }
13908}
13909
13910
13911/**
13912 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000013913 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000013914 *
13915 * Set the function to call call back when a xml reference has been made
13916 */
13917void
13918xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13919{
13920 xmlEntityRefFunc = func;
13921}
Daniel Veillard81273902003-09-30 00:43:48 +000013922#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013923
13924/************************************************************************
13925 * *
13926 * Miscellaneous *
13927 * *
13928 ************************************************************************/
13929
13930#ifdef LIBXML_XPATH_ENABLED
13931#include <libxml/xpath.h>
13932#endif
13933
Daniel Veillardffa3c742005-07-21 13:24:09 +000013934extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000013935static int xmlParserInitialized = 0;
13936
13937/**
13938 * xmlInitParser:
13939 *
13940 * Initialization function for the XML parser.
13941 * This is not reentrant. Call once before processing in case of
13942 * use in multithreaded programs.
13943 */
13944
13945void
13946xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000013947 if (xmlParserInitialized != 0)
13948 return;
Owen Taylor3473f882001-02-23 17:55:21 +000013949
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013950#ifdef LIBXML_THREAD_ENABLED
13951 __xmlGlobalInitMutexLock();
13952 if (xmlParserInitialized == 0) {
13953#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020013954 xmlInitGlobals();
13955 xmlInitThreads();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013956 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13957 (xmlGenericError == NULL))
13958 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013959 xmlInitMemory();
13960 xmlInitCharEncodingHandlers();
13961 xmlDefaultSAXHandlerInit();
13962 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013963#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013964 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013965#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013966#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013967 htmlInitAutoClose();
13968 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013969#endif
13970#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013971 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013972#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013973 xmlParserInitialized = 1;
13974#ifdef LIBXML_THREAD_ENABLED
13975 }
13976 __xmlGlobalInitMutexUnlock();
13977#endif
Owen Taylor3473f882001-02-23 17:55:21 +000013978}
13979
13980/**
13981 * xmlCleanupParser:
13982 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000013983 * This function name is somewhat misleading. It does not clean up
13984 * parser state, it cleans up memory allocated by the library itself.
13985 * It is a cleanup function for the XML library. It tries to reclaim all
13986 * related global memory allocated for the library processing.
13987 * It doesn't deallocate any document related memory. One should
13988 * call xmlCleanupParser() only when the process has finished using
13989 * the library and all XML/HTML documents built with it.
13990 * See also xmlInitParser() which has the opposite function of preparing
13991 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000013992 *
13993 * WARNING: if your application is multithreaded or has plugin support
13994 * calling this may crash the application if another thread or
13995 * a plugin is still using libxml2. It's sometimes very hard to
13996 * guess if libxml2 is in use in the application, some libraries
13997 * or plugins may use it without notice. In case of doubt abstain
13998 * from calling this function or do it just before calling exit()
13999 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014000 */
14001
14002void
14003xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014004 if (!xmlParserInitialized)
14005 return;
14006
Owen Taylor3473f882001-02-23 17:55:21 +000014007 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014008#ifdef LIBXML_CATALOG_ENABLED
14009 xmlCatalogCleanup();
14010#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014011 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014012 xmlCleanupInputCallbacks();
14013#ifdef LIBXML_OUTPUT_ENABLED
14014 xmlCleanupOutputCallbacks();
14015#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014016#ifdef LIBXML_SCHEMAS_ENABLED
14017 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014018 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014019#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000014020 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014021 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014022 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014023 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014024 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014025}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014026
14027/************************************************************************
14028 * *
14029 * New set (2.6.0) of simpler and more flexible APIs *
14030 * *
14031 ************************************************************************/
14032
14033/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014034 * DICT_FREE:
14035 * @str: a string
14036 *
14037 * Free a string if it is not owned by the "dict" dictionnary in the
14038 * current scope
14039 */
14040#define DICT_FREE(str) \
14041 if ((str) && ((!dict) || \
14042 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14043 xmlFree((char *)(str));
14044
14045/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014046 * xmlCtxtReset:
14047 * @ctxt: an XML parser context
14048 *
14049 * Reset a parser context
14050 */
14051void
14052xmlCtxtReset(xmlParserCtxtPtr ctxt)
14053{
14054 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014055 xmlDictPtr dict;
14056
14057 if (ctxt == NULL)
14058 return;
14059
14060 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014061
14062 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14063 xmlFreeInputStream(input);
14064 }
14065 ctxt->inputNr = 0;
14066 ctxt->input = NULL;
14067
14068 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014069 if (ctxt->spaceTab != NULL) {
14070 ctxt->spaceTab[0] = -1;
14071 ctxt->space = &ctxt->spaceTab[0];
14072 } else {
14073 ctxt->space = NULL;
14074 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014075
14076
14077 ctxt->nodeNr = 0;
14078 ctxt->node = NULL;
14079
14080 ctxt->nameNr = 0;
14081 ctxt->name = NULL;
14082
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014083 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014084 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014085 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014086 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014087 DICT_FREE(ctxt->directory);
14088 ctxt->directory = NULL;
14089 DICT_FREE(ctxt->extSubURI);
14090 ctxt->extSubURI = NULL;
14091 DICT_FREE(ctxt->extSubSystem);
14092 ctxt->extSubSystem = NULL;
14093 if (ctxt->myDoc != NULL)
14094 xmlFreeDoc(ctxt->myDoc);
14095 ctxt->myDoc = NULL;
14096
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014097 ctxt->standalone = -1;
14098 ctxt->hasExternalSubset = 0;
14099 ctxt->hasPErefs = 0;
14100 ctxt->html = 0;
14101 ctxt->external = 0;
14102 ctxt->instate = XML_PARSER_START;
14103 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014104
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014105 ctxt->wellFormed = 1;
14106 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014107 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014108 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014109#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014110 ctxt->vctxt.userData = ctxt;
14111 ctxt->vctxt.error = xmlParserValidityError;
14112 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014113#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014114 ctxt->record_info = 0;
14115 ctxt->nbChars = 0;
14116 ctxt->checkIndex = 0;
14117 ctxt->inSubset = 0;
14118 ctxt->errNo = XML_ERR_OK;
14119 ctxt->depth = 0;
14120 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14121 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014122 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014123 ctxt->sizeentities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014124 xmlInitNodeInfoSeq(&ctxt->node_seq);
14125
14126 if (ctxt->attsDefault != NULL) {
14127 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14128 ctxt->attsDefault = NULL;
14129 }
14130 if (ctxt->attsSpecial != NULL) {
14131 xmlHashFree(ctxt->attsSpecial, NULL);
14132 ctxt->attsSpecial = NULL;
14133 }
14134
Daniel Veillard4432df22003-09-28 18:58:27 +000014135#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014136 if (ctxt->catalogs != NULL)
14137 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014138#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014139 if (ctxt->lastError.code != XML_ERR_OK)
14140 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014141}
14142
14143/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014144 * xmlCtxtResetPush:
14145 * @ctxt: an XML parser context
14146 * @chunk: a pointer to an array of chars
14147 * @size: number of chars in the array
14148 * @filename: an optional file name or URI
14149 * @encoding: the document encoding, or NULL
14150 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014151 * Reset a push parser context
14152 *
14153 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014154 */
14155int
14156xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14157 int size, const char *filename, const char *encoding)
14158{
14159 xmlParserInputPtr inputStream;
14160 xmlParserInputBufferPtr buf;
14161 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14162
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014163 if (ctxt == NULL)
14164 return(1);
14165
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014166 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14167 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14168
14169 buf = xmlAllocParserInputBuffer(enc);
14170 if (buf == NULL)
14171 return(1);
14172
14173 if (ctxt == NULL) {
14174 xmlFreeParserInputBuffer(buf);
14175 return(1);
14176 }
14177
14178 xmlCtxtReset(ctxt);
14179
14180 if (ctxt->pushTab == NULL) {
14181 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14182 sizeof(xmlChar *));
14183 if (ctxt->pushTab == NULL) {
14184 xmlErrMemory(ctxt, NULL);
14185 xmlFreeParserInputBuffer(buf);
14186 return(1);
14187 }
14188 }
14189
14190 if (filename == NULL) {
14191 ctxt->directory = NULL;
14192 } else {
14193 ctxt->directory = xmlParserGetDirectory(filename);
14194 }
14195
14196 inputStream = xmlNewInputStream(ctxt);
14197 if (inputStream == NULL) {
14198 xmlFreeParserInputBuffer(buf);
14199 return(1);
14200 }
14201
14202 if (filename == NULL)
14203 inputStream->filename = NULL;
14204 else
14205 inputStream->filename = (char *)
14206 xmlCanonicPath((const xmlChar *) filename);
14207 inputStream->buf = buf;
14208 inputStream->base = inputStream->buf->buffer->content;
14209 inputStream->cur = inputStream->buf->buffer->content;
14210 inputStream->end =
14211 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14212
14213 inputPush(ctxt, inputStream);
14214
14215 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14216 (ctxt->input->buf != NULL)) {
14217 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14218 int cur = ctxt->input->cur - ctxt->input->base;
14219
14220 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14221
14222 ctxt->input->base = ctxt->input->buf->buffer->content + base;
14223 ctxt->input->cur = ctxt->input->base + cur;
14224 ctxt->input->end =
14225 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14226 use];
14227#ifdef DEBUG_PUSH
14228 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14229#endif
14230 }
14231
14232 if (encoding != NULL) {
14233 xmlCharEncodingHandlerPtr hdlr;
14234
Daniel Veillard37334572008-07-31 08:20:02 +000014235 if (ctxt->encoding != NULL)
14236 xmlFree((xmlChar *) ctxt->encoding);
14237 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14238
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014239 hdlr = xmlFindCharEncodingHandler(encoding);
14240 if (hdlr != NULL) {
14241 xmlSwitchToEncoding(ctxt, hdlr);
14242 } else {
14243 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14244 "Unsupported encoding %s\n", BAD_CAST encoding);
14245 }
14246 } else if (enc != XML_CHAR_ENCODING_NONE) {
14247 xmlSwitchEncoding(ctxt, enc);
14248 }
14249
14250 return(0);
14251}
14252
Daniel Veillard37334572008-07-31 08:20:02 +000014253
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014254/**
Daniel Veillard37334572008-07-31 08:20:02 +000014255 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014256 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014257 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014258 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014259 *
14260 * Applies the options to the parser context
14261 *
14262 * Returns 0 in case of success, the set of unknown or unimplemented options
14263 * in case of error.
14264 */
Daniel Veillard37334572008-07-31 08:20:02 +000014265static int
14266xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014267{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014268 if (ctxt == NULL)
14269 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014270 if (encoding != NULL) {
14271 if (ctxt->encoding != NULL)
14272 xmlFree((xmlChar *) ctxt->encoding);
14273 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14274 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014275 if (options & XML_PARSE_RECOVER) {
14276 ctxt->recovery = 1;
14277 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014278 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014279 } else
14280 ctxt->recovery = 0;
14281 if (options & XML_PARSE_DTDLOAD) {
14282 ctxt->loadsubset = XML_DETECT_IDS;
14283 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014284 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014285 } else
14286 ctxt->loadsubset = 0;
14287 if (options & XML_PARSE_DTDATTR) {
14288 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14289 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014290 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014291 }
14292 if (options & XML_PARSE_NOENT) {
14293 ctxt->replaceEntities = 1;
14294 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14295 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014296 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014297 } else
14298 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014299 if (options & XML_PARSE_PEDANTIC) {
14300 ctxt->pedantic = 1;
14301 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014302 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014303 } else
14304 ctxt->pedantic = 0;
14305 if (options & XML_PARSE_NOBLANKS) {
14306 ctxt->keepBlanks = 0;
14307 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14308 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014309 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014310 } else
14311 ctxt->keepBlanks = 1;
14312 if (options & XML_PARSE_DTDVALID) {
14313 ctxt->validate = 1;
14314 if (options & XML_PARSE_NOWARNING)
14315 ctxt->vctxt.warning = NULL;
14316 if (options & XML_PARSE_NOERROR)
14317 ctxt->vctxt.error = NULL;
14318 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014319 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014320 } else
14321 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000014322 if (options & XML_PARSE_NOWARNING) {
14323 ctxt->sax->warning = NULL;
14324 options -= XML_PARSE_NOWARNING;
14325 }
14326 if (options & XML_PARSE_NOERROR) {
14327 ctxt->sax->error = NULL;
14328 ctxt->sax->fatalError = NULL;
14329 options -= XML_PARSE_NOERROR;
14330 }
Daniel Veillard81273902003-09-30 00:43:48 +000014331#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014332 if (options & XML_PARSE_SAX1) {
14333 ctxt->sax->startElement = xmlSAX2StartElement;
14334 ctxt->sax->endElement = xmlSAX2EndElement;
14335 ctxt->sax->startElementNs = NULL;
14336 ctxt->sax->endElementNs = NULL;
14337 ctxt->sax->initialized = 1;
14338 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014339 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014340 }
Daniel Veillard81273902003-09-30 00:43:48 +000014341#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014342 if (options & XML_PARSE_NODICT) {
14343 ctxt->dictNames = 0;
14344 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014345 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014346 } else {
14347 ctxt->dictNames = 1;
14348 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014349 if (options & XML_PARSE_NOCDATA) {
14350 ctxt->sax->cdataBlock = NULL;
14351 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014352 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014353 }
14354 if (options & XML_PARSE_NSCLEAN) {
14355 ctxt->options |= XML_PARSE_NSCLEAN;
14356 options -= XML_PARSE_NSCLEAN;
14357 }
Daniel Veillard61b93382003-11-03 14:28:31 +000014358 if (options & XML_PARSE_NONET) {
14359 ctxt->options |= XML_PARSE_NONET;
14360 options -= XML_PARSE_NONET;
14361 }
Daniel Veillard8874b942005-08-25 13:19:21 +000014362 if (options & XML_PARSE_COMPACT) {
14363 ctxt->options |= XML_PARSE_COMPACT;
14364 options -= XML_PARSE_COMPACT;
14365 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000014366 if (options & XML_PARSE_OLD10) {
14367 ctxt->options |= XML_PARSE_OLD10;
14368 options -= XML_PARSE_OLD10;
14369 }
Daniel Veillard8915c152008-08-26 13:05:34 +000014370 if (options & XML_PARSE_NOBASEFIX) {
14371 ctxt->options |= XML_PARSE_NOBASEFIX;
14372 options -= XML_PARSE_NOBASEFIX;
14373 }
14374 if (options & XML_PARSE_HUGE) {
14375 ctxt->options |= XML_PARSE_HUGE;
14376 options -= XML_PARSE_HUGE;
14377 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000014378 if (options & XML_PARSE_OLDSAX) {
14379 ctxt->options |= XML_PARSE_OLDSAX;
14380 options -= XML_PARSE_OLDSAX;
14381 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000014382 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014383 return (options);
14384}
14385
14386/**
Daniel Veillard37334572008-07-31 08:20:02 +000014387 * xmlCtxtUseOptions:
14388 * @ctxt: an XML parser context
14389 * @options: a combination of xmlParserOption
14390 *
14391 * Applies the options to the parser context
14392 *
14393 * Returns 0 in case of success, the set of unknown or unimplemented options
14394 * in case of error.
14395 */
14396int
14397xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14398{
14399 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14400}
14401
14402/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014403 * xmlDoRead:
14404 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000014405 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014406 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014407 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014408 * @reuse: keep the context for reuse
14409 *
14410 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000014411 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014412 * Returns the resulting document tree or NULL
14413 */
14414static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014415xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14416 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014417{
14418 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000014419
14420 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014421 if (encoding != NULL) {
14422 xmlCharEncodingHandlerPtr hdlr;
14423
14424 hdlr = xmlFindCharEncodingHandler(encoding);
14425 if (hdlr != NULL)
14426 xmlSwitchToEncoding(ctxt, hdlr);
14427 }
Daniel Veillard60942de2003-09-25 21:05:58 +000014428 if ((URL != NULL) && (ctxt->input != NULL) &&
14429 (ctxt->input->filename == NULL))
14430 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014431 xmlParseDocument(ctxt);
14432 if ((ctxt->wellFormed) || ctxt->recovery)
14433 ret = ctxt->myDoc;
14434 else {
14435 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014436 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014437 xmlFreeDoc(ctxt->myDoc);
14438 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014439 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014440 ctxt->myDoc = NULL;
14441 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014442 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014443 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014444
14445 return (ret);
14446}
14447
14448/**
14449 * xmlReadDoc:
14450 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014451 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014452 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014453 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014454 *
14455 * parse an XML in-memory document and build a tree.
14456 *
14457 * Returns the resulting document tree
14458 */
14459xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014460xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014461{
14462 xmlParserCtxtPtr ctxt;
14463
14464 if (cur == NULL)
14465 return (NULL);
14466
14467 ctxt = xmlCreateDocParserCtxt(cur);
14468 if (ctxt == NULL)
14469 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014470 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014471}
14472
14473/**
14474 * xmlReadFile:
14475 * @filename: a file or URL
14476 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014477 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014478 *
14479 * parse an XML file from the filesystem or the network.
14480 *
14481 * Returns the resulting document tree
14482 */
14483xmlDocPtr
14484xmlReadFile(const char *filename, const char *encoding, int options)
14485{
14486 xmlParserCtxtPtr ctxt;
14487
Daniel Veillard61b93382003-11-03 14:28:31 +000014488 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014489 if (ctxt == NULL)
14490 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014491 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014492}
14493
14494/**
14495 * xmlReadMemory:
14496 * @buffer: a pointer to a char array
14497 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014498 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014499 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014500 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014501 *
14502 * parse an XML in-memory document and build a tree.
14503 *
14504 * Returns the resulting document tree
14505 */
14506xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014507xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014508{
14509 xmlParserCtxtPtr ctxt;
14510
14511 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14512 if (ctxt == NULL)
14513 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014514 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014515}
14516
14517/**
14518 * xmlReadFd:
14519 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014520 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014521 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014522 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014523 *
14524 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014525 * NOTE that the file descriptor will not be closed when the
14526 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014527 *
14528 * Returns the resulting document tree
14529 */
14530xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014531xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014532{
14533 xmlParserCtxtPtr ctxt;
14534 xmlParserInputBufferPtr input;
14535 xmlParserInputPtr stream;
14536
14537 if (fd < 0)
14538 return (NULL);
14539
14540 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14541 if (input == NULL)
14542 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014543 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014544 ctxt = xmlNewParserCtxt();
14545 if (ctxt == NULL) {
14546 xmlFreeParserInputBuffer(input);
14547 return (NULL);
14548 }
14549 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14550 if (stream == NULL) {
14551 xmlFreeParserInputBuffer(input);
14552 xmlFreeParserCtxt(ctxt);
14553 return (NULL);
14554 }
14555 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014556 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014557}
14558
14559/**
14560 * xmlReadIO:
14561 * @ioread: an I/O read function
14562 * @ioclose: an I/O close function
14563 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014564 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014565 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014566 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014567 *
14568 * parse an XML document from I/O functions and source and build a tree.
14569 *
14570 * Returns the resulting document tree
14571 */
14572xmlDocPtr
14573xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000014574 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014575{
14576 xmlParserCtxtPtr ctxt;
14577 xmlParserInputBufferPtr input;
14578 xmlParserInputPtr stream;
14579
14580 if (ioread == NULL)
14581 return (NULL);
14582
14583 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14584 XML_CHAR_ENCODING_NONE);
14585 if (input == NULL)
14586 return (NULL);
14587 ctxt = xmlNewParserCtxt();
14588 if (ctxt == NULL) {
14589 xmlFreeParserInputBuffer(input);
14590 return (NULL);
14591 }
14592 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14593 if (stream == NULL) {
14594 xmlFreeParserInputBuffer(input);
14595 xmlFreeParserCtxt(ctxt);
14596 return (NULL);
14597 }
14598 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014599 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014600}
14601
14602/**
14603 * xmlCtxtReadDoc:
14604 * @ctxt: an XML parser context
14605 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014606 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014607 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014608 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014609 *
14610 * parse an XML in-memory document and build a tree.
14611 * This reuses the existing @ctxt parser context
14612 *
14613 * Returns the resulting document tree
14614 */
14615xmlDocPtr
14616xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000014617 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014618{
14619 xmlParserInputPtr stream;
14620
14621 if (cur == NULL)
14622 return (NULL);
14623 if (ctxt == NULL)
14624 return (NULL);
14625
14626 xmlCtxtReset(ctxt);
14627
14628 stream = xmlNewStringInputStream(ctxt, cur);
14629 if (stream == NULL) {
14630 return (NULL);
14631 }
14632 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014633 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014634}
14635
14636/**
14637 * xmlCtxtReadFile:
14638 * @ctxt: an XML parser context
14639 * @filename: a file or URL
14640 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014641 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014642 *
14643 * parse an XML file from the filesystem or the network.
14644 * This reuses the existing @ctxt parser context
14645 *
14646 * Returns the resulting document tree
14647 */
14648xmlDocPtr
14649xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14650 const char *encoding, int options)
14651{
14652 xmlParserInputPtr stream;
14653
14654 if (filename == NULL)
14655 return (NULL);
14656 if (ctxt == NULL)
14657 return (NULL);
14658
14659 xmlCtxtReset(ctxt);
14660
Daniel Veillard29614c72004-11-26 10:47:26 +000014661 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014662 if (stream == NULL) {
14663 return (NULL);
14664 }
14665 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014666 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014667}
14668
14669/**
14670 * xmlCtxtReadMemory:
14671 * @ctxt: an XML parser context
14672 * @buffer: a pointer to a char array
14673 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014674 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014675 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014676 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014677 *
14678 * parse an XML in-memory document and build a tree.
14679 * This reuses the existing @ctxt parser context
14680 *
14681 * Returns the resulting document tree
14682 */
14683xmlDocPtr
14684xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000014685 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014686{
14687 xmlParserInputBufferPtr input;
14688 xmlParserInputPtr stream;
14689
14690 if (ctxt == NULL)
14691 return (NULL);
14692 if (buffer == NULL)
14693 return (NULL);
14694
14695 xmlCtxtReset(ctxt);
14696
14697 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14698 if (input == NULL) {
14699 return(NULL);
14700 }
14701
14702 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14703 if (stream == NULL) {
14704 xmlFreeParserInputBuffer(input);
14705 return(NULL);
14706 }
14707
14708 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014709 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014710}
14711
14712/**
14713 * xmlCtxtReadFd:
14714 * @ctxt: an XML parser context
14715 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014716 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014717 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014718 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014719 *
14720 * parse an XML from a file descriptor and build a tree.
14721 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014722 * NOTE that the file descriptor will not be closed when the
14723 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014724 *
14725 * Returns the resulting document tree
14726 */
14727xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014728xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14729 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014730{
14731 xmlParserInputBufferPtr input;
14732 xmlParserInputPtr stream;
14733
14734 if (fd < 0)
14735 return (NULL);
14736 if (ctxt == NULL)
14737 return (NULL);
14738
14739 xmlCtxtReset(ctxt);
14740
14741
14742 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14743 if (input == NULL)
14744 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014745 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014746 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14747 if (stream == NULL) {
14748 xmlFreeParserInputBuffer(input);
14749 return (NULL);
14750 }
14751 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014752 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014753}
14754
14755/**
14756 * xmlCtxtReadIO:
14757 * @ctxt: an XML parser context
14758 * @ioread: an I/O read function
14759 * @ioclose: an I/O close function
14760 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014761 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014762 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014763 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014764 *
14765 * parse an XML document from I/O functions and source and build a tree.
14766 * This reuses the existing @ctxt parser context
14767 *
14768 * Returns the resulting document tree
14769 */
14770xmlDocPtr
14771xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14772 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000014773 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014774 const char *encoding, int options)
14775{
14776 xmlParserInputBufferPtr input;
14777 xmlParserInputPtr stream;
14778
14779 if (ioread == NULL)
14780 return (NULL);
14781 if (ctxt == NULL)
14782 return (NULL);
14783
14784 xmlCtxtReset(ctxt);
14785
14786 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14787 XML_CHAR_ENCODING_NONE);
14788 if (input == NULL)
14789 return (NULL);
14790 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14791 if (stream == NULL) {
14792 xmlFreeParserInputBuffer(input);
14793 return (NULL);
14794 }
14795 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014796 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014797}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000014798
14799#define bottom_parser
14800#include "elfgcchack.h"