blob: efa64dde6c86a88a354f3e2731abd8d070a8ebf7 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard0161e632008-08-28 15:36:32 +000083static void
84xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
85
Rob Richards9c0aa472009-03-26 18:10:19 +000086static xmlParserCtxtPtr
87xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
88 const xmlChar *base, xmlParserCtxtPtr pctx);
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090/************************************************************************
91 * *
92 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
93 * *
94 ************************************************************************/
95
96#define XML_PARSER_BIG_ENTITY 1000
97#define XML_PARSER_LOT_ENTITY 5000
98
99/*
100 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
101 * replacement over the size in byte of the input indicates that you have
102 * and eponential behaviour. A value of 10 correspond to at least 3 entity
103 * replacement per byte of input.
104 */
105#define XML_PARSER_NON_LINEAR 10
106
107/*
108 * xmlParserEntityCheck
109 *
110 * Function to check non-linear entity expansion behaviour
111 * This is here to detect and stop exponential linear entity expansion
112 * This is not a limitation of the parser but a safety
113 * boundary feature. It can be disabled with the XML_PARSE_HUGE
114 * parser option.
115 */
116static int
117xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
118 xmlEntityPtr ent)
119{
Daniel Veillardcba68392008-08-29 12:43:40 +0000120 unsigned long consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000121
122 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
123 return (0);
124 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
125 return (1);
126 if (size != 0) {
127 /*
128 * Do the check based on the replacement size of the entity
129 */
130 if (size < XML_PARSER_BIG_ENTITY)
131 return(0);
132
133 /*
134 * A limit on the amount of text data reasonably used
135 */
136 if (ctxt->input != NULL) {
137 consumed = ctxt->input->consumed +
138 (ctxt->input->cur - ctxt->input->base);
139 }
140 consumed += ctxt->sizeentities;
141
142 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
143 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
144 return (0);
145 } else if (ent != NULL) {
146 /*
147 * use the number of parsed entities in the replacement
148 */
149 size = ent->checked;
150
151 /*
152 * The amount of data parsed counting entities size only once
153 */
154 if (ctxt->input != NULL) {
155 consumed = ctxt->input->consumed +
156 (ctxt->input->cur - ctxt->input->base);
157 }
158 consumed += ctxt->sizeentities;
159
160 /*
161 * Check the density of entities for the amount of data
162 * knowing an entity reference will take at least 3 bytes
163 */
164 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
165 return (0);
166 } else {
167 /*
168 * strange we got no data for checking just return
169 */
170 return (0);
171 }
172
173 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
174 return (1);
175}
176
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000177/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000178 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000179 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000180 * arbitrary depth limit for the XML documents that we allow to
181 * process. This is not a limitation of the parser but a safety
182 * boundary feature. It can be disabled with the XML_PARSE_HUGE
183 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000184 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000185unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000186
Daniel Veillard0fb18932003-09-07 09:14:37 +0000187
Daniel Veillard0161e632008-08-28 15:36:32 +0000188
189#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000190#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000191#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000192#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
193
Owen Taylor3473f882001-02-23 17:55:21 +0000194/*
Owen Taylor3473f882001-02-23 17:55:21 +0000195 * List of XML prefixed PI allowed by W3C specs
196 */
197
Daniel Veillardb44025c2001-10-11 22:55:55 +0000198static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000199 "xml-stylesheet",
200 NULL
201};
202
Daniel Veillarda07050d2003-10-19 14:46:32 +0000203
Owen Taylor3473f882001-02-23 17:55:21 +0000204/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000205xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206 const xmlChar **str);
207
Daniel Veillard7d515752003-09-26 19:12:37 +0000208static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000209xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
210 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000211 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000212 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000213
Daniel Veillard37334572008-07-31 08:20:02 +0000214static int
215xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
216 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000217#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000218static void
219xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
220 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000221#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000222
Daniel Veillard7d515752003-09-26 19:12:37 +0000223static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000224xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
225 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000226
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000227static int
228xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
229
Daniel Veillarde57ec792003-09-10 10:50:59 +0000230/************************************************************************
231 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 * Some factorized error routines *
233 * *
234 ************************************************************************/
235
236/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000237 * xmlErrAttributeDup:
238 * @ctxt: an XML parser context
239 * @prefix: the attribute prefix
240 * @localname: the attribute localname
241 *
242 * Handle a redefinition of attribute error
243 */
244static void
245xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
246 const xmlChar * localname)
247{
Daniel Veillard157fee02003-10-31 10:36:03 +0000248 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
249 (ctxt->instate == XML_PARSER_EOF))
250 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000251 if (ctxt != NULL)
252 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000253 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000254 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000255 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
256 (const char *) localname, NULL, NULL, 0, 0,
257 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000258 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000259 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
261 (const char *) prefix, (const char *) localname,
262 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
263 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000264 if (ctxt != NULL) {
265 ctxt->wellFormed = 0;
266 if (ctxt->recovery == 0)
267 ctxt->disableSAX = 1;
268 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000269}
270
271/**
272 * xmlFatalErr:
273 * @ctxt: an XML parser context
274 * @error: the error number
275 * @extra: extra information string
276 *
277 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
278 */
279static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000280xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000281{
282 const char *errmsg;
283
Daniel Veillard157fee02003-10-31 10:36:03 +0000284 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
285 (ctxt->instate == XML_PARSER_EOF))
286 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 switch (error) {
288 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000289 errmsg = "CharRef: invalid hexadecimal value\n";
290 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000291 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000292 errmsg = "CharRef: invalid decimal value\n";
293 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000294 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000295 errmsg = "CharRef: invalid value\n";
296 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000297 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000298 errmsg = "internal error";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "PEReference at end of document\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "PEReference in prolog\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "PEReference in epilog\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "PEReference: no name\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "PEReference: expecting ';'\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "Detected an entity reference loop\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EntityValue: \" or ' expected\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "PEReferences forbidden in internal subset\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "EntityValue: \" or ' expected\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "AttValue: \" or ' expected\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "Unescaped '<' not allowed in attributes values\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "SystemLiteral \" or ' expected\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Unfinished System or Public ID \" or ' expected\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Sequence ']]>' not allowed in content\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "PUBLIC, the Public Identifier is missing\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Comment must not contain '--' (double-hyphen)\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "xmlParsePI : no target name\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "Invalid PI name\n";
356 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 errmsg = "NOTATION: Name expected here\n";
359 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000360 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 errmsg = "'>' required to close NOTATION declaration\n";
362 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 errmsg = "Entity value required\n";
365 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000367 errmsg = "Fragment not allowed";
368 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000369 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000370 errmsg = "'(' required to start ATTLIST enumeration\n";
371 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000372 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000373 errmsg = "NmToken expected in ATTLIST enumeration\n";
374 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000375 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000376 errmsg = "')' required to finish ATTLIST enumeration\n";
377 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000378 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000379 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
380 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000381 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000382 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
383 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000384 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 errmsg = "ContentDecl : Name or '(' expected\n";
386 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000387 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
389 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000390 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000391 errmsg =
392 "PEReference: forbidden within markup decl in internal subset\n";
393 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000394 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000395 errmsg = "expected '>'\n";
396 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000397 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000398 errmsg = "XML conditional section '[' expected\n";
399 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000400 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000401 errmsg = "Content error in the external subset\n";
402 break;
403 case XML_ERR_CONDSEC_INVALID_KEYWORD:
404 errmsg =
405 "conditional section INCLUDE or IGNORE keyword expected\n";
406 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000407 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000408 errmsg = "XML conditional section not closed\n";
409 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000410 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000411 errmsg = "Text declaration '<?xml' required\n";
412 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000413 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 errmsg = "parsing XML declaration: '?>' expected\n";
415 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000416 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 errmsg = "external parsed entities cannot be standalone\n";
418 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000419 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 errmsg = "EntityRef: expecting ';'\n";
421 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000422 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 errmsg = "DOCTYPE improperly terminated\n";
424 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000425 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 errmsg = "EndTag: '</' not found\n";
427 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000428 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000429 errmsg = "expected '='\n";
430 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000431 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000432 errmsg = "String not closed expecting \" or '\n";
433 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000434 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000435 errmsg = "String not started expecting ' or \"\n";
436 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000437 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000438 errmsg = "Invalid XML encoding name\n";
439 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000440 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000441 errmsg = "standalone accepts only 'yes' or 'no'\n";
442 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000443 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000444 errmsg = "Document is empty\n";
445 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000446 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000447 errmsg = "Extra content at the end of the document\n";
448 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000449 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000450 errmsg = "chunk is not well balanced\n";
451 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000452 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000453 errmsg = "extra content at the end of well balanced chunk\n";
454 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000455 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000456 errmsg = "Malformed declaration expecting version\n";
457 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000458#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000459 case:
460 errmsg = "\n";
461 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000462#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 default:
464 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000465 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000466 if (ctxt != NULL)
467 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000468 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000469 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
470 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000471 if (ctxt != NULL) {
472 ctxt->wellFormed = 0;
473 if (ctxt->recovery == 0)
474 ctxt->disableSAX = 1;
475 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000476}
477
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000478/**
479 * xmlFatalErrMsg:
480 * @ctxt: an XML parser context
481 * @error: the error number
482 * @msg: the error message
483 *
484 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
485 */
486static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000487xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
488 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000489{
Daniel Veillard157fee02003-10-31 10:36:03 +0000490 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
491 (ctxt->instate == XML_PARSER_EOF))
492 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000493 if (ctxt != NULL)
494 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000495 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000496 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000497 if (ctxt != NULL) {
498 ctxt->wellFormed = 0;
499 if (ctxt->recovery == 0)
500 ctxt->disableSAX = 1;
501 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000502}
503
504/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000505 * xmlWarningMsg:
506 * @ctxt: an XML parser context
507 * @error: the error number
508 * @msg: the error message
509 * @str1: extra data
510 * @str2: extra data
511 *
512 * Handle a warning.
513 */
514static void
515xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
516 const char *msg, const xmlChar *str1, const xmlChar *str2)
517{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000518 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000519
Daniel Veillard157fee02003-10-31 10:36:03 +0000520 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
521 (ctxt->instate == XML_PARSER_EOF))
522 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000523 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
524 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000525 schannel = ctxt->sax->serror;
526 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000527 (ctxt->sax) ? ctxt->sax->warning : NULL,
528 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000529 ctxt, NULL, XML_FROM_PARSER, error,
530 XML_ERR_WARNING, NULL, 0,
531 (const char *) str1, (const char *) str2, NULL, 0, 0,
532 msg, (const char *) str1, (const char *) str2);
533}
534
535/**
536 * xmlValidityError:
537 * @ctxt: an XML parser context
538 * @error: the error number
539 * @msg: the error message
540 * @str1: extra data
541 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000542 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000543 */
544static void
545xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000546 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000547{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000548 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000549
550 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
551 (ctxt->instate == XML_PARSER_EOF))
552 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000553 if (ctxt != NULL) {
554 ctxt->errNo = error;
555 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
556 schannel = ctxt->sax->serror;
557 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000558 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000559 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000560 ctxt, NULL, XML_FROM_DTD, error,
561 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000562 (const char *) str2, NULL, 0, 0,
563 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000564 if (ctxt != NULL) {
565 ctxt->valid = 0;
566 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000567}
568
569/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000570 * xmlFatalErrMsgInt:
571 * @ctxt: an XML parser context
572 * @error: the error number
573 * @msg: the error message
574 * @val: an integer value
575 *
576 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
577 */
578static void
579xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000580 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000581{
Daniel Veillard157fee02003-10-31 10:36:03 +0000582 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
583 (ctxt->instate == XML_PARSER_EOF))
584 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000585 if (ctxt != NULL)
586 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000587 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000588 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
589 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000590 if (ctxt != NULL) {
591 ctxt->wellFormed = 0;
592 if (ctxt->recovery == 0)
593 ctxt->disableSAX = 1;
594 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000595}
596
597/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000598 * xmlFatalErrMsgStrIntStr:
599 * @ctxt: an XML parser context
600 * @error: the error number
601 * @msg: the error message
602 * @str1: an string info
603 * @val: an integer value
604 * @str2: an string info
605 *
606 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
607 */
608static void
609xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
610 const char *msg, const xmlChar *str1, int val,
611 const xmlChar *str2)
612{
Daniel Veillard157fee02003-10-31 10:36:03 +0000613 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
614 (ctxt->instate == XML_PARSER_EOF))
615 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000616 if (ctxt != NULL)
617 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000618 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000619 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
620 NULL, 0, (const char *) str1, (const char *) str2,
621 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000622 if (ctxt != NULL) {
623 ctxt->wellFormed = 0;
624 if (ctxt->recovery == 0)
625 ctxt->disableSAX = 1;
626 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000627}
628
629/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000630 * xmlFatalErrMsgStr:
631 * @ctxt: an XML parser context
632 * @error: the error number
633 * @msg: the error message
634 * @val: a string value
635 *
636 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
637 */
638static void
639xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000640 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000641{
Daniel Veillard157fee02003-10-31 10:36:03 +0000642 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
643 (ctxt->instate == XML_PARSER_EOF))
644 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000645 if (ctxt != NULL)
646 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000647 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000648 XML_FROM_PARSER, error, XML_ERR_FATAL,
649 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
650 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000651 if (ctxt != NULL) {
652 ctxt->wellFormed = 0;
653 if (ctxt->recovery == 0)
654 ctxt->disableSAX = 1;
655 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000656}
657
658/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000659 * xmlErrMsgStr:
660 * @ctxt: an XML parser context
661 * @error: the error number
662 * @msg: the error message
663 * @val: a string value
664 *
665 * Handle a non fatal parser error
666 */
667static void
668xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
669 const char *msg, const xmlChar * val)
670{
Daniel Veillard157fee02003-10-31 10:36:03 +0000671 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672 (ctxt->instate == XML_PARSER_EOF))
673 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000674 if (ctxt != NULL)
675 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000676 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000677 XML_FROM_PARSER, error, XML_ERR_ERROR,
678 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
679 val);
680}
681
682/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000683 * xmlNsErr:
684 * @ctxt: an XML parser context
685 * @error: the error number
686 * @msg: the message
687 * @info1: extra information string
688 * @info2: extra information string
689 *
690 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
691 */
692static void
693xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
694 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000695 const xmlChar * info1, const xmlChar * info2,
696 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000697{
Daniel Veillard157fee02003-10-31 10:36:03 +0000698 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
699 (ctxt->instate == XML_PARSER_EOF))
700 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000701 if (ctxt != NULL)
702 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000703 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000704 XML_ERR_ERROR, NULL, 0, (const char *) info1,
705 (const char *) info2, (const char *) info3, 0, 0, msg,
706 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000707 if (ctxt != NULL)
708 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000709}
710
Daniel Veillard37334572008-07-31 08:20:02 +0000711/**
712 * xmlNsWarn
713 * @ctxt: an XML parser context
714 * @error: the error number
715 * @msg: the message
716 * @info1: extra information string
717 * @info2: extra information string
718 *
719 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
720 */
721static void
722xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
723 const char *msg,
724 const xmlChar * info1, const xmlChar * info2,
725 const xmlChar * info3)
726{
727 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
728 (ctxt->instate == XML_PARSER_EOF))
729 return;
730 if (ctxt != NULL)
731 ctxt->errNo = error;
732 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
733 XML_ERR_WARNING, NULL, 0, (const char *) info1,
734 (const char *) info2, (const char *) info3, 0, 0, msg,
735 info1, info2, info3);
736}
737
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000738/************************************************************************
739 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000740 * Library wide options *
741 * *
742 ************************************************************************/
743
744/**
745 * xmlHasFeature:
746 * @feature: the feature to be examined
747 *
748 * Examines if the library has been compiled with a given feature.
749 *
750 * Returns a non-zero value if the feature exist, otherwise zero.
751 * Returns zero (0) if the feature does not exist or an unknown
752 * unknown feature is requested, non-zero otherwise.
753 */
754int
755xmlHasFeature(xmlFeature feature)
756{
757 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000758 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000759#ifdef LIBXML_THREAD_ENABLED
760 return(1);
761#else
762 return(0);
763#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000764 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000765#ifdef LIBXML_TREE_ENABLED
766 return(1);
767#else
768 return(0);
769#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000770 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000771#ifdef LIBXML_OUTPUT_ENABLED
772 return(1);
773#else
774 return(0);
775#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000776 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000777#ifdef LIBXML_PUSH_ENABLED
778 return(1);
779#else
780 return(0);
781#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000782 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000783#ifdef LIBXML_READER_ENABLED
784 return(1);
785#else
786 return(0);
787#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000788 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000789#ifdef LIBXML_PATTERN_ENABLED
790 return(1);
791#else
792 return(0);
793#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000794 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000795#ifdef LIBXML_WRITER_ENABLED
796 return(1);
797#else
798 return(0);
799#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000800 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000801#ifdef LIBXML_SAX1_ENABLED
802 return(1);
803#else
804 return(0);
805#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000806 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000807#ifdef LIBXML_FTP_ENABLED
808 return(1);
809#else
810 return(0);
811#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000812 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000813#ifdef LIBXML_HTTP_ENABLED
814 return(1);
815#else
816 return(0);
817#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000818 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000819#ifdef LIBXML_VALID_ENABLED
820 return(1);
821#else
822 return(0);
823#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000824 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000825#ifdef LIBXML_HTML_ENABLED
826 return(1);
827#else
828 return(0);
829#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000830 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000831#ifdef LIBXML_LEGACY_ENABLED
832 return(1);
833#else
834 return(0);
835#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000836 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000837#ifdef LIBXML_C14N_ENABLED
838 return(1);
839#else
840 return(0);
841#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000842 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000843#ifdef LIBXML_CATALOG_ENABLED
844 return(1);
845#else
846 return(0);
847#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000848 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000849#ifdef LIBXML_XPATH_ENABLED
850 return(1);
851#else
852 return(0);
853#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000854 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000855#ifdef LIBXML_XPTR_ENABLED
856 return(1);
857#else
858 return(0);
859#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000860 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000861#ifdef LIBXML_XINCLUDE_ENABLED
862 return(1);
863#else
864 return(0);
865#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000866 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000867#ifdef LIBXML_ICONV_ENABLED
868 return(1);
869#else
870 return(0);
871#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000872 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000873#ifdef LIBXML_ISO8859X_ENABLED
874 return(1);
875#else
876 return(0);
877#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000878 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000879#ifdef LIBXML_UNICODE_ENABLED
880 return(1);
881#else
882 return(0);
883#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000884 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000885#ifdef LIBXML_REGEXP_ENABLED
886 return(1);
887#else
888 return(0);
889#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000890 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000891#ifdef LIBXML_AUTOMATA_ENABLED
892 return(1);
893#else
894 return(0);
895#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000896 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000897#ifdef LIBXML_EXPR_ENABLED
898 return(1);
899#else
900 return(0);
901#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000902 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000903#ifdef LIBXML_SCHEMAS_ENABLED
904 return(1);
905#else
906 return(0);
907#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000908 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000909#ifdef LIBXML_SCHEMATRON_ENABLED
910 return(1);
911#else
912 return(0);
913#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000914 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000915#ifdef LIBXML_MODULES_ENABLED
916 return(1);
917#else
918 return(0);
919#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000920 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000921#ifdef LIBXML_DEBUG_ENABLED
922 return(1);
923#else
924 return(0);
925#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000926 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000927#ifdef DEBUG_MEMORY_LOCATION
928 return(1);
929#else
930 return(0);
931#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000932 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000933#ifdef LIBXML_DEBUG_RUNTIME
934 return(1);
935#else
936 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000937#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000938 case XML_WITH_ZLIB:
939#ifdef LIBXML_ZLIB_ENABLED
940 return(1);
941#else
942 return(0);
943#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000944 default:
945 break;
946 }
947 return(0);
948}
949
950/************************************************************************
951 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000952 * SAX2 defaulted attributes handling *
953 * *
954 ************************************************************************/
955
956/**
957 * xmlDetectSAX2:
958 * @ctxt: an XML parser context
959 *
960 * Do the SAX2 detection and specific intialization
961 */
962static void
963xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
964 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000965#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000966 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
967 ((ctxt->sax->startElementNs != NULL) ||
968 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000969#else
970 ctxt->sax2 = 1;
971#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000972
973 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
974 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
975 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000976 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
977 (ctxt->str_xml_ns == NULL)) {
978 xmlErrMemory(ctxt, NULL);
979 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000980}
981
Daniel Veillarde57ec792003-09-10 10:50:59 +0000982typedef struct _xmlDefAttrs xmlDefAttrs;
983typedef xmlDefAttrs *xmlDefAttrsPtr;
984struct _xmlDefAttrs {
985 int nbAttrs; /* number of defaulted attributes on that element */
986 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +0000987 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000988};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000989
990/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000991 * xmlAttrNormalizeSpace:
992 * @src: the source string
993 * @dst: the target string
994 *
995 * Normalize the space in non CDATA attribute values:
996 * If the attribute type is not CDATA, then the XML processor MUST further
997 * process the normalized attribute value by discarding any leading and
998 * trailing space (#x20) characters, and by replacing sequences of space
999 * (#x20) characters by a single space (#x20) character.
1000 * Note that the size of dst need to be at least src, and if one doesn't need
1001 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1002 * passing src as dst is just fine.
1003 *
1004 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1005 * is needed.
1006 */
1007static xmlChar *
1008xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1009{
1010 if ((src == NULL) || (dst == NULL))
1011 return(NULL);
1012
1013 while (*src == 0x20) src++;
1014 while (*src != 0) {
1015 if (*src == 0x20) {
1016 while (*src == 0x20) src++;
1017 if (*src != 0)
1018 *dst++ = 0x20;
1019 } else {
1020 *dst++ = *src++;
1021 }
1022 }
1023 *dst = 0;
1024 if (dst == src)
1025 return(NULL);
1026 return(dst);
1027}
1028
1029/**
1030 * xmlAttrNormalizeSpace2:
1031 * @src: the source string
1032 *
1033 * Normalize the space in non CDATA attribute values, a slightly more complex
1034 * front end to avoid allocation problems when running on attribute values
1035 * coming from the input.
1036 *
1037 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1038 * is needed.
1039 */
1040static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001041xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001042{
1043 int i;
1044 int remove_head = 0;
1045 int need_realloc = 0;
1046 const xmlChar *cur;
1047
1048 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1049 return(NULL);
1050 i = *len;
1051 if (i <= 0)
1052 return(NULL);
1053
1054 cur = src;
1055 while (*cur == 0x20) {
1056 cur++;
1057 remove_head++;
1058 }
1059 while (*cur != 0) {
1060 if (*cur == 0x20) {
1061 cur++;
1062 if ((*cur == 0x20) || (*cur == 0)) {
1063 need_realloc = 1;
1064 break;
1065 }
1066 } else
1067 cur++;
1068 }
1069 if (need_realloc) {
1070 xmlChar *ret;
1071
1072 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1073 if (ret == NULL) {
1074 xmlErrMemory(ctxt, NULL);
1075 return(NULL);
1076 }
1077 xmlAttrNormalizeSpace(ret, ret);
1078 *len = (int) strlen((const char *)ret);
1079 return(ret);
1080 } else if (remove_head) {
1081 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001082 memmove(src, src + remove_head, 1 + *len);
1083 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001084 }
1085 return(NULL);
1086}
1087
1088/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001089 * xmlAddDefAttrs:
1090 * @ctxt: an XML parser context
1091 * @fullname: the element fullname
1092 * @fullattr: the attribute fullname
1093 * @value: the attribute value
1094 *
1095 * Add a defaulted attribute for an element
1096 */
1097static void
1098xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1099 const xmlChar *fullname,
1100 const xmlChar *fullattr,
1101 const xmlChar *value) {
1102 xmlDefAttrsPtr defaults;
1103 int len;
1104 const xmlChar *name;
1105 const xmlChar *prefix;
1106
Daniel Veillard6a31b832008-03-26 14:06:44 +00001107 /*
1108 * Allows to detect attribute redefinitions
1109 */
1110 if (ctxt->attsSpecial != NULL) {
1111 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1112 return;
1113 }
1114
Daniel Veillarde57ec792003-09-10 10:50:59 +00001115 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001116 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001117 if (ctxt->attsDefault == NULL)
1118 goto mem_error;
1119 }
1120
1121 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001122 * split the element name into prefix:localname , the string found
1123 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001124 */
1125 name = xmlSplitQName3(fullname, &len);
1126 if (name == NULL) {
1127 name = xmlDictLookup(ctxt->dict, fullname, -1);
1128 prefix = NULL;
1129 } else {
1130 name = xmlDictLookup(ctxt->dict, name, -1);
1131 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1132 }
1133
1134 /*
1135 * make sure there is some storage
1136 */
1137 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1138 if (defaults == NULL) {
1139 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001140 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001141 if (defaults == NULL)
1142 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001143 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001144 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001145 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1146 defaults, NULL) < 0) {
1147 xmlFree(defaults);
1148 goto mem_error;
1149 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001150 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001151 xmlDefAttrsPtr temp;
1152
1153 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001154 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001155 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001156 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001157 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001158 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001159 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1160 defaults, NULL) < 0) {
1161 xmlFree(defaults);
1162 goto mem_error;
1163 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001164 }
1165
1166 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001167 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001168 * are within the DTD and hen not associated to namespace names.
1169 */
1170 name = xmlSplitQName3(fullattr, &len);
1171 if (name == NULL) {
1172 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1173 prefix = NULL;
1174 } else {
1175 name = xmlDictLookup(ctxt->dict, name, -1);
1176 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1177 }
1178
Daniel Veillardae0765b2008-07-31 19:54:59 +00001179 defaults->values[5 * defaults->nbAttrs] = name;
1180 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001181 /* intern the string and precompute the end */
1182 len = xmlStrlen(value);
1183 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001184 defaults->values[5 * defaults->nbAttrs + 2] = value;
1185 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1186 if (ctxt->external)
1187 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1188 else
1189 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001190 defaults->nbAttrs++;
1191
1192 return;
1193
1194mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001195 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001196 return;
1197}
1198
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001199/**
1200 * xmlAddSpecialAttr:
1201 * @ctxt: an XML parser context
1202 * @fullname: the element fullname
1203 * @fullattr: the attribute fullname
1204 * @type: the attribute type
1205 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001206 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001207 */
1208static void
1209xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1210 const xmlChar *fullname,
1211 const xmlChar *fullattr,
1212 int type)
1213{
1214 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001215 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001216 if (ctxt->attsSpecial == NULL)
1217 goto mem_error;
1218 }
1219
Daniel Veillardac4118d2008-01-11 05:27:32 +00001220 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1221 return;
1222
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001223 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1224 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001225 return;
1226
1227mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001228 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001229 return;
1230}
1231
Daniel Veillard4432df22003-09-28 18:58:27 +00001232/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001233 * xmlCleanSpecialAttrCallback:
1234 *
1235 * Removes CDATA attributes from the special attribute table
1236 */
1237static void
1238xmlCleanSpecialAttrCallback(void *payload, void *data,
1239 const xmlChar *fullname, const xmlChar *fullattr,
1240 const xmlChar *unused ATTRIBUTE_UNUSED) {
1241 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1242
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001243 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001244 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1245 }
1246}
1247
1248/**
1249 * xmlCleanSpecialAttr:
1250 * @ctxt: an XML parser context
1251 *
1252 * Trim the list of attributes defined to remove all those of type
1253 * CDATA as they are not special. This call should be done when finishing
1254 * to parse the DTD and before starting to parse the document root.
1255 */
1256static void
1257xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1258{
1259 if (ctxt->attsSpecial == NULL)
1260 return;
1261
1262 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1263
1264 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1265 xmlHashFree(ctxt->attsSpecial, NULL);
1266 ctxt->attsSpecial = NULL;
1267 }
1268 return;
1269}
1270
1271/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001272 * xmlCheckLanguageID:
1273 * @lang: pointer to the string value
1274 *
1275 * Checks that the value conforms to the LanguageID production:
1276 *
1277 * NOTE: this is somewhat deprecated, those productions were removed from
1278 * the XML Second edition.
1279 *
1280 * [33] LanguageID ::= Langcode ('-' Subcode)*
1281 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1282 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1283 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1284 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1285 * [38] Subcode ::= ([a-z] | [A-Z])+
1286 *
1287 * Returns 1 if correct 0 otherwise
1288 **/
1289int
1290xmlCheckLanguageID(const xmlChar * lang)
1291{
1292 const xmlChar *cur = lang;
1293
1294 if (cur == NULL)
1295 return (0);
1296 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1297 ((cur[0] == 'I') && (cur[1] == '-'))) {
1298 /*
1299 * IANA code
1300 */
1301 cur += 2;
1302 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1303 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1304 cur++;
1305 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1306 ((cur[0] == 'X') && (cur[1] == '-'))) {
1307 /*
1308 * User code
1309 */
1310 cur += 2;
1311 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1312 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1313 cur++;
1314 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1315 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1316 /*
1317 * ISO639
1318 */
1319 cur++;
1320 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1321 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1322 cur++;
1323 else
1324 return (0);
1325 } else
1326 return (0);
1327 while (cur[0] != 0) { /* non input consuming */
1328 if (cur[0] != '-')
1329 return (0);
1330 cur++;
1331 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1332 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1333 cur++;
1334 else
1335 return (0);
1336 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1337 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1338 cur++;
1339 }
1340 return (1);
1341}
1342
Owen Taylor3473f882001-02-23 17:55:21 +00001343/************************************************************************
1344 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001345 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001346 * *
1347 ************************************************************************/
1348
1349xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1350 const xmlChar ** str);
1351
Daniel Veillard0fb18932003-09-07 09:14:37 +00001352#ifdef SAX2
1353/**
1354 * nsPush:
1355 * @ctxt: an XML parser context
1356 * @prefix: the namespace prefix or NULL
1357 * @URL: the namespace name
1358 *
1359 * Pushes a new parser namespace on top of the ns stack
1360 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001361 * Returns -1 in case of error, -2 if the namespace should be discarded
1362 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001363 */
1364static int
1365nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1366{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001367 if (ctxt->options & XML_PARSE_NSCLEAN) {
1368 int i;
1369 for (i = 0;i < ctxt->nsNr;i += 2) {
1370 if (ctxt->nsTab[i] == prefix) {
1371 /* in scope */
1372 if (ctxt->nsTab[i + 1] == URL)
1373 return(-2);
1374 /* out of scope keep it */
1375 break;
1376 }
1377 }
1378 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001379 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1380 ctxt->nsMax = 10;
1381 ctxt->nsNr = 0;
1382 ctxt->nsTab = (const xmlChar **)
1383 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1384 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001385 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001386 ctxt->nsMax = 0;
1387 return (-1);
1388 }
1389 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001390 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001391 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001392 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1393 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1394 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001395 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001396 ctxt->nsMax /= 2;
1397 return (-1);
1398 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001399 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001400 }
1401 ctxt->nsTab[ctxt->nsNr++] = prefix;
1402 ctxt->nsTab[ctxt->nsNr++] = URL;
1403 return (ctxt->nsNr);
1404}
1405/**
1406 * nsPop:
1407 * @ctxt: an XML parser context
1408 * @nr: the number to pop
1409 *
1410 * Pops the top @nr parser prefix/namespace from the ns stack
1411 *
1412 * Returns the number of namespaces removed
1413 */
1414static int
1415nsPop(xmlParserCtxtPtr ctxt, int nr)
1416{
1417 int i;
1418
1419 if (ctxt->nsTab == NULL) return(0);
1420 if (ctxt->nsNr < nr) {
1421 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1422 nr = ctxt->nsNr;
1423 }
1424 if (ctxt->nsNr <= 0)
1425 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001426
Daniel Veillard0fb18932003-09-07 09:14:37 +00001427 for (i = 0;i < nr;i++) {
1428 ctxt->nsNr--;
1429 ctxt->nsTab[ctxt->nsNr] = NULL;
1430 }
1431 return(nr);
1432}
1433#endif
1434
1435static int
1436xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1437 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001438 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001439 int maxatts;
1440
1441 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001442 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001443 atts = (const xmlChar **)
1444 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001445 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001446 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001447 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1448 if (attallocs == NULL) goto mem_error;
1449 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001450 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001451 } else if (nr + 5 > ctxt->maxatts) {
1452 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001453 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1454 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001455 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001456 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001457 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1458 (maxatts / 5) * sizeof(int));
1459 if (attallocs == NULL) goto mem_error;
1460 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001461 ctxt->maxatts = maxatts;
1462 }
1463 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001464mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001465 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001466 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001467}
1468
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001469/**
1470 * inputPush:
1471 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001472 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001473 *
1474 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001475 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001476 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001477 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001478int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001479inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1480{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001481 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001482 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001483 if (ctxt->inputNr >= ctxt->inputMax) {
1484 ctxt->inputMax *= 2;
1485 ctxt->inputTab =
1486 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1487 ctxt->inputMax *
1488 sizeof(ctxt->inputTab[0]));
1489 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001490 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001491 xmlFreeInputStream(value);
1492 ctxt->inputMax /= 2;
1493 value = NULL;
1494 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001495 }
1496 }
1497 ctxt->inputTab[ctxt->inputNr] = value;
1498 ctxt->input = value;
1499 return (ctxt->inputNr++);
1500}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001501/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001502 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001503 * @ctxt: an XML parser context
1504 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001505 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001506 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001507 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001508 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001509xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001510inputPop(xmlParserCtxtPtr ctxt)
1511{
1512 xmlParserInputPtr ret;
1513
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001514 if (ctxt == NULL)
1515 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001516 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001517 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001518 ctxt->inputNr--;
1519 if (ctxt->inputNr > 0)
1520 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1521 else
1522 ctxt->input = NULL;
1523 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001524 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001525 return (ret);
1526}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001527/**
1528 * nodePush:
1529 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001530 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001531 *
1532 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001533 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001534 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001535 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001536int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001537nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1538{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001539 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001540 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001541 xmlNodePtr *tmp;
1542
1543 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1544 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001545 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001546 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001547 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001548 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001549 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001550 ctxt->nodeTab = tmp;
1551 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001552 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001553 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1554 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001555 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001556 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001557 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001558 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001559 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001560 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001561 ctxt->nodeTab[ctxt->nodeNr] = value;
1562 ctxt->node = value;
1563 return (ctxt->nodeNr++);
1564}
Daniel Veillard8915c152008-08-26 13:05:34 +00001565
Daniel Veillard1c732d22002-11-30 11:22:59 +00001566/**
1567 * nodePop:
1568 * @ctxt: an XML parser context
1569 *
1570 * Pops the top element node from the node stack
1571 *
1572 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001573 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001574xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001575nodePop(xmlParserCtxtPtr ctxt)
1576{
1577 xmlNodePtr ret;
1578
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001579 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001580 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001581 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001582 ctxt->nodeNr--;
1583 if (ctxt->nodeNr > 0)
1584 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1585 else
1586 ctxt->node = NULL;
1587 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001588 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001589 return (ret);
1590}
Daniel Veillarda2351322004-06-27 12:08:10 +00001591
1592#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001593/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001594 * nameNsPush:
1595 * @ctxt: an XML parser context
1596 * @value: the element name
1597 * @prefix: the element prefix
1598 * @URI: the element namespace name
1599 *
1600 * Pushes a new element name/prefix/URL on top of the name stack
1601 *
1602 * Returns -1 in case of error, the index in the stack otherwise
1603 */
1604static int
1605nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1606 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1607{
1608 if (ctxt->nameNr >= ctxt->nameMax) {
1609 const xmlChar * *tmp;
1610 void **tmp2;
1611 ctxt->nameMax *= 2;
1612 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1613 ctxt->nameMax *
1614 sizeof(ctxt->nameTab[0]));
1615 if (tmp == NULL) {
1616 ctxt->nameMax /= 2;
1617 goto mem_error;
1618 }
1619 ctxt->nameTab = tmp;
1620 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1621 ctxt->nameMax * 3 *
1622 sizeof(ctxt->pushTab[0]));
1623 if (tmp2 == NULL) {
1624 ctxt->nameMax /= 2;
1625 goto mem_error;
1626 }
1627 ctxt->pushTab = tmp2;
1628 }
1629 ctxt->nameTab[ctxt->nameNr] = value;
1630 ctxt->name = value;
1631 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1632 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001633 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001634 return (ctxt->nameNr++);
1635mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001636 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001637 return (-1);
1638}
1639/**
1640 * nameNsPop:
1641 * @ctxt: an XML parser context
1642 *
1643 * Pops the top element/prefix/URI name from the name stack
1644 *
1645 * Returns the name just removed
1646 */
1647static const xmlChar *
1648nameNsPop(xmlParserCtxtPtr ctxt)
1649{
1650 const xmlChar *ret;
1651
1652 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001653 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001654 ctxt->nameNr--;
1655 if (ctxt->nameNr > 0)
1656 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1657 else
1658 ctxt->name = NULL;
1659 ret = ctxt->nameTab[ctxt->nameNr];
1660 ctxt->nameTab[ctxt->nameNr] = NULL;
1661 return (ret);
1662}
Daniel Veillarda2351322004-06-27 12:08:10 +00001663#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001664
1665/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001666 * namePush:
1667 * @ctxt: an XML parser context
1668 * @value: the element name
1669 *
1670 * Pushes a new element name on top of the name stack
1671 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001672 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001673 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001674int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001675namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001676{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001677 if (ctxt == NULL) return (-1);
1678
Daniel Veillard1c732d22002-11-30 11:22:59 +00001679 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001680 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001681 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001682 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001683 ctxt->nameMax *
1684 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001685 if (tmp == NULL) {
1686 ctxt->nameMax /= 2;
1687 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001688 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001689 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001690 }
1691 ctxt->nameTab[ctxt->nameNr] = value;
1692 ctxt->name = value;
1693 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001694mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001695 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001696 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001697}
1698/**
1699 * namePop:
1700 * @ctxt: an XML parser context
1701 *
1702 * Pops the top element name from the name stack
1703 *
1704 * Returns the name just removed
1705 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001706const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001707namePop(xmlParserCtxtPtr ctxt)
1708{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001709 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001710
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001711 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1712 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001713 ctxt->nameNr--;
1714 if (ctxt->nameNr > 0)
1715 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1716 else
1717 ctxt->name = NULL;
1718 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001719 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001720 return (ret);
1721}
Owen Taylor3473f882001-02-23 17:55:21 +00001722
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001723static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001724 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001725 int *tmp;
1726
Owen Taylor3473f882001-02-23 17:55:21 +00001727 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001728 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1729 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1730 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001731 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001732 ctxt->spaceMax /=2;
1733 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001734 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001735 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001736 }
1737 ctxt->spaceTab[ctxt->spaceNr] = val;
1738 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1739 return(ctxt->spaceNr++);
1740}
1741
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001742static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001743 int ret;
1744 if (ctxt->spaceNr <= 0) return(0);
1745 ctxt->spaceNr--;
1746 if (ctxt->spaceNr > 0)
1747 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1748 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001749 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001750 ret = ctxt->spaceTab[ctxt->spaceNr];
1751 ctxt->spaceTab[ctxt->spaceNr] = -1;
1752 return(ret);
1753}
1754
1755/*
1756 * Macros for accessing the content. Those should be used only by the parser,
1757 * and not exported.
1758 *
1759 * Dirty macros, i.e. one often need to make assumption on the context to
1760 * use them
1761 *
1762 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1763 * To be used with extreme caution since operations consuming
1764 * characters may move the input buffer to a different location !
1765 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1766 * This should be used internally by the parser
1767 * only to compare to ASCII values otherwise it would break when
1768 * running with UTF-8 encoding.
1769 * RAW same as CUR but in the input buffer, bypass any token
1770 * extraction that may have been done
1771 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1772 * to compare on ASCII based substring.
1773 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001774 * strings without newlines within the parser.
1775 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1776 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001777 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1778 *
1779 * NEXT Skip to the next character, this does the proper decoding
1780 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001781 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001782 * CUR_CHAR(l) returns the current unicode character (int), set l
1783 * to the number of xmlChars used for the encoding [0-5].
1784 * CUR_SCHAR same but operate on a string instead of the context
1785 * COPY_BUF copy the current unicode char to the target buffer, increment
1786 * the index
1787 * GROW, SHRINK handling of input buffers
1788 */
1789
Daniel Veillardfdc91562002-07-01 21:52:03 +00001790#define RAW (*ctxt->input->cur)
1791#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001792#define NXT(val) ctxt->input->cur[(val)]
1793#define CUR_PTR ctxt->input->cur
1794
Daniel Veillarda07050d2003-10-19 14:46:32 +00001795#define CMP4( s, c1, c2, c3, c4 ) \
1796 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1797 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1798#define CMP5( s, c1, c2, c3, c4, c5 ) \
1799 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1800#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1801 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1802#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1803 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1804#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1805 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1806#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1807 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1808 ((unsigned char *) s)[ 8 ] == c9 )
1809#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1810 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1811 ((unsigned char *) s)[ 9 ] == c10 )
1812
Owen Taylor3473f882001-02-23 17:55:21 +00001813#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001814 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001815 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001816 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001817 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1818 xmlPopInput(ctxt); \
1819 } while (0)
1820
Daniel Veillard0b787f32004-03-26 17:29:53 +00001821#define SKIPL(val) do { \
1822 int skipl; \
1823 for(skipl=0; skipl<val; skipl++) { \
1824 if (*(ctxt->input->cur) == '\n') { \
1825 ctxt->input->line++; ctxt->input->col = 1; \
1826 } else ctxt->input->col++; \
1827 ctxt->nbChars++; \
1828 ctxt->input->cur++; \
1829 } \
1830 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1831 if ((*ctxt->input->cur == 0) && \
1832 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1833 xmlPopInput(ctxt); \
1834 } while (0)
1835
Daniel Veillarda880b122003-04-21 21:36:41 +00001836#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001837 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1838 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001839 xmlSHRINK (ctxt);
1840
1841static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1842 xmlParserInputShrink(ctxt->input);
1843 if ((*ctxt->input->cur == 0) &&
1844 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1845 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001846 }
Owen Taylor3473f882001-02-23 17:55:21 +00001847
Daniel Veillarda880b122003-04-21 21:36:41 +00001848#define GROW if ((ctxt->progressive == 0) && \
1849 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001850 xmlGROW (ctxt);
1851
1852static void xmlGROW (xmlParserCtxtPtr ctxt) {
1853 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1854 if ((*ctxt->input->cur == 0) &&
1855 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1856 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001857}
Owen Taylor3473f882001-02-23 17:55:21 +00001858
1859#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1860
1861#define NEXT xmlNextChar(ctxt)
1862
Daniel Veillard21a0f912001-02-25 19:54:14 +00001863#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001864 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001865 ctxt->input->cur++; \
1866 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001867 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001868 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1869 }
1870
Owen Taylor3473f882001-02-23 17:55:21 +00001871#define NEXTL(l) do { \
1872 if (*(ctxt->input->cur) == '\n') { \
1873 ctxt->input->line++; ctxt->input->col = 1; \
1874 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001875 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001876 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001877 } while (0)
1878
1879#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1880#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1881
1882#define COPY_BUF(l,b,i,v) \
1883 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001884 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001885
1886/**
1887 * xmlSkipBlankChars:
1888 * @ctxt: the XML parser context
1889 *
1890 * skip all blanks character found at that point in the input streams.
1891 * It pops up finished entities in the process if allowable at that point.
1892 *
1893 * Returns the number of space chars skipped
1894 */
1895
1896int
1897xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001898 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001899
1900 /*
1901 * It's Okay to use CUR/NEXT here since all the blanks are on
1902 * the ASCII range.
1903 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001904 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1905 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001906 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001907 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001908 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001909 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001910 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001911 if (*cur == '\n') {
1912 ctxt->input->line++; ctxt->input->col = 1;
1913 }
1914 cur++;
1915 res++;
1916 if (*cur == 0) {
1917 ctxt->input->cur = cur;
1918 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1919 cur = ctxt->input->cur;
1920 }
1921 }
1922 ctxt->input->cur = cur;
1923 } else {
1924 int cur;
1925 do {
1926 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001927 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001928 NEXT;
1929 cur = CUR;
1930 res++;
1931 }
1932 while ((cur == 0) && (ctxt->inputNr > 1) &&
1933 (ctxt->instate != XML_PARSER_COMMENT)) {
1934 xmlPopInput(ctxt);
1935 cur = CUR;
1936 }
1937 /*
1938 * Need to handle support of entities branching here
1939 */
1940 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1941 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1942 }
Owen Taylor3473f882001-02-23 17:55:21 +00001943 return(res);
1944}
1945
1946/************************************************************************
1947 * *
1948 * Commodity functions to handle entities *
1949 * *
1950 ************************************************************************/
1951
1952/**
1953 * xmlPopInput:
1954 * @ctxt: an XML parser context
1955 *
1956 * xmlPopInput: the current input pointed by ctxt->input came to an end
1957 * pop it and return the next char.
1958 *
1959 * Returns the current xmlChar in the parser context
1960 */
1961xmlChar
1962xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001963 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001964 if (xmlParserDebugEntities)
1965 xmlGenericError(xmlGenericErrorContext,
1966 "Popping input %d\n", ctxt->inputNr);
1967 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001968 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001969 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1970 return(xmlPopInput(ctxt));
1971 return(CUR);
1972}
1973
1974/**
1975 * xmlPushInput:
1976 * @ctxt: an XML parser context
1977 * @input: an XML parser input fragment (entity, XML fragment ...).
1978 *
1979 * xmlPushInput: switch to a new input stream which is stacked on top
1980 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001981 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00001982 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001983int
Owen Taylor3473f882001-02-23 17:55:21 +00001984xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001985 int ret;
1986 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001987
1988 if (xmlParserDebugEntities) {
1989 if ((ctxt->input != NULL) && (ctxt->input->filename))
1990 xmlGenericError(xmlGenericErrorContext,
1991 "%s(%d): ", ctxt->input->filename,
1992 ctxt->input->line);
1993 xmlGenericError(xmlGenericErrorContext,
1994 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1995 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001996 ret = inputPush(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001997 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001998 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001999}
2000
2001/**
2002 * xmlParseCharRef:
2003 * @ctxt: an XML parser context
2004 *
2005 * parse Reference declarations
2006 *
2007 * [66] CharRef ::= '&#' [0-9]+ ';' |
2008 * '&#x' [0-9a-fA-F]+ ';'
2009 *
2010 * [ WFC: Legal Character ]
2011 * Characters referred to using character references must match the
2012 * production for Char.
2013 *
2014 * Returns the value parsed (as an int), 0 in case of error
2015 */
2016int
2017xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002018 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002019 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002020 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002021
Owen Taylor3473f882001-02-23 17:55:21 +00002022 /*
2023 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2024 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002025 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002026 (NXT(2) == 'x')) {
2027 SKIP(3);
2028 GROW;
2029 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002030 if (count++ > 20) {
2031 count = 0;
2032 GROW;
2033 }
2034 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002035 val = val * 16 + (CUR - '0');
2036 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2037 val = val * 16 + (CUR - 'a') + 10;
2038 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2039 val = val * 16 + (CUR - 'A') + 10;
2040 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002041 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002042 val = 0;
2043 break;
2044 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002045 if (val > 0x10FFFF)
2046 outofrange = val;
2047
Owen Taylor3473f882001-02-23 17:55:21 +00002048 NEXT;
2049 count++;
2050 }
2051 if (RAW == ';') {
2052 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002053 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002054 ctxt->nbChars ++;
2055 ctxt->input->cur++;
2056 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002057 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002058 SKIP(2);
2059 GROW;
2060 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002061 if (count++ > 20) {
2062 count = 0;
2063 GROW;
2064 }
2065 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002066 val = val * 10 + (CUR - '0');
2067 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002068 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002069 val = 0;
2070 break;
2071 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002072 if (val > 0x10FFFF)
2073 outofrange = val;
2074
Owen Taylor3473f882001-02-23 17:55:21 +00002075 NEXT;
2076 count++;
2077 }
2078 if (RAW == ';') {
2079 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002080 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002081 ctxt->nbChars ++;
2082 ctxt->input->cur++;
2083 }
2084 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002085 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002086 }
2087
2088 /*
2089 * [ WFC: Legal Character ]
2090 * Characters referred to using character references must match the
2091 * production for Char.
2092 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002093 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002094 return(val);
2095 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002096 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2097 "xmlParseCharRef: invalid xmlChar value %d\n",
2098 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002099 }
2100 return(0);
2101}
2102
2103/**
2104 * xmlParseStringCharRef:
2105 * @ctxt: an XML parser context
2106 * @str: a pointer to an index in the string
2107 *
2108 * parse Reference declarations, variant parsing from a string rather
2109 * than an an input flow.
2110 *
2111 * [66] CharRef ::= '&#' [0-9]+ ';' |
2112 * '&#x' [0-9a-fA-F]+ ';'
2113 *
2114 * [ WFC: Legal Character ]
2115 * Characters referred to using character references must match the
2116 * production for Char.
2117 *
2118 * Returns the value parsed (as an int), 0 in case of error, str will be
2119 * updated to the current value of the index
2120 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002121static int
Owen Taylor3473f882001-02-23 17:55:21 +00002122xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2123 const xmlChar *ptr;
2124 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002125 unsigned int val = 0;
2126 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002127
2128 if ((str == NULL) || (*str == NULL)) return(0);
2129 ptr = *str;
2130 cur = *ptr;
2131 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2132 ptr += 3;
2133 cur = *ptr;
2134 while (cur != ';') { /* Non input consuming loop */
2135 if ((cur >= '0') && (cur <= '9'))
2136 val = val * 16 + (cur - '0');
2137 else if ((cur >= 'a') && (cur <= 'f'))
2138 val = val * 16 + (cur - 'a') + 10;
2139 else if ((cur >= 'A') && (cur <= 'F'))
2140 val = val * 16 + (cur - 'A') + 10;
2141 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002142 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002143 val = 0;
2144 break;
2145 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002146 if (val > 0x10FFFF)
2147 outofrange = val;
2148
Owen Taylor3473f882001-02-23 17:55:21 +00002149 ptr++;
2150 cur = *ptr;
2151 }
2152 if (cur == ';')
2153 ptr++;
2154 } else if ((cur == '&') && (ptr[1] == '#')){
2155 ptr += 2;
2156 cur = *ptr;
2157 while (cur != ';') { /* Non input consuming loops */
2158 if ((cur >= '0') && (cur <= '9'))
2159 val = val * 10 + (cur - '0');
2160 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002161 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002162 val = 0;
2163 break;
2164 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002165 if (val > 0x10FFFF)
2166 outofrange = val;
2167
Owen Taylor3473f882001-02-23 17:55:21 +00002168 ptr++;
2169 cur = *ptr;
2170 }
2171 if (cur == ';')
2172 ptr++;
2173 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002174 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002175 return(0);
2176 }
2177 *str = ptr;
2178
2179 /*
2180 * [ WFC: Legal Character ]
2181 * Characters referred to using character references must match the
2182 * production for Char.
2183 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002184 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002185 return(val);
2186 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002187 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2188 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2189 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002190 }
2191 return(0);
2192}
2193
2194/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002195 * xmlNewBlanksWrapperInputStream:
2196 * @ctxt: an XML parser context
2197 * @entity: an Entity pointer
2198 *
2199 * Create a new input stream for wrapping
2200 * blanks around a PEReference
2201 *
2202 * Returns the new input stream or NULL
2203 */
2204
2205static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2206
Daniel Veillardf4862f02002-09-10 11:13:43 +00002207static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002208xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2209 xmlParserInputPtr input;
2210 xmlChar *buffer;
2211 size_t length;
2212 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002213 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2214 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002215 return(NULL);
2216 }
2217 if (xmlParserDebugEntities)
2218 xmlGenericError(xmlGenericErrorContext,
2219 "new blanks wrapper for entity: %s\n", entity->name);
2220 input = xmlNewInputStream(ctxt);
2221 if (input == NULL) {
2222 return(NULL);
2223 }
2224 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002225 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002226 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002227 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002228 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002229 return(NULL);
2230 }
2231 buffer [0] = ' ';
2232 buffer [1] = '%';
2233 buffer [length-3] = ';';
2234 buffer [length-2] = ' ';
2235 buffer [length-1] = 0;
2236 memcpy(buffer + 2, entity->name, length - 5);
2237 input->free = deallocblankswrapper;
2238 input->base = buffer;
2239 input->cur = buffer;
2240 input->length = length;
2241 input->end = &buffer[length];
2242 return(input);
2243}
2244
2245/**
Owen Taylor3473f882001-02-23 17:55:21 +00002246 * xmlParserHandlePEReference:
2247 * @ctxt: the parser context
2248 *
2249 * [69] PEReference ::= '%' Name ';'
2250 *
2251 * [ WFC: No Recursion ]
2252 * A parsed entity must not contain a recursive
2253 * reference to itself, either directly or indirectly.
2254 *
2255 * [ WFC: Entity Declared ]
2256 * In a document without any DTD, a document with only an internal DTD
2257 * subset which contains no parameter entity references, or a document
2258 * with "standalone='yes'", ... ... The declaration of a parameter
2259 * entity must precede any reference to it...
2260 *
2261 * [ VC: Entity Declared ]
2262 * In a document with an external subset or external parameter entities
2263 * with "standalone='no'", ... ... The declaration of a parameter entity
2264 * must precede any reference to it...
2265 *
2266 * [ WFC: In DTD ]
2267 * Parameter-entity references may only appear in the DTD.
2268 * NOTE: misleading but this is handled.
2269 *
2270 * A PEReference may have been detected in the current input stream
2271 * the handling is done accordingly to
2272 * http://www.w3.org/TR/REC-xml#entproc
2273 * i.e.
2274 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002275 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002276 */
2277void
2278xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002279 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002280 xmlEntityPtr entity = NULL;
2281 xmlParserInputPtr input;
2282
Owen Taylor3473f882001-02-23 17:55:21 +00002283 if (RAW != '%') return;
2284 switch(ctxt->instate) {
2285 case XML_PARSER_CDATA_SECTION:
2286 return;
2287 case XML_PARSER_COMMENT:
2288 return;
2289 case XML_PARSER_START_TAG:
2290 return;
2291 case XML_PARSER_END_TAG:
2292 return;
2293 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002294 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002295 return;
2296 case XML_PARSER_PROLOG:
2297 case XML_PARSER_START:
2298 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002299 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002300 return;
2301 case XML_PARSER_ENTITY_DECL:
2302 case XML_PARSER_CONTENT:
2303 case XML_PARSER_ATTRIBUTE_VALUE:
2304 case XML_PARSER_PI:
2305 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002306 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002307 /* we just ignore it there */
2308 return;
2309 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002310 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002311 return;
2312 case XML_PARSER_ENTITY_VALUE:
2313 /*
2314 * NOTE: in the case of entity values, we don't do the
2315 * substitution here since we need the literal
2316 * entity value to be able to save the internal
2317 * subset of the document.
2318 * This will be handled by xmlStringDecodeEntities
2319 */
2320 return;
2321 case XML_PARSER_DTD:
2322 /*
2323 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2324 * In the internal DTD subset, parameter-entity references
2325 * can occur only where markup declarations can occur, not
2326 * within markup declarations.
2327 * In that case this is handled in xmlParseMarkupDecl
2328 */
2329 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2330 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002331 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002332 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002333 break;
2334 case XML_PARSER_IGNORE:
2335 return;
2336 }
2337
2338 NEXT;
2339 name = xmlParseName(ctxt);
2340 if (xmlParserDebugEntities)
2341 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002342 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002343 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002344 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002345 } else {
2346 if (RAW == ';') {
2347 NEXT;
2348 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2349 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2350 if (entity == NULL) {
2351
2352 /*
2353 * [ WFC: Entity Declared ]
2354 * In a document without any DTD, a document with only an
2355 * internal DTD subset which contains no parameter entity
2356 * references, or a document with "standalone='yes'", ...
2357 * ... The declaration of a parameter entity must precede
2358 * any reference to it...
2359 */
2360 if ((ctxt->standalone == 1) ||
2361 ((ctxt->hasExternalSubset == 0) &&
2362 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002363 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002364 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002365 } else {
2366 /*
2367 * [ VC: Entity Declared ]
2368 * In a document with an external subset or external
2369 * parameter entities with "standalone='no'", ...
2370 * ... The declaration of a parameter entity must precede
2371 * any reference to it...
2372 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002373 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2374 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2375 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002376 name, NULL);
Daniel Veillard24eb9782003-10-04 21:08:09 +00002377 } else
2378 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2379 "PEReference: %%%s; not found\n",
2380 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002381 ctxt->valid = 0;
2382 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002383 } else if (ctxt->input->free != deallocblankswrapper) {
2384 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002385 if (xmlPushInput(ctxt, input) < 0)
2386 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002387 } else {
2388 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2389 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002390 xmlChar start[4];
2391 xmlCharEncoding enc;
2392
Owen Taylor3473f882001-02-23 17:55:21 +00002393 /*
2394 * handle the extra spaces added before and after
2395 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002396 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002397 */
2398 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002399 if (xmlPushInput(ctxt, input) < 0)
2400 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002401
2402 /*
2403 * Get the 4 first bytes and decode the charset
2404 * if enc != XML_CHAR_ENCODING_NONE
2405 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002406 * Note that, since we may have some non-UTF8
2407 * encoding (like UTF16, bug 135229), the 'length'
2408 * is not known, but we can calculate based upon
2409 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002410 */
2411 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002412 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002413 start[0] = RAW;
2414 start[1] = NXT(1);
2415 start[2] = NXT(2);
2416 start[3] = NXT(3);
2417 enc = xmlDetectCharEncoding(start, 4);
2418 if (enc != XML_CHAR_ENCODING_NONE) {
2419 xmlSwitchEncoding(ctxt, enc);
2420 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002421 }
2422
Owen Taylor3473f882001-02-23 17:55:21 +00002423 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002424 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2425 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002426 xmlParseTextDecl(ctxt);
2427 }
Owen Taylor3473f882001-02-23 17:55:21 +00002428 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002429 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2430 "PEReference: %s is not a parameter entity\n",
2431 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002432 }
2433 }
2434 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002435 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002436 }
Owen Taylor3473f882001-02-23 17:55:21 +00002437 }
2438}
2439
2440/*
2441 * Macro used to grow the current buffer.
2442 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002443#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002444 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002445 buffer##_size *= 2; \
Daniel Veillard0161e632008-08-28 15:36:32 +00002446 buffer##_size += n; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002447 tmp = (xmlChar *) \
Daniel Veillard68b6e022008-03-31 09:26:00 +00002448 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002449 if (tmp == NULL) goto mem_error; \
2450 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002451}
2452
2453/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002454 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002455 * @ctxt: the parser context
2456 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002457 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002458 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2459 * @end: an end marker xmlChar, 0 if none
2460 * @end2: an end marker xmlChar, 0 if none
2461 * @end3: an end marker xmlChar, 0 if none
2462 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002463 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002464 *
2465 * [67] Reference ::= EntityRef | CharRef
2466 *
2467 * [69] PEReference ::= '%' Name ';'
2468 *
2469 * Returns A newly allocated string with the substitution done. The caller
2470 * must deallocate it !
2471 */
2472xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002473xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2474 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002475 xmlChar *buffer = NULL;
2476 int buffer_size = 0;
2477
2478 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002479 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002480 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002481 xmlEntityPtr ent;
2482 int c,l;
2483 int nbchars = 0;
2484
Daniel Veillarda82b1822004-11-08 16:24:57 +00002485 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002486 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002487 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002488
Daniel Veillard0161e632008-08-28 15:36:32 +00002489 if (((ctxt->depth > 40) &&
2490 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2491 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002492 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002493 return(NULL);
2494 }
2495
2496 /*
2497 * allocate a translation buffer.
2498 */
2499 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002500 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002501 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002502
2503 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002504 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002505 * we are operating on already parsed values.
2506 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002507 if (str < last)
2508 c = CUR_SCHAR(str, l);
2509 else
2510 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002511 while ((c != 0) && (c != end) && /* non input consuming loop */
2512 (c != end2) && (c != end3)) {
2513
2514 if (c == 0) break;
2515 if ((c == '&') && (str[1] == '#')) {
2516 int val = xmlParseStringCharRef(ctxt, &str);
2517 if (val != 0) {
2518 COPY_BUF(0,buffer,nbchars,val);
2519 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002520 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002521 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002522 }
Owen Taylor3473f882001-02-23 17:55:21 +00002523 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2524 if (xmlParserDebugEntities)
2525 xmlGenericError(xmlGenericErrorContext,
2526 "String decoding Entity Reference: %.30s\n",
2527 str);
2528 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002529 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2530 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002531 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002532 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002533 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002534 if ((ent != NULL) &&
2535 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2536 if (ent->content != NULL) {
2537 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002538 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002539 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002540 }
Owen Taylor3473f882001-02-23 17:55:21 +00002541 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002542 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2543 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002544 }
2545 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002546 ctxt->depth++;
2547 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2548 0, 0, 0);
2549 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002550
Owen Taylor3473f882001-02-23 17:55:21 +00002551 if (rep != NULL) {
2552 current = rep;
2553 while (*current != 0) { /* non input consuming loop */
2554 buffer[nbchars++] = *current++;
2555 if (nbchars >
2556 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002557 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2558 goto int_error;
2559 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002560 }
2561 }
2562 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002563 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002564 }
2565 } else if (ent != NULL) {
2566 int i = xmlStrlen(ent->name);
2567 const xmlChar *cur = ent->name;
2568
2569 buffer[nbchars++] = '&';
2570 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002571 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002572 }
2573 for (;i > 0;i--)
2574 buffer[nbchars++] = *cur++;
2575 buffer[nbchars++] = ';';
2576 }
2577 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2578 if (xmlParserDebugEntities)
2579 xmlGenericError(xmlGenericErrorContext,
2580 "String decoding PE Reference: %.30s\n", str);
2581 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002582 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2583 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002584 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002585 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002586 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002587 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002588 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002589 }
Owen Taylor3473f882001-02-23 17:55:21 +00002590 ctxt->depth++;
2591 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2592 0, 0, 0);
2593 ctxt->depth--;
2594 if (rep != NULL) {
2595 current = rep;
2596 while (*current != 0) { /* non input consuming loop */
2597 buffer[nbchars++] = *current++;
2598 if (nbchars >
2599 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002600 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2601 goto int_error;
2602 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002603 }
2604 }
2605 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002606 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002607 }
2608 }
2609 } else {
2610 COPY_BUF(l,buffer,nbchars,c);
2611 str += l;
2612 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002613 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002614 }
2615 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002616 if (str < last)
2617 c = CUR_SCHAR(str, l);
2618 else
2619 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002620 }
2621 buffer[nbchars++] = 0;
2622 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002623
2624mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002625 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002626int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002627 if (rep != NULL)
2628 xmlFree(rep);
2629 if (buffer != NULL)
2630 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002631 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002632}
2633
Daniel Veillarde57ec792003-09-10 10:50:59 +00002634/**
2635 * xmlStringDecodeEntities:
2636 * @ctxt: the parser context
2637 * @str: the input string
2638 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2639 * @end: an end marker xmlChar, 0 if none
2640 * @end2: an end marker xmlChar, 0 if none
2641 * @end3: an end marker xmlChar, 0 if none
2642 *
2643 * Takes a entity string content and process to do the adequate substitutions.
2644 *
2645 * [67] Reference ::= EntityRef | CharRef
2646 *
2647 * [69] PEReference ::= '%' Name ';'
2648 *
2649 * Returns A newly allocated string with the substitution done. The caller
2650 * must deallocate it !
2651 */
2652xmlChar *
2653xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2654 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002655 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002656 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2657 end, end2, end3));
2658}
Owen Taylor3473f882001-02-23 17:55:21 +00002659
2660/************************************************************************
2661 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002662 * Commodity functions, cleanup needed ? *
2663 * *
2664 ************************************************************************/
2665
2666/**
2667 * areBlanks:
2668 * @ctxt: an XML parser context
2669 * @str: a xmlChar *
2670 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002671 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002672 *
2673 * Is this a sequence of blank chars that one can ignore ?
2674 *
2675 * Returns 1 if ignorable 0 otherwise.
2676 */
2677
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002678static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2679 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002680 int i, ret;
2681 xmlNodePtr lastChild;
2682
Daniel Veillard05c13a22001-09-09 08:38:09 +00002683 /*
2684 * Don't spend time trying to differentiate them, the same callback is
2685 * used !
2686 */
2687 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002688 return(0);
2689
Owen Taylor3473f882001-02-23 17:55:21 +00002690 /*
2691 * Check for xml:space value.
2692 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002693 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2694 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002695 return(0);
2696
2697 /*
2698 * Check that the string is made of blanks
2699 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002700 if (blank_chars == 0) {
2701 for (i = 0;i < len;i++)
2702 if (!(IS_BLANK_CH(str[i]))) return(0);
2703 }
Owen Taylor3473f882001-02-23 17:55:21 +00002704
2705 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002706 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002707 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002708 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002709 if (ctxt->myDoc != NULL) {
2710 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2711 if (ret == 0) return(1);
2712 if (ret == 1) return(0);
2713 }
2714
2715 /*
2716 * Otherwise, heuristic :-\
2717 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002718 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002719 if ((ctxt->node->children == NULL) &&
2720 (RAW == '<') && (NXT(1) == '/')) return(0);
2721
2722 lastChild = xmlGetLastChild(ctxt->node);
2723 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002724 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2725 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002726 } else if (xmlNodeIsText(lastChild))
2727 return(0);
2728 else if ((ctxt->node->children != NULL) &&
2729 (xmlNodeIsText(ctxt->node->children)))
2730 return(0);
2731 return(1);
2732}
2733
Owen Taylor3473f882001-02-23 17:55:21 +00002734/************************************************************************
2735 * *
2736 * Extra stuff for namespace support *
2737 * Relates to http://www.w3.org/TR/WD-xml-names *
2738 * *
2739 ************************************************************************/
2740
2741/**
2742 * xmlSplitQName:
2743 * @ctxt: an XML parser context
2744 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002745 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002746 *
2747 * parse an UTF8 encoded XML qualified name string
2748 *
2749 * [NS 5] QName ::= (Prefix ':')? LocalPart
2750 *
2751 * [NS 6] Prefix ::= NCName
2752 *
2753 * [NS 7] LocalPart ::= NCName
2754 *
2755 * Returns the local part, and prefix is updated
2756 * to get the Prefix if any.
2757 */
2758
2759xmlChar *
2760xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2761 xmlChar buf[XML_MAX_NAMELEN + 5];
2762 xmlChar *buffer = NULL;
2763 int len = 0;
2764 int max = XML_MAX_NAMELEN;
2765 xmlChar *ret = NULL;
2766 const xmlChar *cur = name;
2767 int c;
2768
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002769 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002770 *prefix = NULL;
2771
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002772 if (cur == NULL) return(NULL);
2773
Owen Taylor3473f882001-02-23 17:55:21 +00002774#ifndef XML_XML_NAMESPACE
2775 /* xml: prefix is not really a namespace */
2776 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2777 (cur[2] == 'l') && (cur[3] == ':'))
2778 return(xmlStrdup(name));
2779#endif
2780
Daniel Veillard597bc482003-07-24 16:08:28 +00002781 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002782 if (cur[0] == ':')
2783 return(xmlStrdup(name));
2784
2785 c = *cur++;
2786 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2787 buf[len++] = c;
2788 c = *cur++;
2789 }
2790 if (len >= max) {
2791 /*
2792 * Okay someone managed to make a huge name, so he's ready to pay
2793 * for the processing speed.
2794 */
2795 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002796
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002797 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002798 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002799 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002800 return(NULL);
2801 }
2802 memcpy(buffer, buf, len);
2803 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2804 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002805 xmlChar *tmp;
2806
Owen Taylor3473f882001-02-23 17:55:21 +00002807 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002808 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002809 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002810 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002811 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002812 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002813 return(NULL);
2814 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002815 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002816 }
2817 buffer[len++] = c;
2818 c = *cur++;
2819 }
2820 buffer[len] = 0;
2821 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002822
Daniel Veillard597bc482003-07-24 16:08:28 +00002823 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002824 if (buffer != NULL)
2825 xmlFree(buffer);
2826 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002827 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002828 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002829
Owen Taylor3473f882001-02-23 17:55:21 +00002830 if (buffer == NULL)
2831 ret = xmlStrndup(buf, len);
2832 else {
2833 ret = buffer;
2834 buffer = NULL;
2835 max = XML_MAX_NAMELEN;
2836 }
2837
2838
2839 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002840 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002841 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002842 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002843 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002844 }
Owen Taylor3473f882001-02-23 17:55:21 +00002845 len = 0;
2846
Daniel Veillardbb284f42002-10-16 18:02:47 +00002847 /*
2848 * Check that the first character is proper to start
2849 * a new name
2850 */
2851 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2852 ((c >= 0x41) && (c <= 0x5A)) ||
2853 (c == '_') || (c == ':'))) {
2854 int l;
2855 int first = CUR_SCHAR(cur, l);
2856
2857 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002858 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002859 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002860 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002861 }
2862 }
2863 cur++;
2864
Owen Taylor3473f882001-02-23 17:55:21 +00002865 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2866 buf[len++] = c;
2867 c = *cur++;
2868 }
2869 if (len >= max) {
2870 /*
2871 * Okay someone managed to make a huge name, so he's ready to pay
2872 * for the processing speed.
2873 */
2874 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002875
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002876 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002877 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002878 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002879 return(NULL);
2880 }
2881 memcpy(buffer, buf, len);
2882 while (c != 0) { /* tested bigname2.xml */
2883 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002884 xmlChar *tmp;
2885
Owen Taylor3473f882001-02-23 17:55:21 +00002886 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002887 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002888 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002889 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002890 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002891 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002892 return(NULL);
2893 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002894 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002895 }
2896 buffer[len++] = c;
2897 c = *cur++;
2898 }
2899 buffer[len] = 0;
2900 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00002901
Owen Taylor3473f882001-02-23 17:55:21 +00002902 if (buffer == NULL)
2903 ret = xmlStrndup(buf, len);
2904 else {
2905 ret = buffer;
2906 }
2907 }
2908
2909 return(ret);
2910}
2911
2912/************************************************************************
2913 * *
2914 * The parser itself *
2915 * Relates to http://www.w3.org/TR/REC-xml *
2916 * *
2917 ************************************************************************/
2918
Daniel Veillard34e3f642008-07-29 09:02:27 +00002919/************************************************************************
2920 * *
2921 * Routines to parse Name, NCName and NmToken *
2922 * *
2923 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00002924#ifdef DEBUG
2925static unsigned long nbParseName = 0;
2926static unsigned long nbParseNmToken = 0;
2927static unsigned long nbParseNCName = 0;
2928static unsigned long nbParseNCNameComplex = 0;
2929static unsigned long nbParseNameComplex = 0;
2930static unsigned long nbParseStringName = 0;
2931#endif
2932
Daniel Veillard34e3f642008-07-29 09:02:27 +00002933/*
2934 * The two following functions are related to the change of accepted
2935 * characters for Name and NmToken in the Revision 5 of XML-1.0
2936 * They correspond to the modified production [4] and the new production [4a]
2937 * changes in that revision. Also note that the macros used for the
2938 * productions Letter, Digit, CombiningChar and Extender are not needed
2939 * anymore.
2940 * We still keep compatibility to pre-revision5 parsing semantic if the
2941 * new XML_PARSE_OLD10 option is given to the parser.
2942 */
2943static int
2944xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
2945 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2946 /*
2947 * Use the new checks of production [4] [4a] amd [5] of the
2948 * Update 5 of XML-1.0
2949 */
2950 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2951 (((c >= 'a') && (c <= 'z')) ||
2952 ((c >= 'A') && (c <= 'Z')) ||
2953 (c == '_') || (c == ':') ||
2954 ((c >= 0xC0) && (c <= 0xD6)) ||
2955 ((c >= 0xD8) && (c <= 0xF6)) ||
2956 ((c >= 0xF8) && (c <= 0x2FF)) ||
2957 ((c >= 0x370) && (c <= 0x37D)) ||
2958 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2959 ((c >= 0x200C) && (c <= 0x200D)) ||
2960 ((c >= 0x2070) && (c <= 0x218F)) ||
2961 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2962 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2963 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2964 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2965 ((c >= 0x10000) && (c <= 0xEFFFF))))
2966 return(1);
2967 } else {
2968 if (IS_LETTER(c) || (c == '_') || (c == ':'))
2969 return(1);
2970 }
2971 return(0);
2972}
2973
2974static int
2975xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
2976 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2977 /*
2978 * Use the new checks of production [4] [4a] amd [5] of the
2979 * Update 5 of XML-1.0
2980 */
2981 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2982 (((c >= 'a') && (c <= 'z')) ||
2983 ((c >= 'A') && (c <= 'Z')) ||
2984 ((c >= '0') && (c <= '9')) || /* !start */
2985 (c == '_') || (c == ':') ||
2986 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2987 ((c >= 0xC0) && (c <= 0xD6)) ||
2988 ((c >= 0xD8) && (c <= 0xF6)) ||
2989 ((c >= 0xF8) && (c <= 0x2FF)) ||
2990 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2991 ((c >= 0x370) && (c <= 0x37D)) ||
2992 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2993 ((c >= 0x200C) && (c <= 0x200D)) ||
2994 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2995 ((c >= 0x2070) && (c <= 0x218F)) ||
2996 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2997 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2998 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2999 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3000 ((c >= 0x10000) && (c <= 0xEFFFF))))
3001 return(1);
3002 } else {
3003 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3004 (c == '.') || (c == '-') ||
3005 (c == '_') || (c == ':') ||
3006 (IS_COMBINING(c)) ||
3007 (IS_EXTENDER(c)))
3008 return(1);
3009 }
3010 return(0);
3011}
3012
Daniel Veillarde57ec792003-09-10 10:50:59 +00003013static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003014 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003015
Daniel Veillard34e3f642008-07-29 09:02:27 +00003016static const xmlChar *
3017xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3018 int len = 0, l;
3019 int c;
3020 int count = 0;
3021
Daniel Veillardc6561462009-03-25 10:22:31 +00003022#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003023 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003024#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003025
3026 /*
3027 * Handler for more complex cases
3028 */
3029 GROW;
3030 c = CUR_CHAR(l);
3031 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3032 /*
3033 * Use the new checks of production [4] [4a] amd [5] of the
3034 * Update 5 of XML-1.0
3035 */
3036 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3037 (!(((c >= 'a') && (c <= 'z')) ||
3038 ((c >= 'A') && (c <= 'Z')) ||
3039 (c == '_') || (c == ':') ||
3040 ((c >= 0xC0) && (c <= 0xD6)) ||
3041 ((c >= 0xD8) && (c <= 0xF6)) ||
3042 ((c >= 0xF8) && (c <= 0x2FF)) ||
3043 ((c >= 0x370) && (c <= 0x37D)) ||
3044 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3045 ((c >= 0x200C) && (c <= 0x200D)) ||
3046 ((c >= 0x2070) && (c <= 0x218F)) ||
3047 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3048 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3049 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3050 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3051 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3052 return(NULL);
3053 }
3054 len += l;
3055 NEXTL(l);
3056 c = CUR_CHAR(l);
3057 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3058 (((c >= 'a') && (c <= 'z')) ||
3059 ((c >= 'A') && (c <= 'Z')) ||
3060 ((c >= '0') && (c <= '9')) || /* !start */
3061 (c == '_') || (c == ':') ||
3062 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3063 ((c >= 0xC0) && (c <= 0xD6)) ||
3064 ((c >= 0xD8) && (c <= 0xF6)) ||
3065 ((c >= 0xF8) && (c <= 0x2FF)) ||
3066 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3067 ((c >= 0x370) && (c <= 0x37D)) ||
3068 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3069 ((c >= 0x200C) && (c <= 0x200D)) ||
3070 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3071 ((c >= 0x2070) && (c <= 0x218F)) ||
3072 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3073 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3074 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3075 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3076 ((c >= 0x10000) && (c <= 0xEFFFF))
3077 )) {
3078 if (count++ > 100) {
3079 count = 0;
3080 GROW;
3081 }
3082 len += l;
3083 NEXTL(l);
3084 c = CUR_CHAR(l);
3085 }
3086 } else {
3087 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3088 (!IS_LETTER(c) && (c != '_') &&
3089 (c != ':'))) {
3090 return(NULL);
3091 }
3092 len += l;
3093 NEXTL(l);
3094 c = CUR_CHAR(l);
3095
3096 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3097 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3098 (c == '.') || (c == '-') ||
3099 (c == '_') || (c == ':') ||
3100 (IS_COMBINING(c)) ||
3101 (IS_EXTENDER(c)))) {
3102 if (count++ > 100) {
3103 count = 0;
3104 GROW;
3105 }
3106 len += l;
3107 NEXTL(l);
3108 c = CUR_CHAR(l);
3109 }
3110 }
3111 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3112 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3113 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3114}
3115
Owen Taylor3473f882001-02-23 17:55:21 +00003116/**
3117 * xmlParseName:
3118 * @ctxt: an XML parser context
3119 *
3120 * parse an XML name.
3121 *
3122 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3123 * CombiningChar | Extender
3124 *
3125 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3126 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003127 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003128 *
3129 * Returns the Name parsed or NULL
3130 */
3131
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003132const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003133xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003134 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003135 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003136 int count = 0;
3137
3138 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003139
Daniel Veillardc6561462009-03-25 10:22:31 +00003140#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003141 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003142#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003143
Daniel Veillard48b2f892001-02-25 16:11:03 +00003144 /*
3145 * Accelerator for simple ASCII names
3146 */
3147 in = ctxt->input->cur;
3148 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3149 ((*in >= 0x41) && (*in <= 0x5A)) ||
3150 (*in == '_') || (*in == ':')) {
3151 in++;
3152 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3153 ((*in >= 0x41) && (*in <= 0x5A)) ||
3154 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003155 (*in == '_') || (*in == '-') ||
3156 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003157 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003158 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003159 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003160 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003161 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003162 ctxt->nbChars += count;
3163 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003164 if (ret == NULL)
3165 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003166 return(ret);
3167 }
3168 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003169 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003170 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003171}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003172
Daniel Veillard34e3f642008-07-29 09:02:27 +00003173static const xmlChar *
3174xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3175 int len = 0, l;
3176 int c;
3177 int count = 0;
3178
Daniel Veillardc6561462009-03-25 10:22:31 +00003179#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003180 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003181#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003182
3183 /*
3184 * Handler for more complex cases
3185 */
3186 GROW;
3187 c = CUR_CHAR(l);
3188 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3189 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3190 return(NULL);
3191 }
3192
3193 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3194 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3195 if (count++ > 100) {
3196 count = 0;
3197 GROW;
3198 }
3199 len += l;
3200 NEXTL(l);
3201 c = CUR_CHAR(l);
3202 }
3203 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3204}
3205
3206/**
3207 * xmlParseNCName:
3208 * @ctxt: an XML parser context
3209 * @len: lenght of the string parsed
3210 *
3211 * parse an XML name.
3212 *
3213 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3214 * CombiningChar | Extender
3215 *
3216 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3217 *
3218 * Returns the Name parsed or NULL
3219 */
3220
3221static const xmlChar *
3222xmlParseNCName(xmlParserCtxtPtr ctxt) {
3223 const xmlChar *in;
3224 const xmlChar *ret;
3225 int count = 0;
3226
Daniel Veillardc6561462009-03-25 10:22:31 +00003227#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003228 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003229#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003230
3231 /*
3232 * Accelerator for simple ASCII names
3233 */
3234 in = ctxt->input->cur;
3235 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3236 ((*in >= 0x41) && (*in <= 0x5A)) ||
3237 (*in == '_')) {
3238 in++;
3239 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3240 ((*in >= 0x41) && (*in <= 0x5A)) ||
3241 ((*in >= 0x30) && (*in <= 0x39)) ||
3242 (*in == '_') || (*in == '-') ||
3243 (*in == '.'))
3244 in++;
3245 if ((*in > 0) && (*in < 0x80)) {
3246 count = in - ctxt->input->cur;
3247 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3248 ctxt->input->cur = in;
3249 ctxt->nbChars += count;
3250 ctxt->input->col += count;
3251 if (ret == NULL) {
3252 xmlErrMemory(ctxt, NULL);
3253 }
3254 return(ret);
3255 }
3256 }
3257 return(xmlParseNCNameComplex(ctxt));
3258}
3259
Daniel Veillard46de64e2002-05-29 08:21:33 +00003260/**
3261 * xmlParseNameAndCompare:
3262 * @ctxt: an XML parser context
3263 *
3264 * parse an XML name and compares for match
3265 * (specialized for endtag parsing)
3266 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003267 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3268 * and the name for mismatch
3269 */
3270
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003271static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003272xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003273 register const xmlChar *cmp = other;
3274 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003275 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003276
3277 GROW;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003278
Daniel Veillard46de64e2002-05-29 08:21:33 +00003279 in = ctxt->input->cur;
3280 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003281 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003282 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003283 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003284 }
William M. Brack76e95df2003-10-18 16:20:14 +00003285 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003286 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003287 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003288 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003289 }
3290 /* failure (or end of input buffer), check with full function */
3291 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003292 /* strings coming from the dictionnary direct compare possible */
3293 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003294 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003295 }
3296 return ret;
3297}
3298
Owen Taylor3473f882001-02-23 17:55:21 +00003299/**
3300 * xmlParseStringName:
3301 * @ctxt: an XML parser context
3302 * @str: a pointer to the string pointer (IN/OUT)
3303 *
3304 * parse an XML name.
3305 *
3306 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3307 * CombiningChar | Extender
3308 *
3309 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3310 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003311 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003312 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003313 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003314 * is updated to the current location in the string.
3315 */
3316
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003317static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003318xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3319 xmlChar buf[XML_MAX_NAMELEN + 5];
3320 const xmlChar *cur = *str;
3321 int len = 0, l;
3322 int c;
3323
Daniel Veillardc6561462009-03-25 10:22:31 +00003324#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003325 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003326#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003327
Owen Taylor3473f882001-02-23 17:55:21 +00003328 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003329 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003330 return(NULL);
3331 }
3332
Daniel Veillard34e3f642008-07-29 09:02:27 +00003333 COPY_BUF(l,buf,len,c);
3334 cur += l;
3335 c = CUR_SCHAR(cur, l);
3336 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003337 COPY_BUF(l,buf,len,c);
3338 cur += l;
3339 c = CUR_SCHAR(cur, l);
3340 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3341 /*
3342 * Okay someone managed to make a huge name, so he's ready to pay
3343 * for the processing speed.
3344 */
3345 xmlChar *buffer;
3346 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003347
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003348 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003349 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003350 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003351 return(NULL);
3352 }
3353 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003354 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003355 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003356 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003357 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003358 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003359 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003360 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003361 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003362 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003363 return(NULL);
3364 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003365 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003366 }
3367 COPY_BUF(l,buffer,len,c);
3368 cur += l;
3369 c = CUR_SCHAR(cur, l);
3370 }
3371 buffer[len] = 0;
3372 *str = cur;
3373 return(buffer);
3374 }
3375 }
3376 *str = cur;
3377 return(xmlStrndup(buf, len));
3378}
3379
3380/**
3381 * xmlParseNmtoken:
3382 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003383 *
Owen Taylor3473f882001-02-23 17:55:21 +00003384 * parse an XML Nmtoken.
3385 *
3386 * [7] Nmtoken ::= (NameChar)+
3387 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003388 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003389 *
3390 * Returns the Nmtoken parsed or NULL
3391 */
3392
3393xmlChar *
3394xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3395 xmlChar buf[XML_MAX_NAMELEN + 5];
3396 int len = 0, l;
3397 int c;
3398 int count = 0;
3399
Daniel Veillardc6561462009-03-25 10:22:31 +00003400#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003401 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003402#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003403
Owen Taylor3473f882001-02-23 17:55:21 +00003404 GROW;
3405 c = CUR_CHAR(l);
3406
Daniel Veillard34e3f642008-07-29 09:02:27 +00003407 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003408 if (count++ > 100) {
3409 count = 0;
3410 GROW;
3411 }
3412 COPY_BUF(l,buf,len,c);
3413 NEXTL(l);
3414 c = CUR_CHAR(l);
3415 if (len >= XML_MAX_NAMELEN) {
3416 /*
3417 * Okay someone managed to make a huge token, so he's ready to pay
3418 * for the processing speed.
3419 */
3420 xmlChar *buffer;
3421 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003422
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003423 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003424 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003425 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003426 return(NULL);
3427 }
3428 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003429 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003430 if (count++ > 100) {
3431 count = 0;
3432 GROW;
3433 }
3434 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003435 xmlChar *tmp;
3436
Owen Taylor3473f882001-02-23 17:55:21 +00003437 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003438 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003439 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003440 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003441 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003442 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003443 return(NULL);
3444 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003445 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003446 }
3447 COPY_BUF(l,buffer,len,c);
3448 NEXTL(l);
3449 c = CUR_CHAR(l);
3450 }
3451 buffer[len] = 0;
3452 return(buffer);
3453 }
3454 }
3455 if (len == 0)
3456 return(NULL);
3457 return(xmlStrndup(buf, len));
3458}
3459
3460/**
3461 * xmlParseEntityValue:
3462 * @ctxt: an XML parser context
3463 * @orig: if non-NULL store a copy of the original entity value
3464 *
3465 * parse a value for ENTITY declarations
3466 *
3467 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3468 * "'" ([^%&'] | PEReference | Reference)* "'"
3469 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003470 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003471 */
3472
3473xmlChar *
3474xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3475 xmlChar *buf = NULL;
3476 int len = 0;
3477 int size = XML_PARSER_BUFFER_SIZE;
3478 int c, l;
3479 xmlChar stop;
3480 xmlChar *ret = NULL;
3481 const xmlChar *cur = NULL;
3482 xmlParserInputPtr input;
3483
3484 if (RAW == '"') stop = '"';
3485 else if (RAW == '\'') stop = '\'';
3486 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003487 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003488 return(NULL);
3489 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003490 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003491 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003492 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003493 return(NULL);
3494 }
3495
3496 /*
3497 * The content of the entity definition is copied in a buffer.
3498 */
3499
3500 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3501 input = ctxt->input;
3502 GROW;
3503 NEXT;
3504 c = CUR_CHAR(l);
3505 /*
3506 * NOTE: 4.4.5 Included in Literal
3507 * When a parameter entity reference appears in a literal entity
3508 * value, ... a single or double quote character in the replacement
3509 * text is always treated as a normal data character and will not
3510 * terminate the literal.
3511 * In practice it means we stop the loop only when back at parsing
3512 * the initial entity and the quote is found
3513 */
William M. Brack871611b2003-10-18 04:53:14 +00003514 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003515 (ctxt->input != input))) {
3516 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003517 xmlChar *tmp;
3518
Owen Taylor3473f882001-02-23 17:55:21 +00003519 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003520 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3521 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003522 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003523 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003524 return(NULL);
3525 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003526 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003527 }
3528 COPY_BUF(l,buf,len,c);
3529 NEXTL(l);
3530 /*
3531 * Pop-up of finished entities.
3532 */
3533 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3534 xmlPopInput(ctxt);
3535
3536 GROW;
3537 c = CUR_CHAR(l);
3538 if (c == 0) {
3539 GROW;
3540 c = CUR_CHAR(l);
3541 }
3542 }
3543 buf[len] = 0;
3544
3545 /*
3546 * Raise problem w.r.t. '&' and '%' being used in non-entities
3547 * reference constructs. Note Charref will be handled in
3548 * xmlStringDecodeEntities()
3549 */
3550 cur = buf;
3551 while (*cur != 0) { /* non input consuming */
3552 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3553 xmlChar *name;
3554 xmlChar tmp = *cur;
3555
3556 cur++;
3557 name = xmlParseStringName(ctxt, &cur);
3558 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003559 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003560 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003561 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003562 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003563 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3564 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003565 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003566 }
3567 if (name != NULL)
3568 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003569 if (*cur == 0)
3570 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003571 }
3572 cur++;
3573 }
3574
3575 /*
3576 * Then PEReference entities are substituted.
3577 */
3578 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003579 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003580 xmlFree(buf);
3581 } else {
3582 NEXT;
3583 /*
3584 * NOTE: 4.4.7 Bypassed
3585 * When a general entity reference appears in the EntityValue in
3586 * an entity declaration, it is bypassed and left as is.
3587 * so XML_SUBSTITUTE_REF is not set here.
3588 */
3589 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3590 0, 0, 0);
3591 if (orig != NULL)
3592 *orig = buf;
3593 else
3594 xmlFree(buf);
3595 }
3596
3597 return(ret);
3598}
3599
3600/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003601 * xmlParseAttValueComplex:
3602 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003603 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003604 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003605 *
3606 * parse a value for an attribute, this is the fallback function
3607 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003608 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003609 *
3610 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3611 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003612static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003613xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003614 xmlChar limit = 0;
3615 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003616 xmlChar *rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003617 int len = 0;
3618 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003619 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003620 xmlChar *current = NULL;
3621 xmlEntityPtr ent;
3622
Owen Taylor3473f882001-02-23 17:55:21 +00003623 if (NXT(0) == '"') {
3624 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3625 limit = '"';
3626 NEXT;
3627 } else if (NXT(0) == '\'') {
3628 limit = '\'';
3629 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3630 NEXT;
3631 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003632 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003633 return(NULL);
3634 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003635
Owen Taylor3473f882001-02-23 17:55:21 +00003636 /*
3637 * allocate a translation buffer.
3638 */
3639 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003640 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003641 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003642
3643 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003644 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003645 */
3646 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003647 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003648 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003649 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003650 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003651 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003652 if (NXT(1) == '#') {
3653 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003654
Owen Taylor3473f882001-02-23 17:55:21 +00003655 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003656 if (ctxt->replaceEntities) {
3657 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003658 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003659 }
3660 buf[len++] = '&';
3661 } else {
3662 /*
3663 * The reparsing will be done in xmlStringGetNodeList()
3664 * called by the attribute() function in SAX.c
3665 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003666 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003667 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003668 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003669 buf[len++] = '&';
3670 buf[len++] = '#';
3671 buf[len++] = '3';
3672 buf[len++] = '8';
3673 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003674 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003675 } else if (val != 0) {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003676 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003677 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003678 }
Owen Taylor3473f882001-02-23 17:55:21 +00003679 len += xmlCopyChar(0, &buf[len], val);
3680 }
3681 } else {
3682 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003683 ctxt->nbentities++;
3684 if (ent != NULL)
3685 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003686 if ((ent != NULL) &&
3687 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3688 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003689 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003690 }
3691 if ((ctxt->replaceEntities == 0) &&
3692 (ent->content[0] == '&')) {
3693 buf[len++] = '&';
3694 buf[len++] = '#';
3695 buf[len++] = '3';
3696 buf[len++] = '8';
3697 buf[len++] = ';';
3698 } else {
3699 buf[len++] = ent->content[0];
3700 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003701 } else if ((ent != NULL) &&
3702 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003703 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3704 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003705 XML_SUBSTITUTE_REF,
3706 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003707 if (rep != NULL) {
3708 current = rep;
3709 while (*current != 0) { /* non input consuming */
3710 buf[len++] = *current++;
3711 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003712 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003713 }
3714 }
3715 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003716 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003717 }
3718 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003719 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003720 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003721 }
Owen Taylor3473f882001-02-23 17:55:21 +00003722 if (ent->content != NULL)
3723 buf[len++] = ent->content[0];
3724 }
3725 } else if (ent != NULL) {
3726 int i = xmlStrlen(ent->name);
3727 const xmlChar *cur = ent->name;
3728
3729 /*
3730 * This may look absurd but is needed to detect
3731 * entities problems
3732 */
3733 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3734 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003735 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003736 XML_SUBSTITUTE_REF, 0, 0, 0);
3737 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003738 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003739 rep = NULL;
3740 }
Owen Taylor3473f882001-02-23 17:55:21 +00003741 }
3742
3743 /*
3744 * Just output the reference
3745 */
3746 buf[len++] = '&';
Daniel Veillard0161e632008-08-28 15:36:32 +00003747 while (len > buf_size - i - 10) {
3748 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003749 }
3750 for (;i > 0;i--)
3751 buf[len++] = *cur++;
3752 buf[len++] = ';';
3753 }
3754 }
3755 } else {
3756 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003757 if ((len != 0) || (!normalize)) {
3758 if ((!normalize) || (!in_space)) {
3759 COPY_BUF(l,buf,len,0x20);
Daniel Veillard0161e632008-08-28 15:36:32 +00003760 while (len > buf_size - 10) {
3761 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003762 }
3763 }
3764 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003765 }
3766 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003767 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003768 COPY_BUF(l,buf,len,c);
3769 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003770 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003771 }
3772 }
3773 NEXTL(l);
3774 }
3775 GROW;
3776 c = CUR_CHAR(l);
3777 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003778 if ((in_space) && (normalize)) {
3779 while (buf[len - 1] == 0x20) len--;
3780 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003781 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003782 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003783 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003784 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003785 if ((c != 0) && (!IS_CHAR(c))) {
3786 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3787 "invalid character in attribute value\n");
3788 } else {
3789 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3790 "AttValue: ' expected\n");
3791 }
Owen Taylor3473f882001-02-23 17:55:21 +00003792 } else
3793 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003794 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003795 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003796
3797mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003798 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003799 if (buf != NULL)
3800 xmlFree(buf);
3801 if (rep != NULL)
3802 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003803 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003804}
3805
3806/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003807 * xmlParseAttValue:
3808 * @ctxt: an XML parser context
3809 *
3810 * parse a value for an attribute
3811 * Note: the parser won't do substitution of entities here, this
3812 * will be handled later in xmlStringGetNodeList
3813 *
3814 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3815 * "'" ([^<&'] | Reference)* "'"
3816 *
3817 * 3.3.3 Attribute-Value Normalization:
3818 * Before the value of an attribute is passed to the application or
3819 * checked for validity, the XML processor must normalize it as follows:
3820 * - a character reference is processed by appending the referenced
3821 * character to the attribute value
3822 * - an entity reference is processed by recursively processing the
3823 * replacement text of the entity
3824 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3825 * appending #x20 to the normalized value, except that only a single
3826 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3827 * parsed entity or the literal entity value of an internal parsed entity
3828 * - other characters are processed by appending them to the normalized value
3829 * If the declared value is not CDATA, then the XML processor must further
3830 * process the normalized attribute value by discarding any leading and
3831 * trailing space (#x20) characters, and by replacing sequences of space
3832 * (#x20) characters by a single space (#x20) character.
3833 * All attributes for which no declaration has been read should be treated
3834 * by a non-validating parser as if declared CDATA.
3835 *
3836 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3837 */
3838
3839
3840xmlChar *
3841xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003842 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003843 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003844}
3845
3846/**
Owen Taylor3473f882001-02-23 17:55:21 +00003847 * xmlParseSystemLiteral:
3848 * @ctxt: an XML parser context
3849 *
3850 * parse an XML Literal
3851 *
3852 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3853 *
3854 * Returns the SystemLiteral parsed or NULL
3855 */
3856
3857xmlChar *
3858xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3859 xmlChar *buf = NULL;
3860 int len = 0;
3861 int size = XML_PARSER_BUFFER_SIZE;
3862 int cur, l;
3863 xmlChar stop;
3864 int state = ctxt->instate;
3865 int count = 0;
3866
3867 SHRINK;
3868 if (RAW == '"') {
3869 NEXT;
3870 stop = '"';
3871 } else if (RAW == '\'') {
3872 NEXT;
3873 stop = '\'';
3874 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003875 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003876 return(NULL);
3877 }
3878
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003879 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003880 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003881 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003882 return(NULL);
3883 }
3884 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3885 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003886 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003887 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003888 xmlChar *tmp;
3889
Owen Taylor3473f882001-02-23 17:55:21 +00003890 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003891 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3892 if (tmp == NULL) {
3893 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003894 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003895 ctxt->instate = (xmlParserInputState) state;
3896 return(NULL);
3897 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003898 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003899 }
3900 count++;
3901 if (count > 50) {
3902 GROW;
3903 count = 0;
3904 }
3905 COPY_BUF(l,buf,len,cur);
3906 NEXTL(l);
3907 cur = CUR_CHAR(l);
3908 if (cur == 0) {
3909 GROW;
3910 SHRINK;
3911 cur = CUR_CHAR(l);
3912 }
3913 }
3914 buf[len] = 0;
3915 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003916 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003917 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003918 } else {
3919 NEXT;
3920 }
3921 return(buf);
3922}
3923
3924/**
3925 * xmlParsePubidLiteral:
3926 * @ctxt: an XML parser context
3927 *
3928 * parse an XML public literal
3929 *
3930 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3931 *
3932 * Returns the PubidLiteral parsed or NULL.
3933 */
3934
3935xmlChar *
3936xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3937 xmlChar *buf = NULL;
3938 int len = 0;
3939 int size = XML_PARSER_BUFFER_SIZE;
3940 xmlChar cur;
3941 xmlChar stop;
3942 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003943 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003944
3945 SHRINK;
3946 if (RAW == '"') {
3947 NEXT;
3948 stop = '"';
3949 } else if (RAW == '\'') {
3950 NEXT;
3951 stop = '\'';
3952 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003953 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003954 return(NULL);
3955 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003956 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003957 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003958 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003959 return(NULL);
3960 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003961 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003962 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003963 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003964 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003965 xmlChar *tmp;
3966
Owen Taylor3473f882001-02-23 17:55:21 +00003967 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003968 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3969 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003970 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003971 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003972 return(NULL);
3973 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003974 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003975 }
3976 buf[len++] = cur;
3977 count++;
3978 if (count > 50) {
3979 GROW;
3980 count = 0;
3981 }
3982 NEXT;
3983 cur = CUR;
3984 if (cur == 0) {
3985 GROW;
3986 SHRINK;
3987 cur = CUR;
3988 }
3989 }
3990 buf[len] = 0;
3991 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003992 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003993 } else {
3994 NEXT;
3995 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003996 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003997 return(buf);
3998}
3999
Daniel Veillard48b2f892001-02-25 16:11:03 +00004000void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004001
4002/*
4003 * used for the test in the inner loop of the char data testing
4004 */
4005static const unsigned char test_char_data[256] = {
4006 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4007 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4008 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4009 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4010 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4011 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4012 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4013 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4014 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4015 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4016 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4017 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4018 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4019 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4020 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4021 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4022 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4023 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4024 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4025 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4026 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4027 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4028 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4029 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4030 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4031 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4032 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4033 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4034 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4035 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4036 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4037 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4038};
4039
Owen Taylor3473f882001-02-23 17:55:21 +00004040/**
4041 * xmlParseCharData:
4042 * @ctxt: an XML parser context
4043 * @cdata: int indicating whether we are within a CDATA section
4044 *
4045 * parse a CharData section.
4046 * if we are within a CDATA section ']]>' marks an end of section.
4047 *
4048 * The right angle bracket (>) may be represented using the string "&gt;",
4049 * and must, for compatibility, be escaped using "&gt;" or a character
4050 * reference when it appears in the string "]]>" in content, when that
4051 * string is not marking the end of a CDATA section.
4052 *
4053 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4054 */
4055
4056void
4057xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004058 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004059 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004060 int line = ctxt->input->line;
4061 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004062 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004063
4064 SHRINK;
4065 GROW;
4066 /*
4067 * Accelerated common case where input don't need to be
4068 * modified before passing it to the handler.
4069 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004070 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004071 in = ctxt->input->cur;
4072 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004073get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004074 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004075 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004076 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004077 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004078 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004079 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004080 goto get_more_space;
4081 }
4082 if (*in == '<') {
4083 nbchar = in - ctxt->input->cur;
4084 if (nbchar > 0) {
4085 const xmlChar *tmp = ctxt->input->cur;
4086 ctxt->input->cur = in;
4087
Daniel Veillard34099b42004-11-04 17:34:35 +00004088 if ((ctxt->sax != NULL) &&
4089 (ctxt->sax->ignorableWhitespace !=
4090 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004091 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004092 if (ctxt->sax->ignorableWhitespace != NULL)
4093 ctxt->sax->ignorableWhitespace(ctxt->userData,
4094 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004095 } else {
4096 if (ctxt->sax->characters != NULL)
4097 ctxt->sax->characters(ctxt->userData,
4098 tmp, nbchar);
4099 if (*ctxt->space == -1)
4100 *ctxt->space = -2;
4101 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004102 } else if ((ctxt->sax != NULL) &&
4103 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004104 ctxt->sax->characters(ctxt->userData,
4105 tmp, nbchar);
4106 }
4107 }
4108 return;
4109 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004110
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004111get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004112 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004113 while (test_char_data[*in]) {
4114 in++;
4115 ccol++;
4116 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004117 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004118 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004119 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004120 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004121 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004122 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004123 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004124 }
4125 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004126 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004127 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004128 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004129 return;
4130 }
4131 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004132 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004133 goto get_more;
4134 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004135 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004136 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004137 if ((ctxt->sax != NULL) &&
4138 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004139 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004140 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004141 const xmlChar *tmp = ctxt->input->cur;
4142 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004143
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004144 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004145 if (ctxt->sax->ignorableWhitespace != NULL)
4146 ctxt->sax->ignorableWhitespace(ctxt->userData,
4147 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004148 } else {
4149 if (ctxt->sax->characters != NULL)
4150 ctxt->sax->characters(ctxt->userData,
4151 tmp, nbchar);
4152 if (*ctxt->space == -1)
4153 *ctxt->space = -2;
4154 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004155 line = ctxt->input->line;
4156 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004157 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004158 if (ctxt->sax->characters != NULL)
4159 ctxt->sax->characters(ctxt->userData,
4160 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004161 line = ctxt->input->line;
4162 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004163 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004164 /* something really bad happened in the SAX callback */
4165 if (ctxt->instate != XML_PARSER_CONTENT)
4166 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004167 }
4168 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004169 if (*in == 0xD) {
4170 in++;
4171 if (*in == 0xA) {
4172 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004173 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004174 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004175 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004176 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004177 in--;
4178 }
4179 if (*in == '<') {
4180 return;
4181 }
4182 if (*in == '&') {
4183 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004184 }
4185 SHRINK;
4186 GROW;
4187 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004188 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004189 nbchar = 0;
4190 }
Daniel Veillard50582112001-03-26 22:52:16 +00004191 ctxt->input->line = line;
4192 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004193 xmlParseCharDataComplex(ctxt, cdata);
4194}
4195
Daniel Veillard01c13b52002-12-10 15:19:08 +00004196/**
4197 * xmlParseCharDataComplex:
4198 * @ctxt: an XML parser context
4199 * @cdata: int indicating whether we are within a CDATA section
4200 *
4201 * parse a CharData section.this is the fallback function
4202 * of xmlParseCharData() when the parsing requires handling
4203 * of non-ASCII characters.
4204 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004205void
4206xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004207 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4208 int nbchar = 0;
4209 int cur, l;
4210 int count = 0;
4211
4212 SHRINK;
4213 GROW;
4214 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004215 while ((cur != '<') && /* checked */
4216 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004217 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004218 if ((cur == ']') && (NXT(1) == ']') &&
4219 (NXT(2) == '>')) {
4220 if (cdata) break;
4221 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004222 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004223 }
4224 }
4225 COPY_BUF(l,buf,nbchar,cur);
4226 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004227 buf[nbchar] = 0;
4228
Owen Taylor3473f882001-02-23 17:55:21 +00004229 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004230 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004231 */
4232 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004233 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004234 if (ctxt->sax->ignorableWhitespace != NULL)
4235 ctxt->sax->ignorableWhitespace(ctxt->userData,
4236 buf, nbchar);
4237 } else {
4238 if (ctxt->sax->characters != NULL)
4239 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004240 if ((ctxt->sax->characters !=
4241 ctxt->sax->ignorableWhitespace) &&
4242 (*ctxt->space == -1))
4243 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004244 }
4245 }
4246 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004247 /* something really bad happened in the SAX callback */
4248 if (ctxt->instate != XML_PARSER_CONTENT)
4249 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004250 }
4251 count++;
4252 if (count > 50) {
4253 GROW;
4254 count = 0;
4255 }
4256 NEXTL(l);
4257 cur = CUR_CHAR(l);
4258 }
4259 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004260 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004261 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004262 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004263 */
4264 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004265 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004266 if (ctxt->sax->ignorableWhitespace != NULL)
4267 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4268 } else {
4269 if (ctxt->sax->characters != NULL)
4270 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004271 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4272 (*ctxt->space == -1))
4273 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004274 }
4275 }
4276 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004277 if ((cur != 0) && (!IS_CHAR(cur))) {
4278 /* Generate the error and skip the offending character */
4279 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4280 "PCDATA invalid Char value %d\n",
4281 cur);
4282 NEXTL(l);
4283 }
Owen Taylor3473f882001-02-23 17:55:21 +00004284}
4285
4286/**
4287 * xmlParseExternalID:
4288 * @ctxt: an XML parser context
4289 * @publicID: a xmlChar** receiving PubidLiteral
4290 * @strict: indicate whether we should restrict parsing to only
4291 * production [75], see NOTE below
4292 *
4293 * Parse an External ID or a Public ID
4294 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004295 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004296 * 'PUBLIC' S PubidLiteral S SystemLiteral
4297 *
4298 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4299 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4300 *
4301 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4302 *
4303 * Returns the function returns SystemLiteral and in the second
4304 * case publicID receives PubidLiteral, is strict is off
4305 * it is possible to return NULL and have publicID set.
4306 */
4307
4308xmlChar *
4309xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4310 xmlChar *URI = NULL;
4311
4312 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004313
4314 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004315 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004316 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004317 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004318 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4319 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004320 }
4321 SKIP_BLANKS;
4322 URI = xmlParseSystemLiteral(ctxt);
4323 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004324 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004325 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004326 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004327 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004328 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004329 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004330 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004331 }
4332 SKIP_BLANKS;
4333 *publicID = xmlParsePubidLiteral(ctxt);
4334 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004335 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004336 }
4337 if (strict) {
4338 /*
4339 * We don't handle [83] so "S SystemLiteral" is required.
4340 */
William M. Brack76e95df2003-10-18 16:20:14 +00004341 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004342 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004343 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004344 }
4345 } else {
4346 /*
4347 * We handle [83] so we return immediately, if
4348 * "S SystemLiteral" is not detected. From a purely parsing
4349 * point of view that's a nice mess.
4350 */
4351 const xmlChar *ptr;
4352 GROW;
4353
4354 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004355 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004356
William M. Brack76e95df2003-10-18 16:20:14 +00004357 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004358 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4359 }
4360 SKIP_BLANKS;
4361 URI = xmlParseSystemLiteral(ctxt);
4362 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004363 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004364 }
4365 }
4366 return(URI);
4367}
4368
4369/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004370 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004371 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004372 * @buf: the already parsed part of the buffer
4373 * @len: number of bytes filles in the buffer
4374 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004375 *
4376 * Skip an XML (SGML) comment <!-- .... -->
4377 * The spec says that "For compatibility, the string "--" (double-hyphen)
4378 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004379 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004380 *
4381 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4382 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004383static void
4384xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004385 int q, ql;
4386 int r, rl;
4387 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00004388 int count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004389 int inputid;
4390
4391 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004392
Owen Taylor3473f882001-02-23 17:55:21 +00004393 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004394 len = 0;
4395 size = XML_PARSER_BUFFER_SIZE;
4396 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4397 if (buf == NULL) {
4398 xmlErrMemory(ctxt, NULL);
4399 return;
4400 }
Owen Taylor3473f882001-02-23 17:55:21 +00004401 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004402 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004403 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004404 if (q == 0)
4405 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004406 if (!IS_CHAR(q)) {
4407 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4408 "xmlParseComment: invalid xmlChar value %d\n",
4409 q);
4410 xmlFree (buf);
4411 return;
4412 }
Owen Taylor3473f882001-02-23 17:55:21 +00004413 NEXTL(ql);
4414 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004415 if (r == 0)
4416 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004417 if (!IS_CHAR(r)) {
4418 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4419 "xmlParseComment: invalid xmlChar value %d\n",
4420 q);
4421 xmlFree (buf);
4422 return;
4423 }
Owen Taylor3473f882001-02-23 17:55:21 +00004424 NEXTL(rl);
4425 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004426 if (cur == 0)
4427 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004428 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004429 ((cur != '>') ||
4430 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004431 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004432 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004433 }
4434 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004435 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004436 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004437 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4438 if (new_buf == NULL) {
4439 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004440 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004441 return;
4442 }
William M. Bracka3215c72004-07-31 16:24:01 +00004443 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004444 }
4445 COPY_BUF(ql,buf,len,q);
4446 q = r;
4447 ql = rl;
4448 r = cur;
4449 rl = l;
4450
4451 count++;
4452 if (count > 50) {
4453 GROW;
4454 count = 0;
4455 }
4456 NEXTL(l);
4457 cur = CUR_CHAR(l);
4458 if (cur == 0) {
4459 SHRINK;
4460 GROW;
4461 cur = CUR_CHAR(l);
4462 }
4463 }
4464 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004465 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004466 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004467 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004468 } else if (!IS_CHAR(cur)) {
4469 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4470 "xmlParseComment: invalid xmlChar value %d\n",
4471 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004472 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004473 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004474 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4475 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004476 }
4477 NEXT;
4478 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4479 (!ctxt->disableSAX))
4480 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004481 }
Daniel Veillardda629342007-08-01 07:49:06 +00004482 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004483 return;
4484not_terminated:
4485 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4486 "Comment not terminated\n", NULL);
4487 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004488 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004489}
Daniel Veillardda629342007-08-01 07:49:06 +00004490
Daniel Veillard4c778d82005-01-23 17:37:44 +00004491/**
4492 * xmlParseComment:
4493 * @ctxt: an XML parser context
4494 *
4495 * Skip an XML (SGML) comment <!-- .... -->
4496 * The spec says that "For compatibility, the string "--" (double-hyphen)
4497 * must not occur within comments. "
4498 *
4499 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4500 */
4501void
4502xmlParseComment(xmlParserCtxtPtr ctxt) {
4503 xmlChar *buf = NULL;
4504 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004505 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004506 xmlParserInputState state;
4507 const xmlChar *in;
4508 int nbchar = 0, ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004509 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004510
4511 /*
4512 * Check that there is a comment right here.
4513 */
4514 if ((RAW != '<') || (NXT(1) != '!') ||
4515 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004516 state = ctxt->instate;
4517 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004518 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004519 SKIP(4);
4520 SHRINK;
4521 GROW;
4522
4523 /*
4524 * Accelerated common case where input don't need to be
4525 * modified before passing it to the handler.
4526 */
4527 in = ctxt->input->cur;
4528 do {
4529 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004530 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004531 ctxt->input->line++; ctxt->input->col = 1;
4532 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004533 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004534 }
4535get_more:
4536 ccol = ctxt->input->col;
4537 while (((*in > '-') && (*in <= 0x7F)) ||
4538 ((*in >= 0x20) && (*in < '-')) ||
4539 (*in == 0x09)) {
4540 in++;
4541 ccol++;
4542 }
4543 ctxt->input->col = ccol;
4544 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004545 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004546 ctxt->input->line++; ctxt->input->col = 1;
4547 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004548 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004549 goto get_more;
4550 }
4551 nbchar = in - ctxt->input->cur;
4552 /*
4553 * save current set of data
4554 */
4555 if (nbchar > 0) {
4556 if ((ctxt->sax != NULL) &&
4557 (ctxt->sax->comment != NULL)) {
4558 if (buf == NULL) {
4559 if ((*in == '-') && (in[1] == '-'))
4560 size = nbchar + 1;
4561 else
4562 size = XML_PARSER_BUFFER_SIZE + nbchar;
4563 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4564 if (buf == NULL) {
4565 xmlErrMemory(ctxt, NULL);
4566 ctxt->instate = state;
4567 return;
4568 }
4569 len = 0;
4570 } else if (len + nbchar + 1 >= size) {
4571 xmlChar *new_buf;
4572 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4573 new_buf = (xmlChar *) xmlRealloc(buf,
4574 size * sizeof(xmlChar));
4575 if (new_buf == NULL) {
4576 xmlFree (buf);
4577 xmlErrMemory(ctxt, NULL);
4578 ctxt->instate = state;
4579 return;
4580 }
4581 buf = new_buf;
4582 }
4583 memcpy(&buf[len], ctxt->input->cur, nbchar);
4584 len += nbchar;
4585 buf[len] = 0;
4586 }
4587 }
4588 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004589 if (*in == 0xA) {
4590 in++;
4591 ctxt->input->line++; ctxt->input->col = 1;
4592 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004593 if (*in == 0xD) {
4594 in++;
4595 if (*in == 0xA) {
4596 ctxt->input->cur = in;
4597 in++;
4598 ctxt->input->line++; ctxt->input->col = 1;
4599 continue; /* while */
4600 }
4601 in--;
4602 }
4603 SHRINK;
4604 GROW;
4605 in = ctxt->input->cur;
4606 if (*in == '-') {
4607 if (in[1] == '-') {
4608 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004609 if (ctxt->input->id != inputid) {
4610 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4611 "comment doesn't start and stop in the same entity\n");
4612 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004613 SKIP(3);
4614 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4615 (!ctxt->disableSAX)) {
4616 if (buf != NULL)
4617 ctxt->sax->comment(ctxt->userData, buf);
4618 else
4619 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4620 }
4621 if (buf != NULL)
4622 xmlFree(buf);
4623 ctxt->instate = state;
4624 return;
4625 }
4626 if (buf != NULL)
4627 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4628 "Comment not terminated \n<!--%.50s\n",
4629 buf);
4630 else
4631 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4632 "Comment not terminated \n", NULL);
4633 in++;
4634 ctxt->input->col++;
4635 }
4636 in++;
4637 ctxt->input->col++;
4638 goto get_more;
4639 }
4640 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4641 xmlParseCommentComplex(ctxt, buf, len, size);
4642 ctxt->instate = state;
4643 return;
4644}
4645
Owen Taylor3473f882001-02-23 17:55:21 +00004646
4647/**
4648 * xmlParsePITarget:
4649 * @ctxt: an XML parser context
4650 *
4651 * parse the name of a PI
4652 *
4653 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4654 *
4655 * Returns the PITarget name or NULL
4656 */
4657
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004658const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004659xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004660 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004661
4662 name = xmlParseName(ctxt);
4663 if ((name != NULL) &&
4664 ((name[0] == 'x') || (name[0] == 'X')) &&
4665 ((name[1] == 'm') || (name[1] == 'M')) &&
4666 ((name[2] == 'l') || (name[2] == 'L'))) {
4667 int i;
4668 if ((name[0] == 'x') && (name[1] == 'm') &&
4669 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004670 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004671 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004672 return(name);
4673 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004674 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004675 return(name);
4676 }
4677 for (i = 0;;i++) {
4678 if (xmlW3CPIs[i] == NULL) break;
4679 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4680 return(name);
4681 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004682 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4683 "xmlParsePITarget: invalid name prefix 'xml'\n",
4684 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004685 }
Daniel Veillard37334572008-07-31 08:20:02 +00004686 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4687 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4688 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4689 }
Owen Taylor3473f882001-02-23 17:55:21 +00004690 return(name);
4691}
4692
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004693#ifdef LIBXML_CATALOG_ENABLED
4694/**
4695 * xmlParseCatalogPI:
4696 * @ctxt: an XML parser context
4697 * @catalog: the PI value string
4698 *
4699 * parse an XML Catalog Processing Instruction.
4700 *
4701 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4702 *
4703 * Occurs only if allowed by the user and if happening in the Misc
4704 * part of the document before any doctype informations
4705 * This will add the given catalog to the parsing context in order
4706 * to be used if there is a resolution need further down in the document
4707 */
4708
4709static void
4710xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4711 xmlChar *URL = NULL;
4712 const xmlChar *tmp, *base;
4713 xmlChar marker;
4714
4715 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004716 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004717 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4718 goto error;
4719 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004720 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004721 if (*tmp != '=') {
4722 return;
4723 }
4724 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004725 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004726 marker = *tmp;
4727 if ((marker != '\'') && (marker != '"'))
4728 goto error;
4729 tmp++;
4730 base = tmp;
4731 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4732 if (*tmp == 0)
4733 goto error;
4734 URL = xmlStrndup(base, tmp - base);
4735 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004736 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004737 if (*tmp != 0)
4738 goto error;
4739
4740 if (URL != NULL) {
4741 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4742 xmlFree(URL);
4743 }
4744 return;
4745
4746error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004747 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4748 "Catalog PI syntax error: %s\n",
4749 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004750 if (URL != NULL)
4751 xmlFree(URL);
4752}
4753#endif
4754
Owen Taylor3473f882001-02-23 17:55:21 +00004755/**
4756 * xmlParsePI:
4757 * @ctxt: an XML parser context
4758 *
4759 * parse an XML Processing Instruction.
4760 *
4761 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4762 *
4763 * The processing is transfered to SAX once parsed.
4764 */
4765
4766void
4767xmlParsePI(xmlParserCtxtPtr ctxt) {
4768 xmlChar *buf = NULL;
4769 int len = 0;
4770 int size = XML_PARSER_BUFFER_SIZE;
4771 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004772 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004773 xmlParserInputState state;
4774 int count = 0;
4775
4776 if ((RAW == '<') && (NXT(1) == '?')) {
4777 xmlParserInputPtr input = ctxt->input;
4778 state = ctxt->instate;
4779 ctxt->instate = XML_PARSER_PI;
4780 /*
4781 * this is a Processing Instruction.
4782 */
4783 SKIP(2);
4784 SHRINK;
4785
4786 /*
4787 * Parse the target name and check for special support like
4788 * namespace.
4789 */
4790 target = xmlParsePITarget(ctxt);
4791 if (target != NULL) {
4792 if ((RAW == '?') && (NXT(1) == '>')) {
4793 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004794 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4795 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004796 }
4797 SKIP(2);
4798
4799 /*
4800 * SAX: PI detected.
4801 */
4802 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4803 (ctxt->sax->processingInstruction != NULL))
4804 ctxt->sax->processingInstruction(ctxt->userData,
4805 target, NULL);
4806 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004807 return;
4808 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004809 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004810 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004811 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004812 ctxt->instate = state;
4813 return;
4814 }
4815 cur = CUR;
4816 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004817 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4818 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004819 }
4820 SKIP_BLANKS;
4821 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004822 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004823 ((cur != '?') || (NXT(1) != '>'))) {
4824 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004825 xmlChar *tmp;
4826
Owen Taylor3473f882001-02-23 17:55:21 +00004827 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004828 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4829 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004830 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004831 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004832 ctxt->instate = state;
4833 return;
4834 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004835 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004836 }
4837 count++;
4838 if (count > 50) {
4839 GROW;
4840 count = 0;
4841 }
4842 COPY_BUF(l,buf,len,cur);
4843 NEXTL(l);
4844 cur = CUR_CHAR(l);
4845 if (cur == 0) {
4846 SHRINK;
4847 GROW;
4848 cur = CUR_CHAR(l);
4849 }
4850 }
4851 buf[len] = 0;
4852 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004853 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4854 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004855 } else {
4856 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004857 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4858 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004859 }
4860 SKIP(2);
4861
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004862#ifdef LIBXML_CATALOG_ENABLED
4863 if (((state == XML_PARSER_MISC) ||
4864 (state == XML_PARSER_START)) &&
4865 (xmlStrEqual(target, XML_CATALOG_PI))) {
4866 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4867 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4868 (allow == XML_CATA_ALLOW_ALL))
4869 xmlParseCatalogPI(ctxt, buf);
4870 }
4871#endif
4872
4873
Owen Taylor3473f882001-02-23 17:55:21 +00004874 /*
4875 * SAX: PI detected.
4876 */
4877 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4878 (ctxt->sax->processingInstruction != NULL))
4879 ctxt->sax->processingInstruction(ctxt->userData,
4880 target, buf);
4881 }
4882 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004883 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004884 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004885 }
4886 ctxt->instate = state;
4887 }
4888}
4889
4890/**
4891 * xmlParseNotationDecl:
4892 * @ctxt: an XML parser context
4893 *
4894 * parse a notation declaration
4895 *
4896 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4897 *
4898 * Hence there is actually 3 choices:
4899 * 'PUBLIC' S PubidLiteral
4900 * 'PUBLIC' S PubidLiteral S SystemLiteral
4901 * and 'SYSTEM' S SystemLiteral
4902 *
4903 * See the NOTE on xmlParseExternalID().
4904 */
4905
4906void
4907xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004908 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004909 xmlChar *Pubid;
4910 xmlChar *Systemid;
4911
Daniel Veillarda07050d2003-10-19 14:46:32 +00004912 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004913 xmlParserInputPtr input = ctxt->input;
4914 SHRINK;
4915 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004916 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004917 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4918 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004919 return;
4920 }
4921 SKIP_BLANKS;
4922
Daniel Veillard76d66f42001-05-16 21:05:17 +00004923 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004924 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004925 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004926 return;
4927 }
William M. Brack76e95df2003-10-18 16:20:14 +00004928 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004929 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004930 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004931 return;
4932 }
Daniel Veillard37334572008-07-31 08:20:02 +00004933 if (xmlStrchr(name, ':') != NULL) {
4934 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4935 "colon are forbidden from notation names '%s'\n",
4936 name, NULL, NULL);
4937 }
Owen Taylor3473f882001-02-23 17:55:21 +00004938 SKIP_BLANKS;
4939
4940 /*
4941 * Parse the IDs.
4942 */
4943 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4944 SKIP_BLANKS;
4945
4946 if (RAW == '>') {
4947 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004948 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4949 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004950 }
4951 NEXT;
4952 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4953 (ctxt->sax->notationDecl != NULL))
4954 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4955 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004956 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004957 }
Owen Taylor3473f882001-02-23 17:55:21 +00004958 if (Systemid != NULL) xmlFree(Systemid);
4959 if (Pubid != NULL) xmlFree(Pubid);
4960 }
4961}
4962
4963/**
4964 * xmlParseEntityDecl:
4965 * @ctxt: an XML parser context
4966 *
4967 * parse <!ENTITY declarations
4968 *
4969 * [70] EntityDecl ::= GEDecl | PEDecl
4970 *
4971 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4972 *
4973 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4974 *
4975 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4976 *
4977 * [74] PEDef ::= EntityValue | ExternalID
4978 *
4979 * [76] NDataDecl ::= S 'NDATA' S Name
4980 *
4981 * [ VC: Notation Declared ]
4982 * The Name must match the declared name of a notation.
4983 */
4984
4985void
4986xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004987 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004988 xmlChar *value = NULL;
4989 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004990 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004991 int isParameter = 0;
4992 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004993 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004994
Daniel Veillard4c778d82005-01-23 17:37:44 +00004995 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004996 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004997 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004998 SHRINK;
4999 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005000 skipped = SKIP_BLANKS;
5001 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005002 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5003 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005004 }
Owen Taylor3473f882001-02-23 17:55:21 +00005005
5006 if (RAW == '%') {
5007 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005008 skipped = SKIP_BLANKS;
5009 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005010 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5011 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005012 }
Owen Taylor3473f882001-02-23 17:55:21 +00005013 isParameter = 1;
5014 }
5015
Daniel Veillard76d66f42001-05-16 21:05:17 +00005016 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005017 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005018 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5019 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005020 return;
5021 }
Daniel Veillard37334572008-07-31 08:20:02 +00005022 if (xmlStrchr(name, ':') != NULL) {
5023 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5024 "colon are forbidden from entities names '%s'\n",
5025 name, NULL, NULL);
5026 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005027 skipped = SKIP_BLANKS;
5028 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005029 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5030 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005031 }
Owen Taylor3473f882001-02-23 17:55:21 +00005032
Daniel Veillardf5582f12002-06-11 10:08:16 +00005033 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005034 /*
5035 * handle the various case of definitions...
5036 */
5037 if (isParameter) {
5038 if ((RAW == '"') || (RAW == '\'')) {
5039 value = xmlParseEntityValue(ctxt, &orig);
5040 if (value) {
5041 if ((ctxt->sax != NULL) &&
5042 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5043 ctxt->sax->entityDecl(ctxt->userData, name,
5044 XML_INTERNAL_PARAMETER_ENTITY,
5045 NULL, NULL, value);
5046 }
5047 } else {
5048 URI = xmlParseExternalID(ctxt, &literal, 1);
5049 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005050 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005051 }
5052 if (URI) {
5053 xmlURIPtr uri;
5054
5055 uri = xmlParseURI((const char *) URI);
5056 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005057 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5058 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005059 /*
5060 * This really ought to be a well formedness error
5061 * but the XML Core WG decided otherwise c.f. issue
5062 * E26 of the XML erratas.
5063 */
Owen Taylor3473f882001-02-23 17:55:21 +00005064 } else {
5065 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005066 /*
5067 * Okay this is foolish to block those but not
5068 * invalid URIs.
5069 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005070 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005071 } else {
5072 if ((ctxt->sax != NULL) &&
5073 (!ctxt->disableSAX) &&
5074 (ctxt->sax->entityDecl != NULL))
5075 ctxt->sax->entityDecl(ctxt->userData, name,
5076 XML_EXTERNAL_PARAMETER_ENTITY,
5077 literal, URI, NULL);
5078 }
5079 xmlFreeURI(uri);
5080 }
5081 }
5082 }
5083 } else {
5084 if ((RAW == '"') || (RAW == '\'')) {
5085 value = xmlParseEntityValue(ctxt, &orig);
5086 if ((ctxt->sax != NULL) &&
5087 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5088 ctxt->sax->entityDecl(ctxt->userData, name,
5089 XML_INTERNAL_GENERAL_ENTITY,
5090 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005091 /*
5092 * For expat compatibility in SAX mode.
5093 */
5094 if ((ctxt->myDoc == NULL) ||
5095 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5096 if (ctxt->myDoc == NULL) {
5097 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005098 if (ctxt->myDoc == NULL) {
5099 xmlErrMemory(ctxt, "New Doc failed");
5100 return;
5101 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005102 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005103 }
5104 if (ctxt->myDoc->intSubset == NULL)
5105 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5106 BAD_CAST "fake", NULL, NULL);
5107
Daniel Veillard1af9a412003-08-20 22:54:39 +00005108 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5109 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005110 }
Owen Taylor3473f882001-02-23 17:55:21 +00005111 } else {
5112 URI = xmlParseExternalID(ctxt, &literal, 1);
5113 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005114 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005115 }
5116 if (URI) {
5117 xmlURIPtr uri;
5118
5119 uri = xmlParseURI((const char *)URI);
5120 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005121 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5122 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005123 /*
5124 * This really ought to be a well formedness error
5125 * but the XML Core WG decided otherwise c.f. issue
5126 * E26 of the XML erratas.
5127 */
Owen Taylor3473f882001-02-23 17:55:21 +00005128 } else {
5129 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005130 /*
5131 * Okay this is foolish to block those but not
5132 * invalid URIs.
5133 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005134 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005135 }
5136 xmlFreeURI(uri);
5137 }
5138 }
William M. Brack76e95df2003-10-18 16:20:14 +00005139 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005140 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5141 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005142 }
5143 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005144 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005145 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005146 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005147 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5148 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005149 }
5150 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005151 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005152 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5153 (ctxt->sax->unparsedEntityDecl != NULL))
5154 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5155 literal, URI, ndata);
5156 } else {
5157 if ((ctxt->sax != NULL) &&
5158 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5159 ctxt->sax->entityDecl(ctxt->userData, name,
5160 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5161 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005162 /*
5163 * For expat compatibility in SAX mode.
5164 * assuming the entity repalcement was asked for
5165 */
5166 if ((ctxt->replaceEntities != 0) &&
5167 ((ctxt->myDoc == NULL) ||
5168 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5169 if (ctxt->myDoc == NULL) {
5170 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005171 if (ctxt->myDoc == NULL) {
5172 xmlErrMemory(ctxt, "New Doc failed");
5173 return;
5174 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005175 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005176 }
5177
5178 if (ctxt->myDoc->intSubset == NULL)
5179 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5180 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005181 xmlSAX2EntityDecl(ctxt, name,
5182 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5183 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005184 }
Owen Taylor3473f882001-02-23 17:55:21 +00005185 }
5186 }
5187 }
5188 SKIP_BLANKS;
5189 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005190 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005191 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005192 } else {
5193 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005194 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5195 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005196 }
5197 NEXT;
5198 }
5199 if (orig != NULL) {
5200 /*
5201 * Ugly mechanism to save the raw entity value.
5202 */
5203 xmlEntityPtr cur = NULL;
5204
5205 if (isParameter) {
5206 if ((ctxt->sax != NULL) &&
5207 (ctxt->sax->getParameterEntity != NULL))
5208 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5209 } else {
5210 if ((ctxt->sax != NULL) &&
5211 (ctxt->sax->getEntity != NULL))
5212 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005213 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005214 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005215 }
Owen Taylor3473f882001-02-23 17:55:21 +00005216 }
5217 if (cur != NULL) {
5218 if (cur->orig != NULL)
5219 xmlFree(orig);
5220 else
5221 cur->orig = orig;
5222 } else
5223 xmlFree(orig);
5224 }
Owen Taylor3473f882001-02-23 17:55:21 +00005225 if (value != NULL) xmlFree(value);
5226 if (URI != NULL) xmlFree(URI);
5227 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005228 }
5229}
5230
5231/**
5232 * xmlParseDefaultDecl:
5233 * @ctxt: an XML parser context
5234 * @value: Receive a possible fixed default value for the attribute
5235 *
5236 * Parse an attribute default declaration
5237 *
5238 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5239 *
5240 * [ VC: Required Attribute ]
5241 * if the default declaration is the keyword #REQUIRED, then the
5242 * attribute must be specified for all elements of the type in the
5243 * attribute-list declaration.
5244 *
5245 * [ VC: Attribute Default Legal ]
5246 * The declared default value must meet the lexical constraints of
5247 * the declared attribute type c.f. xmlValidateAttributeDecl()
5248 *
5249 * [ VC: Fixed Attribute Default ]
5250 * if an attribute has a default value declared with the #FIXED
5251 * keyword, instances of that attribute must match the default value.
5252 *
5253 * [ WFC: No < in Attribute Values ]
5254 * handled in xmlParseAttValue()
5255 *
5256 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5257 * or XML_ATTRIBUTE_FIXED.
5258 */
5259
5260int
5261xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5262 int val;
5263 xmlChar *ret;
5264
5265 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005266 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005267 SKIP(9);
5268 return(XML_ATTRIBUTE_REQUIRED);
5269 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005270 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005271 SKIP(8);
5272 return(XML_ATTRIBUTE_IMPLIED);
5273 }
5274 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005275 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005276 SKIP(6);
5277 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005278 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005279 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5280 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005281 }
5282 SKIP_BLANKS;
5283 }
5284 ret = xmlParseAttValue(ctxt);
5285 ctxt->instate = XML_PARSER_DTD;
5286 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005287 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005288 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005289 } else
5290 *value = ret;
5291 return(val);
5292}
5293
5294/**
5295 * xmlParseNotationType:
5296 * @ctxt: an XML parser context
5297 *
5298 * parse an Notation attribute type.
5299 *
5300 * Note: the leading 'NOTATION' S part has already being parsed...
5301 *
5302 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5303 *
5304 * [ VC: Notation Attributes ]
5305 * Values of this type must match one of the notation names included
5306 * in the declaration; all notation names in the declaration must be declared.
5307 *
5308 * Returns: the notation attribute tree built while parsing
5309 */
5310
5311xmlEnumerationPtr
5312xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005313 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005314 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005315
5316 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005317 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005318 return(NULL);
5319 }
5320 SHRINK;
5321 do {
5322 NEXT;
5323 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005324 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005325 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005326 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5327 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005328 return(ret);
5329 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005330 tmp = ret;
5331 while (tmp != NULL) {
5332 if (xmlStrEqual(name, tmp->name)) {
5333 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5334 "standalone: attribute notation value token %s duplicated\n",
5335 name, NULL);
5336 if (!xmlDictOwns(ctxt->dict, name))
5337 xmlFree((xmlChar *) name);
5338 break;
5339 }
5340 tmp = tmp->next;
5341 }
5342 if (tmp == NULL) {
5343 cur = xmlCreateEnumeration(name);
5344 if (cur == NULL) return(ret);
5345 if (last == NULL) ret = last = cur;
5346 else {
5347 last->next = cur;
5348 last = cur;
5349 }
Owen Taylor3473f882001-02-23 17:55:21 +00005350 }
5351 SKIP_BLANKS;
5352 } while (RAW == '|');
5353 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005354 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005355 if ((last != NULL) && (last != ret))
5356 xmlFreeEnumeration(last);
5357 return(ret);
5358 }
5359 NEXT;
5360 return(ret);
5361}
5362
5363/**
5364 * xmlParseEnumerationType:
5365 * @ctxt: an XML parser context
5366 *
5367 * parse an Enumeration attribute type.
5368 *
5369 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5370 *
5371 * [ VC: Enumeration ]
5372 * Values of this type must match one of the Nmtoken tokens in
5373 * the declaration
5374 *
5375 * Returns: the enumeration attribute tree built while parsing
5376 */
5377
5378xmlEnumerationPtr
5379xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5380 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005381 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005382
5383 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005384 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005385 return(NULL);
5386 }
5387 SHRINK;
5388 do {
5389 NEXT;
5390 SKIP_BLANKS;
5391 name = xmlParseNmtoken(ctxt);
5392 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005393 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005394 return(ret);
5395 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005396 tmp = ret;
5397 while (tmp != NULL) {
5398 if (xmlStrEqual(name, tmp->name)) {
5399 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5400 "standalone: attribute enumeration value token %s duplicated\n",
5401 name, NULL);
5402 if (!xmlDictOwns(ctxt->dict, name))
5403 xmlFree(name);
5404 break;
5405 }
5406 tmp = tmp->next;
5407 }
5408 if (tmp == NULL) {
5409 cur = xmlCreateEnumeration(name);
5410 if (!xmlDictOwns(ctxt->dict, name))
5411 xmlFree(name);
5412 if (cur == NULL) return(ret);
5413 if (last == NULL) ret = last = cur;
5414 else {
5415 last->next = cur;
5416 last = cur;
5417 }
Owen Taylor3473f882001-02-23 17:55:21 +00005418 }
5419 SKIP_BLANKS;
5420 } while (RAW == '|');
5421 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005422 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005423 return(ret);
5424 }
5425 NEXT;
5426 return(ret);
5427}
5428
5429/**
5430 * xmlParseEnumeratedType:
5431 * @ctxt: an XML parser context
5432 * @tree: the enumeration tree built while parsing
5433 *
5434 * parse an Enumerated attribute type.
5435 *
5436 * [57] EnumeratedType ::= NotationType | Enumeration
5437 *
5438 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5439 *
5440 *
5441 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5442 */
5443
5444int
5445xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005446 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005447 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005448 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005449 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5450 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005451 return(0);
5452 }
5453 SKIP_BLANKS;
5454 *tree = xmlParseNotationType(ctxt);
5455 if (*tree == NULL) return(0);
5456 return(XML_ATTRIBUTE_NOTATION);
5457 }
5458 *tree = xmlParseEnumerationType(ctxt);
5459 if (*tree == NULL) return(0);
5460 return(XML_ATTRIBUTE_ENUMERATION);
5461}
5462
5463/**
5464 * xmlParseAttributeType:
5465 * @ctxt: an XML parser context
5466 * @tree: the enumeration tree built while parsing
5467 *
5468 * parse the Attribute list def for an element
5469 *
5470 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5471 *
5472 * [55] StringType ::= 'CDATA'
5473 *
5474 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5475 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5476 *
5477 * Validity constraints for attribute values syntax are checked in
5478 * xmlValidateAttributeValue()
5479 *
5480 * [ VC: ID ]
5481 * Values of type ID must match the Name production. A name must not
5482 * appear more than once in an XML document as a value of this type;
5483 * i.e., ID values must uniquely identify the elements which bear them.
5484 *
5485 * [ VC: One ID per Element Type ]
5486 * No element type may have more than one ID attribute specified.
5487 *
5488 * [ VC: ID Attribute Default ]
5489 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5490 *
5491 * [ VC: IDREF ]
5492 * Values of type IDREF must match the Name production, and values
5493 * of type IDREFS must match Names; each IDREF Name must match the value
5494 * of an ID attribute on some element in the XML document; i.e. IDREF
5495 * values must match the value of some ID attribute.
5496 *
5497 * [ VC: Entity Name ]
5498 * Values of type ENTITY must match the Name production, values
5499 * of type ENTITIES must match Names; each Entity Name must match the
5500 * name of an unparsed entity declared in the DTD.
5501 *
5502 * [ VC: Name Token ]
5503 * Values of type NMTOKEN must match the Nmtoken production; values
5504 * of type NMTOKENS must match Nmtokens.
5505 *
5506 * Returns the attribute type
5507 */
5508int
5509xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5510 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005511 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005512 SKIP(5);
5513 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005514 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005515 SKIP(6);
5516 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005517 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005518 SKIP(5);
5519 return(XML_ATTRIBUTE_IDREF);
5520 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5521 SKIP(2);
5522 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005523 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005524 SKIP(6);
5525 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005526 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005527 SKIP(8);
5528 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005529 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005530 SKIP(8);
5531 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005532 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005533 SKIP(7);
5534 return(XML_ATTRIBUTE_NMTOKEN);
5535 }
5536 return(xmlParseEnumeratedType(ctxt, tree));
5537}
5538
5539/**
5540 * xmlParseAttributeListDecl:
5541 * @ctxt: an XML parser context
5542 *
5543 * : parse the Attribute list def for an element
5544 *
5545 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5546 *
5547 * [53] AttDef ::= S Name S AttType S DefaultDecl
5548 *
5549 */
5550void
5551xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005552 const xmlChar *elemName;
5553 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005554 xmlEnumerationPtr tree;
5555
Daniel Veillarda07050d2003-10-19 14:46:32 +00005556 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005557 xmlParserInputPtr input = ctxt->input;
5558
5559 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005560 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005561 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005562 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005563 }
5564 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005565 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005566 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005567 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5568 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005569 return;
5570 }
5571 SKIP_BLANKS;
5572 GROW;
5573 while (RAW != '>') {
5574 const xmlChar *check = CUR_PTR;
5575 int type;
5576 int def;
5577 xmlChar *defaultValue = NULL;
5578
5579 GROW;
5580 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005581 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005582 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005583 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5584 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005585 break;
5586 }
5587 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005588 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005589 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005590 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005591 break;
5592 }
5593 SKIP_BLANKS;
5594
5595 type = xmlParseAttributeType(ctxt, &tree);
5596 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005597 break;
5598 }
5599
5600 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005601 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005602 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5603 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005604 if (tree != NULL)
5605 xmlFreeEnumeration(tree);
5606 break;
5607 }
5608 SKIP_BLANKS;
5609
5610 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5611 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005612 if (defaultValue != NULL)
5613 xmlFree(defaultValue);
5614 if (tree != NULL)
5615 xmlFreeEnumeration(tree);
5616 break;
5617 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005618 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5619 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005620
5621 GROW;
5622 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005623 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005624 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005625 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005626 if (defaultValue != NULL)
5627 xmlFree(defaultValue);
5628 if (tree != NULL)
5629 xmlFreeEnumeration(tree);
5630 break;
5631 }
5632 SKIP_BLANKS;
5633 }
5634 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005635 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5636 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005637 if (defaultValue != NULL)
5638 xmlFree(defaultValue);
5639 if (tree != NULL)
5640 xmlFreeEnumeration(tree);
5641 break;
5642 }
5643 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5644 (ctxt->sax->attributeDecl != NULL))
5645 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5646 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005647 else if (tree != NULL)
5648 xmlFreeEnumeration(tree);
5649
5650 if ((ctxt->sax2) && (defaultValue != NULL) &&
5651 (def != XML_ATTRIBUTE_IMPLIED) &&
5652 (def != XML_ATTRIBUTE_REQUIRED)) {
5653 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5654 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005655 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005656 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5657 }
Owen Taylor3473f882001-02-23 17:55:21 +00005658 if (defaultValue != NULL)
5659 xmlFree(defaultValue);
5660 GROW;
5661 }
5662 if (RAW == '>') {
5663 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00005664 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5665 "Attribute list declaration doesn't start and stop in the same entity\n",
5666 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005667 }
5668 NEXT;
5669 }
Owen Taylor3473f882001-02-23 17:55:21 +00005670 }
5671}
5672
5673/**
5674 * xmlParseElementMixedContentDecl:
5675 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005676 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005677 *
5678 * parse the declaration for a Mixed Element content
5679 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5680 *
5681 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5682 * '(' S? '#PCDATA' S? ')'
5683 *
5684 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5685 *
5686 * [ VC: No Duplicate Types ]
5687 * The same name must not appear more than once in a single
5688 * mixed-content declaration.
5689 *
5690 * returns: the list of the xmlElementContentPtr describing the element choices
5691 */
5692xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005693xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005694 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005695 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005696
5697 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005698 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005699 SKIP(7);
5700 SKIP_BLANKS;
5701 SHRINK;
5702 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005703 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005704 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5705"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005706 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005707 }
Owen Taylor3473f882001-02-23 17:55:21 +00005708 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005709 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005710 if (ret == NULL)
5711 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005712 if (RAW == '*') {
5713 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5714 NEXT;
5715 }
5716 return(ret);
5717 }
5718 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005719 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005720 if (ret == NULL) return(NULL);
5721 }
5722 while (RAW == '|') {
5723 NEXT;
5724 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005725 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005726 if (ret == NULL) return(NULL);
5727 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005728 if (cur != NULL)
5729 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005730 cur = ret;
5731 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005732 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005733 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005734 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005735 if (n->c1 != NULL)
5736 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005737 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005738 if (n != NULL)
5739 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005740 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005741 }
5742 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005743 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005744 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005745 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005746 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005747 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005748 return(NULL);
5749 }
5750 SKIP_BLANKS;
5751 GROW;
5752 }
5753 if ((RAW == ')') && (NXT(1) == '*')) {
5754 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005755 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005756 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005757 if (cur->c2 != NULL)
5758 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005759 }
5760 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005761 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005762 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5763"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005764 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005765 }
Owen Taylor3473f882001-02-23 17:55:21 +00005766 SKIP(2);
5767 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005768 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005769 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005770 return(NULL);
5771 }
5772
5773 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005774 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005775 }
5776 return(ret);
5777}
5778
5779/**
5780 * xmlParseElementChildrenContentDecl:
5781 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005782 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005783 *
5784 * parse the declaration for a Mixed Element content
5785 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5786 *
5787 *
5788 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5789 *
5790 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5791 *
5792 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5793 *
5794 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5795 *
5796 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5797 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005798 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005799 * opening or closing parentheses in a choice, seq, or Mixed
5800 * construct is contained in the replacement text for a parameter
5801 * entity, both must be contained in the same replacement text. For
5802 * interoperability, if a parameter-entity reference appears in a
5803 * choice, seq, or Mixed construct, its replacement text should not
5804 * be empty, and neither the first nor last non-blank character of
5805 * the replacement text should be a connector (| or ,).
5806 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005807 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005808 * hierarchy.
5809 */
5810xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005811xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005812 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005813 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005814 xmlChar type = 0;
5815
5816 SKIP_BLANKS;
5817 GROW;
5818 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005819 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005820
Owen Taylor3473f882001-02-23 17:55:21 +00005821 /* Recurse on first child */
5822 NEXT;
5823 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005824 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005825 SKIP_BLANKS;
5826 GROW;
5827 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005828 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005829 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005830 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005831 return(NULL);
5832 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005833 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005834 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005835 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005836 return(NULL);
5837 }
Owen Taylor3473f882001-02-23 17:55:21 +00005838 GROW;
5839 if (RAW == '?') {
5840 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5841 NEXT;
5842 } else if (RAW == '*') {
5843 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5844 NEXT;
5845 } else if (RAW == '+') {
5846 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5847 NEXT;
5848 } else {
5849 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5850 }
Owen Taylor3473f882001-02-23 17:55:21 +00005851 GROW;
5852 }
5853 SKIP_BLANKS;
5854 SHRINK;
5855 while (RAW != ')') {
5856 /*
5857 * Each loop we parse one separator and one element.
5858 */
5859 if (RAW == ',') {
5860 if (type == 0) type = CUR;
5861
5862 /*
5863 * Detect "Name | Name , Name" error
5864 */
5865 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005866 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005867 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005868 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005869 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005870 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005871 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005872 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005873 return(NULL);
5874 }
5875 NEXT;
5876
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005877 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005878 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005879 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005880 xmlFreeDocElementContent(ctxt->myDoc, last);
5881 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005882 return(NULL);
5883 }
5884 if (last == NULL) {
5885 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005886 if (ret != NULL)
5887 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005888 ret = cur = op;
5889 } else {
5890 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005891 if (op != NULL)
5892 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005893 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005894 if (last != NULL)
5895 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005896 cur =op;
5897 last = NULL;
5898 }
5899 } else if (RAW == '|') {
5900 if (type == 0) type = CUR;
5901
5902 /*
5903 * Detect "Name , Name | Name" error
5904 */
5905 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005906 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005907 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005908 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005909 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005910 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005911 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005912 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005913 return(NULL);
5914 }
5915 NEXT;
5916
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005917 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005918 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005919 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005920 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005921 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005922 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005923 return(NULL);
5924 }
5925 if (last == NULL) {
5926 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005927 if (ret != NULL)
5928 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005929 ret = cur = op;
5930 } else {
5931 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005932 if (op != NULL)
5933 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005934 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005935 if (last != NULL)
5936 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005937 cur =op;
5938 last = NULL;
5939 }
5940 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005941 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00005942 if ((last != NULL) && (last != ret))
5943 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005944 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005945 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005946 return(NULL);
5947 }
5948 GROW;
5949 SKIP_BLANKS;
5950 GROW;
5951 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005952 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005953 /* Recurse on second child */
5954 NEXT;
5955 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005956 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005957 SKIP_BLANKS;
5958 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005959 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005960 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005961 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005962 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005963 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005964 return(NULL);
5965 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005966 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005967 if (last == NULL) {
5968 if (ret != NULL)
5969 xmlFreeDocElementContent(ctxt->myDoc, ret);
5970 return(NULL);
5971 }
Owen Taylor3473f882001-02-23 17:55:21 +00005972 if (RAW == '?') {
5973 last->ocur = XML_ELEMENT_CONTENT_OPT;
5974 NEXT;
5975 } else if (RAW == '*') {
5976 last->ocur = XML_ELEMENT_CONTENT_MULT;
5977 NEXT;
5978 } else if (RAW == '+') {
5979 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5980 NEXT;
5981 } else {
5982 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5983 }
5984 }
5985 SKIP_BLANKS;
5986 GROW;
5987 }
5988 if ((cur != NULL) && (last != NULL)) {
5989 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005990 if (last != NULL)
5991 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005992 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005993 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005994 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5995"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005996 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005997 }
Owen Taylor3473f882001-02-23 17:55:21 +00005998 NEXT;
5999 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006000 if (ret != NULL) {
6001 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6002 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6003 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6004 else
6005 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6006 }
Owen Taylor3473f882001-02-23 17:55:21 +00006007 NEXT;
6008 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006009 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006010 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006011 cur = ret;
6012 /*
6013 * Some normalization:
6014 * (a | b* | c?)* == (a | b | c)*
6015 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006016 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006017 if ((cur->c1 != NULL) &&
6018 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6019 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6020 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6021 if ((cur->c2 != NULL) &&
6022 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6023 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6024 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6025 cur = cur->c2;
6026 }
6027 }
Owen Taylor3473f882001-02-23 17:55:21 +00006028 NEXT;
6029 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006030 if (ret != NULL) {
6031 int found = 0;
6032
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006033 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6034 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6035 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006036 else
6037 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006038 /*
6039 * Some normalization:
6040 * (a | b*)+ == (a | b)*
6041 * (a | b?)+ == (a | b)*
6042 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006043 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006044 if ((cur->c1 != NULL) &&
6045 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6046 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6047 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6048 found = 1;
6049 }
6050 if ((cur->c2 != NULL) &&
6051 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6052 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6053 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6054 found = 1;
6055 }
6056 cur = cur->c2;
6057 }
6058 if (found)
6059 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6060 }
Owen Taylor3473f882001-02-23 17:55:21 +00006061 NEXT;
6062 }
6063 return(ret);
6064}
6065
6066/**
6067 * xmlParseElementContentDecl:
6068 * @ctxt: an XML parser context
6069 * @name: the name of the element being defined.
6070 * @result: the Element Content pointer will be stored here if any
6071 *
6072 * parse the declaration for an Element content either Mixed or Children,
6073 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6074 *
6075 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6076 *
6077 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6078 */
6079
6080int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006081xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006082 xmlElementContentPtr *result) {
6083
6084 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006085 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006086 int res;
6087
6088 *result = NULL;
6089
6090 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006091 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006092 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006093 return(-1);
6094 }
6095 NEXT;
6096 GROW;
6097 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006098 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006099 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006100 res = XML_ELEMENT_TYPE_MIXED;
6101 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006102 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006103 res = XML_ELEMENT_TYPE_ELEMENT;
6104 }
Owen Taylor3473f882001-02-23 17:55:21 +00006105 SKIP_BLANKS;
6106 *result = tree;
6107 return(res);
6108}
6109
6110/**
6111 * xmlParseElementDecl:
6112 * @ctxt: an XML parser context
6113 *
6114 * parse an Element declaration.
6115 *
6116 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6117 *
6118 * [ VC: Unique Element Type Declaration ]
6119 * No element type may be declared more than once
6120 *
6121 * Returns the type of the element, or -1 in case of error
6122 */
6123int
6124xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006125 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006126 int ret = -1;
6127 xmlElementContentPtr content = NULL;
6128
Daniel Veillard4c778d82005-01-23 17:37:44 +00006129 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006130 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006131 xmlParserInputPtr input = ctxt->input;
6132
6133 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006134 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006135 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6136 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006137 }
6138 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006139 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006140 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006141 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6142 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006143 return(-1);
6144 }
6145 while ((RAW == 0) && (ctxt->inputNr > 1))
6146 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006147 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006148 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6149 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006150 }
6151 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006152 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006153 SKIP(5);
6154 /*
6155 * Element must always be empty.
6156 */
6157 ret = XML_ELEMENT_TYPE_EMPTY;
6158 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6159 (NXT(2) == 'Y')) {
6160 SKIP(3);
6161 /*
6162 * Element is a generic container.
6163 */
6164 ret = XML_ELEMENT_TYPE_ANY;
6165 } else if (RAW == '(') {
6166 ret = xmlParseElementContentDecl(ctxt, name, &content);
6167 } else {
6168 /*
6169 * [ WFC: PEs in Internal Subset ] error handling.
6170 */
6171 if ((RAW == '%') && (ctxt->external == 0) &&
6172 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006173 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006174 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006175 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006176 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006177 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6178 }
Owen Taylor3473f882001-02-23 17:55:21 +00006179 return(-1);
6180 }
6181
6182 SKIP_BLANKS;
6183 /*
6184 * Pop-up of finished entities.
6185 */
6186 while ((RAW == 0) && (ctxt->inputNr > 1))
6187 xmlPopInput(ctxt);
6188 SKIP_BLANKS;
6189
6190 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006191 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006192 if (content != NULL) {
6193 xmlFreeDocElementContent(ctxt->myDoc, content);
6194 }
Owen Taylor3473f882001-02-23 17:55:21 +00006195 } else {
6196 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006197 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6198 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006199 }
6200
6201 NEXT;
6202 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006203 (ctxt->sax->elementDecl != NULL)) {
6204 if (content != NULL)
6205 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006206 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6207 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006208 if ((content != NULL) && (content->parent == NULL)) {
6209 /*
6210 * this is a trick: if xmlAddElementDecl is called,
6211 * instead of copying the full tree it is plugged directly
6212 * if called from the parser. Avoid duplicating the
6213 * interfaces or change the API/ABI
6214 */
6215 xmlFreeDocElementContent(ctxt->myDoc, content);
6216 }
6217 } else if (content != NULL) {
6218 xmlFreeDocElementContent(ctxt->myDoc, content);
6219 }
Owen Taylor3473f882001-02-23 17:55:21 +00006220 }
Owen Taylor3473f882001-02-23 17:55:21 +00006221 }
6222 return(ret);
6223}
6224
6225/**
Owen Taylor3473f882001-02-23 17:55:21 +00006226 * xmlParseConditionalSections
6227 * @ctxt: an XML parser context
6228 *
6229 * [61] conditionalSect ::= includeSect | ignoreSect
6230 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6231 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6232 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6233 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6234 */
6235
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006236static void
Owen Taylor3473f882001-02-23 17:55:21 +00006237xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006238 int id = ctxt->input->id;
6239
Owen Taylor3473f882001-02-23 17:55:21 +00006240 SKIP(3);
6241 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006242 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006243 SKIP(7);
6244 SKIP_BLANKS;
6245 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006246 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006247 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006248 if (ctxt->input->id != id) {
6249 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6250 "All markup of the conditional section is not in the same entity\n",
6251 NULL, NULL);
6252 }
Owen Taylor3473f882001-02-23 17:55:21 +00006253 NEXT;
6254 }
6255 if (xmlParserDebugEntities) {
6256 if ((ctxt->input != NULL) && (ctxt->input->filename))
6257 xmlGenericError(xmlGenericErrorContext,
6258 "%s(%d): ", ctxt->input->filename,
6259 ctxt->input->line);
6260 xmlGenericError(xmlGenericErrorContext,
6261 "Entering INCLUDE Conditional Section\n");
6262 }
6263
6264 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6265 (NXT(2) != '>'))) {
6266 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006267 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006268
6269 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6270 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006271 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006272 NEXT;
6273 } else if (RAW == '%') {
6274 xmlParsePEReference(ctxt);
6275 } else
6276 xmlParseMarkupDecl(ctxt);
6277
6278 /*
6279 * Pop-up of finished entities.
6280 */
6281 while ((RAW == 0) && (ctxt->inputNr > 1))
6282 xmlPopInput(ctxt);
6283
Daniel Veillardfdc91562002-07-01 21:52:03 +00006284 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006285 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006286 break;
6287 }
6288 }
6289 if (xmlParserDebugEntities) {
6290 if ((ctxt->input != NULL) && (ctxt->input->filename))
6291 xmlGenericError(xmlGenericErrorContext,
6292 "%s(%d): ", ctxt->input->filename,
6293 ctxt->input->line);
6294 xmlGenericError(xmlGenericErrorContext,
6295 "Leaving INCLUDE Conditional Section\n");
6296 }
6297
Daniel Veillarda07050d2003-10-19 14:46:32 +00006298 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006299 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006300 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006301 int depth = 0;
6302
6303 SKIP(6);
6304 SKIP_BLANKS;
6305 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006306 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006307 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006308 if (ctxt->input->id != id) {
6309 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6310 "All markup of the conditional section is not in the same entity\n",
6311 NULL, NULL);
6312 }
Owen Taylor3473f882001-02-23 17:55:21 +00006313 NEXT;
6314 }
6315 if (xmlParserDebugEntities) {
6316 if ((ctxt->input != NULL) && (ctxt->input->filename))
6317 xmlGenericError(xmlGenericErrorContext,
6318 "%s(%d): ", ctxt->input->filename,
6319 ctxt->input->line);
6320 xmlGenericError(xmlGenericErrorContext,
6321 "Entering IGNORE Conditional Section\n");
6322 }
6323
6324 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006325 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006326 * But disable SAX event generating DTD building in the meantime
6327 */
6328 state = ctxt->disableSAX;
6329 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006330 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006331 ctxt->instate = XML_PARSER_IGNORE;
6332
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006333 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006334 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6335 depth++;
6336 SKIP(3);
6337 continue;
6338 }
6339 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6340 if (--depth >= 0) SKIP(3);
6341 continue;
6342 }
6343 NEXT;
6344 continue;
6345 }
6346
6347 ctxt->disableSAX = state;
6348 ctxt->instate = instate;
6349
6350 if (xmlParserDebugEntities) {
6351 if ((ctxt->input != NULL) && (ctxt->input->filename))
6352 xmlGenericError(xmlGenericErrorContext,
6353 "%s(%d): ", ctxt->input->filename,
6354 ctxt->input->line);
6355 xmlGenericError(xmlGenericErrorContext,
6356 "Leaving IGNORE Conditional Section\n");
6357 }
6358
6359 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006360 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006361 }
6362
6363 if (RAW == 0)
6364 SHRINK;
6365
6366 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006367 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006368 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006369 if (ctxt->input->id != id) {
6370 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6371 "All markup of the conditional section is not in the same entity\n",
6372 NULL, NULL);
6373 }
Owen Taylor3473f882001-02-23 17:55:21 +00006374 SKIP(3);
6375 }
6376}
6377
6378/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006379 * xmlParseMarkupDecl:
6380 * @ctxt: an XML parser context
6381 *
6382 * parse Markup declarations
6383 *
6384 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6385 * NotationDecl | PI | Comment
6386 *
6387 * [ VC: Proper Declaration/PE Nesting ]
6388 * Parameter-entity replacement text must be properly nested with
6389 * markup declarations. That is to say, if either the first character
6390 * or the last character of a markup declaration (markupdecl above) is
6391 * contained in the replacement text for a parameter-entity reference,
6392 * both must be contained in the same replacement text.
6393 *
6394 * [ WFC: PEs in Internal Subset ]
6395 * In the internal DTD subset, parameter-entity references can occur
6396 * only where markup declarations can occur, not within markup declarations.
6397 * (This does not apply to references that occur in external parameter
6398 * entities or to the external subset.)
6399 */
6400void
6401xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6402 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006403 if (CUR == '<') {
6404 if (NXT(1) == '!') {
6405 switch (NXT(2)) {
6406 case 'E':
6407 if (NXT(3) == 'L')
6408 xmlParseElementDecl(ctxt);
6409 else if (NXT(3) == 'N')
6410 xmlParseEntityDecl(ctxt);
6411 break;
6412 case 'A':
6413 xmlParseAttributeListDecl(ctxt);
6414 break;
6415 case 'N':
6416 xmlParseNotationDecl(ctxt);
6417 break;
6418 case '-':
6419 xmlParseComment(ctxt);
6420 break;
6421 default:
6422 /* there is an error but it will be detected later */
6423 break;
6424 }
6425 } else if (NXT(1) == '?') {
6426 xmlParsePI(ctxt);
6427 }
6428 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006429 /*
6430 * This is only for internal subset. On external entities,
6431 * the replacement is done before parsing stage
6432 */
6433 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6434 xmlParsePEReference(ctxt);
6435
6436 /*
6437 * Conditional sections are allowed from entities included
6438 * by PE References in the internal subset.
6439 */
6440 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6441 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6442 xmlParseConditionalSections(ctxt);
6443 }
6444 }
6445
6446 ctxt->instate = XML_PARSER_DTD;
6447}
6448
6449/**
6450 * xmlParseTextDecl:
6451 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006452 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006453 * parse an XML declaration header for external entities
6454 *
6455 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006456 */
6457
6458void
6459xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6460 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006461 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006462
6463 /*
6464 * We know that '<?xml' is here.
6465 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006466 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006467 SKIP(5);
6468 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006469 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006470 return;
6471 }
6472
William M. Brack76e95df2003-10-18 16:20:14 +00006473 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006474 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6475 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006476 }
6477 SKIP_BLANKS;
6478
6479 /*
6480 * We may have the VersionInfo here.
6481 */
6482 version = xmlParseVersionInfo(ctxt);
6483 if (version == NULL)
6484 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006485 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006486 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006487 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6488 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006489 }
6490 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006491 ctxt->input->version = version;
6492
6493 /*
6494 * We must have the encoding declaration
6495 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006496 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006497 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6498 /*
6499 * The XML REC instructs us to stop parsing right here
6500 */
6501 return;
6502 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006503 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6504 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6505 "Missing encoding in text declaration\n");
6506 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006507
6508 SKIP_BLANKS;
6509 if ((RAW == '?') && (NXT(1) == '>')) {
6510 SKIP(2);
6511 } else if (RAW == '>') {
6512 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006513 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006514 NEXT;
6515 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006516 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006517 MOVETO_ENDTAG(CUR_PTR);
6518 NEXT;
6519 }
6520}
6521
6522/**
Owen Taylor3473f882001-02-23 17:55:21 +00006523 * xmlParseExternalSubset:
6524 * @ctxt: an XML parser context
6525 * @ExternalID: the external identifier
6526 * @SystemID: the system identifier (or URL)
6527 *
6528 * parse Markup declarations from an external subset
6529 *
6530 * [30] extSubset ::= textDecl? extSubsetDecl
6531 *
6532 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6533 */
6534void
6535xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6536 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006537 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006538 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006539
6540 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
6541 (ctxt->input->end - ctxt->input->cur >= 4)) {
6542 xmlChar start[4];
6543 xmlCharEncoding enc;
6544
6545 start[0] = RAW;
6546 start[1] = NXT(1);
6547 start[2] = NXT(2);
6548 start[3] = NXT(3);
6549 enc = xmlDetectCharEncoding(start, 4);
6550 if (enc != XML_CHAR_ENCODING_NONE)
6551 xmlSwitchEncoding(ctxt, enc);
6552 }
6553
Daniel Veillarda07050d2003-10-19 14:46:32 +00006554 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006555 xmlParseTextDecl(ctxt);
6556 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6557 /*
6558 * The XML REC instructs us to stop parsing right here
6559 */
6560 ctxt->instate = XML_PARSER_EOF;
6561 return;
6562 }
6563 }
6564 if (ctxt->myDoc == NULL) {
6565 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006566 if (ctxt->myDoc == NULL) {
6567 xmlErrMemory(ctxt, "New Doc failed");
6568 return;
6569 }
6570 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006571 }
6572 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6573 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6574
6575 ctxt->instate = XML_PARSER_DTD;
6576 ctxt->external = 1;
6577 while (((RAW == '<') && (NXT(1) == '?')) ||
6578 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006579 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006580 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006581 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006582
6583 GROW;
6584 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6585 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006586 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006587 NEXT;
6588 } else if (RAW == '%') {
6589 xmlParsePEReference(ctxt);
6590 } else
6591 xmlParseMarkupDecl(ctxt);
6592
6593 /*
6594 * Pop-up of finished entities.
6595 */
6596 while ((RAW == 0) && (ctxt->inputNr > 1))
6597 xmlPopInput(ctxt);
6598
Daniel Veillardfdc91562002-07-01 21:52:03 +00006599 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006600 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006601 break;
6602 }
6603 }
6604
6605 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006606 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006607 }
6608
6609}
6610
6611/**
6612 * xmlParseReference:
6613 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006614 *
Owen Taylor3473f882001-02-23 17:55:21 +00006615 * parse and handle entity references in content, depending on the SAX
6616 * interface, this may end-up in a call to character() if this is a
6617 * CharRef, a predefined entity, if there is no reference() callback.
6618 * or if the parser was asked to switch to that mode.
6619 *
6620 * [67] Reference ::= EntityRef | CharRef
6621 */
6622void
6623xmlParseReference(xmlParserCtxtPtr ctxt) {
6624 xmlEntityPtr ent;
6625 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00006626 int was_checked;
6627 xmlNodePtr list = NULL;
6628 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006629
Daniel Veillard0161e632008-08-28 15:36:32 +00006630
6631 if (RAW != '&')
6632 return;
6633
6634 /*
6635 * Simple case of a CharRef
6636 */
Owen Taylor3473f882001-02-23 17:55:21 +00006637 if (NXT(1) == '#') {
6638 int i = 0;
6639 xmlChar out[10];
6640 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006641 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00006642
Daniel Veillarddc171602008-03-26 17:41:38 +00006643 if (value == 0)
6644 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006645 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6646 /*
6647 * So we are using non-UTF-8 buffers
6648 * Check that the char fit on 8bits, if not
6649 * generate a CharRef.
6650 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006651 if (value <= 0xFF) {
6652 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006653 out[1] = 0;
6654 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6655 (!ctxt->disableSAX))
6656 ctxt->sax->characters(ctxt->userData, out, 1);
6657 } else {
6658 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006659 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006660 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006661 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006662 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6663 (!ctxt->disableSAX))
6664 ctxt->sax->reference(ctxt->userData, out);
6665 }
6666 } else {
6667 /*
6668 * Just encode the value in UTF-8
6669 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006670 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006671 out[i] = 0;
6672 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6673 (!ctxt->disableSAX))
6674 ctxt->sax->characters(ctxt->userData, out, i);
6675 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006676 return;
6677 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006678
Daniel Veillard0161e632008-08-28 15:36:32 +00006679 /*
6680 * We are seeing an entity reference
6681 */
6682 ent = xmlParseEntityRef(ctxt);
6683 if (ent == NULL) return;
6684 if (!ctxt->wellFormed)
6685 return;
6686 was_checked = ent->checked;
6687
6688 /* special case of predefined entities */
6689 if ((ent->name == NULL) ||
6690 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6691 val = ent->content;
6692 if (val == NULL) return;
6693 /*
6694 * inline the entity.
6695 */
6696 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6697 (!ctxt->disableSAX))
6698 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6699 return;
6700 }
6701
6702 /*
6703 * The first reference to the entity trigger a parsing phase
6704 * where the ent->children is filled with the result from
6705 * the parsing.
6706 */
6707 if (ent->checked == 0) {
6708 unsigned long oldnbent = ctxt->nbentities;
6709
6710 /*
6711 * This is a bit hackish but this seems the best
6712 * way to make sure both SAX and DOM entity support
6713 * behaves okay.
6714 */
6715 void *user_data;
6716 if (ctxt->userData == ctxt)
6717 user_data = NULL;
6718 else
6719 user_data = ctxt->userData;
6720
6721 /*
6722 * Check that this entity is well formed
6723 * 4.3.2: An internal general parsed entity is well-formed
6724 * if its replacement text matches the production labeled
6725 * content.
6726 */
6727 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6728 ctxt->depth++;
6729 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6730 user_data, &list);
6731 ctxt->depth--;
6732
6733 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6734 ctxt->depth++;
6735 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6736 user_data, ctxt->depth, ent->URI,
6737 ent->ExternalID, &list);
6738 ctxt->depth--;
6739 } else {
6740 ret = XML_ERR_ENTITY_PE_INTERNAL;
6741 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6742 "invalid entity type found\n", NULL);
6743 }
6744
6745 /*
6746 * Store the number of entities needing parsing for this entity
6747 * content and do checkings
6748 */
6749 ent->checked = ctxt->nbentities - oldnbent;
6750 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006751 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00006752 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006753 return;
6754 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006755 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6756 xmlFreeNodeList(list);
6757 return;
6758 }
Owen Taylor3473f882001-02-23 17:55:21 +00006759
Daniel Veillard0161e632008-08-28 15:36:32 +00006760 if ((ret == XML_ERR_OK) && (list != NULL)) {
6761 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6762 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6763 (ent->children == NULL)) {
6764 ent->children = list;
6765 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00006766 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006767 * Prune it directly in the generated document
6768 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00006769 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006770 if (((list->type == XML_TEXT_NODE) &&
6771 (list->next == NULL)) ||
6772 (ctxt->parseMode == XML_PARSE_READER)) {
6773 list->parent = (xmlNodePtr) ent;
6774 list = NULL;
6775 ent->owner = 1;
6776 } else {
6777 ent->owner = 0;
6778 while (list != NULL) {
6779 list->parent = (xmlNodePtr) ctxt->node;
6780 list->doc = ctxt->myDoc;
6781 if (list->next == NULL)
6782 ent->last = list;
6783 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006784 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006785 list = ent->children;
6786#ifdef LIBXML_LEGACY_ENABLED
6787 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6788 xmlAddEntityReference(ent, list, NULL);
6789#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00006790 }
6791 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00006792 ent->owner = 1;
6793 while (list != NULL) {
6794 list->parent = (xmlNodePtr) ent;
6795 if (list->next == NULL)
6796 ent->last = list;
6797 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006798 }
6799 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006800 } else {
6801 xmlFreeNodeList(list);
6802 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006803 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006804 } else if ((ret != XML_ERR_OK) &&
6805 (ret != XML_WAR_UNDECLARED_ENTITY)) {
6806 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6807 "Entity '%s' failed to parse\n", ent->name);
6808 } else if (list != NULL) {
6809 xmlFreeNodeList(list);
6810 list = NULL;
6811 }
6812 if (ent->checked == 0)
6813 ent->checked = 1;
6814 } else if (ent->checked != 1) {
6815 ctxt->nbentities += ent->checked;
6816 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006817
Daniel Veillard0161e632008-08-28 15:36:32 +00006818 /*
6819 * Now that the entity content has been gathered
6820 * provide it to the application, this can take different forms based
6821 * on the parsing modes.
6822 */
6823 if (ent->children == NULL) {
6824 /*
6825 * Probably running in SAX mode and the callbacks don't
6826 * build the entity content. So unless we already went
6827 * though parsing for first checking go though the entity
6828 * content to generate callbacks associated to the entity
6829 */
6830 if (was_checked != 0) {
6831 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00006832 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006833 * This is a bit hackish but this seems the best
6834 * way to make sure both SAX and DOM entity support
6835 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00006836 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006837 if (ctxt->userData == ctxt)
6838 user_data = NULL;
6839 else
6840 user_data = ctxt->userData;
6841
6842 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6843 ctxt->depth++;
6844 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6845 ent->content, user_data, NULL);
6846 ctxt->depth--;
6847 } else if (ent->etype ==
6848 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6849 ctxt->depth++;
6850 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6851 ctxt->sax, user_data, ctxt->depth,
6852 ent->URI, ent->ExternalID, NULL);
6853 ctxt->depth--;
6854 } else {
6855 ret = XML_ERR_ENTITY_PE_INTERNAL;
6856 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6857 "invalid entity type found\n", NULL);
6858 }
6859 if (ret == XML_ERR_ENTITY_LOOP) {
6860 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6861 return;
6862 }
6863 }
6864 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6865 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6866 /*
6867 * Entity reference callback comes second, it's somewhat
6868 * superfluous but a compatibility to historical behaviour
6869 */
6870 ctxt->sax->reference(ctxt->userData, ent->name);
6871 }
6872 return;
6873 }
6874
6875 /*
6876 * If we didn't get any children for the entity being built
6877 */
6878 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6879 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6880 /*
6881 * Create a node.
6882 */
6883 ctxt->sax->reference(ctxt->userData, ent->name);
6884 return;
6885 }
6886
6887 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
6888 /*
6889 * There is a problem on the handling of _private for entities
6890 * (bug 155816): Should we copy the content of the field from
6891 * the entity (possibly overwriting some value set by the user
6892 * when a copy is created), should we leave it alone, or should
6893 * we try to take care of different situations? The problem
6894 * is exacerbated by the usage of this field by the xmlReader.
6895 * To fix this bug, we look at _private on the created node
6896 * and, if it's NULL, we copy in whatever was in the entity.
6897 * If it's not NULL we leave it alone. This is somewhat of a
6898 * hack - maybe we should have further tests to determine
6899 * what to do.
6900 */
6901 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6902 /*
6903 * Seems we are generating the DOM content, do
6904 * a simple tree copy for all references except the first
6905 * In the first occurrence list contains the replacement.
6906 * progressive == 2 means we are operating on the Reader
6907 * and since nodes are discarded we must copy all the time.
6908 */
6909 if (((list == NULL) && (ent->owner == 0)) ||
6910 (ctxt->parseMode == XML_PARSE_READER)) {
6911 xmlNodePtr nw = NULL, cur, firstChild = NULL;
6912
6913 /*
6914 * when operating on a reader, the entities definitions
6915 * are always owning the entities subtree.
6916 if (ctxt->parseMode == XML_PARSE_READER)
6917 ent->owner = 1;
6918 */
6919
6920 cur = ent->children;
6921 while (cur != NULL) {
6922 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6923 if (nw != NULL) {
6924 if (nw->_private == NULL)
6925 nw->_private = cur->_private;
6926 if (firstChild == NULL){
6927 firstChild = nw;
6928 }
6929 nw = xmlAddChild(ctxt->node, nw);
6930 }
6931 if (cur == ent->last) {
6932 /*
6933 * needed to detect some strange empty
6934 * node cases in the reader tests
6935 */
6936 if ((ctxt->parseMode == XML_PARSE_READER) &&
6937 (nw != NULL) &&
6938 (nw->type == XML_ELEMENT_NODE) &&
6939 (nw->children == NULL))
6940 nw->extra = 1;
6941
6942 break;
6943 }
6944 cur = cur->next;
6945 }
6946#ifdef LIBXML_LEGACY_ENABLED
6947 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6948 xmlAddEntityReference(ent, firstChild, nw);
6949#endif /* LIBXML_LEGACY_ENABLED */
6950 } else if (list == NULL) {
6951 xmlNodePtr nw = NULL, cur, next, last,
6952 firstChild = NULL;
6953 /*
6954 * Copy the entity child list and make it the new
6955 * entity child list. The goal is to make sure any
6956 * ID or REF referenced will be the one from the
6957 * document content and not the entity copy.
6958 */
6959 cur = ent->children;
6960 ent->children = NULL;
6961 last = ent->last;
6962 ent->last = NULL;
6963 while (cur != NULL) {
6964 next = cur->next;
6965 cur->next = NULL;
6966 cur->parent = NULL;
6967 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6968 if (nw != NULL) {
6969 if (nw->_private == NULL)
6970 nw->_private = cur->_private;
6971 if (firstChild == NULL){
6972 firstChild = cur;
6973 }
6974 xmlAddChild((xmlNodePtr) ent, nw);
6975 xmlAddChild(ctxt->node, cur);
6976 }
6977 if (cur == last)
6978 break;
6979 cur = next;
6980 }
Daniel Veillardcba68392008-08-29 12:43:40 +00006981 if (ent->owner == 0)
6982 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00006983#ifdef LIBXML_LEGACY_ENABLED
6984 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6985 xmlAddEntityReference(ent, firstChild, nw);
6986#endif /* LIBXML_LEGACY_ENABLED */
6987 } else {
6988 const xmlChar *nbktext;
6989
6990 /*
6991 * the name change is to avoid coalescing of the
6992 * node with a possible previous text one which
6993 * would make ent->children a dangling pointer
6994 */
6995 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6996 -1);
6997 if (ent->children->type == XML_TEXT_NODE)
6998 ent->children->name = nbktext;
6999 if ((ent->last != ent->children) &&
7000 (ent->last->type == XML_TEXT_NODE))
7001 ent->last->name = nbktext;
7002 xmlAddChildList(ctxt->node, ent->children);
7003 }
7004
7005 /*
7006 * This is to avoid a nasty side effect, see
7007 * characters() in SAX.c
7008 */
7009 ctxt->nodemem = 0;
7010 ctxt->nodelen = 0;
7011 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007012 }
7013 }
7014}
7015
7016/**
7017 * xmlParseEntityRef:
7018 * @ctxt: an XML parser context
7019 *
7020 * parse ENTITY references declarations
7021 *
7022 * [68] EntityRef ::= '&' Name ';'
7023 *
7024 * [ WFC: Entity Declared ]
7025 * In a document without any DTD, a document with only an internal DTD
7026 * subset which contains no parameter entity references, or a document
7027 * with "standalone='yes'", the Name given in the entity reference
7028 * must match that in an entity declaration, except that well-formed
7029 * documents need not declare any of the following entities: amp, lt,
7030 * gt, apos, quot. The declaration of a parameter entity must precede
7031 * any reference to it. Similarly, the declaration of a general entity
7032 * must precede any reference to it which appears in a default value in an
7033 * attribute-list declaration. Note that if entities are declared in the
7034 * external subset or in external parameter entities, a non-validating
7035 * processor is not obligated to read and process their declarations;
7036 * for such documents, the rule that an entity must be declared is a
7037 * well-formedness constraint only if standalone='yes'.
7038 *
7039 * [ WFC: Parsed Entity ]
7040 * An entity reference must not contain the name of an unparsed entity
7041 *
7042 * Returns the xmlEntityPtr if found, or NULL otherwise.
7043 */
7044xmlEntityPtr
7045xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007046 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007047 xmlEntityPtr ent = NULL;
7048
7049 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007050
Daniel Veillard0161e632008-08-28 15:36:32 +00007051 if (RAW != '&')
7052 return(NULL);
7053 NEXT;
7054 name = xmlParseName(ctxt);
7055 if (name == NULL) {
7056 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7057 "xmlParseEntityRef: no name\n");
7058 return(NULL);
7059 }
7060 if (RAW != ';') {
7061 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7062 return(NULL);
7063 }
7064 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007065
Daniel Veillard0161e632008-08-28 15:36:32 +00007066 /*
7067 * Predefined entites override any extra definition
7068 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007069 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7070 ent = xmlGetPredefinedEntity(name);
7071 if (ent != NULL)
7072 return(ent);
7073 }
Owen Taylor3473f882001-02-23 17:55:21 +00007074
Daniel Veillard0161e632008-08-28 15:36:32 +00007075 /*
7076 * Increate the number of entity references parsed
7077 */
7078 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007079
Daniel Veillard0161e632008-08-28 15:36:32 +00007080 /*
7081 * Ask first SAX for entity resolution, otherwise try the
7082 * entities which may have stored in the parser context.
7083 */
7084 if (ctxt->sax != NULL) {
7085 if (ctxt->sax->getEntity != NULL)
7086 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007087 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7088 (ctxt->options & XML_PARSE_OLDSAX))
7089 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007090 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7091 (ctxt->userData==ctxt)) {
7092 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007093 }
7094 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007095 /*
7096 * [ WFC: Entity Declared ]
7097 * In a document without any DTD, a document with only an
7098 * internal DTD subset which contains no parameter entity
7099 * references, or a document with "standalone='yes'", the
7100 * Name given in the entity reference must match that in an
7101 * entity declaration, except that well-formed documents
7102 * need not declare any of the following entities: amp, lt,
7103 * gt, apos, quot.
7104 * The declaration of a parameter entity must precede any
7105 * reference to it.
7106 * Similarly, the declaration of a general entity must
7107 * precede any reference to it which appears in a default
7108 * value in an attribute-list declaration. Note that if
7109 * entities are declared in the external subset or in
7110 * external parameter entities, a non-validating processor
7111 * is not obligated to read and process their declarations;
7112 * for such documents, the rule that an entity must be
7113 * declared is a well-formedness constraint only if
7114 * standalone='yes'.
7115 */
7116 if (ent == NULL) {
7117 if ((ctxt->standalone == 1) ||
7118 ((ctxt->hasExternalSubset == 0) &&
7119 (ctxt->hasPErefs == 0))) {
7120 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7121 "Entity '%s' not defined\n", name);
7122 } else {
7123 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7124 "Entity '%s' not defined\n", name);
7125 if ((ctxt->inSubset == 0) &&
7126 (ctxt->sax != NULL) &&
7127 (ctxt->sax->reference != NULL)) {
7128 ctxt->sax->reference(ctxt->userData, name);
7129 }
7130 }
7131 ctxt->valid = 0;
7132 }
7133
7134 /*
7135 * [ WFC: Parsed Entity ]
7136 * An entity reference must not contain the name of an
7137 * unparsed entity
7138 */
7139 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7140 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7141 "Entity reference to unparsed entity %s\n", name);
7142 }
7143
7144 /*
7145 * [ WFC: No External Entity References ]
7146 * Attribute values cannot contain direct or indirect
7147 * entity references to external entities.
7148 */
7149 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7150 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7151 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7152 "Attribute references external entity '%s'\n", name);
7153 }
7154 /*
7155 * [ WFC: No < in Attribute Values ]
7156 * The replacement text of any entity referred to directly or
7157 * indirectly in an attribute value (other than "&lt;") must
7158 * not contain a <.
7159 */
7160 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7161 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007162 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007163 (xmlStrchr(ent->content, '<'))) {
7164 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7165 "'<' in entity '%s' is not allowed in attributes values\n", name);
7166 }
7167
7168 /*
7169 * Internal check, no parameter entities here ...
7170 */
7171 else {
7172 switch (ent->etype) {
7173 case XML_INTERNAL_PARAMETER_ENTITY:
7174 case XML_EXTERNAL_PARAMETER_ENTITY:
7175 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7176 "Attempt to reference the parameter entity '%s'\n",
7177 name);
7178 break;
7179 default:
7180 break;
7181 }
7182 }
7183
7184 /*
7185 * [ WFC: No Recursion ]
7186 * A parsed entity must not contain a recursive reference
7187 * to itself, either directly or indirectly.
7188 * Done somewhere else
7189 */
Owen Taylor3473f882001-02-23 17:55:21 +00007190 return(ent);
7191}
7192
7193/**
7194 * xmlParseStringEntityRef:
7195 * @ctxt: an XML parser context
7196 * @str: a pointer to an index in the string
7197 *
7198 * parse ENTITY references declarations, but this version parses it from
7199 * a string value.
7200 *
7201 * [68] EntityRef ::= '&' Name ';'
7202 *
7203 * [ WFC: Entity Declared ]
7204 * In a document without any DTD, a document with only an internal DTD
7205 * subset which contains no parameter entity references, or a document
7206 * with "standalone='yes'", the Name given in the entity reference
7207 * must match that in an entity declaration, except that well-formed
7208 * documents need not declare any of the following entities: amp, lt,
7209 * gt, apos, quot. The declaration of a parameter entity must precede
7210 * any reference to it. Similarly, the declaration of a general entity
7211 * must precede any reference to it which appears in a default value in an
7212 * attribute-list declaration. Note that if entities are declared in the
7213 * external subset or in external parameter entities, a non-validating
7214 * processor is not obligated to read and process their declarations;
7215 * for such documents, the rule that an entity must be declared is a
7216 * well-formedness constraint only if standalone='yes'.
7217 *
7218 * [ WFC: Parsed Entity ]
7219 * An entity reference must not contain the name of an unparsed entity
7220 *
7221 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7222 * is updated to the current location in the string.
7223 */
7224xmlEntityPtr
7225xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7226 xmlChar *name;
7227 const xmlChar *ptr;
7228 xmlChar cur;
7229 xmlEntityPtr ent = NULL;
7230
7231 if ((str == NULL) || (*str == NULL))
7232 return(NULL);
7233 ptr = *str;
7234 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007235 if (cur != '&')
7236 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007237
Daniel Veillard0161e632008-08-28 15:36:32 +00007238 ptr++;
7239 cur = *ptr;
7240 name = xmlParseStringName(ctxt, &ptr);
7241 if (name == NULL) {
7242 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7243 "xmlParseStringEntityRef: no name\n");
7244 *str = ptr;
7245 return(NULL);
7246 }
7247 if (*ptr != ';') {
7248 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007249 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007250 *str = ptr;
7251 return(NULL);
7252 }
7253 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007254
Owen Taylor3473f882001-02-23 17:55:21 +00007255
Daniel Veillard0161e632008-08-28 15:36:32 +00007256 /*
7257 * Predefined entites override any extra definition
7258 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007259 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7260 ent = xmlGetPredefinedEntity(name);
7261 if (ent != NULL) {
7262 xmlFree(name);
7263 *str = ptr;
7264 return(ent);
7265 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007266 }
Owen Taylor3473f882001-02-23 17:55:21 +00007267
Daniel Veillard0161e632008-08-28 15:36:32 +00007268 /*
7269 * Increate the number of entity references parsed
7270 */
7271 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007272
Daniel Veillard0161e632008-08-28 15:36:32 +00007273 /*
7274 * Ask first SAX for entity resolution, otherwise try the
7275 * entities which may have stored in the parser context.
7276 */
7277 if (ctxt->sax != NULL) {
7278 if (ctxt->sax->getEntity != NULL)
7279 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007280 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7281 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007282 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7283 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007284 }
7285 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007286
7287 /*
7288 * [ WFC: Entity Declared ]
7289 * In a document without any DTD, a document with only an
7290 * internal DTD subset which contains no parameter entity
7291 * references, or a document with "standalone='yes'", the
7292 * Name given in the entity reference must match that in an
7293 * entity declaration, except that well-formed documents
7294 * need not declare any of the following entities: amp, lt,
7295 * gt, apos, quot.
7296 * The declaration of a parameter entity must precede any
7297 * reference to it.
7298 * Similarly, the declaration of a general entity must
7299 * precede any reference to it which appears in a default
7300 * value in an attribute-list declaration. Note that if
7301 * entities are declared in the external subset or in
7302 * external parameter entities, a non-validating processor
7303 * is not obligated to read and process their declarations;
7304 * for such documents, the rule that an entity must be
7305 * declared is a well-formedness constraint only if
7306 * standalone='yes'.
7307 */
7308 if (ent == NULL) {
7309 if ((ctxt->standalone == 1) ||
7310 ((ctxt->hasExternalSubset == 0) &&
7311 (ctxt->hasPErefs == 0))) {
7312 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7313 "Entity '%s' not defined\n", name);
7314 } else {
7315 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7316 "Entity '%s' not defined\n",
7317 name);
7318 }
7319 /* TODO ? check regressions ctxt->valid = 0; */
7320 }
7321
7322 /*
7323 * [ WFC: Parsed Entity ]
7324 * An entity reference must not contain the name of an
7325 * unparsed entity
7326 */
7327 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7328 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7329 "Entity reference to unparsed entity %s\n", name);
7330 }
7331
7332 /*
7333 * [ WFC: No External Entity References ]
7334 * Attribute values cannot contain direct or indirect
7335 * entity references to external entities.
7336 */
7337 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7338 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7339 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7340 "Attribute references external entity '%s'\n", name);
7341 }
7342 /*
7343 * [ WFC: No < in Attribute Values ]
7344 * The replacement text of any entity referred to directly or
7345 * indirectly in an attribute value (other than "&lt;") must
7346 * not contain a <.
7347 */
7348 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7349 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007350 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007351 (xmlStrchr(ent->content, '<'))) {
7352 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7353 "'<' in entity '%s' is not allowed in attributes values\n",
7354 name);
7355 }
7356
7357 /*
7358 * Internal check, no parameter entities here ...
7359 */
7360 else {
7361 switch (ent->etype) {
7362 case XML_INTERNAL_PARAMETER_ENTITY:
7363 case XML_EXTERNAL_PARAMETER_ENTITY:
7364 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7365 "Attempt to reference the parameter entity '%s'\n",
7366 name);
7367 break;
7368 default:
7369 break;
7370 }
7371 }
7372
7373 /*
7374 * [ WFC: No Recursion ]
7375 * A parsed entity must not contain a recursive reference
7376 * to itself, either directly or indirectly.
7377 * Done somewhere else
7378 */
7379
7380 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007381 *str = ptr;
7382 return(ent);
7383}
7384
7385/**
7386 * xmlParsePEReference:
7387 * @ctxt: an XML parser context
7388 *
7389 * parse PEReference declarations
7390 * The entity content is handled directly by pushing it's content as
7391 * a new input stream.
7392 *
7393 * [69] PEReference ::= '%' Name ';'
7394 *
7395 * [ WFC: No Recursion ]
7396 * A parsed entity must not contain a recursive
7397 * reference to itself, either directly or indirectly.
7398 *
7399 * [ WFC: Entity Declared ]
7400 * In a document without any DTD, a document with only an internal DTD
7401 * subset which contains no parameter entity references, or a document
7402 * with "standalone='yes'", ... ... The declaration of a parameter
7403 * entity must precede any reference to it...
7404 *
7405 * [ VC: Entity Declared ]
7406 * In a document with an external subset or external parameter entities
7407 * with "standalone='no'", ... ... The declaration of a parameter entity
7408 * must precede any reference to it...
7409 *
7410 * [ WFC: In DTD ]
7411 * Parameter-entity references may only appear in the DTD.
7412 * NOTE: misleading but this is handled.
7413 */
7414void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007415xmlParsePEReference(xmlParserCtxtPtr ctxt)
7416{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007417 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007418 xmlEntityPtr entity = NULL;
7419 xmlParserInputPtr input;
7420
Daniel Veillard0161e632008-08-28 15:36:32 +00007421 if (RAW != '%')
7422 return;
7423 NEXT;
7424 name = xmlParseName(ctxt);
7425 if (name == NULL) {
7426 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7427 "xmlParsePEReference: no name\n");
7428 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007429 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007430 if (RAW != ';') {
7431 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7432 return;
7433 }
7434
7435 NEXT;
7436
7437 /*
7438 * Increate the number of entity references parsed
7439 */
7440 ctxt->nbentities++;
7441
7442 /*
7443 * Request the entity from SAX
7444 */
7445 if ((ctxt->sax != NULL) &&
7446 (ctxt->sax->getParameterEntity != NULL))
7447 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7448 name);
7449 if (entity == NULL) {
7450 /*
7451 * [ WFC: Entity Declared ]
7452 * In a document without any DTD, a document with only an
7453 * internal DTD subset which contains no parameter entity
7454 * references, or a document with "standalone='yes'", ...
7455 * ... The declaration of a parameter entity must precede
7456 * any reference to it...
7457 */
7458 if ((ctxt->standalone == 1) ||
7459 ((ctxt->hasExternalSubset == 0) &&
7460 (ctxt->hasPErefs == 0))) {
7461 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7462 "PEReference: %%%s; not found\n",
7463 name);
7464 } else {
7465 /*
7466 * [ VC: Entity Declared ]
7467 * In a document with an external subset or external
7468 * parameter entities with "standalone='no'", ...
7469 * ... The declaration of a parameter entity must
7470 * precede any reference to it...
7471 */
7472 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7473 "PEReference: %%%s; not found\n",
7474 name, NULL);
7475 ctxt->valid = 0;
7476 }
7477 } else {
7478 /*
7479 * Internal checking in case the entity quest barfed
7480 */
7481 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7482 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7483 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7484 "Internal: %%%s; is not a parameter entity\n",
7485 name, NULL);
7486 } else if (ctxt->input->free != deallocblankswrapper) {
7487 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7488 if (xmlPushInput(ctxt, input) < 0)
7489 return;
7490 } else {
7491 /*
7492 * TODO !!!
7493 * handle the extra spaces added before and after
7494 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7495 */
7496 input = xmlNewEntityInputStream(ctxt, entity);
7497 if (xmlPushInput(ctxt, input) < 0)
7498 return;
7499 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7500 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7501 (IS_BLANK_CH(NXT(5)))) {
7502 xmlParseTextDecl(ctxt);
7503 if (ctxt->errNo ==
7504 XML_ERR_UNSUPPORTED_ENCODING) {
7505 /*
7506 * The XML REC instructs us to stop parsing
7507 * right here
7508 */
7509 ctxt->instate = XML_PARSER_EOF;
7510 return;
7511 }
7512 }
7513 }
7514 }
7515 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007516}
7517
7518/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007519 * xmlLoadEntityContent:
7520 * @ctxt: an XML parser context
7521 * @entity: an unloaded system entity
7522 *
7523 * Load the original content of the given system entity from the
7524 * ExternalID/SystemID given. This is to be used for Included in Literal
7525 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7526 *
7527 * Returns 0 in case of success and -1 in case of failure
7528 */
7529static int
7530xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7531 xmlParserInputPtr input;
7532 xmlBufferPtr buf;
7533 int l, c;
7534 int count = 0;
7535
7536 if ((ctxt == NULL) || (entity == NULL) ||
7537 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7538 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7539 (entity->content != NULL)) {
7540 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7541 "xmlLoadEntityContent parameter error");
7542 return(-1);
7543 }
7544
7545 if (xmlParserDebugEntities)
7546 xmlGenericError(xmlGenericErrorContext,
7547 "Reading %s entity content input\n", entity->name);
7548
7549 buf = xmlBufferCreate();
7550 if (buf == NULL) {
7551 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7552 "xmlLoadEntityContent parameter error");
7553 return(-1);
7554 }
7555
7556 input = xmlNewEntityInputStream(ctxt, entity);
7557 if (input == NULL) {
7558 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7559 "xmlLoadEntityContent input error");
7560 xmlBufferFree(buf);
7561 return(-1);
7562 }
7563
7564 /*
7565 * Push the entity as the current input, read char by char
7566 * saving to the buffer until the end of the entity or an error
7567 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00007568 if (xmlPushInput(ctxt, input) < 0) {
7569 xmlBufferFree(buf);
7570 return(-1);
7571 }
7572
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007573 GROW;
7574 c = CUR_CHAR(l);
7575 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7576 (IS_CHAR(c))) {
7577 xmlBufferAdd(buf, ctxt->input->cur, l);
7578 if (count++ > 100) {
7579 count = 0;
7580 GROW;
7581 }
7582 NEXTL(l);
7583 c = CUR_CHAR(l);
7584 }
7585
7586 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7587 xmlPopInput(ctxt);
7588 } else if (!IS_CHAR(c)) {
7589 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7590 "xmlLoadEntityContent: invalid char value %d\n",
7591 c);
7592 xmlBufferFree(buf);
7593 return(-1);
7594 }
7595 entity->content = buf->content;
7596 buf->content = NULL;
7597 xmlBufferFree(buf);
7598
7599 return(0);
7600}
7601
7602/**
Owen Taylor3473f882001-02-23 17:55:21 +00007603 * xmlParseStringPEReference:
7604 * @ctxt: an XML parser context
7605 * @str: a pointer to an index in the string
7606 *
7607 * parse PEReference declarations
7608 *
7609 * [69] PEReference ::= '%' Name ';'
7610 *
7611 * [ WFC: No Recursion ]
7612 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007613 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007614 *
7615 * [ WFC: Entity Declared ]
7616 * In a document without any DTD, a document with only an internal DTD
7617 * subset which contains no parameter entity references, or a document
7618 * with "standalone='yes'", ... ... The declaration of a parameter
7619 * entity must precede any reference to it...
7620 *
7621 * [ VC: Entity Declared ]
7622 * In a document with an external subset or external parameter entities
7623 * with "standalone='no'", ... ... The declaration of a parameter entity
7624 * must precede any reference to it...
7625 *
7626 * [ WFC: In DTD ]
7627 * Parameter-entity references may only appear in the DTD.
7628 * NOTE: misleading but this is handled.
7629 *
7630 * Returns the string of the entity content.
7631 * str is updated to the current value of the index
7632 */
7633xmlEntityPtr
7634xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7635 const xmlChar *ptr;
7636 xmlChar cur;
7637 xmlChar *name;
7638 xmlEntityPtr entity = NULL;
7639
7640 if ((str == NULL) || (*str == NULL)) return(NULL);
7641 ptr = *str;
7642 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007643 if (cur != '%')
7644 return(NULL);
7645 ptr++;
7646 cur = *ptr;
7647 name = xmlParseStringName(ctxt, &ptr);
7648 if (name == NULL) {
7649 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7650 "xmlParseStringPEReference: no name\n");
7651 *str = ptr;
7652 return(NULL);
7653 }
7654 cur = *ptr;
7655 if (cur != ';') {
7656 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7657 xmlFree(name);
7658 *str = ptr;
7659 return(NULL);
7660 }
7661 ptr++;
7662
7663 /*
7664 * Increate the number of entity references parsed
7665 */
7666 ctxt->nbentities++;
7667
7668 /*
7669 * Request the entity from SAX
7670 */
7671 if ((ctxt->sax != NULL) &&
7672 (ctxt->sax->getParameterEntity != NULL))
7673 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7674 name);
7675 if (entity == NULL) {
7676 /*
7677 * [ WFC: Entity Declared ]
7678 * In a document without any DTD, a document with only an
7679 * internal DTD subset which contains no parameter entity
7680 * references, or a document with "standalone='yes'", ...
7681 * ... The declaration of a parameter entity must precede
7682 * any reference to it...
7683 */
7684 if ((ctxt->standalone == 1) ||
7685 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7686 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7687 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007688 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007689 /*
7690 * [ VC: Entity Declared ]
7691 * In a document with an external subset or external
7692 * parameter entities with "standalone='no'", ...
7693 * ... The declaration of a parameter entity must
7694 * precede any reference to it...
7695 */
7696 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7697 "PEReference: %%%s; not found\n",
7698 name, NULL);
7699 ctxt->valid = 0;
7700 }
7701 } else {
7702 /*
7703 * Internal checking in case the entity quest barfed
7704 */
7705 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7706 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7707 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7708 "%%%s; is not a parameter entity\n",
7709 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007710 }
7711 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007712 ctxt->hasPErefs = 1;
7713 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007714 *str = ptr;
7715 return(entity);
7716}
7717
7718/**
7719 * xmlParseDocTypeDecl:
7720 * @ctxt: an XML parser context
7721 *
7722 * parse a DOCTYPE declaration
7723 *
7724 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7725 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7726 *
7727 * [ VC: Root Element Type ]
7728 * The Name in the document type declaration must match the element
7729 * type of the root element.
7730 */
7731
7732void
7733xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007734 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007735 xmlChar *ExternalID = NULL;
7736 xmlChar *URI = NULL;
7737
7738 /*
7739 * We know that '<!DOCTYPE' has been detected.
7740 */
7741 SKIP(9);
7742
7743 SKIP_BLANKS;
7744
7745 /*
7746 * Parse the DOCTYPE name.
7747 */
7748 name = xmlParseName(ctxt);
7749 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007750 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7751 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007752 }
7753 ctxt->intSubName = name;
7754
7755 SKIP_BLANKS;
7756
7757 /*
7758 * Check for SystemID and ExternalID
7759 */
7760 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7761
7762 if ((URI != NULL) || (ExternalID != NULL)) {
7763 ctxt->hasExternalSubset = 1;
7764 }
7765 ctxt->extSubURI = URI;
7766 ctxt->extSubSystem = ExternalID;
7767
7768 SKIP_BLANKS;
7769
7770 /*
7771 * Create and update the internal subset.
7772 */
7773 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7774 (!ctxt->disableSAX))
7775 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7776
7777 /*
7778 * Is there any internal subset declarations ?
7779 * they are handled separately in xmlParseInternalSubset()
7780 */
7781 if (RAW == '[')
7782 return;
7783
7784 /*
7785 * We should be at the end of the DOCTYPE declaration.
7786 */
7787 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007788 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007789 }
7790 NEXT;
7791}
7792
7793/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007794 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007795 * @ctxt: an XML parser context
7796 *
7797 * parse the internal subset declaration
7798 *
7799 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7800 */
7801
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007802static void
Owen Taylor3473f882001-02-23 17:55:21 +00007803xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7804 /*
7805 * Is there any DTD definition ?
7806 */
7807 if (RAW == '[') {
7808 ctxt->instate = XML_PARSER_DTD;
7809 NEXT;
7810 /*
7811 * Parse the succession of Markup declarations and
7812 * PEReferences.
7813 * Subsequence (markupdecl | PEReference | S)*
7814 */
7815 while (RAW != ']') {
7816 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007817 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007818
7819 SKIP_BLANKS;
7820 xmlParseMarkupDecl(ctxt);
7821 xmlParsePEReference(ctxt);
7822
7823 /*
7824 * Pop-up of finished entities.
7825 */
7826 while ((RAW == 0) && (ctxt->inputNr > 1))
7827 xmlPopInput(ctxt);
7828
7829 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007830 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00007831 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007832 break;
7833 }
7834 }
7835 if (RAW == ']') {
7836 NEXT;
7837 SKIP_BLANKS;
7838 }
7839 }
7840
7841 /*
7842 * We should be at the end of the DOCTYPE declaration.
7843 */
7844 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007845 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007846 }
7847 NEXT;
7848}
7849
Daniel Veillard81273902003-09-30 00:43:48 +00007850#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007851/**
7852 * xmlParseAttribute:
7853 * @ctxt: an XML parser context
7854 * @value: a xmlChar ** used to store the value of the attribute
7855 *
7856 * parse an attribute
7857 *
7858 * [41] Attribute ::= Name Eq AttValue
7859 *
7860 * [ WFC: No External Entity References ]
7861 * Attribute values cannot contain direct or indirect entity references
7862 * to external entities.
7863 *
7864 * [ WFC: No < in Attribute Values ]
7865 * The replacement text of any entity referred to directly or indirectly in
7866 * an attribute value (other than "&lt;") must not contain a <.
7867 *
7868 * [ VC: Attribute Value Type ]
7869 * The attribute must have been declared; the value must be of the type
7870 * declared for it.
7871 *
7872 * [25] Eq ::= S? '=' S?
7873 *
7874 * With namespace:
7875 *
7876 * [NS 11] Attribute ::= QName Eq AttValue
7877 *
7878 * Also the case QName == xmlns:??? is handled independently as a namespace
7879 * definition.
7880 *
7881 * Returns the attribute name, and the value in *value.
7882 */
7883
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007884const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007885xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007886 const xmlChar *name;
7887 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007888
7889 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007890 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007891 name = xmlParseName(ctxt);
7892 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007893 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007894 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007895 return(NULL);
7896 }
7897
7898 /*
7899 * read the value
7900 */
7901 SKIP_BLANKS;
7902 if (RAW == '=') {
7903 NEXT;
7904 SKIP_BLANKS;
7905 val = xmlParseAttValue(ctxt);
7906 ctxt->instate = XML_PARSER_CONTENT;
7907 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007908 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007909 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007910 return(NULL);
7911 }
7912
7913 /*
7914 * Check that xml:lang conforms to the specification
7915 * No more registered as an error, just generate a warning now
7916 * since this was deprecated in XML second edition
7917 */
7918 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7919 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007920 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7921 "Malformed value for xml:lang : %s\n",
7922 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007923 }
7924 }
7925
7926 /*
7927 * Check that xml:space conforms to the specification
7928 */
7929 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7930 if (xmlStrEqual(val, BAD_CAST "default"))
7931 *(ctxt->space) = 0;
7932 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7933 *(ctxt->space) = 1;
7934 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007935 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007936"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007937 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007938 }
7939 }
7940
7941 *value = val;
7942 return(name);
7943}
7944
7945/**
7946 * xmlParseStartTag:
7947 * @ctxt: an XML parser context
7948 *
7949 * parse a start of tag either for rule element or
7950 * EmptyElement. In both case we don't parse the tag closing chars.
7951 *
7952 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7953 *
7954 * [ WFC: Unique Att Spec ]
7955 * No attribute name may appear more than once in the same start-tag or
7956 * empty-element tag.
7957 *
7958 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7959 *
7960 * [ WFC: Unique Att Spec ]
7961 * No attribute name may appear more than once in the same start-tag or
7962 * empty-element tag.
7963 *
7964 * With namespace:
7965 *
7966 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7967 *
7968 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7969 *
7970 * Returns the element name parsed
7971 */
7972
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007973const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007974xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007975 const xmlChar *name;
7976 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007977 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007978 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007979 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007980 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007981 int i;
7982
7983 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007984 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007985
7986 name = xmlParseName(ctxt);
7987 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007988 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007989 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007990 return(NULL);
7991 }
7992
7993 /*
7994 * Now parse the attributes, it ends up with the ending
7995 *
7996 * (S Attribute)* S?
7997 */
7998 SKIP_BLANKS;
7999 GROW;
8000
Daniel Veillard21a0f912001-02-25 19:54:14 +00008001 while ((RAW != '>') &&
8002 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008003 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008004 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008005 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008006
8007 attname = xmlParseAttribute(ctxt, &attvalue);
8008 if ((attname != NULL) && (attvalue != NULL)) {
8009 /*
8010 * [ WFC: Unique Att Spec ]
8011 * No attribute name may appear more than once in the same
8012 * start-tag or empty-element tag.
8013 */
8014 for (i = 0; i < nbatts;i += 2) {
8015 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008016 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008017 xmlFree(attvalue);
8018 goto failed;
8019 }
8020 }
Owen Taylor3473f882001-02-23 17:55:21 +00008021 /*
8022 * Add the pair to atts
8023 */
8024 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008025 maxatts = 22; /* allow for 10 attrs by default */
8026 atts = (const xmlChar **)
8027 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008028 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008029 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008030 if (attvalue != NULL)
8031 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008032 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008033 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008034 ctxt->atts = atts;
8035 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008036 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008037 const xmlChar **n;
8038
Owen Taylor3473f882001-02-23 17:55:21 +00008039 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008040 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008041 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008042 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008043 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008044 if (attvalue != NULL)
8045 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008046 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008047 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008048 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008049 ctxt->atts = atts;
8050 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008051 }
8052 atts[nbatts++] = attname;
8053 atts[nbatts++] = attvalue;
8054 atts[nbatts] = NULL;
8055 atts[nbatts + 1] = NULL;
8056 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008057 if (attvalue != NULL)
8058 xmlFree(attvalue);
8059 }
8060
8061failed:
8062
Daniel Veillard3772de32002-12-17 10:31:45 +00008063 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008064 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8065 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008066 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008067 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8068 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008069 }
8070 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008071 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8072 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008073 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8074 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008075 break;
8076 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008077 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008078 GROW;
8079 }
8080
8081 /*
8082 * SAX: Start of Element !
8083 */
8084 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008085 (!ctxt->disableSAX)) {
8086 if (nbatts > 0)
8087 ctxt->sax->startElement(ctxt->userData, name, atts);
8088 else
8089 ctxt->sax->startElement(ctxt->userData, name, NULL);
8090 }
Owen Taylor3473f882001-02-23 17:55:21 +00008091
8092 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008093 /* Free only the content strings */
8094 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008095 if (atts[i] != NULL)
8096 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008097 }
8098 return(name);
8099}
8100
8101/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008102 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008103 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008104 * @line: line of the start tag
8105 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008106 *
8107 * parse an end of tag
8108 *
8109 * [42] ETag ::= '</' Name S? '>'
8110 *
8111 * With namespace
8112 *
8113 * [NS 9] ETag ::= '</' QName S? '>'
8114 */
8115
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008116static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008117xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008118 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008119
8120 GROW;
8121 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008122 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008123 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008124 return;
8125 }
8126 SKIP(2);
8127
Daniel Veillard46de64e2002-05-29 08:21:33 +00008128 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008129
8130 /*
8131 * We should definitely be at the ending "S? '>'" part
8132 */
8133 GROW;
8134 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008135 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008136 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008137 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008138 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008139
8140 /*
8141 * [ WFC: Element Type Match ]
8142 * The Name in an element's end-tag must match the element type in the
8143 * start-tag.
8144 *
8145 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008146 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008147 if (name == NULL) name = BAD_CAST "unparseable";
8148 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008149 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008150 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008151 }
8152
8153 /*
8154 * SAX: End of Tag
8155 */
8156 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8157 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008158 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008159
Daniel Veillarde57ec792003-09-10 10:50:59 +00008160 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008161 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008162 return;
8163}
8164
8165/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008166 * xmlParseEndTag:
8167 * @ctxt: an XML parser context
8168 *
8169 * parse an end of tag
8170 *
8171 * [42] ETag ::= '</' Name S? '>'
8172 *
8173 * With namespace
8174 *
8175 * [NS 9] ETag ::= '</' QName S? '>'
8176 */
8177
8178void
8179xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008180 xmlParseEndTag1(ctxt, 0);
8181}
Daniel Veillard81273902003-09-30 00:43:48 +00008182#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008183
8184/************************************************************************
8185 * *
8186 * SAX 2 specific operations *
8187 * *
8188 ************************************************************************/
8189
Daniel Veillard0fb18932003-09-07 09:14:37 +00008190/*
8191 * xmlGetNamespace:
8192 * @ctxt: an XML parser context
8193 * @prefix: the prefix to lookup
8194 *
8195 * Lookup the namespace name for the @prefix (which ca be NULL)
8196 * The prefix must come from the @ctxt->dict dictionnary
8197 *
8198 * Returns the namespace name or NULL if not bound
8199 */
8200static const xmlChar *
8201xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8202 int i;
8203
Daniel Veillarde57ec792003-09-10 10:50:59 +00008204 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008205 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008206 if (ctxt->nsTab[i] == prefix) {
8207 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8208 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008209 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008210 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008211 return(NULL);
8212}
8213
8214/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008215 * xmlParseQName:
8216 * @ctxt: an XML parser context
8217 * @prefix: pointer to store the prefix part
8218 *
8219 * parse an XML Namespace QName
8220 *
8221 * [6] QName ::= (Prefix ':')? LocalPart
8222 * [7] Prefix ::= NCName
8223 * [8] LocalPart ::= NCName
8224 *
8225 * Returns the Name parsed or NULL
8226 */
8227
8228static const xmlChar *
8229xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8230 const xmlChar *l, *p;
8231
8232 GROW;
8233
8234 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008235 if (l == NULL) {
8236 if (CUR == ':') {
8237 l = xmlParseName(ctxt);
8238 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008239 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8240 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008241 *prefix = NULL;
8242 return(l);
8243 }
8244 }
8245 return(NULL);
8246 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008247 if (CUR == ':') {
8248 NEXT;
8249 p = l;
8250 l = xmlParseNCName(ctxt);
8251 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008252 xmlChar *tmp;
8253
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008254 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8255 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008256 l = xmlParseNmtoken(ctxt);
8257 if (l == NULL)
8258 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8259 else {
8260 tmp = xmlBuildQName(l, p, NULL, 0);
8261 xmlFree((char *)l);
8262 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008263 p = xmlDictLookup(ctxt->dict, tmp, -1);
8264 if (tmp != NULL) xmlFree(tmp);
8265 *prefix = NULL;
8266 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008267 }
8268 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008269 xmlChar *tmp;
8270
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008271 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8272 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008273 NEXT;
8274 tmp = (xmlChar *) xmlParseName(ctxt);
8275 if (tmp != NULL) {
8276 tmp = xmlBuildQName(tmp, l, NULL, 0);
8277 l = xmlDictLookup(ctxt->dict, tmp, -1);
8278 if (tmp != NULL) xmlFree(tmp);
8279 *prefix = p;
8280 return(l);
8281 }
8282 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8283 l = xmlDictLookup(ctxt->dict, tmp, -1);
8284 if (tmp != NULL) xmlFree(tmp);
8285 *prefix = p;
8286 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008287 }
8288 *prefix = p;
8289 } else
8290 *prefix = NULL;
8291 return(l);
8292}
8293
8294/**
8295 * xmlParseQNameAndCompare:
8296 * @ctxt: an XML parser context
8297 * @name: the localname
8298 * @prefix: the prefix, if any.
8299 *
8300 * parse an XML name and compares for match
8301 * (specialized for endtag parsing)
8302 *
8303 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8304 * and the name for mismatch
8305 */
8306
8307static const xmlChar *
8308xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8309 xmlChar const *prefix) {
8310 const xmlChar *cmp = name;
8311 const xmlChar *in;
8312 const xmlChar *ret;
8313 const xmlChar *prefix2;
8314
8315 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8316
8317 GROW;
8318 in = ctxt->input->cur;
8319
8320 cmp = prefix;
8321 while (*in != 0 && *in == *cmp) {
8322 ++in;
8323 ++cmp;
8324 }
8325 if ((*cmp == 0) && (*in == ':')) {
8326 in++;
8327 cmp = name;
8328 while (*in != 0 && *in == *cmp) {
8329 ++in;
8330 ++cmp;
8331 }
William M. Brack76e95df2003-10-18 16:20:14 +00008332 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008333 /* success */
8334 ctxt->input->cur = in;
8335 return((const xmlChar*) 1);
8336 }
8337 }
8338 /*
8339 * all strings coms from the dictionary, equality can be done directly
8340 */
8341 ret = xmlParseQName (ctxt, &prefix2);
8342 if ((ret == name) && (prefix == prefix2))
8343 return((const xmlChar*) 1);
8344 return ret;
8345}
8346
8347/**
8348 * xmlParseAttValueInternal:
8349 * @ctxt: an XML parser context
8350 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008351 * @alloc: whether the attribute was reallocated as a new string
8352 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008353 *
8354 * parse a value for an attribute.
8355 * NOTE: if no normalization is needed, the routine will return pointers
8356 * directly from the data buffer.
8357 *
8358 * 3.3.3 Attribute-Value Normalization:
8359 * Before the value of an attribute is passed to the application or
8360 * checked for validity, the XML processor must normalize it as follows:
8361 * - a character reference is processed by appending the referenced
8362 * character to the attribute value
8363 * - an entity reference is processed by recursively processing the
8364 * replacement text of the entity
8365 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8366 * appending #x20 to the normalized value, except that only a single
8367 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8368 * parsed entity or the literal entity value of an internal parsed entity
8369 * - other characters are processed by appending them to the normalized value
8370 * If the declared value is not CDATA, then the XML processor must further
8371 * process the normalized attribute value by discarding any leading and
8372 * trailing space (#x20) characters, and by replacing sequences of space
8373 * (#x20) characters by a single space (#x20) character.
8374 * All attributes for which no declaration has been read should be treated
8375 * by a non-validating parser as if declared CDATA.
8376 *
8377 * Returns the AttValue parsed or NULL. The value has to be freed by the
8378 * caller if it was copied, this can be detected by val[*len] == 0.
8379 */
8380
8381static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008382xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8383 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008384{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008385 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008386 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008387 xmlChar *ret = NULL;
8388
8389 GROW;
8390 in = (xmlChar *) CUR_PTR;
8391 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008392 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008393 return (NULL);
8394 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008395 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008396
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008397 /*
8398 * try to handle in this routine the most common case where no
8399 * allocation of a new string is required and where content is
8400 * pure ASCII.
8401 */
8402 limit = *in++;
8403 end = ctxt->input->end;
8404 start = in;
8405 if (in >= end) {
8406 const xmlChar *oldbase = ctxt->input->base;
8407 GROW;
8408 if (oldbase != ctxt->input->base) {
8409 long delta = ctxt->input->base - oldbase;
8410 start = start + delta;
8411 in = in + delta;
8412 }
8413 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008414 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008415 if (normalize) {
8416 /*
8417 * Skip any leading spaces
8418 */
8419 while ((in < end) && (*in != limit) &&
8420 ((*in == 0x20) || (*in == 0x9) ||
8421 (*in == 0xA) || (*in == 0xD))) {
8422 in++;
8423 start = in;
8424 if (in >= end) {
8425 const xmlChar *oldbase = ctxt->input->base;
8426 GROW;
8427 if (oldbase != ctxt->input->base) {
8428 long delta = ctxt->input->base - oldbase;
8429 start = start + delta;
8430 in = in + delta;
8431 }
8432 end = ctxt->input->end;
8433 }
8434 }
8435 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8436 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8437 if ((*in++ == 0x20) && (*in == 0x20)) break;
8438 if (in >= end) {
8439 const xmlChar *oldbase = ctxt->input->base;
8440 GROW;
8441 if (oldbase != ctxt->input->base) {
8442 long delta = ctxt->input->base - oldbase;
8443 start = start + delta;
8444 in = in + delta;
8445 }
8446 end = ctxt->input->end;
8447 }
8448 }
8449 last = in;
8450 /*
8451 * skip the trailing blanks
8452 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008453 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008454 while ((in < end) && (*in != limit) &&
8455 ((*in == 0x20) || (*in == 0x9) ||
8456 (*in == 0xA) || (*in == 0xD))) {
8457 in++;
8458 if (in >= end) {
8459 const xmlChar *oldbase = ctxt->input->base;
8460 GROW;
8461 if (oldbase != ctxt->input->base) {
8462 long delta = ctxt->input->base - oldbase;
8463 start = start + delta;
8464 in = in + delta;
8465 last = last + delta;
8466 }
8467 end = ctxt->input->end;
8468 }
8469 }
8470 if (*in != limit) goto need_complex;
8471 } else {
8472 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8473 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8474 in++;
8475 if (in >= end) {
8476 const xmlChar *oldbase = ctxt->input->base;
8477 GROW;
8478 if (oldbase != ctxt->input->base) {
8479 long delta = ctxt->input->base - oldbase;
8480 start = start + delta;
8481 in = in + delta;
8482 }
8483 end = ctxt->input->end;
8484 }
8485 }
8486 last = in;
8487 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008488 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008489 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008490 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008491 *len = last - start;
8492 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008493 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008494 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008495 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008496 }
8497 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008498 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008499 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008500need_complex:
8501 if (alloc) *alloc = 1;
8502 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008503}
8504
8505/**
8506 * xmlParseAttribute2:
8507 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008508 * @pref: the element prefix
8509 * @elem: the element name
8510 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008511 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008512 * @len: an int * to save the length of the attribute
8513 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008514 *
8515 * parse an attribute in the new SAX2 framework.
8516 *
8517 * Returns the attribute name, and the value in *value, .
8518 */
8519
8520static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008521xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008522 const xmlChar * pref, const xmlChar * elem,
8523 const xmlChar ** prefix, xmlChar ** value,
8524 int *len, int *alloc)
8525{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008526 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008527 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008528 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008529
8530 *value = NULL;
8531 GROW;
8532 name = xmlParseQName(ctxt, prefix);
8533 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008534 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8535 "error parsing attribute name\n");
8536 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008537 }
8538
8539 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008540 * get the type if needed
8541 */
8542 if (ctxt->attsSpecial != NULL) {
8543 int type;
8544
8545 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008546 pref, elem, *prefix, name);
8547 if (type != 0)
8548 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008549 }
8550
8551 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008552 * read the value
8553 */
8554 SKIP_BLANKS;
8555 if (RAW == '=') {
8556 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008557 SKIP_BLANKS;
8558 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8559 if (normalize) {
8560 /*
8561 * Sometimes a second normalisation pass for spaces is needed
8562 * but that only happens if charrefs or entities refernces
8563 * have been used in the attribute value, i.e. the attribute
8564 * value have been extracted in an allocated string already.
8565 */
8566 if (*alloc) {
8567 const xmlChar *val2;
8568
8569 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008570 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008571 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008572 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008573 }
8574 }
8575 }
8576 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008577 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008578 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8579 "Specification mandate value for attribute %s\n",
8580 name);
8581 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008582 }
8583
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008584 if (*prefix == ctxt->str_xml) {
8585 /*
8586 * Check that xml:lang conforms to the specification
8587 * No more registered as an error, just generate a warning now
8588 * since this was deprecated in XML second edition
8589 */
8590 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8591 internal_val = xmlStrndup(val, *len);
8592 if (!xmlCheckLanguageID(internal_val)) {
8593 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8594 "Malformed value for xml:lang : %s\n",
8595 internal_val, NULL);
8596 }
8597 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008598
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008599 /*
8600 * Check that xml:space conforms to the specification
8601 */
8602 if (xmlStrEqual(name, BAD_CAST "space")) {
8603 internal_val = xmlStrndup(val, *len);
8604 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8605 *(ctxt->space) = 0;
8606 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8607 *(ctxt->space) = 1;
8608 else {
8609 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8610 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8611 internal_val, NULL);
8612 }
8613 }
8614 if (internal_val) {
8615 xmlFree(internal_val);
8616 }
8617 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008618
8619 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008620 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008621}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008622/**
8623 * xmlParseStartTag2:
8624 * @ctxt: an XML parser context
8625 *
8626 * parse a start of tag either for rule element or
8627 * EmptyElement. In both case we don't parse the tag closing chars.
8628 * This routine is called when running SAX2 parsing
8629 *
8630 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8631 *
8632 * [ WFC: Unique Att Spec ]
8633 * No attribute name may appear more than once in the same start-tag or
8634 * empty-element tag.
8635 *
8636 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8637 *
8638 * [ WFC: Unique Att Spec ]
8639 * No attribute name may appear more than once in the same start-tag or
8640 * empty-element tag.
8641 *
8642 * With namespace:
8643 *
8644 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8645 *
8646 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8647 *
8648 * Returns the element name parsed
8649 */
8650
8651static const xmlChar *
8652xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008653 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008654 const xmlChar *localname;
8655 const xmlChar *prefix;
8656 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008657 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008658 const xmlChar *nsname;
8659 xmlChar *attvalue;
8660 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008661 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008662 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008663 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008664 const xmlChar *base;
8665 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008666 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008667
8668 if (RAW != '<') return(NULL);
8669 NEXT1;
8670
8671 /*
8672 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8673 * point since the attribute values may be stored as pointers to
8674 * the buffer and calling SHRINK would destroy them !
8675 * The Shrinking is only possible once the full set of attribute
8676 * callbacks have been done.
8677 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008678reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008679 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008680 base = ctxt->input->base;
8681 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008682 oldline = ctxt->input->line;
8683 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008684 nbatts = 0;
8685 nratts = 0;
8686 nbdef = 0;
8687 nbNs = 0;
8688 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008689 /* Forget any namespaces added during an earlier parse of this element. */
8690 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008691
8692 localname = xmlParseQName(ctxt, &prefix);
8693 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008694 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8695 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008696 return(NULL);
8697 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008698 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008699
8700 /*
8701 * Now parse the attributes, it ends up with the ending
8702 *
8703 * (S Attribute)* S?
8704 */
8705 SKIP_BLANKS;
8706 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008707 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008708
8709 while ((RAW != '>') &&
8710 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008711 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008712 const xmlChar *q = CUR_PTR;
8713 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008714 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008715
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008716 attname = xmlParseAttribute2(ctxt, prefix, localname,
8717 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008718 if (ctxt->input->base != base) {
8719 if ((attvalue != NULL) && (alloc != 0))
8720 xmlFree(attvalue);
8721 attvalue = NULL;
8722 goto base_changed;
8723 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008724 if ((attname != NULL) && (attvalue != NULL)) {
8725 if (len < 0) len = xmlStrlen(attvalue);
8726 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008727 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8728 xmlURIPtr uri;
8729
8730 if (*URL != 0) {
8731 uri = xmlParseURI((const char *) URL);
8732 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008733 xmlNsErr(ctxt, XML_WAR_NS_URI,
8734 "xmlns: '%s' is not a valid URI\n",
8735 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008736 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00008737 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008738 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8739 "xmlns: URI %s is not absolute\n",
8740 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008741 }
8742 xmlFreeURI(uri);
8743 }
Daniel Veillard37334572008-07-31 08:20:02 +00008744 if (URL == ctxt->str_xml_ns) {
8745 if (attname != ctxt->str_xml) {
8746 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8747 "xml namespace URI cannot be the default namespace\n",
8748 NULL, NULL, NULL);
8749 }
8750 goto skip_default_ns;
8751 }
8752 if ((len == 29) &&
8753 (xmlStrEqual(URL,
8754 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8755 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8756 "reuse of the xmlns namespace name is forbidden\n",
8757 NULL, NULL, NULL);
8758 goto skip_default_ns;
8759 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008760 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008761 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008762 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008763 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008764 for (j = 1;j <= nbNs;j++)
8765 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8766 break;
8767 if (j <= nbNs)
8768 xmlErrAttributeDup(ctxt, NULL, attname);
8769 else
8770 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008771skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008772 if (alloc != 0) xmlFree(attvalue);
8773 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008774 continue;
8775 }
8776 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008777 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8778 xmlURIPtr uri;
8779
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008780 if (attname == ctxt->str_xml) {
8781 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008782 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8783 "xml namespace prefix mapped to wrong URI\n",
8784 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008785 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008786 /*
8787 * Do not keep a namespace definition node
8788 */
Daniel Veillard37334572008-07-31 08:20:02 +00008789 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008790 }
Daniel Veillard37334572008-07-31 08:20:02 +00008791 if (URL == ctxt->str_xml_ns) {
8792 if (attname != ctxt->str_xml) {
8793 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8794 "xml namespace URI mapped to wrong prefix\n",
8795 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008796 }
Daniel Veillard37334572008-07-31 08:20:02 +00008797 goto skip_ns;
8798 }
8799 if (attname == ctxt->str_xmlns) {
8800 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8801 "redefinition of the xmlns prefix is forbidden\n",
8802 NULL, NULL, NULL);
8803 goto skip_ns;
8804 }
8805 if ((len == 29) &&
8806 (xmlStrEqual(URL,
8807 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8808 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8809 "reuse of the xmlns namespace name is forbidden\n",
8810 NULL, NULL, NULL);
8811 goto skip_ns;
8812 }
8813 if ((URL == NULL) || (URL[0] == 0)) {
8814 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8815 "xmlns:%s: Empty XML namespace is not allowed\n",
8816 attname, NULL, NULL);
8817 goto skip_ns;
8818 } else {
8819 uri = xmlParseURI((const char *) URL);
8820 if (uri == NULL) {
8821 xmlNsErr(ctxt, XML_WAR_NS_URI,
8822 "xmlns:%s: '%s' is not a valid URI\n",
8823 attname, URL, NULL);
8824 } else {
8825 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8826 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8827 "xmlns:%s: URI %s is not absolute\n",
8828 attname, URL, NULL);
8829 }
8830 xmlFreeURI(uri);
8831 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008832 }
8833
Daniel Veillard0fb18932003-09-07 09:14:37 +00008834 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008835 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008836 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008837 for (j = 1;j <= nbNs;j++)
8838 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8839 break;
8840 if (j <= nbNs)
8841 xmlErrAttributeDup(ctxt, aprefix, attname);
8842 else
8843 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008844skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008845 if (alloc != 0) xmlFree(attvalue);
8846 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008847 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008848 continue;
8849 }
8850
8851 /*
8852 * Add the pair to atts
8853 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008854 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8855 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008856 if (attvalue[len] == 0)
8857 xmlFree(attvalue);
8858 goto failed;
8859 }
8860 maxatts = ctxt->maxatts;
8861 atts = ctxt->atts;
8862 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008863 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008864 atts[nbatts++] = attname;
8865 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008866 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008867 atts[nbatts++] = attvalue;
8868 attvalue += len;
8869 atts[nbatts++] = attvalue;
8870 /*
8871 * tag if some deallocation is needed
8872 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008873 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008874 } else {
8875 if ((attvalue != NULL) && (attvalue[len] == 0))
8876 xmlFree(attvalue);
8877 }
8878
Daniel Veillard37334572008-07-31 08:20:02 +00008879failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008880
8881 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008882 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008883 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8884 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008885 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008886 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8887 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008888 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008889 }
8890 SKIP_BLANKS;
8891 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8892 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008893 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008894 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008895 break;
8896 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008897 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008898 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008899 }
8900
Daniel Veillard0fb18932003-09-07 09:14:37 +00008901 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008902 * The attributes defaulting
8903 */
8904 if (ctxt->attsDefault != NULL) {
8905 xmlDefAttrsPtr defaults;
8906
8907 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8908 if (defaults != NULL) {
8909 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00008910 attname = defaults->values[5 * i];
8911 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00008912
8913 /*
8914 * special work for namespaces defaulted defs
8915 */
8916 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8917 /*
8918 * check that it's not a defined namespace
8919 */
8920 for (j = 1;j <= nbNs;j++)
8921 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8922 break;
8923 if (j <= nbNs) continue;
8924
8925 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008926 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008927 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008928 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008929 nbNs++;
8930 }
8931 } else if (aprefix == ctxt->str_xmlns) {
8932 /*
8933 * check that it's not a defined namespace
8934 */
8935 for (j = 1;j <= nbNs;j++)
8936 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8937 break;
8938 if (j <= nbNs) continue;
8939
8940 nsname = xmlGetNamespace(ctxt, attname);
8941 if (nsname != defaults->values[2]) {
8942 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008943 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008944 nbNs++;
8945 }
8946 } else {
8947 /*
8948 * check that it's not a defined attribute
8949 */
8950 for (j = 0;j < nbatts;j+=5) {
8951 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8952 break;
8953 }
8954 if (j < nbatts) continue;
8955
8956 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8957 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008958 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008959 }
8960 maxatts = ctxt->maxatts;
8961 atts = ctxt->atts;
8962 }
8963 atts[nbatts++] = attname;
8964 atts[nbatts++] = aprefix;
8965 if (aprefix == NULL)
8966 atts[nbatts++] = NULL;
8967 else
8968 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008969 atts[nbatts++] = defaults->values[5 * i + 2];
8970 atts[nbatts++] = defaults->values[5 * i + 3];
8971 if ((ctxt->standalone == 1) &&
8972 (defaults->values[5 * i + 4] != NULL)) {
8973 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
8974 "standalone: attribute %s on %s defaulted from external subset\n",
8975 attname, localname);
8976 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008977 nbdef++;
8978 }
8979 }
8980 }
8981 }
8982
Daniel Veillarde70c8772003-11-25 07:21:18 +00008983 /*
8984 * The attributes checkings
8985 */
8986 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008987 /*
8988 * The default namespace does not apply to attribute names.
8989 */
8990 if (atts[i + 1] != NULL) {
8991 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8992 if (nsname == NULL) {
8993 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8994 "Namespace prefix %s for %s on %s is not defined\n",
8995 atts[i + 1], atts[i], localname);
8996 }
8997 atts[i + 2] = nsname;
8998 } else
8999 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009000 /*
9001 * [ WFC: Unique Att Spec ]
9002 * No attribute name may appear more than once in the same
9003 * start-tag or empty-element tag.
9004 * As extended by the Namespace in XML REC.
9005 */
9006 for (j = 0; j < i;j += 5) {
9007 if (atts[i] == atts[j]) {
9008 if (atts[i+1] == atts[j+1]) {
9009 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9010 break;
9011 }
9012 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9013 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9014 "Namespaced Attribute %s in '%s' redefined\n",
9015 atts[i], nsname, NULL);
9016 break;
9017 }
9018 }
9019 }
9020 }
9021
Daniel Veillarde57ec792003-09-10 10:50:59 +00009022 nsname = xmlGetNamespace(ctxt, prefix);
9023 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009024 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9025 "Namespace prefix %s on %s is not defined\n",
9026 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009027 }
9028 *pref = prefix;
9029 *URI = nsname;
9030
9031 /*
9032 * SAX: Start of Element !
9033 */
9034 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9035 (!ctxt->disableSAX)) {
9036 if (nbNs > 0)
9037 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9038 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9039 nbatts / 5, nbdef, atts);
9040 else
9041 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9042 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9043 }
9044
9045 /*
9046 * Free up attribute allocated strings if needed
9047 */
9048 if (attval != 0) {
9049 for (i = 3,j = 0; j < nratts;i += 5,j++)
9050 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9051 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009052 }
9053
9054 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009055
9056base_changed:
9057 /*
9058 * the attribute strings are valid iif the base didn't changed
9059 */
9060 if (attval != 0) {
9061 for (i = 3,j = 0; j < nratts;i += 5,j++)
9062 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9063 xmlFree((xmlChar *) atts[i]);
9064 }
9065 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009066 ctxt->input->line = oldline;
9067 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009068 if (ctxt->wellFormed == 1) {
9069 goto reparse;
9070 }
9071 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009072}
9073
9074/**
9075 * xmlParseEndTag2:
9076 * @ctxt: an XML parser context
9077 * @line: line of the start tag
9078 * @nsNr: number of namespaces on the start tag
9079 *
9080 * parse an end of tag
9081 *
9082 * [42] ETag ::= '</' Name S? '>'
9083 *
9084 * With namespace
9085 *
9086 * [NS 9] ETag ::= '</' QName S? '>'
9087 */
9088
9089static void
9090xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009091 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009092 const xmlChar *name;
9093
9094 GROW;
9095 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009096 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009097 return;
9098 }
9099 SKIP(2);
9100
William M. Brack13dfa872004-09-18 04:52:08 +00009101 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009102 if (ctxt->input->cur[tlen] == '>') {
9103 ctxt->input->cur += tlen + 1;
9104 goto done;
9105 }
9106 ctxt->input->cur += tlen;
9107 name = (xmlChar*)1;
9108 } else {
9109 if (prefix == NULL)
9110 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9111 else
9112 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9113 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009114
9115 /*
9116 * We should definitely be at the ending "S? '>'" part
9117 */
9118 GROW;
9119 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009120 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009121 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009122 } else
9123 NEXT1;
9124
9125 /*
9126 * [ WFC: Element Type Match ]
9127 * The Name in an element's end-tag must match the element type in the
9128 * start-tag.
9129 *
9130 */
9131 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009132 if (name == NULL) name = BAD_CAST "unparseable";
9133 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009134 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009135 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009136 }
9137
9138 /*
9139 * SAX: End of Tag
9140 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009141done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009142 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9143 (!ctxt->disableSAX))
9144 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9145
Daniel Veillard0fb18932003-09-07 09:14:37 +00009146 spacePop(ctxt);
9147 if (nsNr != 0)
9148 nsPop(ctxt, nsNr);
9149 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009150}
9151
9152/**
Owen Taylor3473f882001-02-23 17:55:21 +00009153 * xmlParseCDSect:
9154 * @ctxt: an XML parser context
9155 *
9156 * Parse escaped pure raw content.
9157 *
9158 * [18] CDSect ::= CDStart CData CDEnd
9159 *
9160 * [19] CDStart ::= '<![CDATA['
9161 *
9162 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9163 *
9164 * [21] CDEnd ::= ']]>'
9165 */
9166void
9167xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9168 xmlChar *buf = NULL;
9169 int len = 0;
9170 int size = XML_PARSER_BUFFER_SIZE;
9171 int r, rl;
9172 int s, sl;
9173 int cur, l;
9174 int count = 0;
9175
Daniel Veillard8f597c32003-10-06 08:19:27 +00009176 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009177 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009178 SKIP(9);
9179 } else
9180 return;
9181
9182 ctxt->instate = XML_PARSER_CDATA_SECTION;
9183 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009184 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009185 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009186 ctxt->instate = XML_PARSER_CONTENT;
9187 return;
9188 }
9189 NEXTL(rl);
9190 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009191 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009192 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009193 ctxt->instate = XML_PARSER_CONTENT;
9194 return;
9195 }
9196 NEXTL(sl);
9197 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009198 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009199 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009200 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009201 return;
9202 }
William M. Brack871611b2003-10-18 04:53:14 +00009203 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009204 ((r != ']') || (s != ']') || (cur != '>'))) {
9205 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009206 xmlChar *tmp;
9207
Owen Taylor3473f882001-02-23 17:55:21 +00009208 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009209 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9210 if (tmp == NULL) {
9211 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009212 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009213 return;
9214 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009215 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009216 }
9217 COPY_BUF(rl,buf,len,r);
9218 r = s;
9219 rl = sl;
9220 s = cur;
9221 sl = l;
9222 count++;
9223 if (count > 50) {
9224 GROW;
9225 count = 0;
9226 }
9227 NEXTL(l);
9228 cur = CUR_CHAR(l);
9229 }
9230 buf[len] = 0;
9231 ctxt->instate = XML_PARSER_CONTENT;
9232 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009233 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009234 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009235 xmlFree(buf);
9236 return;
9237 }
9238 NEXTL(l);
9239
9240 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009241 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009242 */
9243 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9244 if (ctxt->sax->cdataBlock != NULL)
9245 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009246 else if (ctxt->sax->characters != NULL)
9247 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009248 }
9249 xmlFree(buf);
9250}
9251
9252/**
9253 * xmlParseContent:
9254 * @ctxt: an XML parser context
9255 *
9256 * Parse a content:
9257 *
9258 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9259 */
9260
9261void
9262xmlParseContent(xmlParserCtxtPtr ctxt) {
9263 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009264 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009265 ((RAW != '<') || (NXT(1) != '/')) &&
9266 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009267 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009268 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009269 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009270
9271 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009272 * First case : a Processing Instruction.
9273 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009274 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009275 xmlParsePI(ctxt);
9276 }
9277
9278 /*
9279 * Second case : a CDSection
9280 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009281 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009282 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009283 xmlParseCDSect(ctxt);
9284 }
9285
9286 /*
9287 * Third case : a comment
9288 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009289 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009290 (NXT(2) == '-') && (NXT(3) == '-')) {
9291 xmlParseComment(ctxt);
9292 ctxt->instate = XML_PARSER_CONTENT;
9293 }
9294
9295 /*
9296 * Fourth case : a sub-element.
9297 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009298 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009299 xmlParseElement(ctxt);
9300 }
9301
9302 /*
9303 * Fifth case : a reference. If if has not been resolved,
9304 * parsing returns it's Name, create the node
9305 */
9306
Daniel Veillard21a0f912001-02-25 19:54:14 +00009307 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009308 xmlParseReference(ctxt);
9309 }
9310
9311 /*
9312 * Last case, text. Note that References are handled directly.
9313 */
9314 else {
9315 xmlParseCharData(ctxt, 0);
9316 }
9317
9318 GROW;
9319 /*
9320 * Pop-up of finished entities.
9321 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009322 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009323 xmlPopInput(ctxt);
9324 SHRINK;
9325
Daniel Veillardfdc91562002-07-01 21:52:03 +00009326 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009327 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9328 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009329 ctxt->instate = XML_PARSER_EOF;
9330 break;
9331 }
9332 }
9333}
9334
9335/**
9336 * xmlParseElement:
9337 * @ctxt: an XML parser context
9338 *
9339 * parse an XML element, this is highly recursive
9340 *
9341 * [39] element ::= EmptyElemTag | STag content ETag
9342 *
9343 * [ WFC: Element Type Match ]
9344 * The Name in an element's end-tag must match the element type in the
9345 * start-tag.
9346 *
Owen Taylor3473f882001-02-23 17:55:21 +00009347 */
9348
9349void
9350xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009351 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009352 const xmlChar *prefix;
9353 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00009354 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009355 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009356 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009357 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009358
Daniel Veillard8915c152008-08-26 13:05:34 +00009359 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9360 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9361 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9362 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9363 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009364 ctxt->instate = XML_PARSER_EOF;
9365 return;
9366 }
9367
Owen Taylor3473f882001-02-23 17:55:21 +00009368 /* Capture start position */
9369 if (ctxt->record_info) {
9370 node_info.begin_pos = ctxt->input->consumed +
9371 (CUR_PTR - ctxt->input->base);
9372 node_info.begin_line = ctxt->input->line;
9373 }
9374
9375 if (ctxt->spaceNr == 0)
9376 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009377 else if (*ctxt->space == -2)
9378 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009379 else
9380 spacePush(ctxt, *ctxt->space);
9381
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009382 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009383#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009384 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009385#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009386 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009387#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009388 else
9389 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009390#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009391 if (name == NULL) {
9392 spacePop(ctxt);
9393 return;
9394 }
9395 namePush(ctxt, name);
9396 ret = ctxt->node;
9397
Daniel Veillard4432df22003-09-28 18:58:27 +00009398#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009399 /*
9400 * [ VC: Root Element Type ]
9401 * The Name in the document type declaration must match the element
9402 * type of the root element.
9403 */
9404 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9405 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9406 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009407#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009408
9409 /*
9410 * Check for an Empty Element.
9411 */
9412 if ((RAW == '/') && (NXT(1) == '>')) {
9413 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009414 if (ctxt->sax2) {
9415 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9416 (!ctxt->disableSAX))
9417 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009418#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009419 } else {
9420 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9421 (!ctxt->disableSAX))
9422 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009423#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009424 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009425 namePop(ctxt);
9426 spacePop(ctxt);
9427 if (nsNr != ctxt->nsNr)
9428 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009429 if ( ret != NULL && ctxt->record_info ) {
9430 node_info.end_pos = ctxt->input->consumed +
9431 (CUR_PTR - ctxt->input->base);
9432 node_info.end_line = ctxt->input->line;
9433 node_info.node = ret;
9434 xmlParserAddNodeInfo(ctxt, &node_info);
9435 }
9436 return;
9437 }
9438 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009439 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009440 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009441 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9442 "Couldn't find end of Start Tag %s line %d\n",
9443 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009444
9445 /*
9446 * end of parsing of this node.
9447 */
9448 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009449 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009450 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009451 if (nsNr != ctxt->nsNr)
9452 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009453
9454 /*
9455 * Capture end position and add node
9456 */
9457 if ( ret != NULL && ctxt->record_info ) {
9458 node_info.end_pos = ctxt->input->consumed +
9459 (CUR_PTR - ctxt->input->base);
9460 node_info.end_line = ctxt->input->line;
9461 node_info.node = ret;
9462 xmlParserAddNodeInfo(ctxt, &node_info);
9463 }
9464 return;
9465 }
9466
9467 /*
9468 * Parse the content of the element:
9469 */
9470 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009471 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009472 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009473 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009474 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009475
9476 /*
9477 * end of parsing of this node.
9478 */
9479 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009480 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009481 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009482 if (nsNr != ctxt->nsNr)
9483 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009484 return;
9485 }
9486
9487 /*
9488 * parse the end of tag: '</' should be here.
9489 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009490 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009491 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009492 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009493 }
9494#ifdef LIBXML_SAX1_ENABLED
9495 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00009496 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00009497#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009498
9499 /*
9500 * Capture end position and add node
9501 */
9502 if ( ret != NULL && ctxt->record_info ) {
9503 node_info.end_pos = ctxt->input->consumed +
9504 (CUR_PTR - ctxt->input->base);
9505 node_info.end_line = ctxt->input->line;
9506 node_info.node = ret;
9507 xmlParserAddNodeInfo(ctxt, &node_info);
9508 }
9509}
9510
9511/**
9512 * xmlParseVersionNum:
9513 * @ctxt: an XML parser context
9514 *
9515 * parse the XML version value.
9516 *
Daniel Veillard34e3f642008-07-29 09:02:27 +00009517 * [26] VersionNum ::= '1.' [0-9]+
9518 *
9519 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +00009520 *
9521 * Returns the string giving the XML version number, or NULL
9522 */
9523xmlChar *
9524xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9525 xmlChar *buf = NULL;
9526 int len = 0;
9527 int size = 10;
9528 xmlChar cur;
9529
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009530 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009531 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009532 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009533 return(NULL);
9534 }
9535 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00009536 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009537 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009538 return(NULL);
9539 }
9540 buf[len++] = cur;
9541 NEXT;
9542 cur=CUR;
9543 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009544 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009545 return(NULL);
9546 }
9547 buf[len++] = cur;
9548 NEXT;
9549 cur=CUR;
9550 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009551 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009552 xmlChar *tmp;
9553
Owen Taylor3473f882001-02-23 17:55:21 +00009554 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009555 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9556 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009557 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009558 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009559 return(NULL);
9560 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009561 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009562 }
9563 buf[len++] = cur;
9564 NEXT;
9565 cur=CUR;
9566 }
9567 buf[len] = 0;
9568 return(buf);
9569}
9570
9571/**
9572 * xmlParseVersionInfo:
9573 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009574 *
Owen Taylor3473f882001-02-23 17:55:21 +00009575 * parse the XML version.
9576 *
9577 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009578 *
Owen Taylor3473f882001-02-23 17:55:21 +00009579 * [25] Eq ::= S? '=' S?
9580 *
9581 * Returns the version string, e.g. "1.0"
9582 */
9583
9584xmlChar *
9585xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9586 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009587
Daniel Veillarda07050d2003-10-19 14:46:32 +00009588 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009589 SKIP(7);
9590 SKIP_BLANKS;
9591 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009592 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009593 return(NULL);
9594 }
9595 NEXT;
9596 SKIP_BLANKS;
9597 if (RAW == '"') {
9598 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009599 version = xmlParseVersionNum(ctxt);
9600 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009601 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009602 } else
9603 NEXT;
9604 } else if (RAW == '\''){
9605 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009606 version = xmlParseVersionNum(ctxt);
9607 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009608 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009609 } else
9610 NEXT;
9611 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009612 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009613 }
9614 }
9615 return(version);
9616}
9617
9618/**
9619 * xmlParseEncName:
9620 * @ctxt: an XML parser context
9621 *
9622 * parse the XML encoding name
9623 *
9624 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9625 *
9626 * Returns the encoding name value or NULL
9627 */
9628xmlChar *
9629xmlParseEncName(xmlParserCtxtPtr ctxt) {
9630 xmlChar *buf = NULL;
9631 int len = 0;
9632 int size = 10;
9633 xmlChar cur;
9634
9635 cur = CUR;
9636 if (((cur >= 'a') && (cur <= 'z')) ||
9637 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009638 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009639 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009640 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009641 return(NULL);
9642 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00009643
Owen Taylor3473f882001-02-23 17:55:21 +00009644 buf[len++] = cur;
9645 NEXT;
9646 cur = CUR;
9647 while (((cur >= 'a') && (cur <= 'z')) ||
9648 ((cur >= 'A') && (cur <= 'Z')) ||
9649 ((cur >= '0') && (cur <= '9')) ||
9650 (cur == '.') || (cur == '_') ||
9651 (cur == '-')) {
9652 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009653 xmlChar *tmp;
9654
Owen Taylor3473f882001-02-23 17:55:21 +00009655 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009656 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9657 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009658 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009659 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009660 return(NULL);
9661 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009662 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009663 }
9664 buf[len++] = cur;
9665 NEXT;
9666 cur = CUR;
9667 if (cur == 0) {
9668 SHRINK;
9669 GROW;
9670 cur = CUR;
9671 }
9672 }
9673 buf[len] = 0;
9674 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009675 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009676 }
9677 return(buf);
9678}
9679
9680/**
9681 * xmlParseEncodingDecl:
9682 * @ctxt: an XML parser context
9683 *
9684 * parse the XML encoding declaration
9685 *
9686 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9687 *
9688 * this setups the conversion filters.
9689 *
9690 * Returns the encoding value or NULL
9691 */
9692
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009693const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009694xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9695 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009696
9697 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009698 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009699 SKIP(8);
9700 SKIP_BLANKS;
9701 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009702 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009703 return(NULL);
9704 }
9705 NEXT;
9706 SKIP_BLANKS;
9707 if (RAW == '"') {
9708 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009709 encoding = xmlParseEncName(ctxt);
9710 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009711 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009712 } else
9713 NEXT;
9714 } else if (RAW == '\''){
9715 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009716 encoding = xmlParseEncName(ctxt);
9717 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009718 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009719 } else
9720 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009721 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009722 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009723 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00009724 /*
9725 * UTF-16 encoding stwich has already taken place at this stage,
9726 * more over the little-endian/big-endian selection is already done
9727 */
9728 if ((encoding != NULL) &&
9729 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9730 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +00009731 /*
9732 * If no encoding was passed to the parser, that we are
9733 * using UTF-16 and no decoder is present i.e. the
9734 * document is apparently UTF-8 compatible, then raise an
9735 * encoding mismatch fatal error
9736 */
9737 if ((ctxt->encoding == NULL) &&
9738 (ctxt->input->buf != NULL) &&
9739 (ctxt->input->buf->encoder == NULL)) {
9740 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9741 "Document labelled UTF-16 but has UTF-8 content\n");
9742 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009743 if (ctxt->encoding != NULL)
9744 xmlFree((xmlChar *) ctxt->encoding);
9745 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009746 }
9747 /*
9748 * UTF-8 encoding is handled natively
9749 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009750 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009751 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9752 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009753 if (ctxt->encoding != NULL)
9754 xmlFree((xmlChar *) ctxt->encoding);
9755 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009756 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009757 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009758 xmlCharEncodingHandlerPtr handler;
9759
9760 if (ctxt->input->encoding != NULL)
9761 xmlFree((xmlChar *) ctxt->input->encoding);
9762 ctxt->input->encoding = encoding;
9763
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009764 handler = xmlFindCharEncodingHandler((const char *) encoding);
9765 if (handler != NULL) {
9766 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009767 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009768 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009769 "Unsupported encoding %s\n", encoding);
9770 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009771 }
9772 }
9773 }
9774 return(encoding);
9775}
9776
9777/**
9778 * xmlParseSDDecl:
9779 * @ctxt: an XML parser context
9780 *
9781 * parse the XML standalone declaration
9782 *
9783 * [32] SDDecl ::= S 'standalone' Eq
9784 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9785 *
9786 * [ VC: Standalone Document Declaration ]
9787 * TODO The standalone document declaration must have the value "no"
9788 * if any external markup declarations contain declarations of:
9789 * - attributes with default values, if elements to which these
9790 * attributes apply appear in the document without specifications
9791 * of values for these attributes, or
9792 * - entities (other than amp, lt, gt, apos, quot), if references
9793 * to those entities appear in the document, or
9794 * - attributes with values subject to normalization, where the
9795 * attribute appears in the document with a value which will change
9796 * as a result of normalization, or
9797 * - element types with element content, if white space occurs directly
9798 * within any instance of those types.
9799 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009800 * Returns:
9801 * 1 if standalone="yes"
9802 * 0 if standalone="no"
9803 * -2 if standalone attribute is missing or invalid
9804 * (A standalone value of -2 means that the XML declaration was found,
9805 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00009806 */
9807
9808int
9809xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009810 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00009811
9812 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009813 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009814 SKIP(10);
9815 SKIP_BLANKS;
9816 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009817 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009818 return(standalone);
9819 }
9820 NEXT;
9821 SKIP_BLANKS;
9822 if (RAW == '\''){
9823 NEXT;
9824 if ((RAW == 'n') && (NXT(1) == 'o')) {
9825 standalone = 0;
9826 SKIP(2);
9827 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9828 (NXT(2) == 's')) {
9829 standalone = 1;
9830 SKIP(3);
9831 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009832 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009833 }
9834 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009835 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009836 } else
9837 NEXT;
9838 } else if (RAW == '"'){
9839 NEXT;
9840 if ((RAW == 'n') && (NXT(1) == 'o')) {
9841 standalone = 0;
9842 SKIP(2);
9843 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9844 (NXT(2) == 's')) {
9845 standalone = 1;
9846 SKIP(3);
9847 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009848 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009849 }
9850 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009851 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009852 } else
9853 NEXT;
9854 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009855 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009856 }
9857 }
9858 return(standalone);
9859}
9860
9861/**
9862 * xmlParseXMLDecl:
9863 * @ctxt: an XML parser context
9864 *
9865 * parse an XML declaration header
9866 *
9867 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9868 */
9869
9870void
9871xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9872 xmlChar *version;
9873
9874 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009875 * This value for standalone indicates that the document has an
9876 * XML declaration but it does not have a standalone attribute.
9877 * It will be overwritten later if a standalone attribute is found.
9878 */
9879 ctxt->input->standalone = -2;
9880
9881 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009882 * We know that '<?xml' is here.
9883 */
9884 SKIP(5);
9885
William M. Brack76e95df2003-10-18 16:20:14 +00009886 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009887 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9888 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009889 }
9890 SKIP_BLANKS;
9891
9892 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009893 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009894 */
9895 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009896 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009897 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009898 } else {
9899 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9900 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +00009901 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +00009902 */
Daniel Veillard34e3f642008-07-29 09:02:27 +00009903 if (ctxt->options & XML_PARSE_OLD10) {
9904 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9905 "Unsupported version '%s'\n",
9906 version);
9907 } else {
9908 if ((version[0] == '1') && ((version[1] == '.'))) {
9909 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9910 "Unsupported version '%s'\n",
9911 version, NULL);
9912 } else {
9913 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9914 "Unsupported version '%s'\n",
9915 version);
9916 }
9917 }
Daniel Veillard19840942001-11-29 16:11:38 +00009918 }
9919 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009920 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009921 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009922 }
Owen Taylor3473f882001-02-23 17:55:21 +00009923
9924 /*
9925 * We may have the encoding declaration
9926 */
William M. Brack76e95df2003-10-18 16:20:14 +00009927 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009928 if ((RAW == '?') && (NXT(1) == '>')) {
9929 SKIP(2);
9930 return;
9931 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009932 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009933 }
9934 xmlParseEncodingDecl(ctxt);
9935 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9936 /*
9937 * The XML REC instructs us to stop parsing right here
9938 */
9939 return;
9940 }
9941
9942 /*
9943 * We may have the standalone status.
9944 */
William M. Brack76e95df2003-10-18 16:20:14 +00009945 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009946 if ((RAW == '?') && (NXT(1) == '>')) {
9947 SKIP(2);
9948 return;
9949 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009950 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009951 }
9952 SKIP_BLANKS;
9953 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9954
9955 SKIP_BLANKS;
9956 if ((RAW == '?') && (NXT(1) == '>')) {
9957 SKIP(2);
9958 } else if (RAW == '>') {
9959 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009960 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009961 NEXT;
9962 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009963 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009964 MOVETO_ENDTAG(CUR_PTR);
9965 NEXT;
9966 }
9967}
9968
9969/**
9970 * xmlParseMisc:
9971 * @ctxt: an XML parser context
9972 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009973 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009974 *
9975 * [27] Misc ::= Comment | PI | S
9976 */
9977
9978void
9979xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009980 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009981 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009982 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009983 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009984 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009985 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009986 NEXT;
9987 } else
9988 xmlParseComment(ctxt);
9989 }
9990}
9991
9992/**
9993 * xmlParseDocument:
9994 * @ctxt: an XML parser context
9995 *
9996 * parse an XML document (and build a tree if using the standard SAX
9997 * interface).
9998 *
9999 * [1] document ::= prolog element Misc*
10000 *
10001 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10002 *
10003 * Returns 0, -1 in case of error. the parser context is augmented
10004 * as a result of the parsing.
10005 */
10006
10007int
10008xmlParseDocument(xmlParserCtxtPtr ctxt) {
10009 xmlChar start[4];
10010 xmlCharEncoding enc;
10011
10012 xmlInitParser();
10013
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010014 if ((ctxt == NULL) || (ctxt->input == NULL))
10015 return(-1);
10016
Owen Taylor3473f882001-02-23 17:55:21 +000010017 GROW;
10018
10019 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010020 * SAX: detecting the level.
10021 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010022 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010023
10024 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010025 * SAX: beginning of the document processing.
10026 */
10027 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10028 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10029
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010030 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
10031 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +000010032 /*
10033 * Get the 4 first bytes and decode the charset
10034 * if enc != XML_CHAR_ENCODING_NONE
10035 * plug some encoding conversion routines.
10036 */
10037 start[0] = RAW;
10038 start[1] = NXT(1);
10039 start[2] = NXT(2);
10040 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010041 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010042 if (enc != XML_CHAR_ENCODING_NONE) {
10043 xmlSwitchEncoding(ctxt, enc);
10044 }
Owen Taylor3473f882001-02-23 17:55:21 +000010045 }
10046
10047
10048 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010049 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010050 }
10051
10052 /*
10053 * Check for the XMLDecl in the Prolog.
10054 */
10055 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010056 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010057
10058 /*
10059 * Note that we will switch encoding on the fly.
10060 */
10061 xmlParseXMLDecl(ctxt);
10062 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10063 /*
10064 * The XML REC instructs us to stop parsing right here
10065 */
10066 return(-1);
10067 }
10068 ctxt->standalone = ctxt->input->standalone;
10069 SKIP_BLANKS;
10070 } else {
10071 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10072 }
10073 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10074 ctxt->sax->startDocument(ctxt->userData);
10075
10076 /*
10077 * The Misc part of the Prolog
10078 */
10079 GROW;
10080 xmlParseMisc(ctxt);
10081
10082 /*
10083 * Then possibly doc type declaration(s) and more Misc
10084 * (doctypedecl Misc*)?
10085 */
10086 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010087 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010088
10089 ctxt->inSubset = 1;
10090 xmlParseDocTypeDecl(ctxt);
10091 if (RAW == '[') {
10092 ctxt->instate = XML_PARSER_DTD;
10093 xmlParseInternalSubset(ctxt);
10094 }
10095
10096 /*
10097 * Create and update the external subset.
10098 */
10099 ctxt->inSubset = 2;
10100 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10101 (!ctxt->disableSAX))
10102 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10103 ctxt->extSubSystem, ctxt->extSubURI);
10104 ctxt->inSubset = 0;
10105
Daniel Veillardac4118d2008-01-11 05:27:32 +000010106 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010107
10108 ctxt->instate = XML_PARSER_PROLOG;
10109 xmlParseMisc(ctxt);
10110 }
10111
10112 /*
10113 * Time to start parsing the tree itself
10114 */
10115 GROW;
10116 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010117 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10118 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010119 } else {
10120 ctxt->instate = XML_PARSER_CONTENT;
10121 xmlParseElement(ctxt);
10122 ctxt->instate = XML_PARSER_EPILOG;
10123
10124
10125 /*
10126 * The Misc part at the end
10127 */
10128 xmlParseMisc(ctxt);
10129
Daniel Veillard561b7f82002-03-20 21:55:57 +000010130 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010131 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010132 }
10133 ctxt->instate = XML_PARSER_EOF;
10134 }
10135
10136 /*
10137 * SAX: end of the document processing.
10138 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010139 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010140 ctxt->sax->endDocument(ctxt->userData);
10141
Daniel Veillard5997aca2002-03-18 18:36:20 +000010142 /*
10143 * Remove locally kept entity definitions if the tree was not built
10144 */
10145 if ((ctxt->myDoc != NULL) &&
10146 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10147 xmlFreeDoc(ctxt->myDoc);
10148 ctxt->myDoc = NULL;
10149 }
10150
Daniel Veillardae0765b2008-07-31 19:54:59 +000010151 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10152 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10153 if (ctxt->valid)
10154 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10155 if (ctxt->nsWellFormed)
10156 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10157 if (ctxt->options & XML_PARSE_OLD10)
10158 ctxt->myDoc->properties |= XML_DOC_OLD10;
10159 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010160 if (! ctxt->wellFormed) {
10161 ctxt->valid = 0;
10162 return(-1);
10163 }
Owen Taylor3473f882001-02-23 17:55:21 +000010164 return(0);
10165}
10166
10167/**
10168 * xmlParseExtParsedEnt:
10169 * @ctxt: an XML parser context
10170 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010171 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010172 * An external general parsed entity is well-formed if it matches the
10173 * production labeled extParsedEnt.
10174 *
10175 * [78] extParsedEnt ::= TextDecl? content
10176 *
10177 * Returns 0, -1 in case of error. the parser context is augmented
10178 * as a result of the parsing.
10179 */
10180
10181int
10182xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10183 xmlChar start[4];
10184 xmlCharEncoding enc;
10185
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010186 if ((ctxt == NULL) || (ctxt->input == NULL))
10187 return(-1);
10188
Owen Taylor3473f882001-02-23 17:55:21 +000010189 xmlDefaultSAXHandlerInit();
10190
Daniel Veillard309f81d2003-09-23 09:02:53 +000010191 xmlDetectSAX2(ctxt);
10192
Owen Taylor3473f882001-02-23 17:55:21 +000010193 GROW;
10194
10195 /*
10196 * SAX: beginning of the document processing.
10197 */
10198 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10199 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10200
10201 /*
10202 * Get the 4 first bytes and decode the charset
10203 * if enc != XML_CHAR_ENCODING_NONE
10204 * plug some encoding conversion routines.
10205 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010206 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10207 start[0] = RAW;
10208 start[1] = NXT(1);
10209 start[2] = NXT(2);
10210 start[3] = NXT(3);
10211 enc = xmlDetectCharEncoding(start, 4);
10212 if (enc != XML_CHAR_ENCODING_NONE) {
10213 xmlSwitchEncoding(ctxt, enc);
10214 }
Owen Taylor3473f882001-02-23 17:55:21 +000010215 }
10216
10217
10218 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010219 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010220 }
10221
10222 /*
10223 * Check for the XMLDecl in the Prolog.
10224 */
10225 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010226 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010227
10228 /*
10229 * Note that we will switch encoding on the fly.
10230 */
10231 xmlParseXMLDecl(ctxt);
10232 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10233 /*
10234 * The XML REC instructs us to stop parsing right here
10235 */
10236 return(-1);
10237 }
10238 SKIP_BLANKS;
10239 } else {
10240 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10241 }
10242 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10243 ctxt->sax->startDocument(ctxt->userData);
10244
10245 /*
10246 * Doing validity checking on chunk doesn't make sense
10247 */
10248 ctxt->instate = XML_PARSER_CONTENT;
10249 ctxt->validate = 0;
10250 ctxt->loadsubset = 0;
10251 ctxt->depth = 0;
10252
10253 xmlParseContent(ctxt);
10254
10255 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010256 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010257 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010258 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010259 }
10260
10261 /*
10262 * SAX: end of the document processing.
10263 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010264 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010265 ctxt->sax->endDocument(ctxt->userData);
10266
10267 if (! ctxt->wellFormed) return(-1);
10268 return(0);
10269}
10270
Daniel Veillard73b013f2003-09-30 12:36:01 +000010271#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010272/************************************************************************
10273 * *
10274 * Progressive parsing interfaces *
10275 * *
10276 ************************************************************************/
10277
10278/**
10279 * xmlParseLookupSequence:
10280 * @ctxt: an XML parser context
10281 * @first: the first char to lookup
10282 * @next: the next char to lookup or zero
10283 * @third: the next char to lookup or zero
10284 *
10285 * Try to find if a sequence (first, next, third) or just (first next) or
10286 * (first) is available in the input stream.
10287 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10288 * to avoid rescanning sequences of bytes, it DOES change the state of the
10289 * parser, do not use liberally.
10290 *
10291 * Returns the index to the current parsing point if the full sequence
10292 * is available, -1 otherwise.
10293 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010294static int
Owen Taylor3473f882001-02-23 17:55:21 +000010295xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10296 xmlChar next, xmlChar third) {
10297 int base, len;
10298 xmlParserInputPtr in;
10299 const xmlChar *buf;
10300
10301 in = ctxt->input;
10302 if (in == NULL) return(-1);
10303 base = in->cur - in->base;
10304 if (base < 0) return(-1);
10305 if (ctxt->checkIndex > base)
10306 base = ctxt->checkIndex;
10307 if (in->buf == NULL) {
10308 buf = in->base;
10309 len = in->length;
10310 } else {
10311 buf = in->buf->buffer->content;
10312 len = in->buf->buffer->use;
10313 }
10314 /* take into account the sequence length */
10315 if (third) len -= 2;
10316 else if (next) len --;
10317 for (;base < len;base++) {
10318 if (buf[base] == first) {
10319 if (third != 0) {
10320 if ((buf[base + 1] != next) ||
10321 (buf[base + 2] != third)) continue;
10322 } else if (next != 0) {
10323 if (buf[base + 1] != next) continue;
10324 }
10325 ctxt->checkIndex = 0;
10326#ifdef DEBUG_PUSH
10327 if (next == 0)
10328 xmlGenericError(xmlGenericErrorContext,
10329 "PP: lookup '%c' found at %d\n",
10330 first, base);
10331 else if (third == 0)
10332 xmlGenericError(xmlGenericErrorContext,
10333 "PP: lookup '%c%c' found at %d\n",
10334 first, next, base);
10335 else
10336 xmlGenericError(xmlGenericErrorContext,
10337 "PP: lookup '%c%c%c' found at %d\n",
10338 first, next, third, base);
10339#endif
10340 return(base - (in->cur - in->base));
10341 }
10342 }
10343 ctxt->checkIndex = base;
10344#ifdef DEBUG_PUSH
10345 if (next == 0)
10346 xmlGenericError(xmlGenericErrorContext,
10347 "PP: lookup '%c' failed\n", first);
10348 else if (third == 0)
10349 xmlGenericError(xmlGenericErrorContext,
10350 "PP: lookup '%c%c' failed\n", first, next);
10351 else
10352 xmlGenericError(xmlGenericErrorContext,
10353 "PP: lookup '%c%c%c' failed\n", first, next, third);
10354#endif
10355 return(-1);
10356}
10357
10358/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010359 * xmlParseGetLasts:
10360 * @ctxt: an XML parser context
10361 * @lastlt: pointer to store the last '<' from the input
10362 * @lastgt: pointer to store the last '>' from the input
10363 *
10364 * Lookup the last < and > in the current chunk
10365 */
10366static void
10367xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10368 const xmlChar **lastgt) {
10369 const xmlChar *tmp;
10370
10371 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10372 xmlGenericError(xmlGenericErrorContext,
10373 "Internal error: xmlParseGetLasts\n");
10374 return;
10375 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010376 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010377 tmp = ctxt->input->end;
10378 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010379 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010380 if (tmp < ctxt->input->base) {
10381 *lastlt = NULL;
10382 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010383 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010384 *lastlt = tmp;
10385 tmp++;
10386 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10387 if (*tmp == '\'') {
10388 tmp++;
10389 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10390 if (tmp < ctxt->input->end) tmp++;
10391 } else if (*tmp == '"') {
10392 tmp++;
10393 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10394 if (tmp < ctxt->input->end) tmp++;
10395 } else
10396 tmp++;
10397 }
10398 if (tmp < ctxt->input->end)
10399 *lastgt = tmp;
10400 else {
10401 tmp = *lastlt;
10402 tmp--;
10403 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10404 if (tmp >= ctxt->input->base)
10405 *lastgt = tmp;
10406 else
10407 *lastgt = NULL;
10408 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010409 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010410 } else {
10411 *lastlt = NULL;
10412 *lastgt = NULL;
10413 }
10414}
10415/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010416 * xmlCheckCdataPush:
10417 * @cur: pointer to the bock of characters
10418 * @len: length of the block in bytes
10419 *
10420 * Check that the block of characters is okay as SCdata content [20]
10421 *
10422 * Returns the number of bytes to pass if okay, a negative index where an
10423 * UTF-8 error occured otherwise
10424 */
10425static int
10426xmlCheckCdataPush(const xmlChar *utf, int len) {
10427 int ix;
10428 unsigned char c;
10429 int codepoint;
10430
10431 if ((utf == NULL) || (len <= 0))
10432 return(0);
10433
10434 for (ix = 0; ix < len;) { /* string is 0-terminated */
10435 c = utf[ix];
10436 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10437 if (c >= 0x20)
10438 ix++;
10439 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10440 ix++;
10441 else
10442 return(-ix);
10443 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10444 if (ix + 2 > len) return(ix);
10445 if ((utf[ix+1] & 0xc0 ) != 0x80)
10446 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010447 codepoint = (utf[ix] & 0x1f) << 6;
10448 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010449 if (!xmlIsCharQ(codepoint))
10450 return(-ix);
10451 ix += 2;
10452 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10453 if (ix + 3 > len) return(ix);
10454 if (((utf[ix+1] & 0xc0) != 0x80) ||
10455 ((utf[ix+2] & 0xc0) != 0x80))
10456 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010457 codepoint = (utf[ix] & 0xf) << 12;
10458 codepoint |= (utf[ix+1] & 0x3f) << 6;
10459 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010460 if (!xmlIsCharQ(codepoint))
10461 return(-ix);
10462 ix += 3;
10463 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10464 if (ix + 4 > len) return(ix);
10465 if (((utf[ix+1] & 0xc0) != 0x80) ||
10466 ((utf[ix+2] & 0xc0) != 0x80) ||
10467 ((utf[ix+3] & 0xc0) != 0x80))
10468 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010469 codepoint = (utf[ix] & 0x7) << 18;
10470 codepoint |= (utf[ix+1] & 0x3f) << 12;
10471 codepoint |= (utf[ix+2] & 0x3f) << 6;
10472 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010473 if (!xmlIsCharQ(codepoint))
10474 return(-ix);
10475 ix += 4;
10476 } else /* unknown encoding */
10477 return(-ix);
10478 }
10479 return(ix);
10480}
10481
10482/**
Owen Taylor3473f882001-02-23 17:55:21 +000010483 * xmlParseTryOrFinish:
10484 * @ctxt: an XML parser context
10485 * @terminate: last chunk indicator
10486 *
10487 * Try to progress on parsing
10488 *
10489 * Returns zero if no parsing was possible
10490 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010491static int
Owen Taylor3473f882001-02-23 17:55:21 +000010492xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10493 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010494 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000010495 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000010496 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000010497
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010498 if (ctxt->input == NULL)
10499 return(0);
10500
Owen Taylor3473f882001-02-23 17:55:21 +000010501#ifdef DEBUG_PUSH
10502 switch (ctxt->instate) {
10503 case XML_PARSER_EOF:
10504 xmlGenericError(xmlGenericErrorContext,
10505 "PP: try EOF\n"); break;
10506 case XML_PARSER_START:
10507 xmlGenericError(xmlGenericErrorContext,
10508 "PP: try START\n"); break;
10509 case XML_PARSER_MISC:
10510 xmlGenericError(xmlGenericErrorContext,
10511 "PP: try MISC\n");break;
10512 case XML_PARSER_COMMENT:
10513 xmlGenericError(xmlGenericErrorContext,
10514 "PP: try COMMENT\n");break;
10515 case XML_PARSER_PROLOG:
10516 xmlGenericError(xmlGenericErrorContext,
10517 "PP: try PROLOG\n");break;
10518 case XML_PARSER_START_TAG:
10519 xmlGenericError(xmlGenericErrorContext,
10520 "PP: try START_TAG\n");break;
10521 case XML_PARSER_CONTENT:
10522 xmlGenericError(xmlGenericErrorContext,
10523 "PP: try CONTENT\n");break;
10524 case XML_PARSER_CDATA_SECTION:
10525 xmlGenericError(xmlGenericErrorContext,
10526 "PP: try CDATA_SECTION\n");break;
10527 case XML_PARSER_END_TAG:
10528 xmlGenericError(xmlGenericErrorContext,
10529 "PP: try END_TAG\n");break;
10530 case XML_PARSER_ENTITY_DECL:
10531 xmlGenericError(xmlGenericErrorContext,
10532 "PP: try ENTITY_DECL\n");break;
10533 case XML_PARSER_ENTITY_VALUE:
10534 xmlGenericError(xmlGenericErrorContext,
10535 "PP: try ENTITY_VALUE\n");break;
10536 case XML_PARSER_ATTRIBUTE_VALUE:
10537 xmlGenericError(xmlGenericErrorContext,
10538 "PP: try ATTRIBUTE_VALUE\n");break;
10539 case XML_PARSER_DTD:
10540 xmlGenericError(xmlGenericErrorContext,
10541 "PP: try DTD\n");break;
10542 case XML_PARSER_EPILOG:
10543 xmlGenericError(xmlGenericErrorContext,
10544 "PP: try EPILOG\n");break;
10545 case XML_PARSER_PI:
10546 xmlGenericError(xmlGenericErrorContext,
10547 "PP: try PI\n");break;
10548 case XML_PARSER_IGNORE:
10549 xmlGenericError(xmlGenericErrorContext,
10550 "PP: try IGNORE\n");break;
10551 }
10552#endif
10553
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010554 if ((ctxt->input != NULL) &&
10555 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010556 xmlSHRINK(ctxt);
10557 ctxt->checkIndex = 0;
10558 }
10559 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010560
Daniel Veillarda880b122003-04-21 21:36:41 +000010561 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010562 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010563 return(0);
10564
10565
Owen Taylor3473f882001-02-23 17:55:21 +000010566 /*
10567 * Pop-up of finished entities.
10568 */
10569 while ((RAW == 0) && (ctxt->inputNr > 1))
10570 xmlPopInput(ctxt);
10571
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010572 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010573 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010574 avail = ctxt->input->length -
10575 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010576 else {
10577 /*
10578 * If we are operating on converted input, try to flush
10579 * remainng chars to avoid them stalling in the non-converted
10580 * buffer.
10581 */
10582 if ((ctxt->input->buf->raw != NULL) &&
10583 (ctxt->input->buf->raw->use > 0)) {
10584 int base = ctxt->input->base -
10585 ctxt->input->buf->buffer->content;
10586 int current = ctxt->input->cur - ctxt->input->base;
10587
10588 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10589 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10590 ctxt->input->cur = ctxt->input->base + current;
10591 ctxt->input->end =
10592 &ctxt->input->buf->buffer->content[
10593 ctxt->input->buf->buffer->use];
10594 }
10595 avail = ctxt->input->buf->buffer->use -
10596 (ctxt->input->cur - ctxt->input->base);
10597 }
Owen Taylor3473f882001-02-23 17:55:21 +000010598 if (avail < 1)
10599 goto done;
10600 switch (ctxt->instate) {
10601 case XML_PARSER_EOF:
10602 /*
10603 * Document parsing is done !
10604 */
10605 goto done;
10606 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010607 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10608 xmlChar start[4];
10609 xmlCharEncoding enc;
10610
10611 /*
10612 * Very first chars read from the document flow.
10613 */
10614 if (avail < 4)
10615 goto done;
10616
10617 /*
10618 * Get the 4 first bytes and decode the charset
10619 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010620 * plug some encoding conversion routines,
10621 * else xmlSwitchEncoding will set to (default)
10622 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010623 */
10624 start[0] = RAW;
10625 start[1] = NXT(1);
10626 start[2] = NXT(2);
10627 start[3] = NXT(3);
10628 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010629 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010630 break;
10631 }
Owen Taylor3473f882001-02-23 17:55:21 +000010632
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010633 if (avail < 2)
10634 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010635 cur = ctxt->input->cur[0];
10636 next = ctxt->input->cur[1];
10637 if (cur == 0) {
10638 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10639 ctxt->sax->setDocumentLocator(ctxt->userData,
10640 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010641 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010642 ctxt->instate = XML_PARSER_EOF;
10643#ifdef DEBUG_PUSH
10644 xmlGenericError(xmlGenericErrorContext,
10645 "PP: entering EOF\n");
10646#endif
10647 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10648 ctxt->sax->endDocument(ctxt->userData);
10649 goto done;
10650 }
10651 if ((cur == '<') && (next == '?')) {
10652 /* PI or XML decl */
10653 if (avail < 5) return(ret);
10654 if ((!terminate) &&
10655 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10656 return(ret);
10657 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10658 ctxt->sax->setDocumentLocator(ctxt->userData,
10659 &xmlDefaultSAXLocator);
10660 if ((ctxt->input->cur[2] == 'x') &&
10661 (ctxt->input->cur[3] == 'm') &&
10662 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010663 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010664 ret += 5;
10665#ifdef DEBUG_PUSH
10666 xmlGenericError(xmlGenericErrorContext,
10667 "PP: Parsing XML Decl\n");
10668#endif
10669 xmlParseXMLDecl(ctxt);
10670 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10671 /*
10672 * The XML REC instructs us to stop parsing right
10673 * here
10674 */
10675 ctxt->instate = XML_PARSER_EOF;
10676 return(0);
10677 }
10678 ctxt->standalone = ctxt->input->standalone;
10679 if ((ctxt->encoding == NULL) &&
10680 (ctxt->input->encoding != NULL))
10681 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10682 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10683 (!ctxt->disableSAX))
10684 ctxt->sax->startDocument(ctxt->userData);
10685 ctxt->instate = XML_PARSER_MISC;
10686#ifdef DEBUG_PUSH
10687 xmlGenericError(xmlGenericErrorContext,
10688 "PP: entering MISC\n");
10689#endif
10690 } else {
10691 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10692 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10693 (!ctxt->disableSAX))
10694 ctxt->sax->startDocument(ctxt->userData);
10695 ctxt->instate = XML_PARSER_MISC;
10696#ifdef DEBUG_PUSH
10697 xmlGenericError(xmlGenericErrorContext,
10698 "PP: entering MISC\n");
10699#endif
10700 }
10701 } else {
10702 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10703 ctxt->sax->setDocumentLocator(ctxt->userData,
10704 &xmlDefaultSAXLocator);
10705 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010706 if (ctxt->version == NULL) {
10707 xmlErrMemory(ctxt, NULL);
10708 break;
10709 }
Owen Taylor3473f882001-02-23 17:55:21 +000010710 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10711 (!ctxt->disableSAX))
10712 ctxt->sax->startDocument(ctxt->userData);
10713 ctxt->instate = XML_PARSER_MISC;
10714#ifdef DEBUG_PUSH
10715 xmlGenericError(xmlGenericErrorContext,
10716 "PP: entering MISC\n");
10717#endif
10718 }
10719 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010720 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010721 const xmlChar *name;
10722 const xmlChar *prefix;
10723 const xmlChar *URI;
10724 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010725
10726 if ((avail < 2) && (ctxt->inputNr == 1))
10727 goto done;
10728 cur = ctxt->input->cur[0];
10729 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010730 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010731 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010732 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10733 ctxt->sax->endDocument(ctxt->userData);
10734 goto done;
10735 }
10736 if (!terminate) {
10737 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010738 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010739 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010740 goto done;
10741 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10742 goto done;
10743 }
10744 }
10745 if (ctxt->spaceNr == 0)
10746 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010747 else if (*ctxt->space == -2)
10748 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010749 else
10750 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010751#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010752 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010753#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010754 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010755#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010756 else
10757 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010758#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010759 if (name == NULL) {
10760 spacePop(ctxt);
10761 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010762 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10763 ctxt->sax->endDocument(ctxt->userData);
10764 goto done;
10765 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010766#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000010767 /*
10768 * [ VC: Root Element Type ]
10769 * The Name in the document type declaration must match
10770 * the element type of the root element.
10771 */
10772 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10773 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10774 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010775#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010776
10777 /*
10778 * Check for an Empty Element.
10779 */
10780 if ((RAW == '/') && (NXT(1) == '>')) {
10781 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010782
10783 if (ctxt->sax2) {
10784 if ((ctxt->sax != NULL) &&
10785 (ctxt->sax->endElementNs != NULL) &&
10786 (!ctxt->disableSAX))
10787 ctxt->sax->endElementNs(ctxt->userData, name,
10788 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000010789 if (ctxt->nsNr - nsNr > 0)
10790 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010791#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010792 } else {
10793 if ((ctxt->sax != NULL) &&
10794 (ctxt->sax->endElement != NULL) &&
10795 (!ctxt->disableSAX))
10796 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010797#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010798 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010799 spacePop(ctxt);
10800 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010801 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010802 } else {
10803 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010804 }
10805 break;
10806 }
10807 if (RAW == '>') {
10808 NEXT;
10809 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000010810 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000010811 "Couldn't find end of Start Tag %s\n",
10812 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000010813 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010814 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010815 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010816 if (ctxt->sax2)
10817 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010818#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010819 else
10820 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010821#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010822
Daniel Veillarda880b122003-04-21 21:36:41 +000010823 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010824 break;
10825 }
10826 case XML_PARSER_CONTENT: {
10827 const xmlChar *test;
10828 unsigned int cons;
10829 if ((avail < 2) && (ctxt->inputNr == 1))
10830 goto done;
10831 cur = ctxt->input->cur[0];
10832 next = ctxt->input->cur[1];
10833
10834 test = CUR_PTR;
10835 cons = ctxt->input->consumed;
10836 if ((cur == '<') && (next == '/')) {
10837 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010838 break;
10839 } else if ((cur == '<') && (next == '?')) {
10840 if ((!terminate) &&
10841 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10842 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010843 xmlParsePI(ctxt);
10844 } else if ((cur == '<') && (next != '!')) {
10845 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010846 break;
10847 } else if ((cur == '<') && (next == '!') &&
10848 (ctxt->input->cur[2] == '-') &&
10849 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000010850 int term;
10851
10852 if (avail < 4)
10853 goto done;
10854 ctxt->input->cur += 4;
10855 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10856 ctxt->input->cur -= 4;
10857 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000010858 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010859 xmlParseComment(ctxt);
10860 ctxt->instate = XML_PARSER_CONTENT;
10861 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10862 (ctxt->input->cur[2] == '[') &&
10863 (ctxt->input->cur[3] == 'C') &&
10864 (ctxt->input->cur[4] == 'D') &&
10865 (ctxt->input->cur[5] == 'A') &&
10866 (ctxt->input->cur[6] == 'T') &&
10867 (ctxt->input->cur[7] == 'A') &&
10868 (ctxt->input->cur[8] == '[')) {
10869 SKIP(9);
10870 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000010871 break;
10872 } else if ((cur == '<') && (next == '!') &&
10873 (avail < 9)) {
10874 goto done;
10875 } else if (cur == '&') {
10876 if ((!terminate) &&
10877 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10878 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010879 xmlParseReference(ctxt);
10880 } else {
10881 /* TODO Avoid the extra copy, handle directly !!! */
10882 /*
10883 * Goal of the following test is:
10884 * - minimize calls to the SAX 'character' callback
10885 * when they are mergeable
10886 * - handle an problem for isBlank when we only parse
10887 * a sequence of blank chars and the next one is
10888 * not available to check against '<' presence.
10889 * - tries to homogenize the differences in SAX
10890 * callbacks between the push and pull versions
10891 * of the parser.
10892 */
10893 if ((ctxt->inputNr == 1) &&
10894 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10895 if (!terminate) {
10896 if (ctxt->progressive) {
10897 if ((lastlt == NULL) ||
10898 (ctxt->input->cur > lastlt))
10899 goto done;
10900 } else if (xmlParseLookupSequence(ctxt,
10901 '<', 0, 0) < 0) {
10902 goto done;
10903 }
10904 }
10905 }
10906 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010907 xmlParseCharData(ctxt, 0);
10908 }
10909 /*
10910 * Pop-up of finished entities.
10911 */
10912 while ((RAW == 0) && (ctxt->inputNr > 1))
10913 xmlPopInput(ctxt);
10914 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010915 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10916 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010917 ctxt->instate = XML_PARSER_EOF;
10918 break;
10919 }
10920 break;
10921 }
10922 case XML_PARSER_END_TAG:
10923 if (avail < 2)
10924 goto done;
10925 if (!terminate) {
10926 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010927 /* > can be found unescaped in attribute values */
10928 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010929 goto done;
10930 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10931 goto done;
10932 }
10933 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010934 if (ctxt->sax2) {
10935 xmlParseEndTag2(ctxt,
10936 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10937 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010938 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010939 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010940 }
10941#ifdef LIBXML_SAX1_ENABLED
10942 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010943 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010944#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010945 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010946 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010947 } else {
10948 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010949 }
10950 break;
10951 case XML_PARSER_CDATA_SECTION: {
10952 /*
10953 * The Push mode need to have the SAX callback for
10954 * cdataBlock merge back contiguous callbacks.
10955 */
10956 int base;
10957
10958 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10959 if (base < 0) {
10960 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010961 int tmp;
10962
10963 tmp = xmlCheckCdataPush(ctxt->input->cur,
10964 XML_PARSER_BIG_BUFFER_SIZE);
10965 if (tmp < 0) {
10966 tmp = -tmp;
10967 ctxt->input->cur += tmp;
10968 goto encoding_error;
10969 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010970 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10971 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010972 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010973 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010974 else if (ctxt->sax->characters != NULL)
10975 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010976 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010977 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010978 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010979 ctxt->checkIndex = 0;
10980 }
10981 goto done;
10982 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010983 int tmp;
10984
10985 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10986 if ((tmp < 0) || (tmp != base)) {
10987 tmp = -tmp;
10988 ctxt->input->cur += tmp;
10989 goto encoding_error;
10990 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000010991 if ((ctxt->sax != NULL) && (base == 0) &&
10992 (ctxt->sax->cdataBlock != NULL) &&
10993 (!ctxt->disableSAX)) {
10994 /*
10995 * Special case to provide identical behaviour
10996 * between pull and push parsers on enpty CDATA
10997 * sections
10998 */
10999 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11000 (!strncmp((const char *)&ctxt->input->cur[-9],
11001 "<![CDATA[", 9)))
11002 ctxt->sax->cdataBlock(ctxt->userData,
11003 BAD_CAST "", 0);
11004 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011005 (!ctxt->disableSAX)) {
11006 if (ctxt->sax->cdataBlock != NULL)
11007 ctxt->sax->cdataBlock(ctxt->userData,
11008 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011009 else if (ctxt->sax->characters != NULL)
11010 ctxt->sax->characters(ctxt->userData,
11011 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011012 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000011013 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011014 ctxt->checkIndex = 0;
11015 ctxt->instate = XML_PARSER_CONTENT;
11016#ifdef DEBUG_PUSH
11017 xmlGenericError(xmlGenericErrorContext,
11018 "PP: entering CONTENT\n");
11019#endif
11020 }
11021 break;
11022 }
Owen Taylor3473f882001-02-23 17:55:21 +000011023 case XML_PARSER_MISC:
11024 SKIP_BLANKS;
11025 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011026 avail = ctxt->input->length -
11027 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011028 else
Daniel Veillarda880b122003-04-21 21:36:41 +000011029 avail = ctxt->input->buf->buffer->use -
11030 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011031 if (avail < 2)
11032 goto done;
11033 cur = ctxt->input->cur[0];
11034 next = ctxt->input->cur[1];
11035 if ((cur == '<') && (next == '?')) {
11036 if ((!terminate) &&
11037 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11038 goto done;
11039#ifdef DEBUG_PUSH
11040 xmlGenericError(xmlGenericErrorContext,
11041 "PP: Parsing PI\n");
11042#endif
11043 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000011044 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011045 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011046 (ctxt->input->cur[2] == '-') &&
11047 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011048 if ((!terminate) &&
11049 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11050 goto done;
11051#ifdef DEBUG_PUSH
11052 xmlGenericError(xmlGenericErrorContext,
11053 "PP: Parsing Comment\n");
11054#endif
11055 xmlParseComment(ctxt);
11056 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011057 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011058 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011059 (ctxt->input->cur[2] == 'D') &&
11060 (ctxt->input->cur[3] == 'O') &&
11061 (ctxt->input->cur[4] == 'C') &&
11062 (ctxt->input->cur[5] == 'T') &&
11063 (ctxt->input->cur[6] == 'Y') &&
11064 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011065 (ctxt->input->cur[8] == 'E')) {
11066 if ((!terminate) &&
11067 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11068 goto done;
11069#ifdef DEBUG_PUSH
11070 xmlGenericError(xmlGenericErrorContext,
11071 "PP: Parsing internal subset\n");
11072#endif
11073 ctxt->inSubset = 1;
11074 xmlParseDocTypeDecl(ctxt);
11075 if (RAW == '[') {
11076 ctxt->instate = XML_PARSER_DTD;
11077#ifdef DEBUG_PUSH
11078 xmlGenericError(xmlGenericErrorContext,
11079 "PP: entering DTD\n");
11080#endif
11081 } else {
11082 /*
11083 * Create and update the external subset.
11084 */
11085 ctxt->inSubset = 2;
11086 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11087 (ctxt->sax->externalSubset != NULL))
11088 ctxt->sax->externalSubset(ctxt->userData,
11089 ctxt->intSubName, ctxt->extSubSystem,
11090 ctxt->extSubURI);
11091 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011092 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011093 ctxt->instate = XML_PARSER_PROLOG;
11094#ifdef DEBUG_PUSH
11095 xmlGenericError(xmlGenericErrorContext,
11096 "PP: entering PROLOG\n");
11097#endif
11098 }
11099 } else if ((cur == '<') && (next == '!') &&
11100 (avail < 9)) {
11101 goto done;
11102 } else {
11103 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011104 ctxt->progressive = 1;
11105 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011106#ifdef DEBUG_PUSH
11107 xmlGenericError(xmlGenericErrorContext,
11108 "PP: entering START_TAG\n");
11109#endif
11110 }
11111 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011112 case XML_PARSER_PROLOG:
11113 SKIP_BLANKS;
11114 if (ctxt->input->buf == NULL)
11115 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11116 else
11117 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11118 if (avail < 2)
11119 goto done;
11120 cur = ctxt->input->cur[0];
11121 next = ctxt->input->cur[1];
11122 if ((cur == '<') && (next == '?')) {
11123 if ((!terminate) &&
11124 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11125 goto done;
11126#ifdef DEBUG_PUSH
11127 xmlGenericError(xmlGenericErrorContext,
11128 "PP: Parsing PI\n");
11129#endif
11130 xmlParsePI(ctxt);
11131 } else if ((cur == '<') && (next == '!') &&
11132 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11133 if ((!terminate) &&
11134 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11135 goto done;
11136#ifdef DEBUG_PUSH
11137 xmlGenericError(xmlGenericErrorContext,
11138 "PP: Parsing Comment\n");
11139#endif
11140 xmlParseComment(ctxt);
11141 ctxt->instate = XML_PARSER_PROLOG;
11142 } else if ((cur == '<') && (next == '!') &&
11143 (avail < 4)) {
11144 goto done;
11145 } else {
11146 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011147 if (ctxt->progressive == 0)
11148 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011149 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011150#ifdef DEBUG_PUSH
11151 xmlGenericError(xmlGenericErrorContext,
11152 "PP: entering START_TAG\n");
11153#endif
11154 }
11155 break;
11156 case XML_PARSER_EPILOG:
11157 SKIP_BLANKS;
11158 if (ctxt->input->buf == NULL)
11159 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11160 else
11161 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11162 if (avail < 2)
11163 goto done;
11164 cur = ctxt->input->cur[0];
11165 next = ctxt->input->cur[1];
11166 if ((cur == '<') && (next == '?')) {
11167 if ((!terminate) &&
11168 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11169 goto done;
11170#ifdef DEBUG_PUSH
11171 xmlGenericError(xmlGenericErrorContext,
11172 "PP: Parsing PI\n");
11173#endif
11174 xmlParsePI(ctxt);
11175 ctxt->instate = XML_PARSER_EPILOG;
11176 } else if ((cur == '<') && (next == '!') &&
11177 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11178 if ((!terminate) &&
11179 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11180 goto done;
11181#ifdef DEBUG_PUSH
11182 xmlGenericError(xmlGenericErrorContext,
11183 "PP: Parsing Comment\n");
11184#endif
11185 xmlParseComment(ctxt);
11186 ctxt->instate = XML_PARSER_EPILOG;
11187 } else if ((cur == '<') && (next == '!') &&
11188 (avail < 4)) {
11189 goto done;
11190 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011191 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011192 ctxt->instate = XML_PARSER_EOF;
11193#ifdef DEBUG_PUSH
11194 xmlGenericError(xmlGenericErrorContext,
11195 "PP: entering EOF\n");
11196#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011197 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011198 ctxt->sax->endDocument(ctxt->userData);
11199 goto done;
11200 }
11201 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011202 case XML_PARSER_DTD: {
11203 /*
11204 * Sorry but progressive parsing of the internal subset
11205 * is not expected to be supported. We first check that
11206 * the full content of the internal subset is available and
11207 * the parsing is launched only at that point.
11208 * Internal subset ends up with "']' S? '>'" in an unescaped
11209 * section and not in a ']]>' sequence which are conditional
11210 * sections (whoever argued to keep that crap in XML deserve
11211 * a place in hell !).
11212 */
11213 int base, i;
11214 xmlChar *buf;
11215 xmlChar quote = 0;
11216
11217 base = ctxt->input->cur - ctxt->input->base;
11218 if (base < 0) return(0);
11219 if (ctxt->checkIndex > base)
11220 base = ctxt->checkIndex;
11221 buf = ctxt->input->buf->buffer->content;
11222 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11223 base++) {
11224 if (quote != 0) {
11225 if (buf[base] == quote)
11226 quote = 0;
11227 continue;
11228 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011229 if ((quote == 0) && (buf[base] == '<')) {
11230 int found = 0;
11231 /* special handling of comments */
11232 if (((unsigned int) base + 4 <
11233 ctxt->input->buf->buffer->use) &&
11234 (buf[base + 1] == '!') &&
11235 (buf[base + 2] == '-') &&
11236 (buf[base + 3] == '-')) {
11237 for (;(unsigned int) base + 3 <
11238 ctxt->input->buf->buffer->use; base++) {
11239 if ((buf[base] == '-') &&
11240 (buf[base + 1] == '-') &&
11241 (buf[base + 2] == '>')) {
11242 found = 1;
11243 base += 2;
11244 break;
11245 }
11246 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011247 if (!found) {
11248#if 0
11249 fprintf(stderr, "unfinished comment\n");
11250#endif
11251 break; /* for */
11252 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011253 continue;
11254 }
11255 }
Owen Taylor3473f882001-02-23 17:55:21 +000011256 if (buf[base] == '"') {
11257 quote = '"';
11258 continue;
11259 }
11260 if (buf[base] == '\'') {
11261 quote = '\'';
11262 continue;
11263 }
11264 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011265#if 0
11266 fprintf(stderr, "%c%c%c%c: ", buf[base],
11267 buf[base + 1], buf[base + 2], buf[base + 3]);
11268#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011269 if ((unsigned int) base +1 >=
11270 ctxt->input->buf->buffer->use)
11271 break;
11272 if (buf[base + 1] == ']') {
11273 /* conditional crap, skip both ']' ! */
11274 base++;
11275 continue;
11276 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011277 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011278 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11279 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011280 if (buf[base + i] == '>') {
11281#if 0
11282 fprintf(stderr, "found\n");
11283#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011284 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011285 }
11286 if (!IS_BLANK_CH(buf[base + i])) {
11287#if 0
11288 fprintf(stderr, "not found\n");
11289#endif
11290 goto not_end_of_int_subset;
11291 }
Owen Taylor3473f882001-02-23 17:55:21 +000011292 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011293#if 0
11294 fprintf(stderr, "end of stream\n");
11295#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011296 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011297
Owen Taylor3473f882001-02-23 17:55:21 +000011298 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011299not_end_of_int_subset:
11300 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011301 }
11302 /*
11303 * We didn't found the end of the Internal subset
11304 */
Owen Taylor3473f882001-02-23 17:55:21 +000011305#ifdef DEBUG_PUSH
11306 if (next == 0)
11307 xmlGenericError(xmlGenericErrorContext,
11308 "PP: lookup of int subset end filed\n");
11309#endif
11310 goto done;
11311
11312found_end_int_subset:
11313 xmlParseInternalSubset(ctxt);
11314 ctxt->inSubset = 2;
11315 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11316 (ctxt->sax->externalSubset != NULL))
11317 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11318 ctxt->extSubSystem, ctxt->extSubURI);
11319 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011320 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011321 ctxt->instate = XML_PARSER_PROLOG;
11322 ctxt->checkIndex = 0;
11323#ifdef DEBUG_PUSH
11324 xmlGenericError(xmlGenericErrorContext,
11325 "PP: entering PROLOG\n");
11326#endif
11327 break;
11328 }
11329 case XML_PARSER_COMMENT:
11330 xmlGenericError(xmlGenericErrorContext,
11331 "PP: internal error, state == COMMENT\n");
11332 ctxt->instate = XML_PARSER_CONTENT;
11333#ifdef DEBUG_PUSH
11334 xmlGenericError(xmlGenericErrorContext,
11335 "PP: entering CONTENT\n");
11336#endif
11337 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011338 case XML_PARSER_IGNORE:
11339 xmlGenericError(xmlGenericErrorContext,
11340 "PP: internal error, state == IGNORE");
11341 ctxt->instate = XML_PARSER_DTD;
11342#ifdef DEBUG_PUSH
11343 xmlGenericError(xmlGenericErrorContext,
11344 "PP: entering DTD\n");
11345#endif
11346 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011347 case XML_PARSER_PI:
11348 xmlGenericError(xmlGenericErrorContext,
11349 "PP: internal error, state == PI\n");
11350 ctxt->instate = XML_PARSER_CONTENT;
11351#ifdef DEBUG_PUSH
11352 xmlGenericError(xmlGenericErrorContext,
11353 "PP: entering CONTENT\n");
11354#endif
11355 break;
11356 case XML_PARSER_ENTITY_DECL:
11357 xmlGenericError(xmlGenericErrorContext,
11358 "PP: internal error, state == ENTITY_DECL\n");
11359 ctxt->instate = XML_PARSER_DTD;
11360#ifdef DEBUG_PUSH
11361 xmlGenericError(xmlGenericErrorContext,
11362 "PP: entering DTD\n");
11363#endif
11364 break;
11365 case XML_PARSER_ENTITY_VALUE:
11366 xmlGenericError(xmlGenericErrorContext,
11367 "PP: internal error, state == ENTITY_VALUE\n");
11368 ctxt->instate = XML_PARSER_CONTENT;
11369#ifdef DEBUG_PUSH
11370 xmlGenericError(xmlGenericErrorContext,
11371 "PP: entering DTD\n");
11372#endif
11373 break;
11374 case XML_PARSER_ATTRIBUTE_VALUE:
11375 xmlGenericError(xmlGenericErrorContext,
11376 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11377 ctxt->instate = XML_PARSER_START_TAG;
11378#ifdef DEBUG_PUSH
11379 xmlGenericError(xmlGenericErrorContext,
11380 "PP: entering START_TAG\n");
11381#endif
11382 break;
11383 case XML_PARSER_SYSTEM_LITERAL:
11384 xmlGenericError(xmlGenericErrorContext,
11385 "PP: internal error, state == SYSTEM_LITERAL\n");
11386 ctxt->instate = XML_PARSER_START_TAG;
11387#ifdef DEBUG_PUSH
11388 xmlGenericError(xmlGenericErrorContext,
11389 "PP: entering START_TAG\n");
11390#endif
11391 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011392 case XML_PARSER_PUBLIC_LITERAL:
11393 xmlGenericError(xmlGenericErrorContext,
11394 "PP: internal error, state == PUBLIC_LITERAL\n");
11395 ctxt->instate = XML_PARSER_START_TAG;
11396#ifdef DEBUG_PUSH
11397 xmlGenericError(xmlGenericErrorContext,
11398 "PP: entering START_TAG\n");
11399#endif
11400 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011401 }
11402 }
11403done:
11404#ifdef DEBUG_PUSH
11405 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11406#endif
11407 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011408encoding_error:
11409 {
11410 char buffer[150];
11411
11412 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11413 ctxt->input->cur[0], ctxt->input->cur[1],
11414 ctxt->input->cur[2], ctxt->input->cur[3]);
11415 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11416 "Input is not proper UTF-8, indicate encoding !\n%s",
11417 BAD_CAST buffer, NULL);
11418 }
11419 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000011420}
11421
11422/**
Owen Taylor3473f882001-02-23 17:55:21 +000011423 * xmlParseChunk:
11424 * @ctxt: an XML parser context
11425 * @chunk: an char array
11426 * @size: the size in byte of the chunk
11427 * @terminate: last chunk indicator
11428 *
11429 * Parse a Chunk of memory
11430 *
11431 * Returns zero if no error, the xmlParserErrors otherwise.
11432 */
11433int
11434xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11435 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000011436 int end_in_lf = 0;
11437
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011438 if (ctxt == NULL)
11439 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000011440 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011441 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000011442 if (ctxt->instate == XML_PARSER_START)
11443 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000011444 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11445 (chunk[size - 1] == '\r')) {
11446 end_in_lf = 1;
11447 size--;
11448 }
Owen Taylor3473f882001-02-23 17:55:21 +000011449 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11450 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11451 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11452 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000011453 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000011454
William M. Bracka3215c72004-07-31 16:24:01 +000011455 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11456 if (res < 0) {
11457 ctxt->errNo = XML_PARSER_EOF;
11458 ctxt->disableSAX = 1;
11459 return (XML_PARSER_EOF);
11460 }
Owen Taylor3473f882001-02-23 17:55:21 +000011461 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11462 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011463 ctxt->input->end =
11464 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011465#ifdef DEBUG_PUSH
11466 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11467#endif
11468
Owen Taylor3473f882001-02-23 17:55:21 +000011469 } else if (ctxt->instate != XML_PARSER_EOF) {
11470 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11471 xmlParserInputBufferPtr in = ctxt->input->buf;
11472 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11473 (in->raw != NULL)) {
11474 int nbchars;
11475
11476 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11477 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011478 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000011479 xmlGenericError(xmlGenericErrorContext,
11480 "xmlParseChunk: encoder error\n");
11481 return(XML_ERR_INVALID_ENCODING);
11482 }
11483 }
11484 }
11485 }
11486 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000011487 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11488 (ctxt->input->buf != NULL)) {
11489 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11490 }
Daniel Veillard14412512005-01-21 23:53:26 +000011491 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011492 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000011493 if (terminate) {
11494 /*
11495 * Check for termination
11496 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011497 int avail = 0;
11498
11499 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011500 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011501 avail = ctxt->input->length -
11502 (ctxt->input->cur - ctxt->input->base);
11503 else
11504 avail = ctxt->input->buf->buffer->use -
11505 (ctxt->input->cur - ctxt->input->base);
11506 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011507
Owen Taylor3473f882001-02-23 17:55:21 +000011508 if ((ctxt->instate != XML_PARSER_EOF) &&
11509 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011510 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011511 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011512 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011513 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011514 }
Owen Taylor3473f882001-02-23 17:55:21 +000011515 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011516 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011517 ctxt->sax->endDocument(ctxt->userData);
11518 }
11519 ctxt->instate = XML_PARSER_EOF;
11520 }
11521 return((xmlParserErrors) ctxt->errNo);
11522}
11523
11524/************************************************************************
11525 * *
11526 * I/O front end functions to the parser *
11527 * *
11528 ************************************************************************/
11529
11530/**
Owen Taylor3473f882001-02-23 17:55:21 +000011531 * xmlCreatePushParserCtxt:
11532 * @sax: a SAX handler
11533 * @user_data: The user data returned on SAX callbacks
11534 * @chunk: a pointer to an array of chars
11535 * @size: number of chars in the array
11536 * @filename: an optional file name or URI
11537 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000011538 * Create a parser context for using the XML parser in push mode.
11539 * If @buffer and @size are non-NULL, the data is used to detect
11540 * the encoding. The remaining characters will be parsed so they
11541 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000011542 * To allow content encoding detection, @size should be >= 4
11543 * The value of @filename is used for fetching external entities
11544 * and error/warning reports.
11545 *
11546 * Returns the new parser context or NULL
11547 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000011548
Owen Taylor3473f882001-02-23 17:55:21 +000011549xmlParserCtxtPtr
11550xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11551 const char *chunk, int size, const char *filename) {
11552 xmlParserCtxtPtr ctxt;
11553 xmlParserInputPtr inputStream;
11554 xmlParserInputBufferPtr buf;
11555 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11556
11557 /*
11558 * plug some encoding conversion routines
11559 */
11560 if ((chunk != NULL) && (size >= 4))
11561 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11562
11563 buf = xmlAllocParserInputBuffer(enc);
11564 if (buf == NULL) return(NULL);
11565
11566 ctxt = xmlNewParserCtxt();
11567 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011568 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011569 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011570 return(NULL);
11571 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011572 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011573 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11574 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011575 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011576 xmlFreeParserInputBuffer(buf);
11577 xmlFreeParserCtxt(ctxt);
11578 return(NULL);
11579 }
Owen Taylor3473f882001-02-23 17:55:21 +000011580 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011581#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011582 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011583#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011584 xmlFree(ctxt->sax);
11585 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11586 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011587 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011588 xmlFreeParserInputBuffer(buf);
11589 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011590 return(NULL);
11591 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011592 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11593 if (sax->initialized == XML_SAX2_MAGIC)
11594 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11595 else
11596 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011597 if (user_data != NULL)
11598 ctxt->userData = user_data;
11599 }
11600 if (filename == NULL) {
11601 ctxt->directory = NULL;
11602 } else {
11603 ctxt->directory = xmlParserGetDirectory(filename);
11604 }
11605
11606 inputStream = xmlNewInputStream(ctxt);
11607 if (inputStream == NULL) {
11608 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011609 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011610 return(NULL);
11611 }
11612
11613 if (filename == NULL)
11614 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011615 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011616 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011617 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011618 if (inputStream->filename == NULL) {
11619 xmlFreeParserCtxt(ctxt);
11620 xmlFreeParserInputBuffer(buf);
11621 return(NULL);
11622 }
11623 }
Owen Taylor3473f882001-02-23 17:55:21 +000011624 inputStream->buf = buf;
11625 inputStream->base = inputStream->buf->buffer->content;
11626 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011627 inputStream->end =
11628 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011629
11630 inputPush(ctxt, inputStream);
11631
William M. Brack3a1cd212005-02-11 14:35:54 +000011632 /*
11633 * If the caller didn't provide an initial 'chunk' for determining
11634 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11635 * that it can be automatically determined later
11636 */
11637 if ((size == 0) || (chunk == NULL)) {
11638 ctxt->charset = XML_CHAR_ENCODING_NONE;
11639 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011640 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11641 int cur = ctxt->input->cur - ctxt->input->base;
11642
Owen Taylor3473f882001-02-23 17:55:21 +000011643 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011644
11645 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11646 ctxt->input->cur = ctxt->input->base + cur;
11647 ctxt->input->end =
11648 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011649#ifdef DEBUG_PUSH
11650 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11651#endif
11652 }
11653
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011654 if (enc != XML_CHAR_ENCODING_NONE) {
11655 xmlSwitchEncoding(ctxt, enc);
11656 }
11657
Owen Taylor3473f882001-02-23 17:55:21 +000011658 return(ctxt);
11659}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011660#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011661
11662/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011663 * xmlStopParser:
11664 * @ctxt: an XML parser context
11665 *
11666 * Blocks further parser processing
11667 */
11668void
11669xmlStopParser(xmlParserCtxtPtr ctxt) {
11670 if (ctxt == NULL)
11671 return;
11672 ctxt->instate = XML_PARSER_EOF;
11673 ctxt->disableSAX = 1;
11674 if (ctxt->input != NULL) {
11675 ctxt->input->cur = BAD_CAST"";
11676 ctxt->input->base = ctxt->input->cur;
11677 }
11678}
11679
11680/**
Owen Taylor3473f882001-02-23 17:55:21 +000011681 * xmlCreateIOParserCtxt:
11682 * @sax: a SAX handler
11683 * @user_data: The user data returned on SAX callbacks
11684 * @ioread: an I/O read function
11685 * @ioclose: an I/O close function
11686 * @ioctx: an I/O handler
11687 * @enc: the charset encoding if known
11688 *
11689 * Create a parser context for using the XML parser with an existing
11690 * I/O stream
11691 *
11692 * Returns the new parser context or NULL
11693 */
11694xmlParserCtxtPtr
11695xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11696 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11697 void *ioctx, xmlCharEncoding enc) {
11698 xmlParserCtxtPtr ctxt;
11699 xmlParserInputPtr inputStream;
11700 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011701
11702 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011703
11704 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11705 if (buf == NULL) return(NULL);
11706
11707 ctxt = xmlNewParserCtxt();
11708 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011709 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011710 return(NULL);
11711 }
11712 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011713#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011714 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011715#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011716 xmlFree(ctxt->sax);
11717 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11718 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011719 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011720 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011721 return(NULL);
11722 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011723 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11724 if (sax->initialized == XML_SAX2_MAGIC)
11725 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11726 else
11727 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011728 if (user_data != NULL)
11729 ctxt->userData = user_data;
11730 }
11731
11732 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11733 if (inputStream == NULL) {
11734 xmlFreeParserCtxt(ctxt);
11735 return(NULL);
11736 }
11737 inputPush(ctxt, inputStream);
11738
11739 return(ctxt);
11740}
11741
Daniel Veillard4432df22003-09-28 18:58:27 +000011742#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011743/************************************************************************
11744 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011745 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000011746 * *
11747 ************************************************************************/
11748
11749/**
11750 * xmlIOParseDTD:
11751 * @sax: the SAX handler block or NULL
11752 * @input: an Input Buffer
11753 * @enc: the charset encoding if known
11754 *
11755 * Load and parse a DTD
11756 *
11757 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000011758 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000011759 */
11760
11761xmlDtdPtr
11762xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11763 xmlCharEncoding enc) {
11764 xmlDtdPtr ret = NULL;
11765 xmlParserCtxtPtr ctxt;
11766 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011767 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000011768
11769 if (input == NULL)
11770 return(NULL);
11771
11772 ctxt = xmlNewParserCtxt();
11773 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000011774 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011775 return(NULL);
11776 }
11777
11778 /*
11779 * Set-up the SAX context
11780 */
11781 if (sax != NULL) {
11782 if (ctxt->sax != NULL)
11783 xmlFree(ctxt->sax);
11784 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000011785 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011786 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011787 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011788
11789 /*
11790 * generate a parser input from the I/O handler
11791 */
11792
Daniel Veillard43caefb2003-12-07 19:32:22 +000011793 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000011794 if (pinput == NULL) {
11795 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000011796 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011797 xmlFreeParserCtxt(ctxt);
11798 return(NULL);
11799 }
11800
11801 /*
11802 * plug some encoding conversion routines here.
11803 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000011804 if (xmlPushInput(ctxt, pinput) < 0) {
11805 if (sax != NULL) ctxt->sax = NULL;
11806 xmlFreeParserCtxt(ctxt);
11807 return(NULL);
11808 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000011809 if (enc != XML_CHAR_ENCODING_NONE) {
11810 xmlSwitchEncoding(ctxt, enc);
11811 }
Owen Taylor3473f882001-02-23 17:55:21 +000011812
11813 pinput->filename = NULL;
11814 pinput->line = 1;
11815 pinput->col = 1;
11816 pinput->base = ctxt->input->cur;
11817 pinput->cur = ctxt->input->cur;
11818 pinput->free = NULL;
11819
11820 /*
11821 * let's parse that entity knowing it's an external subset.
11822 */
11823 ctxt->inSubset = 2;
11824 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000011825 if (ctxt->myDoc == NULL) {
11826 xmlErrMemory(ctxt, "New Doc failed");
11827 return(NULL);
11828 }
11829 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000011830 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11831 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000011832
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011833 if ((enc == XML_CHAR_ENCODING_NONE) &&
11834 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000011835 /*
11836 * Get the 4 first bytes and decode the charset
11837 * if enc != XML_CHAR_ENCODING_NONE
11838 * plug some encoding conversion routines.
11839 */
11840 start[0] = RAW;
11841 start[1] = NXT(1);
11842 start[2] = NXT(2);
11843 start[3] = NXT(3);
11844 enc = xmlDetectCharEncoding(start, 4);
11845 if (enc != XML_CHAR_ENCODING_NONE) {
11846 xmlSwitchEncoding(ctxt, enc);
11847 }
11848 }
11849
Owen Taylor3473f882001-02-23 17:55:21 +000011850 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11851
11852 if (ctxt->myDoc != NULL) {
11853 if (ctxt->wellFormed) {
11854 ret = ctxt->myDoc->extSubset;
11855 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000011856 if (ret != NULL) {
11857 xmlNodePtr tmp;
11858
11859 ret->doc = NULL;
11860 tmp = ret->children;
11861 while (tmp != NULL) {
11862 tmp->doc = NULL;
11863 tmp = tmp->next;
11864 }
11865 }
Owen Taylor3473f882001-02-23 17:55:21 +000011866 } else {
11867 ret = NULL;
11868 }
11869 xmlFreeDoc(ctxt->myDoc);
11870 ctxt->myDoc = NULL;
11871 }
11872 if (sax != NULL) ctxt->sax = NULL;
11873 xmlFreeParserCtxt(ctxt);
11874
11875 return(ret);
11876}
11877
11878/**
11879 * xmlSAXParseDTD:
11880 * @sax: the SAX handler block
11881 * @ExternalID: a NAME* containing the External ID of the DTD
11882 * @SystemID: a NAME* containing the URL to the DTD
11883 *
11884 * Load and parse an external subset.
11885 *
11886 * Returns the resulting xmlDtdPtr or NULL in case of error.
11887 */
11888
11889xmlDtdPtr
11890xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11891 const xmlChar *SystemID) {
11892 xmlDtdPtr ret = NULL;
11893 xmlParserCtxtPtr ctxt;
11894 xmlParserInputPtr input = NULL;
11895 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011896 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000011897
11898 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11899
11900 ctxt = xmlNewParserCtxt();
11901 if (ctxt == NULL) {
11902 return(NULL);
11903 }
11904
11905 /*
11906 * Set-up the SAX context
11907 */
11908 if (sax != NULL) {
11909 if (ctxt->sax != NULL)
11910 xmlFree(ctxt->sax);
11911 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000011912 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011913 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011914
11915 /*
11916 * Canonicalise the system ID
11917 */
11918 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000011919 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011920 xmlFreeParserCtxt(ctxt);
11921 return(NULL);
11922 }
Owen Taylor3473f882001-02-23 17:55:21 +000011923
11924 /*
11925 * Ask the Entity resolver to load the damn thing
11926 */
11927
11928 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011929 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11930 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011931 if (input == NULL) {
11932 if (sax != NULL) ctxt->sax = NULL;
11933 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011934 if (systemIdCanonic != NULL)
11935 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011936 return(NULL);
11937 }
11938
11939 /*
11940 * plug some encoding conversion routines here.
11941 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000011942 if (xmlPushInput(ctxt, input) < 0) {
11943 if (sax != NULL) ctxt->sax = NULL;
11944 xmlFreeParserCtxt(ctxt);
11945 if (systemIdCanonic != NULL)
11946 xmlFree(systemIdCanonic);
11947 return(NULL);
11948 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011949 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11950 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11951 xmlSwitchEncoding(ctxt, enc);
11952 }
Owen Taylor3473f882001-02-23 17:55:21 +000011953
11954 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011955 input->filename = (char *) systemIdCanonic;
11956 else
11957 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011958 input->line = 1;
11959 input->col = 1;
11960 input->base = ctxt->input->cur;
11961 input->cur = ctxt->input->cur;
11962 input->free = NULL;
11963
11964 /*
11965 * let's parse that entity knowing it's an external subset.
11966 */
11967 ctxt->inSubset = 2;
11968 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000011969 if (ctxt->myDoc == NULL) {
11970 xmlErrMemory(ctxt, "New Doc failed");
11971 if (sax != NULL) ctxt->sax = NULL;
11972 xmlFreeParserCtxt(ctxt);
11973 return(NULL);
11974 }
11975 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000011976 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11977 ExternalID, SystemID);
11978 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11979
11980 if (ctxt->myDoc != NULL) {
11981 if (ctxt->wellFormed) {
11982 ret = ctxt->myDoc->extSubset;
11983 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011984 if (ret != NULL) {
11985 xmlNodePtr tmp;
11986
11987 ret->doc = NULL;
11988 tmp = ret->children;
11989 while (tmp != NULL) {
11990 tmp->doc = NULL;
11991 tmp = tmp->next;
11992 }
11993 }
Owen Taylor3473f882001-02-23 17:55:21 +000011994 } else {
11995 ret = NULL;
11996 }
11997 xmlFreeDoc(ctxt->myDoc);
11998 ctxt->myDoc = NULL;
11999 }
12000 if (sax != NULL) ctxt->sax = NULL;
12001 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012002
Owen Taylor3473f882001-02-23 17:55:21 +000012003 return(ret);
12004}
12005
Daniel Veillard4432df22003-09-28 18:58:27 +000012006
Owen Taylor3473f882001-02-23 17:55:21 +000012007/**
12008 * xmlParseDTD:
12009 * @ExternalID: a NAME* containing the External ID of the DTD
12010 * @SystemID: a NAME* containing the URL to the DTD
12011 *
12012 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012013 *
Owen Taylor3473f882001-02-23 17:55:21 +000012014 * Returns the resulting xmlDtdPtr or NULL in case of error.
12015 */
12016
12017xmlDtdPtr
12018xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12019 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12020}
Daniel Veillard4432df22003-09-28 18:58:27 +000012021#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012022
12023/************************************************************************
12024 * *
12025 * Front ends when parsing an Entity *
12026 * *
12027 ************************************************************************/
12028
12029/**
Owen Taylor3473f882001-02-23 17:55:21 +000012030 * xmlParseCtxtExternalEntity:
12031 * @ctx: the existing parsing context
12032 * @URL: the URL for the entity to load
12033 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012034 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012035 *
12036 * Parse an external general entity within an existing parsing context
12037 * An external general parsed entity is well-formed if it matches the
12038 * production labeled extParsedEnt.
12039 *
12040 * [78] extParsedEnt ::= TextDecl? content
12041 *
12042 * Returns 0 if the entity is well formed, -1 in case of args problem and
12043 * the parser error code otherwise
12044 */
12045
12046int
12047xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012048 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012049 xmlParserCtxtPtr ctxt;
12050 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012051 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012052 xmlSAXHandlerPtr oldsax = NULL;
12053 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012054 xmlChar start[4];
12055 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012056 xmlParserInputPtr inputStream;
12057 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012058
Daniel Veillardce682bc2004-11-05 17:22:25 +000012059 if (ctx == NULL) return(-1);
12060
Daniel Veillard0161e632008-08-28 15:36:32 +000012061 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12062 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012063 return(XML_ERR_ENTITY_LOOP);
12064 }
12065
Daniel Veillardcda96922001-08-21 10:56:31 +000012066 if (lst != NULL)
12067 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012068 if ((URL == NULL) && (ID == NULL))
12069 return(-1);
12070 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12071 return(-1);
12072
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012073 ctxt = xmlNewParserCtxt();
12074 if (ctxt == NULL) {
12075 return(-1);
12076 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012077
Owen Taylor3473f882001-02-23 17:55:21 +000012078 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000012079 ctxt->_private = ctx->_private;
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012080
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012081 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12082 if (inputStream == NULL) {
12083 xmlFreeParserCtxt(ctxt);
12084 return(-1);
12085 }
12086
12087 inputPush(ctxt, inputStream);
12088
12089 if ((ctxt->directory == NULL) && (directory == NULL))
12090 directory = xmlParserGetDirectory((char *)URL);
12091 if ((ctxt->directory == NULL) && (directory != NULL))
12092 ctxt->directory = directory;
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012093
Owen Taylor3473f882001-02-23 17:55:21 +000012094 oldsax = ctxt->sax;
12095 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012096 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012097 newDoc = xmlNewDoc(BAD_CAST "1.0");
12098 if (newDoc == NULL) {
12099 xmlFreeParserCtxt(ctxt);
12100 return(-1);
12101 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012102 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012103 if (ctx->myDoc->dict) {
12104 newDoc->dict = ctx->myDoc->dict;
12105 xmlDictReference(newDoc->dict);
12106 }
Owen Taylor3473f882001-02-23 17:55:21 +000012107 if (ctx->myDoc != NULL) {
12108 newDoc->intSubset = ctx->myDoc->intSubset;
12109 newDoc->extSubset = ctx->myDoc->extSubset;
12110 }
12111 if (ctx->myDoc->URL != NULL) {
12112 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12113 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012114 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12115 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012116 ctxt->sax = oldsax;
12117 xmlFreeParserCtxt(ctxt);
12118 newDoc->intSubset = NULL;
12119 newDoc->extSubset = NULL;
12120 xmlFreeDoc(newDoc);
12121 return(-1);
12122 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012123 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012124 nodePush(ctxt, newDoc->children);
12125 if (ctx->myDoc == NULL) {
12126 ctxt->myDoc = newDoc;
12127 } else {
12128 ctxt->myDoc = ctx->myDoc;
12129 newDoc->children->doc = ctx->myDoc;
12130 }
12131
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012132 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012133 * Get the 4 first bytes and decode the charset
12134 * if enc != XML_CHAR_ENCODING_NONE
12135 * plug some encoding conversion routines.
12136 */
12137 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012138 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12139 start[0] = RAW;
12140 start[1] = NXT(1);
12141 start[2] = NXT(2);
12142 start[3] = NXT(3);
12143 enc = xmlDetectCharEncoding(start, 4);
12144 if (enc != XML_CHAR_ENCODING_NONE) {
12145 xmlSwitchEncoding(ctxt, enc);
12146 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012147 }
12148
Owen Taylor3473f882001-02-23 17:55:21 +000012149 /*
12150 * Parse a possible text declaration first
12151 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012152 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012153 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012154 /*
12155 * An XML-1.0 document can't reference an entity not XML-1.0
12156 */
12157 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12158 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12159 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12160 "Version mismatch between document and entity\n");
12161 }
Owen Taylor3473f882001-02-23 17:55:21 +000012162 }
12163
12164 /*
12165 * Doing validity checking on chunk doesn't make sense
12166 */
12167 ctxt->instate = XML_PARSER_CONTENT;
12168 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012169 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012170 ctxt->loadsubset = ctx->loadsubset;
12171 ctxt->depth = ctx->depth + 1;
12172 ctxt->replaceEntities = ctx->replaceEntities;
12173 if (ctxt->validate) {
12174 ctxt->vctxt.error = ctx->vctxt.error;
12175 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012176 } else {
12177 ctxt->vctxt.error = NULL;
12178 ctxt->vctxt.warning = NULL;
12179 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012180 ctxt->vctxt.nodeTab = NULL;
12181 ctxt->vctxt.nodeNr = 0;
12182 ctxt->vctxt.nodeMax = 0;
12183 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012184 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12185 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012186 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12187 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12188 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012189 ctxt->dictNames = ctx->dictNames;
12190 ctxt->attsDefault = ctx->attsDefault;
12191 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012192 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012193
12194 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012195
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012196 ctx->validate = ctxt->validate;
12197 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012198 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012199 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012200 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012201 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012202 }
12203 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012204 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012205 }
12206
12207 if (!ctxt->wellFormed) {
12208 if (ctxt->errNo == 0)
12209 ret = 1;
12210 else
12211 ret = ctxt->errNo;
12212 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012213 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012214 xmlNodePtr cur;
12215
12216 /*
12217 * Return the newly created nodeset after unlinking it from
12218 * they pseudo parent.
12219 */
12220 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012221 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012222 while (cur != NULL) {
12223 cur->parent = NULL;
12224 cur = cur->next;
12225 }
12226 newDoc->children->children = NULL;
12227 }
12228 ret = 0;
12229 }
12230 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012231 ctxt->dict = NULL;
12232 ctxt->attsDefault = NULL;
12233 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012234 xmlFreeParserCtxt(ctxt);
12235 newDoc->intSubset = NULL;
12236 newDoc->extSubset = NULL;
12237 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012238
Owen Taylor3473f882001-02-23 17:55:21 +000012239 return(ret);
12240}
12241
12242/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012243 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012244 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012245 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012246 * @sax: the SAX handler bloc (possibly NULL)
12247 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12248 * @depth: Used for loop detection, use 0
12249 * @URL: the URL for the entity to load
12250 * @ID: the System ID for the entity to load
12251 * @list: the return value for the set of parsed nodes
12252 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012253 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012254 *
12255 * Returns 0 if the entity is well formed, -1 in case of args problem and
12256 * the parser error code otherwise
12257 */
12258
Daniel Veillard7d515752003-09-26 19:12:37 +000012259static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012260xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12261 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012262 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012263 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012264 xmlParserCtxtPtr ctxt;
12265 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012266 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012267 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012268 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012269 xmlChar start[4];
12270 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012271
Daniel Veillard0161e632008-08-28 15:36:32 +000012272 if (((depth > 40) &&
12273 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12274 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012275 return(XML_ERR_ENTITY_LOOP);
12276 }
12277
Owen Taylor3473f882001-02-23 17:55:21 +000012278 if (list != NULL)
12279 *list = NULL;
12280 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012281 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012282 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012283 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012284
12285
Rob Richards9c0aa472009-03-26 18:10:19 +000012286 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000012287 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012288 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012289 if (oldctxt != NULL) {
12290 ctxt->_private = oldctxt->_private;
12291 ctxt->loadsubset = oldctxt->loadsubset;
12292 ctxt->validate = oldctxt->validate;
12293 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012294 ctxt->record_info = oldctxt->record_info;
12295 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12296 ctxt->node_seq.length = oldctxt->node_seq.length;
12297 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012298 } else {
12299 /*
12300 * Doing validity checking on chunk without context
12301 * doesn't make sense
12302 */
12303 ctxt->_private = NULL;
12304 ctxt->validate = 0;
12305 ctxt->external = 2;
12306 ctxt->loadsubset = 0;
12307 }
Owen Taylor3473f882001-02-23 17:55:21 +000012308 if (sax != NULL) {
12309 oldsax = ctxt->sax;
12310 ctxt->sax = sax;
12311 if (user_data != NULL)
12312 ctxt->userData = user_data;
12313 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012314 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012315 newDoc = xmlNewDoc(BAD_CAST "1.0");
12316 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012317 ctxt->node_seq.maximum = 0;
12318 ctxt->node_seq.length = 0;
12319 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012320 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000012321 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012322 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012323 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000012324 newDoc->intSubset = doc->intSubset;
12325 newDoc->extSubset = doc->extSubset;
12326 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012327 xmlDictReference(newDoc->dict);
12328
Owen Taylor3473f882001-02-23 17:55:21 +000012329 if (doc->URL != NULL) {
12330 newDoc->URL = xmlStrdup(doc->URL);
12331 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012332 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12333 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012334 if (sax != NULL)
12335 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012336 ctxt->node_seq.maximum = 0;
12337 ctxt->node_seq.length = 0;
12338 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012339 xmlFreeParserCtxt(ctxt);
12340 newDoc->intSubset = NULL;
12341 newDoc->extSubset = NULL;
12342 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000012343 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012344 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012345 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012346 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000012347 ctxt->myDoc = doc;
12348 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000012349
Daniel Veillard0161e632008-08-28 15:36:32 +000012350 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012351 * Get the 4 first bytes and decode the charset
12352 * if enc != XML_CHAR_ENCODING_NONE
12353 * plug some encoding conversion routines.
12354 */
12355 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012356 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12357 start[0] = RAW;
12358 start[1] = NXT(1);
12359 start[2] = NXT(2);
12360 start[3] = NXT(3);
12361 enc = xmlDetectCharEncoding(start, 4);
12362 if (enc != XML_CHAR_ENCODING_NONE) {
12363 xmlSwitchEncoding(ctxt, enc);
12364 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012365 }
12366
Owen Taylor3473f882001-02-23 17:55:21 +000012367 /*
12368 * Parse a possible text declaration first
12369 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012370 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012371 xmlParseTextDecl(ctxt);
12372 }
12373
Owen Taylor3473f882001-02-23 17:55:21 +000012374 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012375 ctxt->depth = depth;
12376
12377 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012378
Daniel Veillard561b7f82002-03-20 21:55:57 +000012379 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012380 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000012381 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012382 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012383 }
12384 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012385 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012386 }
12387
12388 if (!ctxt->wellFormed) {
12389 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012390 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000012391 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012392 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000012393 } else {
12394 if (list != NULL) {
12395 xmlNodePtr cur;
12396
12397 /*
12398 * Return the newly created nodeset after unlinking it from
12399 * they pseudo parent.
12400 */
12401 cur = newDoc->children->children;
12402 *list = cur;
12403 while (cur != NULL) {
12404 cur->parent = NULL;
12405 cur = cur->next;
12406 }
12407 newDoc->children->children = NULL;
12408 }
Daniel Veillard7d515752003-09-26 19:12:37 +000012409 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000012410 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012411
12412 /*
12413 * Record in the parent context the number of entities replacement
12414 * done when parsing that reference.
12415 */
12416 oldctxt->nbentities += ctxt->nbentities;
12417 /*
12418 * Also record the size of the entity parsed
12419 */
12420 if (ctxt->input != NULL) {
12421 oldctxt->sizeentities += ctxt->input->consumed;
12422 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12423 }
12424 /*
12425 * And record the last error if any
12426 */
12427 if (ctxt->lastError.code != XML_ERR_OK)
12428 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12429
Owen Taylor3473f882001-02-23 17:55:21 +000012430 if (sax != NULL)
12431 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000012432 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12433 oldctxt->node_seq.length = ctxt->node_seq.length;
12434 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012435 ctxt->node_seq.maximum = 0;
12436 ctxt->node_seq.length = 0;
12437 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012438 xmlFreeParserCtxt(ctxt);
12439 newDoc->intSubset = NULL;
12440 newDoc->extSubset = NULL;
12441 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000012442
Owen Taylor3473f882001-02-23 17:55:21 +000012443 return(ret);
12444}
12445
Daniel Veillard81273902003-09-30 00:43:48 +000012446#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012447/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012448 * xmlParseExternalEntity:
12449 * @doc: the document the chunk pertains to
12450 * @sax: the SAX handler bloc (possibly NULL)
12451 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12452 * @depth: Used for loop detection, use 0
12453 * @URL: the URL for the entity to load
12454 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012455 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000012456 *
12457 * Parse an external general entity
12458 * An external general parsed entity is well-formed if it matches the
12459 * production labeled extParsedEnt.
12460 *
12461 * [78] extParsedEnt ::= TextDecl? content
12462 *
12463 * Returns 0 if the entity is well formed, -1 in case of args problem and
12464 * the parser error code otherwise
12465 */
12466
12467int
12468xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000012469 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012470 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012471 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000012472}
12473
12474/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000012475 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000012476 * @doc: the document the chunk pertains to
12477 * @sax: the SAX handler bloc (possibly NULL)
12478 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12479 * @depth: Used for loop detection, use 0
12480 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000012481 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012482 *
12483 * Parse a well-balanced chunk of an XML document
12484 * called by the parser
12485 * The allowed sequence for the Well Balanced Chunk is the one defined by
12486 * the content production in the XML grammar:
12487 *
12488 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12489 *
12490 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12491 * the parser error code otherwise
12492 */
12493
12494int
12495xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000012496 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012497 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12498 depth, string, lst, 0 );
12499}
Daniel Veillard81273902003-09-30 00:43:48 +000012500#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000012501
12502/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000012503 * xmlParseBalancedChunkMemoryInternal:
12504 * @oldctxt: the existing parsing context
12505 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12506 * @user_data: the user data field for the parser context
12507 * @lst: the return value for the set of parsed nodes
12508 *
12509 *
12510 * Parse a well-balanced chunk of an XML document
12511 * called by the parser
12512 * The allowed sequence for the Well Balanced Chunk is the one defined by
12513 * the content production in the XML grammar:
12514 *
12515 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12516 *
Daniel Veillard7d515752003-09-26 19:12:37 +000012517 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12518 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000012519 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000012520 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000012521 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000012522 */
Daniel Veillard7d515752003-09-26 19:12:37 +000012523static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000012524xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12525 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12526 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012527 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012528 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012529 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012530 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012531 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012532 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000012533 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012534
Daniel Veillard0161e632008-08-28 15:36:32 +000012535 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12536 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012537 return(XML_ERR_ENTITY_LOOP);
12538 }
12539
12540
12541 if (lst != NULL)
12542 *lst = NULL;
12543 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000012544 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012545
12546 size = xmlStrlen(string);
12547
12548 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000012549 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012550 if (user_data != NULL)
12551 ctxt->userData = user_data;
12552 else
12553 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012554 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12555 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012556 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12557 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12558 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012559
12560 oldsax = ctxt->sax;
12561 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012562 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012563 ctxt->replaceEntities = oldctxt->replaceEntities;
12564 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000012565
Daniel Veillarde1ca5032002-12-09 14:13:43 +000012566 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012567 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012568 newDoc = xmlNewDoc(BAD_CAST "1.0");
12569 if (newDoc == NULL) {
12570 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012571 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012572 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000012573 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012574 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012575 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012576 newDoc->dict = ctxt->dict;
12577 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012578 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012579 } else {
12580 ctxt->myDoc = oldctxt->myDoc;
12581 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012582 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012583 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012584 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12585 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012586 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012587 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012588 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012589 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012590 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012591 }
William M. Brack7b9154b2003-09-27 19:23:50 +000012592 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012593 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012594 ctxt->myDoc->children = NULL;
12595 ctxt->myDoc->last = NULL;
12596 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012597 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012598 ctxt->instate = XML_PARSER_CONTENT;
12599 ctxt->depth = oldctxt->depth + 1;
12600
Daniel Veillard328f48c2002-11-15 15:24:34 +000012601 ctxt->validate = 0;
12602 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000012603 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12604 /*
12605 * ID/IDREF registration will be done in xmlValidateElement below
12606 */
12607 ctxt->loadsubset |= XML_SKIP_IDS;
12608 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012609 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012610 ctxt->attsDefault = oldctxt->attsDefault;
12611 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012612
Daniel Veillard68e9e742002-11-16 15:35:11 +000012613 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012614 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012615 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012616 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012617 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012618 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012619 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012620 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012621 }
12622
12623 if (!ctxt->wellFormed) {
12624 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012625 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012626 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012627 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012628 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012629 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012630 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012631
William M. Brack7b9154b2003-09-27 19:23:50 +000012632 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012633 xmlNodePtr cur;
12634
12635 /*
12636 * Return the newly created nodeset after unlinking it from
12637 * they pseudo parent.
12638 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012639 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012640 *lst = cur;
12641 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012642#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012643 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12644 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12645 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012646 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12647 oldctxt->myDoc, cur);
12648 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012649#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012650 cur->parent = NULL;
12651 cur = cur->next;
12652 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012653 ctxt->myDoc->children->children = NULL;
12654 }
12655 if (ctxt->myDoc != NULL) {
12656 xmlFreeNode(ctxt->myDoc->children);
12657 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012658 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012659 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012660
12661 /*
12662 * Record in the parent context the number of entities replacement
12663 * done when parsing that reference.
12664 */
Daniel Veillard4bf899b2008-08-20 17:04:30 +000012665 oldctxt->nbentities += ctxt->nbentities;
Daniel Veillard0161e632008-08-28 15:36:32 +000012666 /*
12667 * Also record the last error if any
12668 */
12669 if (ctxt->lastError.code != XML_ERR_OK)
12670 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12671
Daniel Veillard328f48c2002-11-15 15:24:34 +000012672 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012673 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012674 ctxt->attsDefault = NULL;
12675 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012676 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012677 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012678 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012679 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012680
Daniel Veillard328f48c2002-11-15 15:24:34 +000012681 return(ret);
12682}
12683
Daniel Veillard29b17482004-08-16 00:39:03 +000012684/**
12685 * xmlParseInNodeContext:
12686 * @node: the context node
12687 * @data: the input string
12688 * @datalen: the input string length in bytes
12689 * @options: a combination of xmlParserOption
12690 * @lst: the return value for the set of parsed nodes
12691 *
12692 * Parse a well-balanced chunk of an XML document
12693 * within the context (DTD, namespaces, etc ...) of the given node.
12694 *
12695 * The allowed sequence for the data is a Well Balanced Chunk defined by
12696 * the content production in the XML grammar:
12697 *
12698 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12699 *
12700 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12701 * error code otherwise
12702 */
12703xmlParserErrors
12704xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12705 int options, xmlNodePtr *lst) {
12706#ifdef SAX2
12707 xmlParserCtxtPtr ctxt;
12708 xmlDocPtr doc = NULL;
12709 xmlNodePtr fake, cur;
12710 int nsnr = 0;
12711
12712 xmlParserErrors ret = XML_ERR_OK;
12713
12714 /*
12715 * check all input parameters, grab the document
12716 */
12717 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12718 return(XML_ERR_INTERNAL_ERROR);
12719 switch (node->type) {
12720 case XML_ELEMENT_NODE:
12721 case XML_ATTRIBUTE_NODE:
12722 case XML_TEXT_NODE:
12723 case XML_CDATA_SECTION_NODE:
12724 case XML_ENTITY_REF_NODE:
12725 case XML_PI_NODE:
12726 case XML_COMMENT_NODE:
12727 case XML_DOCUMENT_NODE:
12728 case XML_HTML_DOCUMENT_NODE:
12729 break;
12730 default:
12731 return(XML_ERR_INTERNAL_ERROR);
12732
12733 }
12734 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12735 (node->type != XML_DOCUMENT_NODE) &&
12736 (node->type != XML_HTML_DOCUMENT_NODE))
12737 node = node->parent;
12738 if (node == NULL)
12739 return(XML_ERR_INTERNAL_ERROR);
12740 if (node->type == XML_ELEMENT_NODE)
12741 doc = node->doc;
12742 else
12743 doc = (xmlDocPtr) node;
12744 if (doc == NULL)
12745 return(XML_ERR_INTERNAL_ERROR);
12746
12747 /*
12748 * allocate a context and set-up everything not related to the
12749 * node position in the tree
12750 */
12751 if (doc->type == XML_DOCUMENT_NODE)
12752 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12753#ifdef LIBXML_HTML_ENABLED
12754 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12755 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12756#endif
12757 else
12758 return(XML_ERR_INTERNAL_ERROR);
12759
12760 if (ctxt == NULL)
12761 return(XML_ERR_NO_MEMORY);
12762 fake = xmlNewComment(NULL);
12763 if (fake == NULL) {
12764 xmlFreeParserCtxt(ctxt);
12765 return(XML_ERR_NO_MEMORY);
12766 }
12767 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000012768
12769 /*
12770 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12771 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12772 * we must wait until the last moment to free the original one.
12773 */
Daniel Veillard29b17482004-08-16 00:39:03 +000012774 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000012775 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000012776 xmlDictFree(ctxt->dict);
12777 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000012778 } else
12779 options |= XML_PARSE_NODICT;
12780
Daniel Veillard37334572008-07-31 08:20:02 +000012781 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000012782 xmlDetectSAX2(ctxt);
12783 ctxt->myDoc = doc;
12784
12785 if (node->type == XML_ELEMENT_NODE) {
12786 nodePush(ctxt, node);
12787 /*
12788 * initialize the SAX2 namespaces stack
12789 */
12790 cur = node;
12791 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12792 xmlNsPtr ns = cur->nsDef;
12793 const xmlChar *iprefix, *ihref;
12794
12795 while (ns != NULL) {
12796 if (ctxt->dict) {
12797 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12798 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12799 } else {
12800 iprefix = ns->prefix;
12801 ihref = ns->href;
12802 }
12803
12804 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12805 nsPush(ctxt, iprefix, ihref);
12806 nsnr++;
12807 }
12808 ns = ns->next;
12809 }
12810 cur = cur->parent;
12811 }
12812 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000012813 }
Daniel Veillard29b17482004-08-16 00:39:03 +000012814
12815 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12816 /*
12817 * ID/IDREF registration will be done in xmlValidateElement below
12818 */
12819 ctxt->loadsubset |= XML_SKIP_IDS;
12820 }
12821
Daniel Veillard499cc922006-01-18 17:22:35 +000012822#ifdef LIBXML_HTML_ENABLED
12823 if (doc->type == XML_HTML_DOCUMENT_NODE)
12824 __htmlParseContent(ctxt);
12825 else
12826#endif
12827 xmlParseContent(ctxt);
12828
Daniel Veillard29b17482004-08-16 00:39:03 +000012829 nsPop(ctxt, nsnr);
12830 if ((RAW == '<') && (NXT(1) == '/')) {
12831 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12832 } else if (RAW != 0) {
12833 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12834 }
12835 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12836 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12837 ctxt->wellFormed = 0;
12838 }
12839
12840 if (!ctxt->wellFormed) {
12841 if (ctxt->errNo == 0)
12842 ret = XML_ERR_INTERNAL_ERROR;
12843 else
12844 ret = (xmlParserErrors)ctxt->errNo;
12845 } else {
12846 ret = XML_ERR_OK;
12847 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012848
Daniel Veillard29b17482004-08-16 00:39:03 +000012849 /*
12850 * Return the newly created nodeset after unlinking it from
12851 * the pseudo sibling.
12852 */
Daniel Veillard0161e632008-08-28 15:36:32 +000012853
Daniel Veillard29b17482004-08-16 00:39:03 +000012854 cur = fake->next;
12855 fake->next = NULL;
12856 node->last = fake;
12857
12858 if (cur != NULL) {
12859 cur->prev = NULL;
12860 }
12861
12862 *lst = cur;
12863
12864 while (cur != NULL) {
12865 cur->parent = NULL;
12866 cur = cur->next;
12867 }
12868
12869 xmlUnlinkNode(fake);
12870 xmlFreeNode(fake);
12871
12872
12873 if (ret != XML_ERR_OK) {
12874 xmlFreeNodeList(*lst);
12875 *lst = NULL;
12876 }
William M. Brackc3f81342004-10-03 01:22:44 +000012877
William M. Brackb7b54de2004-10-06 16:38:01 +000012878 if (doc->dict != NULL)
12879 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000012880 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012881
Daniel Veillard29b17482004-08-16 00:39:03 +000012882 return(ret);
12883#else /* !SAX2 */
12884 return(XML_ERR_INTERNAL_ERROR);
12885#endif
12886}
12887
Daniel Veillard81273902003-09-30 00:43:48 +000012888#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000012889/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000012890 * xmlParseBalancedChunkMemoryRecover:
12891 * @doc: the document the chunk pertains to
12892 * @sax: the SAX handler bloc (possibly NULL)
12893 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12894 * @depth: Used for loop detection, use 0
12895 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12896 * @lst: the return value for the set of parsed nodes
12897 * @recover: return nodes even if the data is broken (use 0)
12898 *
12899 *
12900 * Parse a well-balanced chunk of an XML document
12901 * called by the parser
12902 * The allowed sequence for the Well Balanced Chunk is the one defined by
12903 * the content production in the XML grammar:
12904 *
12905 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12906 *
12907 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12908 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000012909 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000012910 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000012911 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12912 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000012913 */
12914int
12915xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000012916 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000012917 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000012918 xmlParserCtxtPtr ctxt;
12919 xmlDocPtr newDoc;
12920 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012921 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012922 int size;
12923 int ret = 0;
12924
Daniel Veillard0161e632008-08-28 15:36:32 +000012925 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000012926 return(XML_ERR_ENTITY_LOOP);
12927 }
12928
12929
Daniel Veillardcda96922001-08-21 10:56:31 +000012930 if (lst != NULL)
12931 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012932 if (string == NULL)
12933 return(-1);
12934
12935 size = xmlStrlen(string);
12936
12937 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12938 if (ctxt == NULL) return(-1);
12939 ctxt->userData = ctxt;
12940 if (sax != NULL) {
12941 oldsax = ctxt->sax;
12942 ctxt->sax = sax;
12943 if (user_data != NULL)
12944 ctxt->userData = user_data;
12945 }
12946 newDoc = xmlNewDoc(BAD_CAST "1.0");
12947 if (newDoc == NULL) {
12948 xmlFreeParserCtxt(ctxt);
12949 return(-1);
12950 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012951 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012952 if ((doc != NULL) && (doc->dict != NULL)) {
12953 xmlDictFree(ctxt->dict);
12954 ctxt->dict = doc->dict;
12955 xmlDictReference(ctxt->dict);
12956 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12957 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12958 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12959 ctxt->dictNames = 1;
12960 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000012961 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012962 }
Owen Taylor3473f882001-02-23 17:55:21 +000012963 if (doc != NULL) {
12964 newDoc->intSubset = doc->intSubset;
12965 newDoc->extSubset = doc->extSubset;
12966 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012967 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12968 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012969 if (sax != NULL)
12970 ctxt->sax = oldsax;
12971 xmlFreeParserCtxt(ctxt);
12972 newDoc->intSubset = NULL;
12973 newDoc->extSubset = NULL;
12974 xmlFreeDoc(newDoc);
12975 return(-1);
12976 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012977 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12978 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012979 if (doc == NULL) {
12980 ctxt->myDoc = newDoc;
12981 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000012982 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000012983 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000012984 /* Ensure that doc has XML spec namespace */
12985 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12986 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000012987 }
12988 ctxt->instate = XML_PARSER_CONTENT;
12989 ctxt->depth = depth;
12990
12991 /*
12992 * Doing validity checking on chunk doesn't make sense
12993 */
12994 ctxt->validate = 0;
12995 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012996 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012997
Daniel Veillardb39bc392002-10-26 19:29:51 +000012998 if ( doc != NULL ){
12999 content = doc->children;
13000 doc->children = NULL;
13001 xmlParseContent(ctxt);
13002 doc->children = content;
13003 }
13004 else {
13005 xmlParseContent(ctxt);
13006 }
Owen Taylor3473f882001-02-23 17:55:21 +000013007 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013008 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013009 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013010 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013011 }
13012 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013013 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013014 }
13015
13016 if (!ctxt->wellFormed) {
13017 if (ctxt->errNo == 0)
13018 ret = 1;
13019 else
13020 ret = ctxt->errNo;
13021 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013022 ret = 0;
13023 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013024
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013025 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13026 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013027
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013028 /*
13029 * Return the newly created nodeset after unlinking it from
13030 * they pseudo parent.
13031 */
13032 cur = newDoc->children->children;
13033 *lst = cur;
13034 while (cur != NULL) {
13035 xmlSetTreeDoc(cur, doc);
13036 cur->parent = NULL;
13037 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013038 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013039 newDoc->children->children = NULL;
13040 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013041
13042 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013043 ctxt->sax = oldsax;
13044 xmlFreeParserCtxt(ctxt);
13045 newDoc->intSubset = NULL;
13046 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013047 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013048 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013049
Owen Taylor3473f882001-02-23 17:55:21 +000013050 return(ret);
13051}
13052
13053/**
13054 * xmlSAXParseEntity:
13055 * @sax: the SAX handler block
13056 * @filename: the filename
13057 *
13058 * parse an XML external entity out of context and build a tree.
13059 * It use the given SAX function block to handle the parsing callback.
13060 * If sax is NULL, fallback to the default DOM tree building routines.
13061 *
13062 * [78] extParsedEnt ::= TextDecl? content
13063 *
13064 * This correspond to a "Well Balanced" chunk
13065 *
13066 * Returns the resulting document tree
13067 */
13068
13069xmlDocPtr
13070xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13071 xmlDocPtr ret;
13072 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013073
13074 ctxt = xmlCreateFileParserCtxt(filename);
13075 if (ctxt == NULL) {
13076 return(NULL);
13077 }
13078 if (sax != NULL) {
13079 if (ctxt->sax != NULL)
13080 xmlFree(ctxt->sax);
13081 ctxt->sax = sax;
13082 ctxt->userData = NULL;
13083 }
13084
Owen Taylor3473f882001-02-23 17:55:21 +000013085 xmlParseExtParsedEnt(ctxt);
13086
13087 if (ctxt->wellFormed)
13088 ret = ctxt->myDoc;
13089 else {
13090 ret = NULL;
13091 xmlFreeDoc(ctxt->myDoc);
13092 ctxt->myDoc = NULL;
13093 }
13094 if (sax != NULL)
13095 ctxt->sax = NULL;
13096 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013097
Owen Taylor3473f882001-02-23 17:55:21 +000013098 return(ret);
13099}
13100
13101/**
13102 * xmlParseEntity:
13103 * @filename: the filename
13104 *
13105 * parse an XML external entity out of context and build a tree.
13106 *
13107 * [78] extParsedEnt ::= TextDecl? content
13108 *
13109 * This correspond to a "Well Balanced" chunk
13110 *
13111 * Returns the resulting document tree
13112 */
13113
13114xmlDocPtr
13115xmlParseEntity(const char *filename) {
13116 return(xmlSAXParseEntity(NULL, filename));
13117}
Daniel Veillard81273902003-09-30 00:43:48 +000013118#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013119
13120/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013121 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013122 * @URL: the entity URL
13123 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013124 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013125 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013126 *
13127 * Create a parser context for an external entity
13128 * Automatic support for ZLIB/Compress compressed document is provided
13129 * by default if found at compile-time.
13130 *
13131 * Returns the new parser context or NULL
13132 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013133static xmlParserCtxtPtr
13134xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13135 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013136 xmlParserCtxtPtr ctxt;
13137 xmlParserInputPtr inputStream;
13138 char *directory = NULL;
13139 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013140
Owen Taylor3473f882001-02-23 17:55:21 +000013141 ctxt = xmlNewParserCtxt();
13142 if (ctxt == NULL) {
13143 return(NULL);
13144 }
13145
Rob Richards9c0aa472009-03-26 18:10:19 +000013146 if (pctx != NULL) {
13147 ctxt->options = pctx->options;
13148 }
13149
Owen Taylor3473f882001-02-23 17:55:21 +000013150 uri = xmlBuildURI(URL, base);
13151
13152 if (uri == NULL) {
13153 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13154 if (inputStream == NULL) {
13155 xmlFreeParserCtxt(ctxt);
13156 return(NULL);
13157 }
13158
13159 inputPush(ctxt, inputStream);
13160
13161 if ((ctxt->directory == NULL) && (directory == NULL))
13162 directory = xmlParserGetDirectory((char *)URL);
13163 if ((ctxt->directory == NULL) && (directory != NULL))
13164 ctxt->directory = directory;
13165 } else {
13166 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13167 if (inputStream == NULL) {
13168 xmlFree(uri);
13169 xmlFreeParserCtxt(ctxt);
13170 return(NULL);
13171 }
13172
13173 inputPush(ctxt, inputStream);
13174
13175 if ((ctxt->directory == NULL) && (directory == NULL))
13176 directory = xmlParserGetDirectory((char *)uri);
13177 if ((ctxt->directory == NULL) && (directory != NULL))
13178 ctxt->directory = directory;
13179 xmlFree(uri);
13180 }
Owen Taylor3473f882001-02-23 17:55:21 +000013181 return(ctxt);
13182}
13183
Rob Richards9c0aa472009-03-26 18:10:19 +000013184/**
13185 * xmlCreateEntityParserCtxt:
13186 * @URL: the entity URL
13187 * @ID: the entity PUBLIC ID
13188 * @base: a possible base for the target URI
13189 *
13190 * Create a parser context for an external entity
13191 * Automatic support for ZLIB/Compress compressed document is provided
13192 * by default if found at compile-time.
13193 *
13194 * Returns the new parser context or NULL
13195 */
13196xmlParserCtxtPtr
13197xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13198 const xmlChar *base) {
13199 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13200
13201}
13202
Owen Taylor3473f882001-02-23 17:55:21 +000013203/************************************************************************
13204 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000013205 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000013206 * *
13207 ************************************************************************/
13208
13209/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013210 * xmlCreateURLParserCtxt:
13211 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013212 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013213 *
Daniel Veillard61b93382003-11-03 14:28:31 +000013214 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000013215 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000013216 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000013217 *
13218 * Returns the new parser context or NULL
13219 */
13220xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000013221xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000013222{
13223 xmlParserCtxtPtr ctxt;
13224 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000013225 char *directory = NULL;
13226
Owen Taylor3473f882001-02-23 17:55:21 +000013227 ctxt = xmlNewParserCtxt();
13228 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000013229 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000013230 return(NULL);
13231 }
13232
Daniel Veillarddf292f72005-01-16 19:00:15 +000013233 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000013234 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000013235 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000013236
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000013237 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013238 if (inputStream == NULL) {
13239 xmlFreeParserCtxt(ctxt);
13240 return(NULL);
13241 }
13242
Owen Taylor3473f882001-02-23 17:55:21 +000013243 inputPush(ctxt, inputStream);
13244 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000013245 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013246 if ((ctxt->directory == NULL) && (directory != NULL))
13247 ctxt->directory = directory;
13248
13249 return(ctxt);
13250}
13251
Daniel Veillard61b93382003-11-03 14:28:31 +000013252/**
13253 * xmlCreateFileParserCtxt:
13254 * @filename: the filename
13255 *
13256 * Create a parser context for a file content.
13257 * Automatic support for ZLIB/Compress compressed document is provided
13258 * by default if found at compile-time.
13259 *
13260 * Returns the new parser context or NULL
13261 */
13262xmlParserCtxtPtr
13263xmlCreateFileParserCtxt(const char *filename)
13264{
13265 return(xmlCreateURLParserCtxt(filename, 0));
13266}
13267
Daniel Veillard81273902003-09-30 00:43:48 +000013268#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013269/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013270 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000013271 * @sax: the SAX handler block
13272 * @filename: the filename
13273 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13274 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000013275 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000013276 *
13277 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13278 * compressed document is provided by default if found at compile-time.
13279 * It use the given SAX function block to handle the parsing callback.
13280 * If sax is NULL, fallback to the default DOM tree building routines.
13281 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000013282 * User data (void *) is stored within the parser context in the
13283 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000013284 *
Owen Taylor3473f882001-02-23 17:55:21 +000013285 * Returns the resulting document tree
13286 */
13287
13288xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000013289xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13290 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000013291 xmlDocPtr ret;
13292 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013293
Daniel Veillard635ef722001-10-29 11:48:19 +000013294 xmlInitParser();
13295
Owen Taylor3473f882001-02-23 17:55:21 +000013296 ctxt = xmlCreateFileParserCtxt(filename);
13297 if (ctxt == NULL) {
13298 return(NULL);
13299 }
13300 if (sax != NULL) {
13301 if (ctxt->sax != NULL)
13302 xmlFree(ctxt->sax);
13303 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013304 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013305 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000013306 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000013307 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000013308 }
Owen Taylor3473f882001-02-23 17:55:21 +000013309
Daniel Veillard37d2d162008-03-14 10:54:00 +000013310 if (ctxt->directory == NULL)
13311 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013312
Daniel Veillarddad3f682002-11-17 16:47:27 +000013313 ctxt->recovery = recovery;
13314
Owen Taylor3473f882001-02-23 17:55:21 +000013315 xmlParseDocument(ctxt);
13316
William M. Brackc07329e2003-09-08 01:57:30 +000013317 if ((ctxt->wellFormed) || recovery) {
13318 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000013319 if (ret != NULL) {
13320 if (ctxt->input->buf->compressed > 0)
13321 ret->compression = 9;
13322 else
13323 ret->compression = ctxt->input->buf->compressed;
13324 }
William M. Brackc07329e2003-09-08 01:57:30 +000013325 }
Owen Taylor3473f882001-02-23 17:55:21 +000013326 else {
13327 ret = NULL;
13328 xmlFreeDoc(ctxt->myDoc);
13329 ctxt->myDoc = NULL;
13330 }
13331 if (sax != NULL)
13332 ctxt->sax = NULL;
13333 xmlFreeParserCtxt(ctxt);
13334
13335 return(ret);
13336}
13337
13338/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013339 * xmlSAXParseFile:
13340 * @sax: the SAX handler block
13341 * @filename: the filename
13342 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13343 * documents
13344 *
13345 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13346 * compressed document is provided by default if found at compile-time.
13347 * It use the given SAX function block to handle the parsing callback.
13348 * If sax is NULL, fallback to the default DOM tree building routines.
13349 *
13350 * Returns the resulting document tree
13351 */
13352
13353xmlDocPtr
13354xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13355 int recovery) {
13356 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13357}
13358
13359/**
Owen Taylor3473f882001-02-23 17:55:21 +000013360 * xmlRecoverDoc:
13361 * @cur: a pointer to an array of xmlChar
13362 *
13363 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013364 * In the case the document is not Well Formed, a attempt to build a
13365 * tree is tried anyway
13366 *
13367 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013368 */
13369
13370xmlDocPtr
13371xmlRecoverDoc(xmlChar *cur) {
13372 return(xmlSAXParseDoc(NULL, cur, 1));
13373}
13374
13375/**
13376 * xmlParseFile:
13377 * @filename: the filename
13378 *
13379 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13380 * compressed document is provided by default if found at compile-time.
13381 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000013382 * Returns the resulting document tree if the file was wellformed,
13383 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000013384 */
13385
13386xmlDocPtr
13387xmlParseFile(const char *filename) {
13388 return(xmlSAXParseFile(NULL, filename, 0));
13389}
13390
13391/**
13392 * xmlRecoverFile:
13393 * @filename: the filename
13394 *
13395 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13396 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013397 * In the case the document is not Well Formed, it attempts to build
13398 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000013399 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000013400 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013401 */
13402
13403xmlDocPtr
13404xmlRecoverFile(const char *filename) {
13405 return(xmlSAXParseFile(NULL, filename, 1));
13406}
13407
13408
13409/**
13410 * xmlSetupParserForBuffer:
13411 * @ctxt: an XML parser context
13412 * @buffer: a xmlChar * buffer
13413 * @filename: a file name
13414 *
13415 * Setup the parser context to parse a new buffer; Clears any prior
13416 * contents from the parser context. The buffer parameter must not be
13417 * NULL, but the filename parameter can be
13418 */
13419void
13420xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13421 const char* filename)
13422{
13423 xmlParserInputPtr input;
13424
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013425 if ((ctxt == NULL) || (buffer == NULL))
13426 return;
13427
Owen Taylor3473f882001-02-23 17:55:21 +000013428 input = xmlNewInputStream(ctxt);
13429 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000013430 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013431 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013432 return;
13433 }
13434
13435 xmlClearParserCtxt(ctxt);
13436 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000013437 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013438 input->base = buffer;
13439 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013440 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000013441 inputPush(ctxt, input);
13442}
13443
13444/**
13445 * xmlSAXUserParseFile:
13446 * @sax: a SAX handler
13447 * @user_data: The user data returned on SAX callbacks
13448 * @filename: a file name
13449 *
13450 * parse an XML file and call the given SAX handler routines.
13451 * Automatic support for ZLIB/Compress compressed document is provided
13452 *
13453 * Returns 0 in case of success or a error number otherwise
13454 */
13455int
13456xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13457 const char *filename) {
13458 int ret = 0;
13459 xmlParserCtxtPtr ctxt;
13460
13461 ctxt = xmlCreateFileParserCtxt(filename);
13462 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000013463 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000013464 xmlFree(ctxt->sax);
13465 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013466 xmlDetectSAX2(ctxt);
13467
Owen Taylor3473f882001-02-23 17:55:21 +000013468 if (user_data != NULL)
13469 ctxt->userData = user_data;
13470
13471 xmlParseDocument(ctxt);
13472
13473 if (ctxt->wellFormed)
13474 ret = 0;
13475 else {
13476 if (ctxt->errNo != 0)
13477 ret = ctxt->errNo;
13478 else
13479 ret = -1;
13480 }
13481 if (sax != NULL)
13482 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013483 if (ctxt->myDoc != NULL) {
13484 xmlFreeDoc(ctxt->myDoc);
13485 ctxt->myDoc = NULL;
13486 }
Owen Taylor3473f882001-02-23 17:55:21 +000013487 xmlFreeParserCtxt(ctxt);
13488
13489 return ret;
13490}
Daniel Veillard81273902003-09-30 00:43:48 +000013491#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013492
13493/************************************************************************
13494 * *
13495 * Front ends when parsing from memory *
13496 * *
13497 ************************************************************************/
13498
13499/**
13500 * xmlCreateMemoryParserCtxt:
13501 * @buffer: a pointer to a char array
13502 * @size: the size of the array
13503 *
13504 * Create a parser context for an XML in-memory document.
13505 *
13506 * Returns the new parser context or NULL
13507 */
13508xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013509xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013510 xmlParserCtxtPtr ctxt;
13511 xmlParserInputPtr input;
13512 xmlParserInputBufferPtr buf;
13513
13514 if (buffer == NULL)
13515 return(NULL);
13516 if (size <= 0)
13517 return(NULL);
13518
13519 ctxt = xmlNewParserCtxt();
13520 if (ctxt == NULL)
13521 return(NULL);
13522
Daniel Veillard53350552003-09-18 13:35:51 +000013523 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000013524 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013525 if (buf == NULL) {
13526 xmlFreeParserCtxt(ctxt);
13527 return(NULL);
13528 }
Owen Taylor3473f882001-02-23 17:55:21 +000013529
13530 input = xmlNewInputStream(ctxt);
13531 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013532 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000013533 xmlFreeParserCtxt(ctxt);
13534 return(NULL);
13535 }
13536
13537 input->filename = NULL;
13538 input->buf = buf;
13539 input->base = input->buf->buffer->content;
13540 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013541 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000013542
13543 inputPush(ctxt, input);
13544 return(ctxt);
13545}
13546
Daniel Veillard81273902003-09-30 00:43:48 +000013547#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013548/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013549 * xmlSAXParseMemoryWithData:
13550 * @sax: the SAX handler block
13551 * @buffer: an pointer to a char array
13552 * @size: the size of the array
13553 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13554 * documents
13555 * @data: the userdata
13556 *
13557 * parse an XML in-memory block and use the given SAX function block
13558 * to handle the parsing callback. If sax is NULL, fallback to the default
13559 * DOM tree building routines.
13560 *
13561 * User data (void *) is stored within the parser context in the
13562 * context's _private member, so it is available nearly everywhere in libxml
13563 *
13564 * Returns the resulting document tree
13565 */
13566
13567xmlDocPtr
13568xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13569 int size, int recovery, void *data) {
13570 xmlDocPtr ret;
13571 xmlParserCtxtPtr ctxt;
13572
13573 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13574 if (ctxt == NULL) return(NULL);
13575 if (sax != NULL) {
13576 if (ctxt->sax != NULL)
13577 xmlFree(ctxt->sax);
13578 ctxt->sax = sax;
13579 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013580 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013581 if (data!=NULL) {
13582 ctxt->_private=data;
13583 }
13584
Daniel Veillardadba5f12003-04-04 16:09:01 +000013585 ctxt->recovery = recovery;
13586
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013587 xmlParseDocument(ctxt);
13588
13589 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13590 else {
13591 ret = NULL;
13592 xmlFreeDoc(ctxt->myDoc);
13593 ctxt->myDoc = NULL;
13594 }
13595 if (sax != NULL)
13596 ctxt->sax = NULL;
13597 xmlFreeParserCtxt(ctxt);
13598
13599 return(ret);
13600}
13601
13602/**
Owen Taylor3473f882001-02-23 17:55:21 +000013603 * xmlSAXParseMemory:
13604 * @sax: the SAX handler block
13605 * @buffer: an pointer to a char array
13606 * @size: the size of the array
13607 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13608 * documents
13609 *
13610 * parse an XML in-memory block and use the given SAX function block
13611 * to handle the parsing callback. If sax is NULL, fallback to the default
13612 * DOM tree building routines.
13613 *
13614 * Returns the resulting document tree
13615 */
13616xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000013617xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13618 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013619 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013620}
13621
13622/**
13623 * xmlParseMemory:
13624 * @buffer: an pointer to a char array
13625 * @size: the size of the array
13626 *
13627 * parse an XML in-memory block and build a tree.
13628 *
13629 * Returns the resulting document tree
13630 */
13631
Daniel Veillard50822cb2001-07-26 20:05:51 +000013632xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013633 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13634}
13635
13636/**
13637 * xmlRecoverMemory:
13638 * @buffer: an pointer to a char array
13639 * @size: the size of the array
13640 *
13641 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013642 * In the case the document is not Well Formed, an attempt to
13643 * build a tree is tried anyway
13644 *
13645 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000013646 */
13647
Daniel Veillard50822cb2001-07-26 20:05:51 +000013648xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013649 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13650}
13651
13652/**
13653 * xmlSAXUserParseMemory:
13654 * @sax: a SAX handler
13655 * @user_data: The user data returned on SAX callbacks
13656 * @buffer: an in-memory XML document input
13657 * @size: the length of the XML document in bytes
13658 *
13659 * A better SAX parsing routine.
13660 * parse an XML in-memory buffer and call the given SAX handler routines.
13661 *
13662 * Returns 0 in case of success or a error number otherwise
13663 */
13664int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013665 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013666 int ret = 0;
13667 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013668
13669 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13670 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013671 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13672 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000013673 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013674 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013675
Daniel Veillard30211a02001-04-26 09:33:18 +000013676 if (user_data != NULL)
13677 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000013678
13679 xmlParseDocument(ctxt);
13680
13681 if (ctxt->wellFormed)
13682 ret = 0;
13683 else {
13684 if (ctxt->errNo != 0)
13685 ret = ctxt->errNo;
13686 else
13687 ret = -1;
13688 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013689 if (sax != NULL)
13690 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013691 if (ctxt->myDoc != NULL) {
13692 xmlFreeDoc(ctxt->myDoc);
13693 ctxt->myDoc = NULL;
13694 }
Owen Taylor3473f882001-02-23 17:55:21 +000013695 xmlFreeParserCtxt(ctxt);
13696
13697 return ret;
13698}
Daniel Veillard81273902003-09-30 00:43:48 +000013699#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013700
13701/**
13702 * xmlCreateDocParserCtxt:
13703 * @cur: a pointer to an array of xmlChar
13704 *
13705 * Creates a parser context for an XML in-memory document.
13706 *
13707 * Returns the new parser context or NULL
13708 */
13709xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013710xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013711 int len;
13712
13713 if (cur == NULL)
13714 return(NULL);
13715 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013716 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000013717}
13718
Daniel Veillard81273902003-09-30 00:43:48 +000013719#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013720/**
13721 * xmlSAXParseDoc:
13722 * @sax: the SAX handler block
13723 * @cur: a pointer to an array of xmlChar
13724 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13725 * documents
13726 *
13727 * parse an XML in-memory document and build a tree.
13728 * It use the given SAX function block to handle the parsing callback.
13729 * If sax is NULL, fallback to the default DOM tree building routines.
13730 *
13731 * Returns the resulting document tree
13732 */
13733
13734xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013735xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000013736 xmlDocPtr ret;
13737 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000013738 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013739
Daniel Veillard38936062004-11-04 17:45:11 +000013740 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013741
13742
13743 ctxt = xmlCreateDocParserCtxt(cur);
13744 if (ctxt == NULL) return(NULL);
13745 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000013746 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013747 ctxt->sax = sax;
13748 ctxt->userData = NULL;
13749 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013750 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013751
13752 xmlParseDocument(ctxt);
13753 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13754 else {
13755 ret = NULL;
13756 xmlFreeDoc(ctxt->myDoc);
13757 ctxt->myDoc = NULL;
13758 }
Daniel Veillard34099b42004-11-04 17:34:35 +000013759 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000013760 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000013761 xmlFreeParserCtxt(ctxt);
13762
13763 return(ret);
13764}
13765
13766/**
13767 * xmlParseDoc:
13768 * @cur: a pointer to an array of xmlChar
13769 *
13770 * parse an XML in-memory document and build a tree.
13771 *
13772 * Returns the resulting document tree
13773 */
13774
13775xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013776xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013777 return(xmlSAXParseDoc(NULL, cur, 0));
13778}
Daniel Veillard81273902003-09-30 00:43:48 +000013779#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013780
Daniel Veillard81273902003-09-30 00:43:48 +000013781#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000013782/************************************************************************
13783 * *
13784 * Specific function to keep track of entities references *
13785 * and used by the XSLT debugger *
13786 * *
13787 ************************************************************************/
13788
13789static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13790
13791/**
13792 * xmlAddEntityReference:
13793 * @ent : A valid entity
13794 * @firstNode : A valid first node for children of entity
13795 * @lastNode : A valid last node of children entity
13796 *
13797 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13798 */
13799static void
13800xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13801 xmlNodePtr lastNode)
13802{
13803 if (xmlEntityRefFunc != NULL) {
13804 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13805 }
13806}
13807
13808
13809/**
13810 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000013811 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000013812 *
13813 * Set the function to call call back when a xml reference has been made
13814 */
13815void
13816xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13817{
13818 xmlEntityRefFunc = func;
13819}
Daniel Veillard81273902003-09-30 00:43:48 +000013820#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013821
13822/************************************************************************
13823 * *
13824 * Miscellaneous *
13825 * *
13826 ************************************************************************/
13827
13828#ifdef LIBXML_XPATH_ENABLED
13829#include <libxml/xpath.h>
13830#endif
13831
Daniel Veillardffa3c742005-07-21 13:24:09 +000013832extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000013833static int xmlParserInitialized = 0;
13834
13835/**
13836 * xmlInitParser:
13837 *
13838 * Initialization function for the XML parser.
13839 * This is not reentrant. Call once before processing in case of
13840 * use in multithreaded programs.
13841 */
13842
13843void
13844xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000013845 if (xmlParserInitialized != 0)
13846 return;
Owen Taylor3473f882001-02-23 17:55:21 +000013847
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013848#ifdef LIBXML_THREAD_ENABLED
13849 __xmlGlobalInitMutexLock();
13850 if (xmlParserInitialized == 0) {
13851#endif
13852 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13853 (xmlGenericError == NULL))
13854 initGenericErrorDefaultFunc(NULL);
13855 xmlInitGlobals();
13856 xmlInitThreads();
13857 xmlInitMemory();
13858 xmlInitCharEncodingHandlers();
13859 xmlDefaultSAXHandlerInit();
13860 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013861#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013862 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013863#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013864#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013865 htmlInitAutoClose();
13866 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013867#endif
13868#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013869 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013870#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013871 xmlParserInitialized = 1;
13872#ifdef LIBXML_THREAD_ENABLED
13873 }
13874 __xmlGlobalInitMutexUnlock();
13875#endif
Owen Taylor3473f882001-02-23 17:55:21 +000013876}
13877
13878/**
13879 * xmlCleanupParser:
13880 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000013881 * This function name is somewhat misleading. It does not clean up
13882 * parser state, it cleans up memory allocated by the library itself.
13883 * It is a cleanup function for the XML library. It tries to reclaim all
13884 * related global memory allocated for the library processing.
13885 * It doesn't deallocate any document related memory. One should
13886 * call xmlCleanupParser() only when the process has finished using
13887 * the library and all XML/HTML documents built with it.
13888 * See also xmlInitParser() which has the opposite function of preparing
13889 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000013890 *
13891 * WARNING: if your application is multithreaded or has plugin support
13892 * calling this may crash the application if another thread or
13893 * a plugin is still using libxml2. It's sometimes very hard to
13894 * guess if libxml2 is in use in the application, some libraries
13895 * or plugins may use it without notice. In case of doubt abstain
13896 * from calling this function or do it just before calling exit()
13897 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000013898 */
13899
13900void
13901xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000013902 if (!xmlParserInitialized)
13903 return;
13904
Owen Taylor3473f882001-02-23 17:55:21 +000013905 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000013906#ifdef LIBXML_CATALOG_ENABLED
13907 xmlCatalogCleanup();
13908#endif
Daniel Veillard14412512005-01-21 23:53:26 +000013909 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000013910 xmlCleanupInputCallbacks();
13911#ifdef LIBXML_OUTPUT_ENABLED
13912 xmlCleanupOutputCallbacks();
13913#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013914#ifdef LIBXML_SCHEMAS_ENABLED
13915 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000013916 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013917#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000013918 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000013919 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000013920 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000013921 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000013922 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000013923}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013924
13925/************************************************************************
13926 * *
13927 * New set (2.6.0) of simpler and more flexible APIs *
13928 * *
13929 ************************************************************************/
13930
13931/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013932 * DICT_FREE:
13933 * @str: a string
13934 *
13935 * Free a string if it is not owned by the "dict" dictionnary in the
13936 * current scope
13937 */
13938#define DICT_FREE(str) \
13939 if ((str) && ((!dict) || \
13940 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13941 xmlFree((char *)(str));
13942
13943/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013944 * xmlCtxtReset:
13945 * @ctxt: an XML parser context
13946 *
13947 * Reset a parser context
13948 */
13949void
13950xmlCtxtReset(xmlParserCtxtPtr ctxt)
13951{
13952 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013953 xmlDictPtr dict;
13954
13955 if (ctxt == NULL)
13956 return;
13957
13958 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013959
13960 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13961 xmlFreeInputStream(input);
13962 }
13963 ctxt->inputNr = 0;
13964 ctxt->input = NULL;
13965
13966 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000013967 if (ctxt->spaceTab != NULL) {
13968 ctxt->spaceTab[0] = -1;
13969 ctxt->space = &ctxt->spaceTab[0];
13970 } else {
13971 ctxt->space = NULL;
13972 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013973
13974
13975 ctxt->nodeNr = 0;
13976 ctxt->node = NULL;
13977
13978 ctxt->nameNr = 0;
13979 ctxt->name = NULL;
13980
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013981 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013982 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013983 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013984 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013985 DICT_FREE(ctxt->directory);
13986 ctxt->directory = NULL;
13987 DICT_FREE(ctxt->extSubURI);
13988 ctxt->extSubURI = NULL;
13989 DICT_FREE(ctxt->extSubSystem);
13990 ctxt->extSubSystem = NULL;
13991 if (ctxt->myDoc != NULL)
13992 xmlFreeDoc(ctxt->myDoc);
13993 ctxt->myDoc = NULL;
13994
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013995 ctxt->standalone = -1;
13996 ctxt->hasExternalSubset = 0;
13997 ctxt->hasPErefs = 0;
13998 ctxt->html = 0;
13999 ctxt->external = 0;
14000 ctxt->instate = XML_PARSER_START;
14001 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014002
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014003 ctxt->wellFormed = 1;
14004 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014005 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014006 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014007#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014008 ctxt->vctxt.userData = ctxt;
14009 ctxt->vctxt.error = xmlParserValidityError;
14010 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014011#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014012 ctxt->record_info = 0;
14013 ctxt->nbChars = 0;
14014 ctxt->checkIndex = 0;
14015 ctxt->inSubset = 0;
14016 ctxt->errNo = XML_ERR_OK;
14017 ctxt->depth = 0;
14018 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14019 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014020 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014021 ctxt->sizeentities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014022 xmlInitNodeInfoSeq(&ctxt->node_seq);
14023
14024 if (ctxt->attsDefault != NULL) {
14025 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14026 ctxt->attsDefault = NULL;
14027 }
14028 if (ctxt->attsSpecial != NULL) {
14029 xmlHashFree(ctxt->attsSpecial, NULL);
14030 ctxt->attsSpecial = NULL;
14031 }
14032
Daniel Veillard4432df22003-09-28 18:58:27 +000014033#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014034 if (ctxt->catalogs != NULL)
14035 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014036#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014037 if (ctxt->lastError.code != XML_ERR_OK)
14038 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014039}
14040
14041/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014042 * xmlCtxtResetPush:
14043 * @ctxt: an XML parser context
14044 * @chunk: a pointer to an array of chars
14045 * @size: number of chars in the array
14046 * @filename: an optional file name or URI
14047 * @encoding: the document encoding, or NULL
14048 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014049 * Reset a push parser context
14050 *
14051 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014052 */
14053int
14054xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14055 int size, const char *filename, const char *encoding)
14056{
14057 xmlParserInputPtr inputStream;
14058 xmlParserInputBufferPtr buf;
14059 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14060
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014061 if (ctxt == NULL)
14062 return(1);
14063
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014064 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14065 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14066
14067 buf = xmlAllocParserInputBuffer(enc);
14068 if (buf == NULL)
14069 return(1);
14070
14071 if (ctxt == NULL) {
14072 xmlFreeParserInputBuffer(buf);
14073 return(1);
14074 }
14075
14076 xmlCtxtReset(ctxt);
14077
14078 if (ctxt->pushTab == NULL) {
14079 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14080 sizeof(xmlChar *));
14081 if (ctxt->pushTab == NULL) {
14082 xmlErrMemory(ctxt, NULL);
14083 xmlFreeParserInputBuffer(buf);
14084 return(1);
14085 }
14086 }
14087
14088 if (filename == NULL) {
14089 ctxt->directory = NULL;
14090 } else {
14091 ctxt->directory = xmlParserGetDirectory(filename);
14092 }
14093
14094 inputStream = xmlNewInputStream(ctxt);
14095 if (inputStream == NULL) {
14096 xmlFreeParserInputBuffer(buf);
14097 return(1);
14098 }
14099
14100 if (filename == NULL)
14101 inputStream->filename = NULL;
14102 else
14103 inputStream->filename = (char *)
14104 xmlCanonicPath((const xmlChar *) filename);
14105 inputStream->buf = buf;
14106 inputStream->base = inputStream->buf->buffer->content;
14107 inputStream->cur = inputStream->buf->buffer->content;
14108 inputStream->end =
14109 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14110
14111 inputPush(ctxt, inputStream);
14112
14113 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14114 (ctxt->input->buf != NULL)) {
14115 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14116 int cur = ctxt->input->cur - ctxt->input->base;
14117
14118 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14119
14120 ctxt->input->base = ctxt->input->buf->buffer->content + base;
14121 ctxt->input->cur = ctxt->input->base + cur;
14122 ctxt->input->end =
14123 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14124 use];
14125#ifdef DEBUG_PUSH
14126 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14127#endif
14128 }
14129
14130 if (encoding != NULL) {
14131 xmlCharEncodingHandlerPtr hdlr;
14132
Daniel Veillard37334572008-07-31 08:20:02 +000014133 if (ctxt->encoding != NULL)
14134 xmlFree((xmlChar *) ctxt->encoding);
14135 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14136
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014137 hdlr = xmlFindCharEncodingHandler(encoding);
14138 if (hdlr != NULL) {
14139 xmlSwitchToEncoding(ctxt, hdlr);
14140 } else {
14141 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14142 "Unsupported encoding %s\n", BAD_CAST encoding);
14143 }
14144 } else if (enc != XML_CHAR_ENCODING_NONE) {
14145 xmlSwitchEncoding(ctxt, enc);
14146 }
14147
14148 return(0);
14149}
14150
Daniel Veillard37334572008-07-31 08:20:02 +000014151
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014152/**
Daniel Veillard37334572008-07-31 08:20:02 +000014153 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014154 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014155 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014156 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014157 *
14158 * Applies the options to the parser context
14159 *
14160 * Returns 0 in case of success, the set of unknown or unimplemented options
14161 * in case of error.
14162 */
Daniel Veillard37334572008-07-31 08:20:02 +000014163static int
14164xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014165{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014166 if (ctxt == NULL)
14167 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014168 if (encoding != NULL) {
14169 if (ctxt->encoding != NULL)
14170 xmlFree((xmlChar *) ctxt->encoding);
14171 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14172 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014173 if (options & XML_PARSE_RECOVER) {
14174 ctxt->recovery = 1;
14175 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014176 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014177 } else
14178 ctxt->recovery = 0;
14179 if (options & XML_PARSE_DTDLOAD) {
14180 ctxt->loadsubset = XML_DETECT_IDS;
14181 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014182 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014183 } else
14184 ctxt->loadsubset = 0;
14185 if (options & XML_PARSE_DTDATTR) {
14186 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14187 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014188 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014189 }
14190 if (options & XML_PARSE_NOENT) {
14191 ctxt->replaceEntities = 1;
14192 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14193 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014194 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014195 } else
14196 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014197 if (options & XML_PARSE_PEDANTIC) {
14198 ctxt->pedantic = 1;
14199 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014200 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014201 } else
14202 ctxt->pedantic = 0;
14203 if (options & XML_PARSE_NOBLANKS) {
14204 ctxt->keepBlanks = 0;
14205 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14206 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014207 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014208 } else
14209 ctxt->keepBlanks = 1;
14210 if (options & XML_PARSE_DTDVALID) {
14211 ctxt->validate = 1;
14212 if (options & XML_PARSE_NOWARNING)
14213 ctxt->vctxt.warning = NULL;
14214 if (options & XML_PARSE_NOERROR)
14215 ctxt->vctxt.error = NULL;
14216 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014217 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014218 } else
14219 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000014220 if (options & XML_PARSE_NOWARNING) {
14221 ctxt->sax->warning = NULL;
14222 options -= XML_PARSE_NOWARNING;
14223 }
14224 if (options & XML_PARSE_NOERROR) {
14225 ctxt->sax->error = NULL;
14226 ctxt->sax->fatalError = NULL;
14227 options -= XML_PARSE_NOERROR;
14228 }
Daniel Veillard81273902003-09-30 00:43:48 +000014229#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014230 if (options & XML_PARSE_SAX1) {
14231 ctxt->sax->startElement = xmlSAX2StartElement;
14232 ctxt->sax->endElement = xmlSAX2EndElement;
14233 ctxt->sax->startElementNs = NULL;
14234 ctxt->sax->endElementNs = NULL;
14235 ctxt->sax->initialized = 1;
14236 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014237 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014238 }
Daniel Veillard81273902003-09-30 00:43:48 +000014239#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014240 if (options & XML_PARSE_NODICT) {
14241 ctxt->dictNames = 0;
14242 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014243 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014244 } else {
14245 ctxt->dictNames = 1;
14246 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014247 if (options & XML_PARSE_NOCDATA) {
14248 ctxt->sax->cdataBlock = NULL;
14249 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014250 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014251 }
14252 if (options & XML_PARSE_NSCLEAN) {
14253 ctxt->options |= XML_PARSE_NSCLEAN;
14254 options -= XML_PARSE_NSCLEAN;
14255 }
Daniel Veillard61b93382003-11-03 14:28:31 +000014256 if (options & XML_PARSE_NONET) {
14257 ctxt->options |= XML_PARSE_NONET;
14258 options -= XML_PARSE_NONET;
14259 }
Daniel Veillard8874b942005-08-25 13:19:21 +000014260 if (options & XML_PARSE_COMPACT) {
14261 ctxt->options |= XML_PARSE_COMPACT;
14262 options -= XML_PARSE_COMPACT;
14263 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000014264 if (options & XML_PARSE_OLD10) {
14265 ctxt->options |= XML_PARSE_OLD10;
14266 options -= XML_PARSE_OLD10;
14267 }
Daniel Veillard8915c152008-08-26 13:05:34 +000014268 if (options & XML_PARSE_NOBASEFIX) {
14269 ctxt->options |= XML_PARSE_NOBASEFIX;
14270 options -= XML_PARSE_NOBASEFIX;
14271 }
14272 if (options & XML_PARSE_HUGE) {
14273 ctxt->options |= XML_PARSE_HUGE;
14274 options -= XML_PARSE_HUGE;
14275 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000014276 if (options & XML_PARSE_OLDSAX) {
14277 ctxt->options |= XML_PARSE_OLDSAX;
14278 options -= XML_PARSE_OLDSAX;
14279 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000014280 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014281 return (options);
14282}
14283
14284/**
Daniel Veillard37334572008-07-31 08:20:02 +000014285 * xmlCtxtUseOptions:
14286 * @ctxt: an XML parser context
14287 * @options: a combination of xmlParserOption
14288 *
14289 * Applies the options to the parser context
14290 *
14291 * Returns 0 in case of success, the set of unknown or unimplemented options
14292 * in case of error.
14293 */
14294int
14295xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14296{
14297 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14298}
14299
14300/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014301 * xmlDoRead:
14302 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000014303 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014304 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014305 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014306 * @reuse: keep the context for reuse
14307 *
14308 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000014309 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014310 * Returns the resulting document tree or NULL
14311 */
14312static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014313xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14314 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014315{
14316 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000014317
14318 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014319 if (encoding != NULL) {
14320 xmlCharEncodingHandlerPtr hdlr;
14321
14322 hdlr = xmlFindCharEncodingHandler(encoding);
14323 if (hdlr != NULL)
14324 xmlSwitchToEncoding(ctxt, hdlr);
14325 }
Daniel Veillard60942de2003-09-25 21:05:58 +000014326 if ((URL != NULL) && (ctxt->input != NULL) &&
14327 (ctxt->input->filename == NULL))
14328 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014329 xmlParseDocument(ctxt);
14330 if ((ctxt->wellFormed) || ctxt->recovery)
14331 ret = ctxt->myDoc;
14332 else {
14333 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014334 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014335 xmlFreeDoc(ctxt->myDoc);
14336 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014337 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014338 ctxt->myDoc = NULL;
14339 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014340 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014341 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014342
14343 return (ret);
14344}
14345
14346/**
14347 * xmlReadDoc:
14348 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014349 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014350 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014351 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014352 *
14353 * parse an XML in-memory document and build a tree.
14354 *
14355 * Returns the resulting document tree
14356 */
14357xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014358xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014359{
14360 xmlParserCtxtPtr ctxt;
14361
14362 if (cur == NULL)
14363 return (NULL);
14364
14365 ctxt = xmlCreateDocParserCtxt(cur);
14366 if (ctxt == NULL)
14367 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014368 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014369}
14370
14371/**
14372 * xmlReadFile:
14373 * @filename: a file or URL
14374 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014375 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014376 *
14377 * parse an XML file from the filesystem or the network.
14378 *
14379 * Returns the resulting document tree
14380 */
14381xmlDocPtr
14382xmlReadFile(const char *filename, const char *encoding, int options)
14383{
14384 xmlParserCtxtPtr ctxt;
14385
Daniel Veillard61b93382003-11-03 14:28:31 +000014386 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014387 if (ctxt == NULL)
14388 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014389 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014390}
14391
14392/**
14393 * xmlReadMemory:
14394 * @buffer: a pointer to a char array
14395 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014396 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014397 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014398 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014399 *
14400 * parse an XML in-memory document and build a tree.
14401 *
14402 * Returns the resulting document tree
14403 */
14404xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014405xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014406{
14407 xmlParserCtxtPtr ctxt;
14408
14409 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14410 if (ctxt == NULL)
14411 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014412 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014413}
14414
14415/**
14416 * xmlReadFd:
14417 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014418 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014419 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014420 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014421 *
14422 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014423 * NOTE that the file descriptor will not be closed when the
14424 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014425 *
14426 * Returns the resulting document tree
14427 */
14428xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014429xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014430{
14431 xmlParserCtxtPtr ctxt;
14432 xmlParserInputBufferPtr input;
14433 xmlParserInputPtr stream;
14434
14435 if (fd < 0)
14436 return (NULL);
14437
14438 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14439 if (input == NULL)
14440 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014441 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014442 ctxt = xmlNewParserCtxt();
14443 if (ctxt == NULL) {
14444 xmlFreeParserInputBuffer(input);
14445 return (NULL);
14446 }
14447 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14448 if (stream == NULL) {
14449 xmlFreeParserInputBuffer(input);
14450 xmlFreeParserCtxt(ctxt);
14451 return (NULL);
14452 }
14453 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014454 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014455}
14456
14457/**
14458 * xmlReadIO:
14459 * @ioread: an I/O read function
14460 * @ioclose: an I/O close function
14461 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014462 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014463 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014464 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014465 *
14466 * parse an XML document from I/O functions and source and build a tree.
14467 *
14468 * Returns the resulting document tree
14469 */
14470xmlDocPtr
14471xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000014472 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014473{
14474 xmlParserCtxtPtr ctxt;
14475 xmlParserInputBufferPtr input;
14476 xmlParserInputPtr stream;
14477
14478 if (ioread == NULL)
14479 return (NULL);
14480
14481 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14482 XML_CHAR_ENCODING_NONE);
14483 if (input == NULL)
14484 return (NULL);
14485 ctxt = xmlNewParserCtxt();
14486 if (ctxt == NULL) {
14487 xmlFreeParserInputBuffer(input);
14488 return (NULL);
14489 }
14490 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14491 if (stream == NULL) {
14492 xmlFreeParserInputBuffer(input);
14493 xmlFreeParserCtxt(ctxt);
14494 return (NULL);
14495 }
14496 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014497 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014498}
14499
14500/**
14501 * xmlCtxtReadDoc:
14502 * @ctxt: an XML parser context
14503 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014504 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014505 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014506 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014507 *
14508 * parse an XML in-memory document and build a tree.
14509 * This reuses the existing @ctxt parser context
14510 *
14511 * Returns the resulting document tree
14512 */
14513xmlDocPtr
14514xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000014515 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014516{
14517 xmlParserInputPtr stream;
14518
14519 if (cur == NULL)
14520 return (NULL);
14521 if (ctxt == NULL)
14522 return (NULL);
14523
14524 xmlCtxtReset(ctxt);
14525
14526 stream = xmlNewStringInputStream(ctxt, cur);
14527 if (stream == NULL) {
14528 return (NULL);
14529 }
14530 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014531 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014532}
14533
14534/**
14535 * xmlCtxtReadFile:
14536 * @ctxt: an XML parser context
14537 * @filename: a file or URL
14538 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014539 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014540 *
14541 * parse an XML file from the filesystem or the network.
14542 * This reuses the existing @ctxt parser context
14543 *
14544 * Returns the resulting document tree
14545 */
14546xmlDocPtr
14547xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14548 const char *encoding, int options)
14549{
14550 xmlParserInputPtr stream;
14551
14552 if (filename == NULL)
14553 return (NULL);
14554 if (ctxt == NULL)
14555 return (NULL);
14556
14557 xmlCtxtReset(ctxt);
14558
Daniel Veillard29614c72004-11-26 10:47:26 +000014559 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014560 if (stream == NULL) {
14561 return (NULL);
14562 }
14563 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014564 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014565}
14566
14567/**
14568 * xmlCtxtReadMemory:
14569 * @ctxt: an XML parser context
14570 * @buffer: a pointer to a char array
14571 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014572 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014573 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014574 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014575 *
14576 * parse an XML in-memory document and build a tree.
14577 * This reuses the existing @ctxt parser context
14578 *
14579 * Returns the resulting document tree
14580 */
14581xmlDocPtr
14582xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000014583 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014584{
14585 xmlParserInputBufferPtr input;
14586 xmlParserInputPtr stream;
14587
14588 if (ctxt == NULL)
14589 return (NULL);
14590 if (buffer == NULL)
14591 return (NULL);
14592
14593 xmlCtxtReset(ctxt);
14594
14595 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14596 if (input == NULL) {
14597 return(NULL);
14598 }
14599
14600 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14601 if (stream == NULL) {
14602 xmlFreeParserInputBuffer(input);
14603 return(NULL);
14604 }
14605
14606 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014607 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014608}
14609
14610/**
14611 * xmlCtxtReadFd:
14612 * @ctxt: an XML parser context
14613 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014614 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014615 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014616 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014617 *
14618 * parse an XML from a file descriptor and build a tree.
14619 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014620 * NOTE that the file descriptor will not be closed when the
14621 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014622 *
14623 * Returns the resulting document tree
14624 */
14625xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014626xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14627 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014628{
14629 xmlParserInputBufferPtr input;
14630 xmlParserInputPtr stream;
14631
14632 if (fd < 0)
14633 return (NULL);
14634 if (ctxt == NULL)
14635 return (NULL);
14636
14637 xmlCtxtReset(ctxt);
14638
14639
14640 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14641 if (input == NULL)
14642 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014643 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014644 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14645 if (stream == NULL) {
14646 xmlFreeParserInputBuffer(input);
14647 return (NULL);
14648 }
14649 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014650 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014651}
14652
14653/**
14654 * xmlCtxtReadIO:
14655 * @ctxt: an XML parser context
14656 * @ioread: an I/O read function
14657 * @ioclose: an I/O close function
14658 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014659 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014660 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014661 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014662 *
14663 * parse an XML document from I/O functions and source and build a tree.
14664 * This reuses the existing @ctxt parser context
14665 *
14666 * Returns the resulting document tree
14667 */
14668xmlDocPtr
14669xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14670 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000014671 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014672 const char *encoding, int options)
14673{
14674 xmlParserInputBufferPtr input;
14675 xmlParserInputPtr stream;
14676
14677 if (ioread == NULL)
14678 return (NULL);
14679 if (ctxt == NULL)
14680 return (NULL);
14681
14682 xmlCtxtReset(ctxt);
14683
14684 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14685 XML_CHAR_ENCODING_NONE);
14686 if (input == NULL)
14687 return (NULL);
14688 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14689 if (stream == NULL) {
14690 xmlFreeParserInputBuffer(input);
14691 return (NULL);
14692 }
14693 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014694 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014695}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000014696
14697#define bottom_parser
14698#include "elfgcchack.h"