blob: f02aa1c82da9cfe8ee9ad3468f1cef270ebc3e3e [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard0161e632008-08-28 15:36:32 +000083static void
84xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
85
Rob Richards9c0aa472009-03-26 18:10:19 +000086static xmlParserCtxtPtr
87xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
88 const xmlChar *base, xmlParserCtxtPtr pctx);
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090/************************************************************************
91 * *
92 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
93 * *
94 ************************************************************************/
95
96#define XML_PARSER_BIG_ENTITY 1000
97#define XML_PARSER_LOT_ENTITY 5000
98
99/*
100 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
101 * replacement over the size in byte of the input indicates that you have
102 * and eponential behaviour. A value of 10 correspond to at least 3 entity
103 * replacement per byte of input.
104 */
105#define XML_PARSER_NON_LINEAR 10
106
107/*
108 * xmlParserEntityCheck
109 *
110 * Function to check non-linear entity expansion behaviour
111 * This is here to detect and stop exponential linear entity expansion
112 * This is not a limitation of the parser but a safety
113 * boundary feature. It can be disabled with the XML_PARSE_HUGE
114 * parser option.
115 */
116static int
117xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
118 xmlEntityPtr ent)
119{
Daniel Veillardcba68392008-08-29 12:43:40 +0000120 unsigned long consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000121
122 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
123 return (0);
124 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
125 return (1);
126 if (size != 0) {
127 /*
128 * Do the check based on the replacement size of the entity
129 */
130 if (size < XML_PARSER_BIG_ENTITY)
131 return(0);
132
133 /*
134 * A limit on the amount of text data reasonably used
135 */
136 if (ctxt->input != NULL) {
137 consumed = ctxt->input->consumed +
138 (ctxt->input->cur - ctxt->input->base);
139 }
140 consumed += ctxt->sizeentities;
141
142 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
143 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
144 return (0);
145 } else if (ent != NULL) {
146 /*
147 * use the number of parsed entities in the replacement
148 */
149 size = ent->checked;
150
151 /*
152 * The amount of data parsed counting entities size only once
153 */
154 if (ctxt->input != NULL) {
155 consumed = ctxt->input->consumed +
156 (ctxt->input->cur - ctxt->input->base);
157 }
158 consumed += ctxt->sizeentities;
159
160 /*
161 * Check the density of entities for the amount of data
162 * knowing an entity reference will take at least 3 bytes
163 */
164 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
165 return (0);
166 } else {
167 /*
168 * strange we got no data for checking just return
169 */
170 return (0);
171 }
172
173 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
174 return (1);
175}
176
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000177/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000178 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000179 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000180 * arbitrary depth limit for the XML documents that we allow to
181 * process. This is not a limitation of the parser but a safety
182 * boundary feature. It can be disabled with the XML_PARSE_HUGE
183 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000184 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000185unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000186
Daniel Veillard0fb18932003-09-07 09:14:37 +0000187
Daniel Veillard0161e632008-08-28 15:36:32 +0000188
189#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000190#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000191#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000192#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
193
Owen Taylor3473f882001-02-23 17:55:21 +0000194/*
Owen Taylor3473f882001-02-23 17:55:21 +0000195 * List of XML prefixed PI allowed by W3C specs
196 */
197
Daniel Veillardb44025c2001-10-11 22:55:55 +0000198static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000199 "xml-stylesheet",
200 NULL
201};
202
Daniel Veillarda07050d2003-10-19 14:46:32 +0000203
Owen Taylor3473f882001-02-23 17:55:21 +0000204/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000205xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206 const xmlChar **str);
207
Daniel Veillard7d515752003-09-26 19:12:37 +0000208static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000209xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
210 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000211 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000212 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000213
Daniel Veillard37334572008-07-31 08:20:02 +0000214static int
215xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
216 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000217#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000218static void
219xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
220 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000221#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000222
Daniel Veillard7d515752003-09-26 19:12:37 +0000223static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000224xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
225 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000226
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000227static int
228xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
229
Daniel Veillarde57ec792003-09-10 10:50:59 +0000230/************************************************************************
231 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 * Some factorized error routines *
233 * *
234 ************************************************************************/
235
236/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000237 * xmlErrAttributeDup:
238 * @ctxt: an XML parser context
239 * @prefix: the attribute prefix
240 * @localname: the attribute localname
241 *
242 * Handle a redefinition of attribute error
243 */
244static void
245xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
246 const xmlChar * localname)
247{
Daniel Veillard157fee02003-10-31 10:36:03 +0000248 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
249 (ctxt->instate == XML_PARSER_EOF))
250 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000251 if (ctxt != NULL)
252 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000253 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000254 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000255 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
256 (const char *) localname, NULL, NULL, 0, 0,
257 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000258 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000259 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
261 (const char *) prefix, (const char *) localname,
262 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
263 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000264 if (ctxt != NULL) {
265 ctxt->wellFormed = 0;
266 if (ctxt->recovery == 0)
267 ctxt->disableSAX = 1;
268 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000269}
270
271/**
272 * xmlFatalErr:
273 * @ctxt: an XML parser context
274 * @error: the error number
275 * @extra: extra information string
276 *
277 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
278 */
279static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000280xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000281{
282 const char *errmsg;
283
Daniel Veillard157fee02003-10-31 10:36:03 +0000284 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
285 (ctxt->instate == XML_PARSER_EOF))
286 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 switch (error) {
288 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000289 errmsg = "CharRef: invalid hexadecimal value\n";
290 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000291 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000292 errmsg = "CharRef: invalid decimal value\n";
293 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000294 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000295 errmsg = "CharRef: invalid value\n";
296 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000297 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000298 errmsg = "internal error";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "PEReference at end of document\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "PEReference in prolog\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "PEReference in epilog\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "PEReference: no name\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "PEReference: expecting ';'\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "Detected an entity reference loop\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EntityValue: \" or ' expected\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "PEReferences forbidden in internal subset\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "EntityValue: \" or ' expected\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "AttValue: \" or ' expected\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "Unescaped '<' not allowed in attributes values\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "SystemLiteral \" or ' expected\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Unfinished System or Public ID \" or ' expected\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Sequence ']]>' not allowed in content\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "PUBLIC, the Public Identifier is missing\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Comment must not contain '--' (double-hyphen)\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "xmlParsePI : no target name\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "Invalid PI name\n";
356 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 errmsg = "NOTATION: Name expected here\n";
359 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000360 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 errmsg = "'>' required to close NOTATION declaration\n";
362 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 errmsg = "Entity value required\n";
365 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000367 errmsg = "Fragment not allowed";
368 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000369 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000370 errmsg = "'(' required to start ATTLIST enumeration\n";
371 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000372 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000373 errmsg = "NmToken expected in ATTLIST enumeration\n";
374 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000375 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000376 errmsg = "')' required to finish ATTLIST enumeration\n";
377 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000378 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000379 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
380 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000381 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000382 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
383 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000384 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 errmsg = "ContentDecl : Name or '(' expected\n";
386 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000387 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
389 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000390 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000391 errmsg =
392 "PEReference: forbidden within markup decl in internal subset\n";
393 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000394 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000395 errmsg = "expected '>'\n";
396 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000397 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000398 errmsg = "XML conditional section '[' expected\n";
399 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000400 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000401 errmsg = "Content error in the external subset\n";
402 break;
403 case XML_ERR_CONDSEC_INVALID_KEYWORD:
404 errmsg =
405 "conditional section INCLUDE or IGNORE keyword expected\n";
406 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000407 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000408 errmsg = "XML conditional section not closed\n";
409 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000410 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000411 errmsg = "Text declaration '<?xml' required\n";
412 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000413 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 errmsg = "parsing XML declaration: '?>' expected\n";
415 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000416 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 errmsg = "external parsed entities cannot be standalone\n";
418 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000419 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 errmsg = "EntityRef: expecting ';'\n";
421 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000422 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 errmsg = "DOCTYPE improperly terminated\n";
424 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000425 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 errmsg = "EndTag: '</' not found\n";
427 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000428 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000429 errmsg = "expected '='\n";
430 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000431 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000432 errmsg = "String not closed expecting \" or '\n";
433 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000434 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000435 errmsg = "String not started expecting ' or \"\n";
436 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000437 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000438 errmsg = "Invalid XML encoding name\n";
439 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000440 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000441 errmsg = "standalone accepts only 'yes' or 'no'\n";
442 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000443 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000444 errmsg = "Document is empty\n";
445 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000446 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000447 errmsg = "Extra content at the end of the document\n";
448 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000449 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000450 errmsg = "chunk is not well balanced\n";
451 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000452 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000453 errmsg = "extra content at the end of well balanced chunk\n";
454 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000455 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000456 errmsg = "Malformed declaration expecting version\n";
457 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000458#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000459 case:
460 errmsg = "\n";
461 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000462#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 default:
464 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000465 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000466 if (ctxt != NULL)
467 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000468 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000469 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
470 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000471 if (ctxt != NULL) {
472 ctxt->wellFormed = 0;
473 if (ctxt->recovery == 0)
474 ctxt->disableSAX = 1;
475 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000476}
477
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000478/**
479 * xmlFatalErrMsg:
480 * @ctxt: an XML parser context
481 * @error: the error number
482 * @msg: the error message
483 *
484 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
485 */
486static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000487xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
488 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000489{
Daniel Veillard157fee02003-10-31 10:36:03 +0000490 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
491 (ctxt->instate == XML_PARSER_EOF))
492 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000493 if (ctxt != NULL)
494 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000495 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200496 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000497 if (ctxt != NULL) {
498 ctxt->wellFormed = 0;
499 if (ctxt->recovery == 0)
500 ctxt->disableSAX = 1;
501 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000502}
503
504/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000505 * xmlWarningMsg:
506 * @ctxt: an XML parser context
507 * @error: the error number
508 * @msg: the error message
509 * @str1: extra data
510 * @str2: extra data
511 *
512 * Handle a warning.
513 */
514static void
515xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
516 const char *msg, const xmlChar *str1, const xmlChar *str2)
517{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000518 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000519
Daniel Veillard157fee02003-10-31 10:36:03 +0000520 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
521 (ctxt->instate == XML_PARSER_EOF))
522 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000523 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
524 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000525 schannel = ctxt->sax->serror;
526 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000527 (ctxt->sax) ? ctxt->sax->warning : NULL,
528 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000529 ctxt, NULL, XML_FROM_PARSER, error,
530 XML_ERR_WARNING, NULL, 0,
531 (const char *) str1, (const char *) str2, NULL, 0, 0,
532 msg, (const char *) str1, (const char *) str2);
533}
534
535/**
536 * xmlValidityError:
537 * @ctxt: an XML parser context
538 * @error: the error number
539 * @msg: the error message
540 * @str1: extra data
541 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000542 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000543 */
544static void
545xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000546 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000547{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000548 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000549
550 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
551 (ctxt->instate == XML_PARSER_EOF))
552 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000553 if (ctxt != NULL) {
554 ctxt->errNo = error;
555 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
556 schannel = ctxt->sax->serror;
557 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000558 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000559 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000560 ctxt, NULL, XML_FROM_DTD, error,
561 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000562 (const char *) str2, NULL, 0, 0,
563 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000564 if (ctxt != NULL) {
565 ctxt->valid = 0;
566 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000567}
568
569/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000570 * xmlFatalErrMsgInt:
571 * @ctxt: an XML parser context
572 * @error: the error number
573 * @msg: the error message
574 * @val: an integer value
575 *
576 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
577 */
578static void
579xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000580 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000581{
Daniel Veillard157fee02003-10-31 10:36:03 +0000582 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
583 (ctxt->instate == XML_PARSER_EOF))
584 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000585 if (ctxt != NULL)
586 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000587 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000588 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
589 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000590 if (ctxt != NULL) {
591 ctxt->wellFormed = 0;
592 if (ctxt->recovery == 0)
593 ctxt->disableSAX = 1;
594 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000595}
596
597/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000598 * xmlFatalErrMsgStrIntStr:
599 * @ctxt: an XML parser context
600 * @error: the error number
601 * @msg: the error message
602 * @str1: an string info
603 * @val: an integer value
604 * @str2: an string info
605 *
606 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
607 */
608static void
609xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
610 const char *msg, const xmlChar *str1, int val,
611 const xmlChar *str2)
612{
Daniel Veillard157fee02003-10-31 10:36:03 +0000613 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
614 (ctxt->instate == XML_PARSER_EOF))
615 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000616 if (ctxt != NULL)
617 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000618 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000619 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
620 NULL, 0, (const char *) str1, (const char *) str2,
621 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000622 if (ctxt != NULL) {
623 ctxt->wellFormed = 0;
624 if (ctxt->recovery == 0)
625 ctxt->disableSAX = 1;
626 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000627}
628
629/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000630 * xmlFatalErrMsgStr:
631 * @ctxt: an XML parser context
632 * @error: the error number
633 * @msg: the error message
634 * @val: a string value
635 *
636 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
637 */
638static void
639xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000640 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000641{
Daniel Veillard157fee02003-10-31 10:36:03 +0000642 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
643 (ctxt->instate == XML_PARSER_EOF))
644 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000645 if (ctxt != NULL)
646 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000647 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000648 XML_FROM_PARSER, error, XML_ERR_FATAL,
649 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
650 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000651 if (ctxt != NULL) {
652 ctxt->wellFormed = 0;
653 if (ctxt->recovery == 0)
654 ctxt->disableSAX = 1;
655 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000656}
657
658/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000659 * xmlErrMsgStr:
660 * @ctxt: an XML parser context
661 * @error: the error number
662 * @msg: the error message
663 * @val: a string value
664 *
665 * Handle a non fatal parser error
666 */
667static void
668xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
669 const char *msg, const xmlChar * val)
670{
Daniel Veillard157fee02003-10-31 10:36:03 +0000671 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672 (ctxt->instate == XML_PARSER_EOF))
673 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000674 if (ctxt != NULL)
675 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000676 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000677 XML_FROM_PARSER, error, XML_ERR_ERROR,
678 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
679 val);
680}
681
682/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000683 * xmlNsErr:
684 * @ctxt: an XML parser context
685 * @error: the error number
686 * @msg: the message
687 * @info1: extra information string
688 * @info2: extra information string
689 *
690 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
691 */
692static void
693xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
694 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000695 const xmlChar * info1, const xmlChar * info2,
696 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000697{
Daniel Veillard157fee02003-10-31 10:36:03 +0000698 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
699 (ctxt->instate == XML_PARSER_EOF))
700 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000701 if (ctxt != NULL)
702 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000703 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000704 XML_ERR_ERROR, NULL, 0, (const char *) info1,
705 (const char *) info2, (const char *) info3, 0, 0, msg,
706 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000707 if (ctxt != NULL)
708 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000709}
710
Daniel Veillard37334572008-07-31 08:20:02 +0000711/**
712 * xmlNsWarn
713 * @ctxt: an XML parser context
714 * @error: the error number
715 * @msg: the message
716 * @info1: extra information string
717 * @info2: extra information string
718 *
719 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
720 */
721static void
722xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
723 const char *msg,
724 const xmlChar * info1, const xmlChar * info2,
725 const xmlChar * info3)
726{
727 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
728 (ctxt->instate == XML_PARSER_EOF))
729 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000730 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
731 XML_ERR_WARNING, NULL, 0, (const char *) info1,
732 (const char *) info2, (const char *) info3, 0, 0, msg,
733 info1, info2, info3);
734}
735
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000736/************************************************************************
737 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000738 * Library wide options *
739 * *
740 ************************************************************************/
741
742/**
743 * xmlHasFeature:
744 * @feature: the feature to be examined
745 *
746 * Examines if the library has been compiled with a given feature.
747 *
748 * Returns a non-zero value if the feature exist, otherwise zero.
749 * Returns zero (0) if the feature does not exist or an unknown
750 * unknown feature is requested, non-zero otherwise.
751 */
752int
753xmlHasFeature(xmlFeature feature)
754{
755 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000756 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000757#ifdef LIBXML_THREAD_ENABLED
758 return(1);
759#else
760 return(0);
761#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000762 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000763#ifdef LIBXML_TREE_ENABLED
764 return(1);
765#else
766 return(0);
767#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000768 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000769#ifdef LIBXML_OUTPUT_ENABLED
770 return(1);
771#else
772 return(0);
773#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000774 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000775#ifdef LIBXML_PUSH_ENABLED
776 return(1);
777#else
778 return(0);
779#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000780 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000781#ifdef LIBXML_READER_ENABLED
782 return(1);
783#else
784 return(0);
785#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000786 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000787#ifdef LIBXML_PATTERN_ENABLED
788 return(1);
789#else
790 return(0);
791#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000792 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000793#ifdef LIBXML_WRITER_ENABLED
794 return(1);
795#else
796 return(0);
797#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000798 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000799#ifdef LIBXML_SAX1_ENABLED
800 return(1);
801#else
802 return(0);
803#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000804 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000805#ifdef LIBXML_FTP_ENABLED
806 return(1);
807#else
808 return(0);
809#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000810 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000811#ifdef LIBXML_HTTP_ENABLED
812 return(1);
813#else
814 return(0);
815#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000816 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000817#ifdef LIBXML_VALID_ENABLED
818 return(1);
819#else
820 return(0);
821#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000822 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000823#ifdef LIBXML_HTML_ENABLED
824 return(1);
825#else
826 return(0);
827#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000828 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000829#ifdef LIBXML_LEGACY_ENABLED
830 return(1);
831#else
832 return(0);
833#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000834 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000835#ifdef LIBXML_C14N_ENABLED
836 return(1);
837#else
838 return(0);
839#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000840 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000841#ifdef LIBXML_CATALOG_ENABLED
842 return(1);
843#else
844 return(0);
845#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000846 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000847#ifdef LIBXML_XPATH_ENABLED
848 return(1);
849#else
850 return(0);
851#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000852 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000853#ifdef LIBXML_XPTR_ENABLED
854 return(1);
855#else
856 return(0);
857#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000858 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000859#ifdef LIBXML_XINCLUDE_ENABLED
860 return(1);
861#else
862 return(0);
863#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000864 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000865#ifdef LIBXML_ICONV_ENABLED
866 return(1);
867#else
868 return(0);
869#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000870 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000871#ifdef LIBXML_ISO8859X_ENABLED
872 return(1);
873#else
874 return(0);
875#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000876 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000877#ifdef LIBXML_UNICODE_ENABLED
878 return(1);
879#else
880 return(0);
881#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000882 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000883#ifdef LIBXML_REGEXP_ENABLED
884 return(1);
885#else
886 return(0);
887#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000888 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000889#ifdef LIBXML_AUTOMATA_ENABLED
890 return(1);
891#else
892 return(0);
893#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000894 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000895#ifdef LIBXML_EXPR_ENABLED
896 return(1);
897#else
898 return(0);
899#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000900 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000901#ifdef LIBXML_SCHEMAS_ENABLED
902 return(1);
903#else
904 return(0);
905#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000906 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000907#ifdef LIBXML_SCHEMATRON_ENABLED
908 return(1);
909#else
910 return(0);
911#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000912 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000913#ifdef LIBXML_MODULES_ENABLED
914 return(1);
915#else
916 return(0);
917#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000918 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000919#ifdef LIBXML_DEBUG_ENABLED
920 return(1);
921#else
922 return(0);
923#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000924 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000925#ifdef DEBUG_MEMORY_LOCATION
926 return(1);
927#else
928 return(0);
929#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000930 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000931#ifdef LIBXML_DEBUG_RUNTIME
932 return(1);
933#else
934 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000935#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000936 case XML_WITH_ZLIB:
937#ifdef LIBXML_ZLIB_ENABLED
938 return(1);
939#else
940 return(0);
941#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000942 default:
943 break;
944 }
945 return(0);
946}
947
948/************************************************************************
949 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000950 * SAX2 defaulted attributes handling *
951 * *
952 ************************************************************************/
953
954/**
955 * xmlDetectSAX2:
956 * @ctxt: an XML parser context
957 *
958 * Do the SAX2 detection and specific intialization
959 */
960static void
961xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
962 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000963#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000964 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
965 ((ctxt->sax->startElementNs != NULL) ||
966 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000967#else
968 ctxt->sax2 = 1;
969#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000970
971 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
972 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
973 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000974 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
975 (ctxt->str_xml_ns == NULL)) {
976 xmlErrMemory(ctxt, NULL);
977 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000978}
979
Daniel Veillarde57ec792003-09-10 10:50:59 +0000980typedef struct _xmlDefAttrs xmlDefAttrs;
981typedef xmlDefAttrs *xmlDefAttrsPtr;
982struct _xmlDefAttrs {
983 int nbAttrs; /* number of defaulted attributes on that element */
984 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +0000985 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000986};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000987
988/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000989 * xmlAttrNormalizeSpace:
990 * @src: the source string
991 * @dst: the target string
992 *
993 * Normalize the space in non CDATA attribute values:
994 * If the attribute type is not CDATA, then the XML processor MUST further
995 * process the normalized attribute value by discarding any leading and
996 * trailing space (#x20) characters, and by replacing sequences of space
997 * (#x20) characters by a single space (#x20) character.
998 * Note that the size of dst need to be at least src, and if one doesn't need
999 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1000 * passing src as dst is just fine.
1001 *
1002 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1003 * is needed.
1004 */
1005static xmlChar *
1006xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1007{
1008 if ((src == NULL) || (dst == NULL))
1009 return(NULL);
1010
1011 while (*src == 0x20) src++;
1012 while (*src != 0) {
1013 if (*src == 0x20) {
1014 while (*src == 0x20) src++;
1015 if (*src != 0)
1016 *dst++ = 0x20;
1017 } else {
1018 *dst++ = *src++;
1019 }
1020 }
1021 *dst = 0;
1022 if (dst == src)
1023 return(NULL);
1024 return(dst);
1025}
1026
1027/**
1028 * xmlAttrNormalizeSpace2:
1029 * @src: the source string
1030 *
1031 * Normalize the space in non CDATA attribute values, a slightly more complex
1032 * front end to avoid allocation problems when running on attribute values
1033 * coming from the input.
1034 *
1035 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1036 * is needed.
1037 */
1038static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001039xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001040{
1041 int i;
1042 int remove_head = 0;
1043 int need_realloc = 0;
1044 const xmlChar *cur;
1045
1046 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1047 return(NULL);
1048 i = *len;
1049 if (i <= 0)
1050 return(NULL);
1051
1052 cur = src;
1053 while (*cur == 0x20) {
1054 cur++;
1055 remove_head++;
1056 }
1057 while (*cur != 0) {
1058 if (*cur == 0x20) {
1059 cur++;
1060 if ((*cur == 0x20) || (*cur == 0)) {
1061 need_realloc = 1;
1062 break;
1063 }
1064 } else
1065 cur++;
1066 }
1067 if (need_realloc) {
1068 xmlChar *ret;
1069
1070 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1071 if (ret == NULL) {
1072 xmlErrMemory(ctxt, NULL);
1073 return(NULL);
1074 }
1075 xmlAttrNormalizeSpace(ret, ret);
1076 *len = (int) strlen((const char *)ret);
1077 return(ret);
1078 } else if (remove_head) {
1079 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001080 memmove(src, src + remove_head, 1 + *len);
1081 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001082 }
1083 return(NULL);
1084}
1085
1086/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001087 * xmlAddDefAttrs:
1088 * @ctxt: an XML parser context
1089 * @fullname: the element fullname
1090 * @fullattr: the attribute fullname
1091 * @value: the attribute value
1092 *
1093 * Add a defaulted attribute for an element
1094 */
1095static void
1096xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1097 const xmlChar *fullname,
1098 const xmlChar *fullattr,
1099 const xmlChar *value) {
1100 xmlDefAttrsPtr defaults;
1101 int len;
1102 const xmlChar *name;
1103 const xmlChar *prefix;
1104
Daniel Veillard6a31b832008-03-26 14:06:44 +00001105 /*
1106 * Allows to detect attribute redefinitions
1107 */
1108 if (ctxt->attsSpecial != NULL) {
1109 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1110 return;
1111 }
1112
Daniel Veillarde57ec792003-09-10 10:50:59 +00001113 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001114 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001115 if (ctxt->attsDefault == NULL)
1116 goto mem_error;
1117 }
1118
1119 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001120 * split the element name into prefix:localname , the string found
1121 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001122 */
1123 name = xmlSplitQName3(fullname, &len);
1124 if (name == NULL) {
1125 name = xmlDictLookup(ctxt->dict, fullname, -1);
1126 prefix = NULL;
1127 } else {
1128 name = xmlDictLookup(ctxt->dict, name, -1);
1129 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1130 }
1131
1132 /*
1133 * make sure there is some storage
1134 */
1135 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1136 if (defaults == NULL) {
1137 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001138 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001139 if (defaults == NULL)
1140 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001141 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001142 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001143 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1144 defaults, NULL) < 0) {
1145 xmlFree(defaults);
1146 goto mem_error;
1147 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001148 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001149 xmlDefAttrsPtr temp;
1150
1151 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001152 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001153 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001154 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001155 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001156 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001157 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1158 defaults, NULL) < 0) {
1159 xmlFree(defaults);
1160 goto mem_error;
1161 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001162 }
1163
1164 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001165 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001166 * are within the DTD and hen not associated to namespace names.
1167 */
1168 name = xmlSplitQName3(fullattr, &len);
1169 if (name == NULL) {
1170 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1171 prefix = NULL;
1172 } else {
1173 name = xmlDictLookup(ctxt->dict, name, -1);
1174 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1175 }
1176
Daniel Veillardae0765b2008-07-31 19:54:59 +00001177 defaults->values[5 * defaults->nbAttrs] = name;
1178 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001179 /* intern the string and precompute the end */
1180 len = xmlStrlen(value);
1181 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001182 defaults->values[5 * defaults->nbAttrs + 2] = value;
1183 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1184 if (ctxt->external)
1185 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1186 else
1187 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001188 defaults->nbAttrs++;
1189
1190 return;
1191
1192mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001193 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001194 return;
1195}
1196
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001197/**
1198 * xmlAddSpecialAttr:
1199 * @ctxt: an XML parser context
1200 * @fullname: the element fullname
1201 * @fullattr: the attribute fullname
1202 * @type: the attribute type
1203 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001204 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001205 */
1206static void
1207xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1208 const xmlChar *fullname,
1209 const xmlChar *fullattr,
1210 int type)
1211{
1212 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001213 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001214 if (ctxt->attsSpecial == NULL)
1215 goto mem_error;
1216 }
1217
Daniel Veillardac4118d2008-01-11 05:27:32 +00001218 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1219 return;
1220
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001221 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1222 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001223 return;
1224
1225mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001226 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001227 return;
1228}
1229
Daniel Veillard4432df22003-09-28 18:58:27 +00001230/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001231 * xmlCleanSpecialAttrCallback:
1232 *
1233 * Removes CDATA attributes from the special attribute table
1234 */
1235static void
1236xmlCleanSpecialAttrCallback(void *payload, void *data,
1237 const xmlChar *fullname, const xmlChar *fullattr,
1238 const xmlChar *unused ATTRIBUTE_UNUSED) {
1239 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1240
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001241 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001242 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1243 }
1244}
1245
1246/**
1247 * xmlCleanSpecialAttr:
1248 * @ctxt: an XML parser context
1249 *
1250 * Trim the list of attributes defined to remove all those of type
1251 * CDATA as they are not special. This call should be done when finishing
1252 * to parse the DTD and before starting to parse the document root.
1253 */
1254static void
1255xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1256{
1257 if (ctxt->attsSpecial == NULL)
1258 return;
1259
1260 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1261
1262 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1263 xmlHashFree(ctxt->attsSpecial, NULL);
1264 ctxt->attsSpecial = NULL;
1265 }
1266 return;
1267}
1268
1269/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001270 * xmlCheckLanguageID:
1271 * @lang: pointer to the string value
1272 *
1273 * Checks that the value conforms to the LanguageID production:
1274 *
1275 * NOTE: this is somewhat deprecated, those productions were removed from
1276 * the XML Second edition.
1277 *
1278 * [33] LanguageID ::= Langcode ('-' Subcode)*
1279 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1280 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1281 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1282 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1283 * [38] Subcode ::= ([a-z] | [A-Z])+
1284 *
1285 * Returns 1 if correct 0 otherwise
1286 **/
1287int
1288xmlCheckLanguageID(const xmlChar * lang)
1289{
1290 const xmlChar *cur = lang;
1291
1292 if (cur == NULL)
1293 return (0);
1294 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1295 ((cur[0] == 'I') && (cur[1] == '-'))) {
1296 /*
1297 * IANA code
1298 */
1299 cur += 2;
1300 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1301 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1302 cur++;
1303 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1304 ((cur[0] == 'X') && (cur[1] == '-'))) {
1305 /*
1306 * User code
1307 */
1308 cur += 2;
1309 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1310 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1311 cur++;
1312 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1313 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1314 /*
1315 * ISO639
1316 */
1317 cur++;
1318 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1319 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1320 cur++;
1321 else
1322 return (0);
1323 } else
1324 return (0);
1325 while (cur[0] != 0) { /* non input consuming */
1326 if (cur[0] != '-')
1327 return (0);
1328 cur++;
1329 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1330 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1331 cur++;
1332 else
1333 return (0);
1334 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1335 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1336 cur++;
1337 }
1338 return (1);
1339}
1340
Owen Taylor3473f882001-02-23 17:55:21 +00001341/************************************************************************
1342 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001343 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001344 * *
1345 ************************************************************************/
1346
1347xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1348 const xmlChar ** str);
1349
Daniel Veillard0fb18932003-09-07 09:14:37 +00001350#ifdef SAX2
1351/**
1352 * nsPush:
1353 * @ctxt: an XML parser context
1354 * @prefix: the namespace prefix or NULL
1355 * @URL: the namespace name
1356 *
1357 * Pushes a new parser namespace on top of the ns stack
1358 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001359 * Returns -1 in case of error, -2 if the namespace should be discarded
1360 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001361 */
1362static int
1363nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1364{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001365 if (ctxt->options & XML_PARSE_NSCLEAN) {
1366 int i;
1367 for (i = 0;i < ctxt->nsNr;i += 2) {
1368 if (ctxt->nsTab[i] == prefix) {
1369 /* in scope */
1370 if (ctxt->nsTab[i + 1] == URL)
1371 return(-2);
1372 /* out of scope keep it */
1373 break;
1374 }
1375 }
1376 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001377 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1378 ctxt->nsMax = 10;
1379 ctxt->nsNr = 0;
1380 ctxt->nsTab = (const xmlChar **)
1381 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1382 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001383 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001384 ctxt->nsMax = 0;
1385 return (-1);
1386 }
1387 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001388 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001389 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001390 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1391 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1392 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001393 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001394 ctxt->nsMax /= 2;
1395 return (-1);
1396 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001397 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001398 }
1399 ctxt->nsTab[ctxt->nsNr++] = prefix;
1400 ctxt->nsTab[ctxt->nsNr++] = URL;
1401 return (ctxt->nsNr);
1402}
1403/**
1404 * nsPop:
1405 * @ctxt: an XML parser context
1406 * @nr: the number to pop
1407 *
1408 * Pops the top @nr parser prefix/namespace from the ns stack
1409 *
1410 * Returns the number of namespaces removed
1411 */
1412static int
1413nsPop(xmlParserCtxtPtr ctxt, int nr)
1414{
1415 int i;
1416
1417 if (ctxt->nsTab == NULL) return(0);
1418 if (ctxt->nsNr < nr) {
1419 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1420 nr = ctxt->nsNr;
1421 }
1422 if (ctxt->nsNr <= 0)
1423 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001424
Daniel Veillard0fb18932003-09-07 09:14:37 +00001425 for (i = 0;i < nr;i++) {
1426 ctxt->nsNr--;
1427 ctxt->nsTab[ctxt->nsNr] = NULL;
1428 }
1429 return(nr);
1430}
1431#endif
1432
1433static int
1434xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1435 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001436 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001437 int maxatts;
1438
1439 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001440 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001441 atts = (const xmlChar **)
1442 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001443 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001444 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001445 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1446 if (attallocs == NULL) goto mem_error;
1447 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001448 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001449 } else if (nr + 5 > ctxt->maxatts) {
1450 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001451 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1452 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001453 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001454 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001455 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1456 (maxatts / 5) * sizeof(int));
1457 if (attallocs == NULL) goto mem_error;
1458 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001459 ctxt->maxatts = maxatts;
1460 }
1461 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001462mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001463 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001464 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001465}
1466
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001467/**
1468 * inputPush:
1469 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001470 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001471 *
1472 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001473 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001474 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001475 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001476int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001477inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1478{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001479 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001480 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001481 if (ctxt->inputNr >= ctxt->inputMax) {
1482 ctxt->inputMax *= 2;
1483 ctxt->inputTab =
1484 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1485 ctxt->inputMax *
1486 sizeof(ctxt->inputTab[0]));
1487 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001488 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001489 xmlFreeInputStream(value);
1490 ctxt->inputMax /= 2;
1491 value = NULL;
1492 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001493 }
1494 }
1495 ctxt->inputTab[ctxt->inputNr] = value;
1496 ctxt->input = value;
1497 return (ctxt->inputNr++);
1498}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001499/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001500 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001501 * @ctxt: an XML parser context
1502 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001503 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001504 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001505 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001506 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001507xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001508inputPop(xmlParserCtxtPtr ctxt)
1509{
1510 xmlParserInputPtr ret;
1511
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001512 if (ctxt == NULL)
1513 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001514 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001515 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001516 ctxt->inputNr--;
1517 if (ctxt->inputNr > 0)
1518 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1519 else
1520 ctxt->input = NULL;
1521 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001522 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001523 return (ret);
1524}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001525/**
1526 * nodePush:
1527 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001528 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001529 *
1530 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001531 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001532 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001533 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001534int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001535nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1536{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001537 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001538 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001539 xmlNodePtr *tmp;
1540
1541 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1542 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001543 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001544 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001545 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001546 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001547 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001548 ctxt->nodeTab = tmp;
1549 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001550 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001551 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1552 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001553 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001554 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001555 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001556 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001557 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001558 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001559 ctxt->nodeTab[ctxt->nodeNr] = value;
1560 ctxt->node = value;
1561 return (ctxt->nodeNr++);
1562}
Daniel Veillard8915c152008-08-26 13:05:34 +00001563
Daniel Veillard1c732d22002-11-30 11:22:59 +00001564/**
1565 * nodePop:
1566 * @ctxt: an XML parser context
1567 *
1568 * Pops the top element node from the node stack
1569 *
1570 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001571 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001572xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001573nodePop(xmlParserCtxtPtr ctxt)
1574{
1575 xmlNodePtr ret;
1576
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001577 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001578 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001579 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001580 ctxt->nodeNr--;
1581 if (ctxt->nodeNr > 0)
1582 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1583 else
1584 ctxt->node = NULL;
1585 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001586 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001587 return (ret);
1588}
Daniel Veillarda2351322004-06-27 12:08:10 +00001589
1590#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001591/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001592 * nameNsPush:
1593 * @ctxt: an XML parser context
1594 * @value: the element name
1595 * @prefix: the element prefix
1596 * @URI: the element namespace name
1597 *
1598 * Pushes a new element name/prefix/URL on top of the name stack
1599 *
1600 * Returns -1 in case of error, the index in the stack otherwise
1601 */
1602static int
1603nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1604 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1605{
1606 if (ctxt->nameNr >= ctxt->nameMax) {
1607 const xmlChar * *tmp;
1608 void **tmp2;
1609 ctxt->nameMax *= 2;
1610 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1611 ctxt->nameMax *
1612 sizeof(ctxt->nameTab[0]));
1613 if (tmp == NULL) {
1614 ctxt->nameMax /= 2;
1615 goto mem_error;
1616 }
1617 ctxt->nameTab = tmp;
1618 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1619 ctxt->nameMax * 3 *
1620 sizeof(ctxt->pushTab[0]));
1621 if (tmp2 == NULL) {
1622 ctxt->nameMax /= 2;
1623 goto mem_error;
1624 }
1625 ctxt->pushTab = tmp2;
1626 }
1627 ctxt->nameTab[ctxt->nameNr] = value;
1628 ctxt->name = value;
1629 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1630 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001631 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001632 return (ctxt->nameNr++);
1633mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001634 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001635 return (-1);
1636}
1637/**
1638 * nameNsPop:
1639 * @ctxt: an XML parser context
1640 *
1641 * Pops the top element/prefix/URI name from the name stack
1642 *
1643 * Returns the name just removed
1644 */
1645static const xmlChar *
1646nameNsPop(xmlParserCtxtPtr ctxt)
1647{
1648 const xmlChar *ret;
1649
1650 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001651 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001652 ctxt->nameNr--;
1653 if (ctxt->nameNr > 0)
1654 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1655 else
1656 ctxt->name = NULL;
1657 ret = ctxt->nameTab[ctxt->nameNr];
1658 ctxt->nameTab[ctxt->nameNr] = NULL;
1659 return (ret);
1660}
Daniel Veillarda2351322004-06-27 12:08:10 +00001661#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001662
1663/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001664 * namePush:
1665 * @ctxt: an XML parser context
1666 * @value: the element name
1667 *
1668 * Pushes a new element name on top of the name stack
1669 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001670 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001671 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001672int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001673namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001674{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001675 if (ctxt == NULL) return (-1);
1676
Daniel Veillard1c732d22002-11-30 11:22:59 +00001677 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001678 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001679 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001680 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001681 ctxt->nameMax *
1682 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001683 if (tmp == NULL) {
1684 ctxt->nameMax /= 2;
1685 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001686 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001687 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001688 }
1689 ctxt->nameTab[ctxt->nameNr] = value;
1690 ctxt->name = value;
1691 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001692mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001693 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001694 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001695}
1696/**
1697 * namePop:
1698 * @ctxt: an XML parser context
1699 *
1700 * Pops the top element name from the name stack
1701 *
1702 * Returns the name just removed
1703 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001704const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001705namePop(xmlParserCtxtPtr ctxt)
1706{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001707 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001708
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001709 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1710 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001711 ctxt->nameNr--;
1712 if (ctxt->nameNr > 0)
1713 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1714 else
1715 ctxt->name = NULL;
1716 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001717 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001718 return (ret);
1719}
Owen Taylor3473f882001-02-23 17:55:21 +00001720
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001721static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001722 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001723 int *tmp;
1724
Owen Taylor3473f882001-02-23 17:55:21 +00001725 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001726 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1727 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1728 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001729 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001730 ctxt->spaceMax /=2;
1731 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001732 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001733 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001734 }
1735 ctxt->spaceTab[ctxt->spaceNr] = val;
1736 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1737 return(ctxt->spaceNr++);
1738}
1739
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001740static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001741 int ret;
1742 if (ctxt->spaceNr <= 0) return(0);
1743 ctxt->spaceNr--;
1744 if (ctxt->spaceNr > 0)
1745 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1746 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001747 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001748 ret = ctxt->spaceTab[ctxt->spaceNr];
1749 ctxt->spaceTab[ctxt->spaceNr] = -1;
1750 return(ret);
1751}
1752
1753/*
1754 * Macros for accessing the content. Those should be used only by the parser,
1755 * and not exported.
1756 *
1757 * Dirty macros, i.e. one often need to make assumption on the context to
1758 * use them
1759 *
1760 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1761 * To be used with extreme caution since operations consuming
1762 * characters may move the input buffer to a different location !
1763 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1764 * This should be used internally by the parser
1765 * only to compare to ASCII values otherwise it would break when
1766 * running with UTF-8 encoding.
1767 * RAW same as CUR but in the input buffer, bypass any token
1768 * extraction that may have been done
1769 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1770 * to compare on ASCII based substring.
1771 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001772 * strings without newlines within the parser.
1773 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1774 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001775 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1776 *
1777 * NEXT Skip to the next character, this does the proper decoding
1778 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001779 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001780 * CUR_CHAR(l) returns the current unicode character (int), set l
1781 * to the number of xmlChars used for the encoding [0-5].
1782 * CUR_SCHAR same but operate on a string instead of the context
1783 * COPY_BUF copy the current unicode char to the target buffer, increment
1784 * the index
1785 * GROW, SHRINK handling of input buffers
1786 */
1787
Daniel Veillardfdc91562002-07-01 21:52:03 +00001788#define RAW (*ctxt->input->cur)
1789#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001790#define NXT(val) ctxt->input->cur[(val)]
1791#define CUR_PTR ctxt->input->cur
1792
Daniel Veillarda07050d2003-10-19 14:46:32 +00001793#define CMP4( s, c1, c2, c3, c4 ) \
1794 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1795 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1796#define CMP5( s, c1, c2, c3, c4, c5 ) \
1797 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1798#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1799 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1800#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1801 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1802#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1803 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1804#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1805 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1806 ((unsigned char *) s)[ 8 ] == c9 )
1807#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1808 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1809 ((unsigned char *) s)[ 9 ] == c10 )
1810
Owen Taylor3473f882001-02-23 17:55:21 +00001811#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001812 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001813 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001814 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001815 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1816 xmlPopInput(ctxt); \
1817 } while (0)
1818
Daniel Veillard0b787f32004-03-26 17:29:53 +00001819#define SKIPL(val) do { \
1820 int skipl; \
1821 for(skipl=0; skipl<val; skipl++) { \
1822 if (*(ctxt->input->cur) == '\n') { \
1823 ctxt->input->line++; ctxt->input->col = 1; \
1824 } else ctxt->input->col++; \
1825 ctxt->nbChars++; \
1826 ctxt->input->cur++; \
1827 } \
1828 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1829 if ((*ctxt->input->cur == 0) && \
1830 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1831 xmlPopInput(ctxt); \
1832 } while (0)
1833
Daniel Veillarda880b122003-04-21 21:36:41 +00001834#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001835 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1836 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001837 xmlSHRINK (ctxt);
1838
1839static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1840 xmlParserInputShrink(ctxt->input);
1841 if ((*ctxt->input->cur == 0) &&
1842 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1843 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001844 }
Owen Taylor3473f882001-02-23 17:55:21 +00001845
Daniel Veillarda880b122003-04-21 21:36:41 +00001846#define GROW if ((ctxt->progressive == 0) && \
1847 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001848 xmlGROW (ctxt);
1849
1850static void xmlGROW (xmlParserCtxtPtr ctxt) {
1851 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1852 if ((*ctxt->input->cur == 0) &&
1853 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1854 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001855}
Owen Taylor3473f882001-02-23 17:55:21 +00001856
1857#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1858
1859#define NEXT xmlNextChar(ctxt)
1860
Daniel Veillard21a0f912001-02-25 19:54:14 +00001861#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001862 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001863 ctxt->input->cur++; \
1864 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001865 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001866 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1867 }
1868
Owen Taylor3473f882001-02-23 17:55:21 +00001869#define NEXTL(l) do { \
1870 if (*(ctxt->input->cur) == '\n') { \
1871 ctxt->input->line++; ctxt->input->col = 1; \
1872 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001873 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001874 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001875 } while (0)
1876
1877#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1878#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1879
1880#define COPY_BUF(l,b,i,v) \
1881 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001882 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001883
1884/**
1885 * xmlSkipBlankChars:
1886 * @ctxt: the XML parser context
1887 *
1888 * skip all blanks character found at that point in the input streams.
1889 * It pops up finished entities in the process if allowable at that point.
1890 *
1891 * Returns the number of space chars skipped
1892 */
1893
1894int
1895xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001896 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001897
1898 /*
1899 * It's Okay to use CUR/NEXT here since all the blanks are on
1900 * the ASCII range.
1901 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001902 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1903 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001904 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001905 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001906 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001907 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001908 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001909 if (*cur == '\n') {
1910 ctxt->input->line++; ctxt->input->col = 1;
1911 }
1912 cur++;
1913 res++;
1914 if (*cur == 0) {
1915 ctxt->input->cur = cur;
1916 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1917 cur = ctxt->input->cur;
1918 }
1919 }
1920 ctxt->input->cur = cur;
1921 } else {
1922 int cur;
1923 do {
1924 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001925 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001926 NEXT;
1927 cur = CUR;
1928 res++;
1929 }
1930 while ((cur == 0) && (ctxt->inputNr > 1) &&
1931 (ctxt->instate != XML_PARSER_COMMENT)) {
1932 xmlPopInput(ctxt);
1933 cur = CUR;
1934 }
1935 /*
1936 * Need to handle support of entities branching here
1937 */
1938 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1939 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1940 }
Owen Taylor3473f882001-02-23 17:55:21 +00001941 return(res);
1942}
1943
1944/************************************************************************
1945 * *
1946 * Commodity functions to handle entities *
1947 * *
1948 ************************************************************************/
1949
1950/**
1951 * xmlPopInput:
1952 * @ctxt: an XML parser context
1953 *
1954 * xmlPopInput: the current input pointed by ctxt->input came to an end
1955 * pop it and return the next char.
1956 *
1957 * Returns the current xmlChar in the parser context
1958 */
1959xmlChar
1960xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001961 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001962 if (xmlParserDebugEntities)
1963 xmlGenericError(xmlGenericErrorContext,
1964 "Popping input %d\n", ctxt->inputNr);
1965 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001966 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001967 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1968 return(xmlPopInput(ctxt));
1969 return(CUR);
1970}
1971
1972/**
1973 * xmlPushInput:
1974 * @ctxt: an XML parser context
1975 * @input: an XML parser input fragment (entity, XML fragment ...).
1976 *
1977 * xmlPushInput: switch to a new input stream which is stacked on top
1978 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001979 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00001980 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001981int
Owen Taylor3473f882001-02-23 17:55:21 +00001982xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001983 int ret;
1984 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001985
1986 if (xmlParserDebugEntities) {
1987 if ((ctxt->input != NULL) && (ctxt->input->filename))
1988 xmlGenericError(xmlGenericErrorContext,
1989 "%s(%d): ", ctxt->input->filename,
1990 ctxt->input->line);
1991 xmlGenericError(xmlGenericErrorContext,
1992 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1993 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001994 ret = inputPush(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001995 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001996 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001997}
1998
1999/**
2000 * xmlParseCharRef:
2001 * @ctxt: an XML parser context
2002 *
2003 * parse Reference declarations
2004 *
2005 * [66] CharRef ::= '&#' [0-9]+ ';' |
2006 * '&#x' [0-9a-fA-F]+ ';'
2007 *
2008 * [ WFC: Legal Character ]
2009 * Characters referred to using character references must match the
2010 * production for Char.
2011 *
2012 * Returns the value parsed (as an int), 0 in case of error
2013 */
2014int
2015xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002016 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002017 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002018 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002019
Owen Taylor3473f882001-02-23 17:55:21 +00002020 /*
2021 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2022 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002023 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002024 (NXT(2) == 'x')) {
2025 SKIP(3);
2026 GROW;
2027 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002028 if (count++ > 20) {
2029 count = 0;
2030 GROW;
2031 }
2032 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002033 val = val * 16 + (CUR - '0');
2034 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2035 val = val * 16 + (CUR - 'a') + 10;
2036 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2037 val = val * 16 + (CUR - 'A') + 10;
2038 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002039 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002040 val = 0;
2041 break;
2042 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002043 if (val > 0x10FFFF)
2044 outofrange = val;
2045
Owen Taylor3473f882001-02-23 17:55:21 +00002046 NEXT;
2047 count++;
2048 }
2049 if (RAW == ';') {
2050 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002051 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002052 ctxt->nbChars ++;
2053 ctxt->input->cur++;
2054 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002055 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002056 SKIP(2);
2057 GROW;
2058 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002059 if (count++ > 20) {
2060 count = 0;
2061 GROW;
2062 }
2063 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002064 val = val * 10 + (CUR - '0');
2065 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002066 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002067 val = 0;
2068 break;
2069 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002070 if (val > 0x10FFFF)
2071 outofrange = val;
2072
Owen Taylor3473f882001-02-23 17:55:21 +00002073 NEXT;
2074 count++;
2075 }
2076 if (RAW == ';') {
2077 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002078 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002079 ctxt->nbChars ++;
2080 ctxt->input->cur++;
2081 }
2082 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002083 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002084 }
2085
2086 /*
2087 * [ WFC: Legal Character ]
2088 * Characters referred to using character references must match the
2089 * production for Char.
2090 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002091 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002092 return(val);
2093 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002094 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2095 "xmlParseCharRef: invalid xmlChar value %d\n",
2096 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002097 }
2098 return(0);
2099}
2100
2101/**
2102 * xmlParseStringCharRef:
2103 * @ctxt: an XML parser context
2104 * @str: a pointer to an index in the string
2105 *
2106 * parse Reference declarations, variant parsing from a string rather
2107 * than an an input flow.
2108 *
2109 * [66] CharRef ::= '&#' [0-9]+ ';' |
2110 * '&#x' [0-9a-fA-F]+ ';'
2111 *
2112 * [ WFC: Legal Character ]
2113 * Characters referred to using character references must match the
2114 * production for Char.
2115 *
2116 * Returns the value parsed (as an int), 0 in case of error, str will be
2117 * updated to the current value of the index
2118 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002119static int
Owen Taylor3473f882001-02-23 17:55:21 +00002120xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2121 const xmlChar *ptr;
2122 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002123 unsigned int val = 0;
2124 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002125
2126 if ((str == NULL) || (*str == NULL)) return(0);
2127 ptr = *str;
2128 cur = *ptr;
2129 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2130 ptr += 3;
2131 cur = *ptr;
2132 while (cur != ';') { /* Non input consuming loop */
2133 if ((cur >= '0') && (cur <= '9'))
2134 val = val * 16 + (cur - '0');
2135 else if ((cur >= 'a') && (cur <= 'f'))
2136 val = val * 16 + (cur - 'a') + 10;
2137 else if ((cur >= 'A') && (cur <= 'F'))
2138 val = val * 16 + (cur - 'A') + 10;
2139 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002140 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002141 val = 0;
2142 break;
2143 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002144 if (val > 0x10FFFF)
2145 outofrange = val;
2146
Owen Taylor3473f882001-02-23 17:55:21 +00002147 ptr++;
2148 cur = *ptr;
2149 }
2150 if (cur == ';')
2151 ptr++;
2152 } else if ((cur == '&') && (ptr[1] == '#')){
2153 ptr += 2;
2154 cur = *ptr;
2155 while (cur != ';') { /* Non input consuming loops */
2156 if ((cur >= '0') && (cur <= '9'))
2157 val = val * 10 + (cur - '0');
2158 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002159 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002160 val = 0;
2161 break;
2162 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002163 if (val > 0x10FFFF)
2164 outofrange = val;
2165
Owen Taylor3473f882001-02-23 17:55:21 +00002166 ptr++;
2167 cur = *ptr;
2168 }
2169 if (cur == ';')
2170 ptr++;
2171 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002172 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002173 return(0);
2174 }
2175 *str = ptr;
2176
2177 /*
2178 * [ WFC: Legal Character ]
2179 * Characters referred to using character references must match the
2180 * production for Char.
2181 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002182 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002183 return(val);
2184 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002185 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2186 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2187 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002188 }
2189 return(0);
2190}
2191
2192/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002193 * xmlNewBlanksWrapperInputStream:
2194 * @ctxt: an XML parser context
2195 * @entity: an Entity pointer
2196 *
2197 * Create a new input stream for wrapping
2198 * blanks around a PEReference
2199 *
2200 * Returns the new input stream or NULL
2201 */
2202
2203static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2204
Daniel Veillardf4862f02002-09-10 11:13:43 +00002205static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002206xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2207 xmlParserInputPtr input;
2208 xmlChar *buffer;
2209 size_t length;
2210 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002211 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2212 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002213 return(NULL);
2214 }
2215 if (xmlParserDebugEntities)
2216 xmlGenericError(xmlGenericErrorContext,
2217 "new blanks wrapper for entity: %s\n", entity->name);
2218 input = xmlNewInputStream(ctxt);
2219 if (input == NULL) {
2220 return(NULL);
2221 }
2222 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002223 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002224 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002225 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002226 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002227 return(NULL);
2228 }
2229 buffer [0] = ' ';
2230 buffer [1] = '%';
2231 buffer [length-3] = ';';
2232 buffer [length-2] = ' ';
2233 buffer [length-1] = 0;
2234 memcpy(buffer + 2, entity->name, length - 5);
2235 input->free = deallocblankswrapper;
2236 input->base = buffer;
2237 input->cur = buffer;
2238 input->length = length;
2239 input->end = &buffer[length];
2240 return(input);
2241}
2242
2243/**
Owen Taylor3473f882001-02-23 17:55:21 +00002244 * xmlParserHandlePEReference:
2245 * @ctxt: the parser context
2246 *
2247 * [69] PEReference ::= '%' Name ';'
2248 *
2249 * [ WFC: No Recursion ]
2250 * A parsed entity must not contain a recursive
2251 * reference to itself, either directly or indirectly.
2252 *
2253 * [ WFC: Entity Declared ]
2254 * In a document without any DTD, a document with only an internal DTD
2255 * subset which contains no parameter entity references, or a document
2256 * with "standalone='yes'", ... ... The declaration of a parameter
2257 * entity must precede any reference to it...
2258 *
2259 * [ VC: Entity Declared ]
2260 * In a document with an external subset or external parameter entities
2261 * with "standalone='no'", ... ... The declaration of a parameter entity
2262 * must precede any reference to it...
2263 *
2264 * [ WFC: In DTD ]
2265 * Parameter-entity references may only appear in the DTD.
2266 * NOTE: misleading but this is handled.
2267 *
2268 * A PEReference may have been detected in the current input stream
2269 * the handling is done accordingly to
2270 * http://www.w3.org/TR/REC-xml#entproc
2271 * i.e.
2272 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002273 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002274 */
2275void
2276xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002277 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002278 xmlEntityPtr entity = NULL;
2279 xmlParserInputPtr input;
2280
Owen Taylor3473f882001-02-23 17:55:21 +00002281 if (RAW != '%') return;
2282 switch(ctxt->instate) {
2283 case XML_PARSER_CDATA_SECTION:
2284 return;
2285 case XML_PARSER_COMMENT:
2286 return;
2287 case XML_PARSER_START_TAG:
2288 return;
2289 case XML_PARSER_END_TAG:
2290 return;
2291 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002292 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002293 return;
2294 case XML_PARSER_PROLOG:
2295 case XML_PARSER_START:
2296 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002297 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002298 return;
2299 case XML_PARSER_ENTITY_DECL:
2300 case XML_PARSER_CONTENT:
2301 case XML_PARSER_ATTRIBUTE_VALUE:
2302 case XML_PARSER_PI:
2303 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002304 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002305 /* we just ignore it there */
2306 return;
2307 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002308 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002309 return;
2310 case XML_PARSER_ENTITY_VALUE:
2311 /*
2312 * NOTE: in the case of entity values, we don't do the
2313 * substitution here since we need the literal
2314 * entity value to be able to save the internal
2315 * subset of the document.
2316 * This will be handled by xmlStringDecodeEntities
2317 */
2318 return;
2319 case XML_PARSER_DTD:
2320 /*
2321 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2322 * In the internal DTD subset, parameter-entity references
2323 * can occur only where markup declarations can occur, not
2324 * within markup declarations.
2325 * In that case this is handled in xmlParseMarkupDecl
2326 */
2327 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2328 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002329 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002330 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002331 break;
2332 case XML_PARSER_IGNORE:
2333 return;
2334 }
2335
2336 NEXT;
2337 name = xmlParseName(ctxt);
2338 if (xmlParserDebugEntities)
2339 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002340 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002341 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002342 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002343 } else {
2344 if (RAW == ';') {
2345 NEXT;
2346 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2347 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2348 if (entity == NULL) {
2349
2350 /*
2351 * [ WFC: Entity Declared ]
2352 * In a document without any DTD, a document with only an
2353 * internal DTD subset which contains no parameter entity
2354 * references, or a document with "standalone='yes'", ...
2355 * ... The declaration of a parameter entity must precede
2356 * any reference to it...
2357 */
2358 if ((ctxt->standalone == 1) ||
2359 ((ctxt->hasExternalSubset == 0) &&
2360 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002361 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002362 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002363 } else {
2364 /*
2365 * [ VC: Entity Declared ]
2366 * In a document with an external subset or external
2367 * parameter entities with "standalone='no'", ...
2368 * ... The declaration of a parameter entity must precede
2369 * any reference to it...
2370 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002371 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2372 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2373 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002374 name, NULL);
Daniel Veillard24eb9782003-10-04 21:08:09 +00002375 } else
2376 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2377 "PEReference: %%%s; not found\n",
2378 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002379 ctxt->valid = 0;
2380 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002381 } else if (ctxt->input->free != deallocblankswrapper) {
2382 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002383 if (xmlPushInput(ctxt, input) < 0)
2384 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002385 } else {
2386 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2387 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002388 xmlChar start[4];
2389 xmlCharEncoding enc;
2390
Owen Taylor3473f882001-02-23 17:55:21 +00002391 /*
2392 * handle the extra spaces added before and after
2393 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002394 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002395 */
2396 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002397 if (xmlPushInput(ctxt, input) < 0)
2398 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002399
2400 /*
2401 * Get the 4 first bytes and decode the charset
2402 * if enc != XML_CHAR_ENCODING_NONE
2403 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002404 * Note that, since we may have some non-UTF8
2405 * encoding (like UTF16, bug 135229), the 'length'
2406 * is not known, but we can calculate based upon
2407 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002408 */
2409 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002410 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002411 start[0] = RAW;
2412 start[1] = NXT(1);
2413 start[2] = NXT(2);
2414 start[3] = NXT(3);
2415 enc = xmlDetectCharEncoding(start, 4);
2416 if (enc != XML_CHAR_ENCODING_NONE) {
2417 xmlSwitchEncoding(ctxt, enc);
2418 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002419 }
2420
Owen Taylor3473f882001-02-23 17:55:21 +00002421 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002422 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2423 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002424 xmlParseTextDecl(ctxt);
2425 }
Owen Taylor3473f882001-02-23 17:55:21 +00002426 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002427 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2428 "PEReference: %s is not a parameter entity\n",
2429 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002430 }
2431 }
2432 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002433 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002434 }
Owen Taylor3473f882001-02-23 17:55:21 +00002435 }
2436}
2437
2438/*
2439 * Macro used to grow the current buffer.
2440 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002441#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002442 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002443 buffer##_size *= 2; \
Daniel Veillard0161e632008-08-28 15:36:32 +00002444 buffer##_size += n; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002445 tmp = (xmlChar *) \
Daniel Veillard68b6e022008-03-31 09:26:00 +00002446 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002447 if (tmp == NULL) goto mem_error; \
2448 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002449}
2450
2451/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002452 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002453 * @ctxt: the parser context
2454 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002455 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002456 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2457 * @end: an end marker xmlChar, 0 if none
2458 * @end2: an end marker xmlChar, 0 if none
2459 * @end3: an end marker xmlChar, 0 if none
2460 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002461 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002462 *
2463 * [67] Reference ::= EntityRef | CharRef
2464 *
2465 * [69] PEReference ::= '%' Name ';'
2466 *
2467 * Returns A newly allocated string with the substitution done. The caller
2468 * must deallocate it !
2469 */
2470xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002471xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2472 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002473 xmlChar *buffer = NULL;
2474 int buffer_size = 0;
2475
2476 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002477 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002478 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002479 xmlEntityPtr ent;
2480 int c,l;
2481 int nbchars = 0;
2482
Daniel Veillarda82b1822004-11-08 16:24:57 +00002483 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002484 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002485 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002486
Daniel Veillard0161e632008-08-28 15:36:32 +00002487 if (((ctxt->depth > 40) &&
2488 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2489 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002490 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002491 return(NULL);
2492 }
2493
2494 /*
2495 * allocate a translation buffer.
2496 */
2497 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002498 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002499 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002500
2501 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002502 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002503 * we are operating on already parsed values.
2504 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002505 if (str < last)
2506 c = CUR_SCHAR(str, l);
2507 else
2508 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002509 while ((c != 0) && (c != end) && /* non input consuming loop */
2510 (c != end2) && (c != end3)) {
2511
2512 if (c == 0) break;
2513 if ((c == '&') && (str[1] == '#')) {
2514 int val = xmlParseStringCharRef(ctxt, &str);
2515 if (val != 0) {
2516 COPY_BUF(0,buffer,nbchars,val);
2517 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002518 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002519 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002520 }
Owen Taylor3473f882001-02-23 17:55:21 +00002521 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2522 if (xmlParserDebugEntities)
2523 xmlGenericError(xmlGenericErrorContext,
2524 "String decoding Entity Reference: %.30s\n",
2525 str);
2526 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002527 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2528 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002529 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002530 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002531 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002532 if ((ent != NULL) &&
2533 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2534 if (ent->content != NULL) {
2535 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002536 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002537 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002538 }
Owen Taylor3473f882001-02-23 17:55:21 +00002539 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002540 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2541 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002542 }
2543 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002544 ctxt->depth++;
2545 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2546 0, 0, 0);
2547 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002548
Owen Taylor3473f882001-02-23 17:55:21 +00002549 if (rep != NULL) {
2550 current = rep;
2551 while (*current != 0) { /* non input consuming loop */
2552 buffer[nbchars++] = *current++;
2553 if (nbchars >
2554 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002555 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2556 goto int_error;
2557 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002558 }
2559 }
2560 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002561 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002562 }
2563 } else if (ent != NULL) {
2564 int i = xmlStrlen(ent->name);
2565 const xmlChar *cur = ent->name;
2566
2567 buffer[nbchars++] = '&';
2568 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002569 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002570 }
2571 for (;i > 0;i--)
2572 buffer[nbchars++] = *cur++;
2573 buffer[nbchars++] = ';';
2574 }
2575 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2576 if (xmlParserDebugEntities)
2577 xmlGenericError(xmlGenericErrorContext,
2578 "String decoding PE Reference: %.30s\n", str);
2579 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002580 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2581 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002582 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002583 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002584 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002585 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002586 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002587 }
Owen Taylor3473f882001-02-23 17:55:21 +00002588 ctxt->depth++;
2589 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2590 0, 0, 0);
2591 ctxt->depth--;
2592 if (rep != NULL) {
2593 current = rep;
2594 while (*current != 0) { /* non input consuming loop */
2595 buffer[nbchars++] = *current++;
2596 if (nbchars >
2597 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002598 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2599 goto int_error;
2600 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002601 }
2602 }
2603 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002604 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002605 }
2606 }
2607 } else {
2608 COPY_BUF(l,buffer,nbchars,c);
2609 str += l;
2610 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002611 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002612 }
2613 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002614 if (str < last)
2615 c = CUR_SCHAR(str, l);
2616 else
2617 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002618 }
2619 buffer[nbchars++] = 0;
2620 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002621
2622mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002623 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002624int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002625 if (rep != NULL)
2626 xmlFree(rep);
2627 if (buffer != NULL)
2628 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002629 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002630}
2631
Daniel Veillarde57ec792003-09-10 10:50:59 +00002632/**
2633 * xmlStringDecodeEntities:
2634 * @ctxt: the parser context
2635 * @str: the input string
2636 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2637 * @end: an end marker xmlChar, 0 if none
2638 * @end2: an end marker xmlChar, 0 if none
2639 * @end3: an end marker xmlChar, 0 if none
2640 *
2641 * Takes a entity string content and process to do the adequate substitutions.
2642 *
2643 * [67] Reference ::= EntityRef | CharRef
2644 *
2645 * [69] PEReference ::= '%' Name ';'
2646 *
2647 * Returns A newly allocated string with the substitution done. The caller
2648 * must deallocate it !
2649 */
2650xmlChar *
2651xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2652 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002653 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002654 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2655 end, end2, end3));
2656}
Owen Taylor3473f882001-02-23 17:55:21 +00002657
2658/************************************************************************
2659 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002660 * Commodity functions, cleanup needed ? *
2661 * *
2662 ************************************************************************/
2663
2664/**
2665 * areBlanks:
2666 * @ctxt: an XML parser context
2667 * @str: a xmlChar *
2668 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002669 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002670 *
2671 * Is this a sequence of blank chars that one can ignore ?
2672 *
2673 * Returns 1 if ignorable 0 otherwise.
2674 */
2675
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002676static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2677 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002678 int i, ret;
2679 xmlNodePtr lastChild;
2680
Daniel Veillard05c13a22001-09-09 08:38:09 +00002681 /*
2682 * Don't spend time trying to differentiate them, the same callback is
2683 * used !
2684 */
2685 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002686 return(0);
2687
Owen Taylor3473f882001-02-23 17:55:21 +00002688 /*
2689 * Check for xml:space value.
2690 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002691 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2692 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002693 return(0);
2694
2695 /*
2696 * Check that the string is made of blanks
2697 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002698 if (blank_chars == 0) {
2699 for (i = 0;i < len;i++)
2700 if (!(IS_BLANK_CH(str[i]))) return(0);
2701 }
Owen Taylor3473f882001-02-23 17:55:21 +00002702
2703 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002704 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002705 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002706 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002707 if (ctxt->myDoc != NULL) {
2708 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2709 if (ret == 0) return(1);
2710 if (ret == 1) return(0);
2711 }
2712
2713 /*
2714 * Otherwise, heuristic :-\
2715 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002716 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002717 if ((ctxt->node->children == NULL) &&
2718 (RAW == '<') && (NXT(1) == '/')) return(0);
2719
2720 lastChild = xmlGetLastChild(ctxt->node);
2721 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002722 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2723 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002724 } else if (xmlNodeIsText(lastChild))
2725 return(0);
2726 else if ((ctxt->node->children != NULL) &&
2727 (xmlNodeIsText(ctxt->node->children)))
2728 return(0);
2729 return(1);
2730}
2731
Owen Taylor3473f882001-02-23 17:55:21 +00002732/************************************************************************
2733 * *
2734 * Extra stuff for namespace support *
2735 * Relates to http://www.w3.org/TR/WD-xml-names *
2736 * *
2737 ************************************************************************/
2738
2739/**
2740 * xmlSplitQName:
2741 * @ctxt: an XML parser context
2742 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002743 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002744 *
2745 * parse an UTF8 encoded XML qualified name string
2746 *
2747 * [NS 5] QName ::= (Prefix ':')? LocalPart
2748 *
2749 * [NS 6] Prefix ::= NCName
2750 *
2751 * [NS 7] LocalPart ::= NCName
2752 *
2753 * Returns the local part, and prefix is updated
2754 * to get the Prefix if any.
2755 */
2756
2757xmlChar *
2758xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2759 xmlChar buf[XML_MAX_NAMELEN + 5];
2760 xmlChar *buffer = NULL;
2761 int len = 0;
2762 int max = XML_MAX_NAMELEN;
2763 xmlChar *ret = NULL;
2764 const xmlChar *cur = name;
2765 int c;
2766
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002767 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002768 *prefix = NULL;
2769
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002770 if (cur == NULL) return(NULL);
2771
Owen Taylor3473f882001-02-23 17:55:21 +00002772#ifndef XML_XML_NAMESPACE
2773 /* xml: prefix is not really a namespace */
2774 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2775 (cur[2] == 'l') && (cur[3] == ':'))
2776 return(xmlStrdup(name));
2777#endif
2778
Daniel Veillard597bc482003-07-24 16:08:28 +00002779 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002780 if (cur[0] == ':')
2781 return(xmlStrdup(name));
2782
2783 c = *cur++;
2784 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2785 buf[len++] = c;
2786 c = *cur++;
2787 }
2788 if (len >= max) {
2789 /*
2790 * Okay someone managed to make a huge name, so he's ready to pay
2791 * for the processing speed.
2792 */
2793 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002794
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002795 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002796 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002797 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002798 return(NULL);
2799 }
2800 memcpy(buffer, buf, len);
2801 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2802 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002803 xmlChar *tmp;
2804
Owen Taylor3473f882001-02-23 17:55:21 +00002805 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002806 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002807 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002808 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002809 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002810 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002811 return(NULL);
2812 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002813 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002814 }
2815 buffer[len++] = c;
2816 c = *cur++;
2817 }
2818 buffer[len] = 0;
2819 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002820
Daniel Veillard597bc482003-07-24 16:08:28 +00002821 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002822 if (buffer != NULL)
2823 xmlFree(buffer);
2824 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002825 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002826 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002827
Owen Taylor3473f882001-02-23 17:55:21 +00002828 if (buffer == NULL)
2829 ret = xmlStrndup(buf, len);
2830 else {
2831 ret = buffer;
2832 buffer = NULL;
2833 max = XML_MAX_NAMELEN;
2834 }
2835
2836
2837 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002838 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002839 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002840 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002841 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002842 }
Owen Taylor3473f882001-02-23 17:55:21 +00002843 len = 0;
2844
Daniel Veillardbb284f42002-10-16 18:02:47 +00002845 /*
2846 * Check that the first character is proper to start
2847 * a new name
2848 */
2849 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2850 ((c >= 0x41) && (c <= 0x5A)) ||
2851 (c == '_') || (c == ':'))) {
2852 int l;
2853 int first = CUR_SCHAR(cur, l);
2854
2855 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002856 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002857 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002858 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002859 }
2860 }
2861 cur++;
2862
Owen Taylor3473f882001-02-23 17:55:21 +00002863 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2864 buf[len++] = c;
2865 c = *cur++;
2866 }
2867 if (len >= max) {
2868 /*
2869 * Okay someone managed to make a huge name, so he's ready to pay
2870 * for the processing speed.
2871 */
2872 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002873
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002874 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002875 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002876 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002877 return(NULL);
2878 }
2879 memcpy(buffer, buf, len);
2880 while (c != 0) { /* tested bigname2.xml */
2881 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002882 xmlChar *tmp;
2883
Owen Taylor3473f882001-02-23 17:55:21 +00002884 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002885 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002886 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002887 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002888 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002889 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002890 return(NULL);
2891 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002892 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002893 }
2894 buffer[len++] = c;
2895 c = *cur++;
2896 }
2897 buffer[len] = 0;
2898 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00002899
Owen Taylor3473f882001-02-23 17:55:21 +00002900 if (buffer == NULL)
2901 ret = xmlStrndup(buf, len);
2902 else {
2903 ret = buffer;
2904 }
2905 }
2906
2907 return(ret);
2908}
2909
2910/************************************************************************
2911 * *
2912 * The parser itself *
2913 * Relates to http://www.w3.org/TR/REC-xml *
2914 * *
2915 ************************************************************************/
2916
Daniel Veillard34e3f642008-07-29 09:02:27 +00002917/************************************************************************
2918 * *
2919 * Routines to parse Name, NCName and NmToken *
2920 * *
2921 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00002922#ifdef DEBUG
2923static unsigned long nbParseName = 0;
2924static unsigned long nbParseNmToken = 0;
2925static unsigned long nbParseNCName = 0;
2926static unsigned long nbParseNCNameComplex = 0;
2927static unsigned long nbParseNameComplex = 0;
2928static unsigned long nbParseStringName = 0;
2929#endif
2930
Daniel Veillard34e3f642008-07-29 09:02:27 +00002931/*
2932 * The two following functions are related to the change of accepted
2933 * characters for Name and NmToken in the Revision 5 of XML-1.0
2934 * They correspond to the modified production [4] and the new production [4a]
2935 * changes in that revision. Also note that the macros used for the
2936 * productions Letter, Digit, CombiningChar and Extender are not needed
2937 * anymore.
2938 * We still keep compatibility to pre-revision5 parsing semantic if the
2939 * new XML_PARSE_OLD10 option is given to the parser.
2940 */
2941static int
2942xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
2943 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2944 /*
2945 * Use the new checks of production [4] [4a] amd [5] of the
2946 * Update 5 of XML-1.0
2947 */
2948 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2949 (((c >= 'a') && (c <= 'z')) ||
2950 ((c >= 'A') && (c <= 'Z')) ||
2951 (c == '_') || (c == ':') ||
2952 ((c >= 0xC0) && (c <= 0xD6)) ||
2953 ((c >= 0xD8) && (c <= 0xF6)) ||
2954 ((c >= 0xF8) && (c <= 0x2FF)) ||
2955 ((c >= 0x370) && (c <= 0x37D)) ||
2956 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2957 ((c >= 0x200C) && (c <= 0x200D)) ||
2958 ((c >= 0x2070) && (c <= 0x218F)) ||
2959 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2960 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2961 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2962 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2963 ((c >= 0x10000) && (c <= 0xEFFFF))))
2964 return(1);
2965 } else {
2966 if (IS_LETTER(c) || (c == '_') || (c == ':'))
2967 return(1);
2968 }
2969 return(0);
2970}
2971
2972static int
2973xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
2974 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2975 /*
2976 * Use the new checks of production [4] [4a] amd [5] of the
2977 * Update 5 of XML-1.0
2978 */
2979 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2980 (((c >= 'a') && (c <= 'z')) ||
2981 ((c >= 'A') && (c <= 'Z')) ||
2982 ((c >= '0') && (c <= '9')) || /* !start */
2983 (c == '_') || (c == ':') ||
2984 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2985 ((c >= 0xC0) && (c <= 0xD6)) ||
2986 ((c >= 0xD8) && (c <= 0xF6)) ||
2987 ((c >= 0xF8) && (c <= 0x2FF)) ||
2988 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2989 ((c >= 0x370) && (c <= 0x37D)) ||
2990 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2991 ((c >= 0x200C) && (c <= 0x200D)) ||
2992 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2993 ((c >= 0x2070) && (c <= 0x218F)) ||
2994 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2995 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2996 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2997 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2998 ((c >= 0x10000) && (c <= 0xEFFFF))))
2999 return(1);
3000 } else {
3001 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3002 (c == '.') || (c == '-') ||
3003 (c == '_') || (c == ':') ||
3004 (IS_COMBINING(c)) ||
3005 (IS_EXTENDER(c)))
3006 return(1);
3007 }
3008 return(0);
3009}
3010
Daniel Veillarde57ec792003-09-10 10:50:59 +00003011static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003012 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003013
Daniel Veillard34e3f642008-07-29 09:02:27 +00003014static const xmlChar *
3015xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3016 int len = 0, l;
3017 int c;
3018 int count = 0;
3019
Daniel Veillardc6561462009-03-25 10:22:31 +00003020#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003021 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003022#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003023
3024 /*
3025 * Handler for more complex cases
3026 */
3027 GROW;
3028 c = CUR_CHAR(l);
3029 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3030 /*
3031 * Use the new checks of production [4] [4a] amd [5] of the
3032 * Update 5 of XML-1.0
3033 */
3034 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3035 (!(((c >= 'a') && (c <= 'z')) ||
3036 ((c >= 'A') && (c <= 'Z')) ||
3037 (c == '_') || (c == ':') ||
3038 ((c >= 0xC0) && (c <= 0xD6)) ||
3039 ((c >= 0xD8) && (c <= 0xF6)) ||
3040 ((c >= 0xF8) && (c <= 0x2FF)) ||
3041 ((c >= 0x370) && (c <= 0x37D)) ||
3042 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3043 ((c >= 0x200C) && (c <= 0x200D)) ||
3044 ((c >= 0x2070) && (c <= 0x218F)) ||
3045 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3046 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3047 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3048 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3049 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3050 return(NULL);
3051 }
3052 len += l;
3053 NEXTL(l);
3054 c = CUR_CHAR(l);
3055 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3056 (((c >= 'a') && (c <= 'z')) ||
3057 ((c >= 'A') && (c <= 'Z')) ||
3058 ((c >= '0') && (c <= '9')) || /* !start */
3059 (c == '_') || (c == ':') ||
3060 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3061 ((c >= 0xC0) && (c <= 0xD6)) ||
3062 ((c >= 0xD8) && (c <= 0xF6)) ||
3063 ((c >= 0xF8) && (c <= 0x2FF)) ||
3064 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3065 ((c >= 0x370) && (c <= 0x37D)) ||
3066 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3067 ((c >= 0x200C) && (c <= 0x200D)) ||
3068 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3069 ((c >= 0x2070) && (c <= 0x218F)) ||
3070 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3071 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3072 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3073 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3074 ((c >= 0x10000) && (c <= 0xEFFFF))
3075 )) {
3076 if (count++ > 100) {
3077 count = 0;
3078 GROW;
3079 }
3080 len += l;
3081 NEXTL(l);
3082 c = CUR_CHAR(l);
3083 }
3084 } else {
3085 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3086 (!IS_LETTER(c) && (c != '_') &&
3087 (c != ':'))) {
3088 return(NULL);
3089 }
3090 len += l;
3091 NEXTL(l);
3092 c = CUR_CHAR(l);
3093
3094 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3095 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3096 (c == '.') || (c == '-') ||
3097 (c == '_') || (c == ':') ||
3098 (IS_COMBINING(c)) ||
3099 (IS_EXTENDER(c)))) {
3100 if (count++ > 100) {
3101 count = 0;
3102 GROW;
3103 }
3104 len += l;
3105 NEXTL(l);
3106 c = CUR_CHAR(l);
3107 }
3108 }
3109 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3110 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3111 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3112}
3113
Owen Taylor3473f882001-02-23 17:55:21 +00003114/**
3115 * xmlParseName:
3116 * @ctxt: an XML parser context
3117 *
3118 * parse an XML name.
3119 *
3120 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3121 * CombiningChar | Extender
3122 *
3123 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3124 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003125 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003126 *
3127 * Returns the Name parsed or NULL
3128 */
3129
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003130const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003131xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003132 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003133 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003134 int count = 0;
3135
3136 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003137
Daniel Veillardc6561462009-03-25 10:22:31 +00003138#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003139 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003140#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003141
Daniel Veillard48b2f892001-02-25 16:11:03 +00003142 /*
3143 * Accelerator for simple ASCII names
3144 */
3145 in = ctxt->input->cur;
3146 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3147 ((*in >= 0x41) && (*in <= 0x5A)) ||
3148 (*in == '_') || (*in == ':')) {
3149 in++;
3150 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3151 ((*in >= 0x41) && (*in <= 0x5A)) ||
3152 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003153 (*in == '_') || (*in == '-') ||
3154 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003155 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003156 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003157 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003158 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003159 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003160 ctxt->nbChars += count;
3161 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003162 if (ret == NULL)
3163 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003164 return(ret);
3165 }
3166 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003167 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003168 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003169}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003170
Daniel Veillard34e3f642008-07-29 09:02:27 +00003171static const xmlChar *
3172xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3173 int len = 0, l;
3174 int c;
3175 int count = 0;
3176
Daniel Veillardc6561462009-03-25 10:22:31 +00003177#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003178 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003179#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003180
3181 /*
3182 * Handler for more complex cases
3183 */
3184 GROW;
3185 c = CUR_CHAR(l);
3186 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3187 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3188 return(NULL);
3189 }
3190
3191 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3192 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3193 if (count++ > 100) {
3194 count = 0;
3195 GROW;
3196 }
3197 len += l;
3198 NEXTL(l);
3199 c = CUR_CHAR(l);
3200 }
3201 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3202}
3203
3204/**
3205 * xmlParseNCName:
3206 * @ctxt: an XML parser context
3207 * @len: lenght of the string parsed
3208 *
3209 * parse an XML name.
3210 *
3211 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3212 * CombiningChar | Extender
3213 *
3214 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3215 *
3216 * Returns the Name parsed or NULL
3217 */
3218
3219static const xmlChar *
3220xmlParseNCName(xmlParserCtxtPtr ctxt) {
3221 const xmlChar *in;
3222 const xmlChar *ret;
3223 int count = 0;
3224
Daniel Veillardc6561462009-03-25 10:22:31 +00003225#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003226 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003227#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003228
3229 /*
3230 * Accelerator for simple ASCII names
3231 */
3232 in = ctxt->input->cur;
3233 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3234 ((*in >= 0x41) && (*in <= 0x5A)) ||
3235 (*in == '_')) {
3236 in++;
3237 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3238 ((*in >= 0x41) && (*in <= 0x5A)) ||
3239 ((*in >= 0x30) && (*in <= 0x39)) ||
3240 (*in == '_') || (*in == '-') ||
3241 (*in == '.'))
3242 in++;
3243 if ((*in > 0) && (*in < 0x80)) {
3244 count = in - ctxt->input->cur;
3245 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3246 ctxt->input->cur = in;
3247 ctxt->nbChars += count;
3248 ctxt->input->col += count;
3249 if (ret == NULL) {
3250 xmlErrMemory(ctxt, NULL);
3251 }
3252 return(ret);
3253 }
3254 }
3255 return(xmlParseNCNameComplex(ctxt));
3256}
3257
Daniel Veillard46de64e2002-05-29 08:21:33 +00003258/**
3259 * xmlParseNameAndCompare:
3260 * @ctxt: an XML parser context
3261 *
3262 * parse an XML name and compares for match
3263 * (specialized for endtag parsing)
3264 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003265 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3266 * and the name for mismatch
3267 */
3268
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003269static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003270xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003271 register const xmlChar *cmp = other;
3272 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003273 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003274
3275 GROW;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003276
Daniel Veillard46de64e2002-05-29 08:21:33 +00003277 in = ctxt->input->cur;
3278 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003279 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003280 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003281 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003282 }
William M. Brack76e95df2003-10-18 16:20:14 +00003283 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003284 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003285 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003286 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003287 }
3288 /* failure (or end of input buffer), check with full function */
3289 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003290 /* strings coming from the dictionnary direct compare possible */
3291 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003292 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003293 }
3294 return ret;
3295}
3296
Owen Taylor3473f882001-02-23 17:55:21 +00003297/**
3298 * xmlParseStringName:
3299 * @ctxt: an XML parser context
3300 * @str: a pointer to the string pointer (IN/OUT)
3301 *
3302 * parse an XML name.
3303 *
3304 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3305 * CombiningChar | Extender
3306 *
3307 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3308 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003309 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003310 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003311 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003312 * is updated to the current location in the string.
3313 */
3314
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003315static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003316xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3317 xmlChar buf[XML_MAX_NAMELEN + 5];
3318 const xmlChar *cur = *str;
3319 int len = 0, l;
3320 int c;
3321
Daniel Veillardc6561462009-03-25 10:22:31 +00003322#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003323 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003324#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003325
Owen Taylor3473f882001-02-23 17:55:21 +00003326 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003327 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003328 return(NULL);
3329 }
3330
Daniel Veillard34e3f642008-07-29 09:02:27 +00003331 COPY_BUF(l,buf,len,c);
3332 cur += l;
3333 c = CUR_SCHAR(cur, l);
3334 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003335 COPY_BUF(l,buf,len,c);
3336 cur += l;
3337 c = CUR_SCHAR(cur, l);
3338 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3339 /*
3340 * Okay someone managed to make a huge name, so he's ready to pay
3341 * for the processing speed.
3342 */
3343 xmlChar *buffer;
3344 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003345
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003346 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003347 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003348 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003349 return(NULL);
3350 }
3351 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003352 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003353 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003354 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003355 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003356 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003357 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003358 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003359 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003360 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003361 return(NULL);
3362 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003363 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003364 }
3365 COPY_BUF(l,buffer,len,c);
3366 cur += l;
3367 c = CUR_SCHAR(cur, l);
3368 }
3369 buffer[len] = 0;
3370 *str = cur;
3371 return(buffer);
3372 }
3373 }
3374 *str = cur;
3375 return(xmlStrndup(buf, len));
3376}
3377
3378/**
3379 * xmlParseNmtoken:
3380 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003381 *
Owen Taylor3473f882001-02-23 17:55:21 +00003382 * parse an XML Nmtoken.
3383 *
3384 * [7] Nmtoken ::= (NameChar)+
3385 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003386 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003387 *
3388 * Returns the Nmtoken parsed or NULL
3389 */
3390
3391xmlChar *
3392xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3393 xmlChar buf[XML_MAX_NAMELEN + 5];
3394 int len = 0, l;
3395 int c;
3396 int count = 0;
3397
Daniel Veillardc6561462009-03-25 10:22:31 +00003398#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003399 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003400#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003401
Owen Taylor3473f882001-02-23 17:55:21 +00003402 GROW;
3403 c = CUR_CHAR(l);
3404
Daniel Veillard34e3f642008-07-29 09:02:27 +00003405 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003406 if (count++ > 100) {
3407 count = 0;
3408 GROW;
3409 }
3410 COPY_BUF(l,buf,len,c);
3411 NEXTL(l);
3412 c = CUR_CHAR(l);
3413 if (len >= XML_MAX_NAMELEN) {
3414 /*
3415 * Okay someone managed to make a huge token, so he's ready to pay
3416 * for the processing speed.
3417 */
3418 xmlChar *buffer;
3419 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003420
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003421 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003422 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003423 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003424 return(NULL);
3425 }
3426 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003427 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003428 if (count++ > 100) {
3429 count = 0;
3430 GROW;
3431 }
3432 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003433 xmlChar *tmp;
3434
Owen Taylor3473f882001-02-23 17:55:21 +00003435 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003436 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003437 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003438 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003439 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003440 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003441 return(NULL);
3442 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003443 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003444 }
3445 COPY_BUF(l,buffer,len,c);
3446 NEXTL(l);
3447 c = CUR_CHAR(l);
3448 }
3449 buffer[len] = 0;
3450 return(buffer);
3451 }
3452 }
3453 if (len == 0)
3454 return(NULL);
3455 return(xmlStrndup(buf, len));
3456}
3457
3458/**
3459 * xmlParseEntityValue:
3460 * @ctxt: an XML parser context
3461 * @orig: if non-NULL store a copy of the original entity value
3462 *
3463 * parse a value for ENTITY declarations
3464 *
3465 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3466 * "'" ([^%&'] | PEReference | Reference)* "'"
3467 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003468 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003469 */
3470
3471xmlChar *
3472xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3473 xmlChar *buf = NULL;
3474 int len = 0;
3475 int size = XML_PARSER_BUFFER_SIZE;
3476 int c, l;
3477 xmlChar stop;
3478 xmlChar *ret = NULL;
3479 const xmlChar *cur = NULL;
3480 xmlParserInputPtr input;
3481
3482 if (RAW == '"') stop = '"';
3483 else if (RAW == '\'') stop = '\'';
3484 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003485 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003486 return(NULL);
3487 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003488 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003489 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003490 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003491 return(NULL);
3492 }
3493
3494 /*
3495 * The content of the entity definition is copied in a buffer.
3496 */
3497
3498 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3499 input = ctxt->input;
3500 GROW;
3501 NEXT;
3502 c = CUR_CHAR(l);
3503 /*
3504 * NOTE: 4.4.5 Included in Literal
3505 * When a parameter entity reference appears in a literal entity
3506 * value, ... a single or double quote character in the replacement
3507 * text is always treated as a normal data character and will not
3508 * terminate the literal.
3509 * In practice it means we stop the loop only when back at parsing
3510 * the initial entity and the quote is found
3511 */
William M. Brack871611b2003-10-18 04:53:14 +00003512 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003513 (ctxt->input != input))) {
3514 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003515 xmlChar *tmp;
3516
Owen Taylor3473f882001-02-23 17:55:21 +00003517 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003518 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3519 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003520 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003521 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003522 return(NULL);
3523 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003524 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003525 }
3526 COPY_BUF(l,buf,len,c);
3527 NEXTL(l);
3528 /*
3529 * Pop-up of finished entities.
3530 */
3531 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3532 xmlPopInput(ctxt);
3533
3534 GROW;
3535 c = CUR_CHAR(l);
3536 if (c == 0) {
3537 GROW;
3538 c = CUR_CHAR(l);
3539 }
3540 }
3541 buf[len] = 0;
3542
3543 /*
3544 * Raise problem w.r.t. '&' and '%' being used in non-entities
3545 * reference constructs. Note Charref will be handled in
3546 * xmlStringDecodeEntities()
3547 */
3548 cur = buf;
3549 while (*cur != 0) { /* non input consuming */
3550 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3551 xmlChar *name;
3552 xmlChar tmp = *cur;
3553
3554 cur++;
3555 name = xmlParseStringName(ctxt, &cur);
3556 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003557 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003558 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003559 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003560 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003561 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3562 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003563 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003564 }
3565 if (name != NULL)
3566 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003567 if (*cur == 0)
3568 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003569 }
3570 cur++;
3571 }
3572
3573 /*
3574 * Then PEReference entities are substituted.
3575 */
3576 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003577 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003578 xmlFree(buf);
3579 } else {
3580 NEXT;
3581 /*
3582 * NOTE: 4.4.7 Bypassed
3583 * When a general entity reference appears in the EntityValue in
3584 * an entity declaration, it is bypassed and left as is.
3585 * so XML_SUBSTITUTE_REF is not set here.
3586 */
3587 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3588 0, 0, 0);
3589 if (orig != NULL)
3590 *orig = buf;
3591 else
3592 xmlFree(buf);
3593 }
3594
3595 return(ret);
3596}
3597
3598/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003599 * xmlParseAttValueComplex:
3600 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003601 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003602 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003603 *
3604 * parse a value for an attribute, this is the fallback function
3605 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003606 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003607 *
3608 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3609 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003610static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003611xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003612 xmlChar limit = 0;
3613 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003614 xmlChar *rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003615 int len = 0;
3616 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003617 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003618 xmlChar *current = NULL;
3619 xmlEntityPtr ent;
3620
Owen Taylor3473f882001-02-23 17:55:21 +00003621 if (NXT(0) == '"') {
3622 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3623 limit = '"';
3624 NEXT;
3625 } else if (NXT(0) == '\'') {
3626 limit = '\'';
3627 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3628 NEXT;
3629 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003630 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003631 return(NULL);
3632 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003633
Owen Taylor3473f882001-02-23 17:55:21 +00003634 /*
3635 * allocate a translation buffer.
3636 */
3637 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003638 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003639 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003640
3641 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003642 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003643 */
3644 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003645 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003646 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003647 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003648 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003649 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003650 if (NXT(1) == '#') {
3651 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003652
Owen Taylor3473f882001-02-23 17:55:21 +00003653 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003654 if (ctxt->replaceEntities) {
3655 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003656 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003657 }
3658 buf[len++] = '&';
3659 } else {
3660 /*
3661 * The reparsing will be done in xmlStringGetNodeList()
3662 * called by the attribute() function in SAX.c
3663 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003664 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003665 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003666 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003667 buf[len++] = '&';
3668 buf[len++] = '#';
3669 buf[len++] = '3';
3670 buf[len++] = '8';
3671 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003672 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003673 } else if (val != 0) {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003674 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003675 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003676 }
Owen Taylor3473f882001-02-23 17:55:21 +00003677 len += xmlCopyChar(0, &buf[len], val);
3678 }
3679 } else {
3680 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003681 ctxt->nbentities++;
3682 if (ent != NULL)
3683 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003684 if ((ent != NULL) &&
3685 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3686 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003687 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003688 }
3689 if ((ctxt->replaceEntities == 0) &&
3690 (ent->content[0] == '&')) {
3691 buf[len++] = '&';
3692 buf[len++] = '#';
3693 buf[len++] = '3';
3694 buf[len++] = '8';
3695 buf[len++] = ';';
3696 } else {
3697 buf[len++] = ent->content[0];
3698 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003699 } else if ((ent != NULL) &&
3700 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003701 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3702 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003703 XML_SUBSTITUTE_REF,
3704 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003705 if (rep != NULL) {
3706 current = rep;
3707 while (*current != 0) { /* non input consuming */
3708 buf[len++] = *current++;
3709 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003710 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003711 }
3712 }
3713 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003714 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003715 }
3716 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003717 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003718 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003719 }
Owen Taylor3473f882001-02-23 17:55:21 +00003720 if (ent->content != NULL)
3721 buf[len++] = ent->content[0];
3722 }
3723 } else if (ent != NULL) {
3724 int i = xmlStrlen(ent->name);
3725 const xmlChar *cur = ent->name;
3726
3727 /*
3728 * This may look absurd but is needed to detect
3729 * entities problems
3730 */
3731 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3732 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003733 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003734 XML_SUBSTITUTE_REF, 0, 0, 0);
3735 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003736 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003737 rep = NULL;
3738 }
Owen Taylor3473f882001-02-23 17:55:21 +00003739 }
3740
3741 /*
3742 * Just output the reference
3743 */
3744 buf[len++] = '&';
Daniel Veillard0161e632008-08-28 15:36:32 +00003745 while (len > buf_size - i - 10) {
3746 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003747 }
3748 for (;i > 0;i--)
3749 buf[len++] = *cur++;
3750 buf[len++] = ';';
3751 }
3752 }
3753 } else {
3754 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003755 if ((len != 0) || (!normalize)) {
3756 if ((!normalize) || (!in_space)) {
3757 COPY_BUF(l,buf,len,0x20);
Daniel Veillard0161e632008-08-28 15:36:32 +00003758 while (len > buf_size - 10) {
3759 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003760 }
3761 }
3762 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003763 }
3764 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003765 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003766 COPY_BUF(l,buf,len,c);
3767 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003768 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003769 }
3770 }
3771 NEXTL(l);
3772 }
3773 GROW;
3774 c = CUR_CHAR(l);
3775 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003776 if ((in_space) && (normalize)) {
3777 while (buf[len - 1] == 0x20) len--;
3778 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003779 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003780 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003781 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003782 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003783 if ((c != 0) && (!IS_CHAR(c))) {
3784 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3785 "invalid character in attribute value\n");
3786 } else {
3787 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3788 "AttValue: ' expected\n");
3789 }
Owen Taylor3473f882001-02-23 17:55:21 +00003790 } else
3791 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003792 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003793 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003794
3795mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003796 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003797 if (buf != NULL)
3798 xmlFree(buf);
3799 if (rep != NULL)
3800 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003801 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003802}
3803
3804/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003805 * xmlParseAttValue:
3806 * @ctxt: an XML parser context
3807 *
3808 * parse a value for an attribute
3809 * Note: the parser won't do substitution of entities here, this
3810 * will be handled later in xmlStringGetNodeList
3811 *
3812 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3813 * "'" ([^<&'] | Reference)* "'"
3814 *
3815 * 3.3.3 Attribute-Value Normalization:
3816 * Before the value of an attribute is passed to the application or
3817 * checked for validity, the XML processor must normalize it as follows:
3818 * - a character reference is processed by appending the referenced
3819 * character to the attribute value
3820 * - an entity reference is processed by recursively processing the
3821 * replacement text of the entity
3822 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3823 * appending #x20 to the normalized value, except that only a single
3824 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3825 * parsed entity or the literal entity value of an internal parsed entity
3826 * - other characters are processed by appending them to the normalized value
3827 * If the declared value is not CDATA, then the XML processor must further
3828 * process the normalized attribute value by discarding any leading and
3829 * trailing space (#x20) characters, and by replacing sequences of space
3830 * (#x20) characters by a single space (#x20) character.
3831 * All attributes for which no declaration has been read should be treated
3832 * by a non-validating parser as if declared CDATA.
3833 *
3834 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3835 */
3836
3837
3838xmlChar *
3839xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003840 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003841 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003842}
3843
3844/**
Owen Taylor3473f882001-02-23 17:55:21 +00003845 * xmlParseSystemLiteral:
3846 * @ctxt: an XML parser context
3847 *
3848 * parse an XML Literal
3849 *
3850 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3851 *
3852 * Returns the SystemLiteral parsed or NULL
3853 */
3854
3855xmlChar *
3856xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3857 xmlChar *buf = NULL;
3858 int len = 0;
3859 int size = XML_PARSER_BUFFER_SIZE;
3860 int cur, l;
3861 xmlChar stop;
3862 int state = ctxt->instate;
3863 int count = 0;
3864
3865 SHRINK;
3866 if (RAW == '"') {
3867 NEXT;
3868 stop = '"';
3869 } else if (RAW == '\'') {
3870 NEXT;
3871 stop = '\'';
3872 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003873 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003874 return(NULL);
3875 }
3876
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003877 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003878 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003879 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003880 return(NULL);
3881 }
3882 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3883 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003884 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003885 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003886 xmlChar *tmp;
3887
Owen Taylor3473f882001-02-23 17:55:21 +00003888 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003889 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3890 if (tmp == NULL) {
3891 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003892 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003893 ctxt->instate = (xmlParserInputState) state;
3894 return(NULL);
3895 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003896 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003897 }
3898 count++;
3899 if (count > 50) {
3900 GROW;
3901 count = 0;
3902 }
3903 COPY_BUF(l,buf,len,cur);
3904 NEXTL(l);
3905 cur = CUR_CHAR(l);
3906 if (cur == 0) {
3907 GROW;
3908 SHRINK;
3909 cur = CUR_CHAR(l);
3910 }
3911 }
3912 buf[len] = 0;
3913 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003914 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003915 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003916 } else {
3917 NEXT;
3918 }
3919 return(buf);
3920}
3921
3922/**
3923 * xmlParsePubidLiteral:
3924 * @ctxt: an XML parser context
3925 *
3926 * parse an XML public literal
3927 *
3928 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3929 *
3930 * Returns the PubidLiteral parsed or NULL.
3931 */
3932
3933xmlChar *
3934xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3935 xmlChar *buf = NULL;
3936 int len = 0;
3937 int size = XML_PARSER_BUFFER_SIZE;
3938 xmlChar cur;
3939 xmlChar stop;
3940 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003941 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003942
3943 SHRINK;
3944 if (RAW == '"') {
3945 NEXT;
3946 stop = '"';
3947 } else if (RAW == '\'') {
3948 NEXT;
3949 stop = '\'';
3950 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003951 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003952 return(NULL);
3953 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003954 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003955 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003956 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003957 return(NULL);
3958 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003959 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003960 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003961 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003962 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003963 xmlChar *tmp;
3964
Owen Taylor3473f882001-02-23 17:55:21 +00003965 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003966 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3967 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003968 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003969 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003970 return(NULL);
3971 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003972 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003973 }
3974 buf[len++] = cur;
3975 count++;
3976 if (count > 50) {
3977 GROW;
3978 count = 0;
3979 }
3980 NEXT;
3981 cur = CUR;
3982 if (cur == 0) {
3983 GROW;
3984 SHRINK;
3985 cur = CUR;
3986 }
3987 }
3988 buf[len] = 0;
3989 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003990 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003991 } else {
3992 NEXT;
3993 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003994 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003995 return(buf);
3996}
3997
Daniel Veillard48b2f892001-02-25 16:11:03 +00003998void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003999
4000/*
4001 * used for the test in the inner loop of the char data testing
4002 */
4003static const unsigned char test_char_data[256] = {
4004 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4005 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4006 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4007 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4008 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4009 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4010 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4011 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4012 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4013 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4014 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4015 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4016 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4017 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4018 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4019 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4020 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4021 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4022 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4023 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4024 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4025 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4026 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4027 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4028 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4029 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4030 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4031 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4032 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4033 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4034 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4035 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4036};
4037
Owen Taylor3473f882001-02-23 17:55:21 +00004038/**
4039 * xmlParseCharData:
4040 * @ctxt: an XML parser context
4041 * @cdata: int indicating whether we are within a CDATA section
4042 *
4043 * parse a CharData section.
4044 * if we are within a CDATA section ']]>' marks an end of section.
4045 *
4046 * The right angle bracket (>) may be represented using the string "&gt;",
4047 * and must, for compatibility, be escaped using "&gt;" or a character
4048 * reference when it appears in the string "]]>" in content, when that
4049 * string is not marking the end of a CDATA section.
4050 *
4051 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4052 */
4053
4054void
4055xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004056 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004057 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004058 int line = ctxt->input->line;
4059 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004060 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004061
4062 SHRINK;
4063 GROW;
4064 /*
4065 * Accelerated common case where input don't need to be
4066 * modified before passing it to the handler.
4067 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004068 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004069 in = ctxt->input->cur;
4070 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004071get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004072 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004073 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004074 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004075 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004076 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004077 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004078 goto get_more_space;
4079 }
4080 if (*in == '<') {
4081 nbchar = in - ctxt->input->cur;
4082 if (nbchar > 0) {
4083 const xmlChar *tmp = ctxt->input->cur;
4084 ctxt->input->cur = in;
4085
Daniel Veillard34099b42004-11-04 17:34:35 +00004086 if ((ctxt->sax != NULL) &&
4087 (ctxt->sax->ignorableWhitespace !=
4088 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004089 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004090 if (ctxt->sax->ignorableWhitespace != NULL)
4091 ctxt->sax->ignorableWhitespace(ctxt->userData,
4092 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004093 } else {
4094 if (ctxt->sax->characters != NULL)
4095 ctxt->sax->characters(ctxt->userData,
4096 tmp, nbchar);
4097 if (*ctxt->space == -1)
4098 *ctxt->space = -2;
4099 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004100 } else if ((ctxt->sax != NULL) &&
4101 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004102 ctxt->sax->characters(ctxt->userData,
4103 tmp, nbchar);
4104 }
4105 }
4106 return;
4107 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004108
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004109get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004110 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004111 while (test_char_data[*in]) {
4112 in++;
4113 ccol++;
4114 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004115 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004116 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004117 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004118 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004119 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004120 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004121 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004122 }
4123 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004124 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004125 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004126 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004127 return;
4128 }
4129 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004130 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004131 goto get_more;
4132 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004133 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004134 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004135 if ((ctxt->sax != NULL) &&
4136 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004137 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004138 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004139 const xmlChar *tmp = ctxt->input->cur;
4140 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004141
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004142 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004143 if (ctxt->sax->ignorableWhitespace != NULL)
4144 ctxt->sax->ignorableWhitespace(ctxt->userData,
4145 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004146 } else {
4147 if (ctxt->sax->characters != NULL)
4148 ctxt->sax->characters(ctxt->userData,
4149 tmp, nbchar);
4150 if (*ctxt->space == -1)
4151 *ctxt->space = -2;
4152 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004153 line = ctxt->input->line;
4154 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004155 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004156 if (ctxt->sax->characters != NULL)
4157 ctxt->sax->characters(ctxt->userData,
4158 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004159 line = ctxt->input->line;
4160 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004161 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004162 /* something really bad happened in the SAX callback */
4163 if (ctxt->instate != XML_PARSER_CONTENT)
4164 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004165 }
4166 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004167 if (*in == 0xD) {
4168 in++;
4169 if (*in == 0xA) {
4170 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004171 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004172 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004173 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004174 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004175 in--;
4176 }
4177 if (*in == '<') {
4178 return;
4179 }
4180 if (*in == '&') {
4181 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004182 }
4183 SHRINK;
4184 GROW;
4185 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004186 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004187 nbchar = 0;
4188 }
Daniel Veillard50582112001-03-26 22:52:16 +00004189 ctxt->input->line = line;
4190 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004191 xmlParseCharDataComplex(ctxt, cdata);
4192}
4193
Daniel Veillard01c13b52002-12-10 15:19:08 +00004194/**
4195 * xmlParseCharDataComplex:
4196 * @ctxt: an XML parser context
4197 * @cdata: int indicating whether we are within a CDATA section
4198 *
4199 * parse a CharData section.this is the fallback function
4200 * of xmlParseCharData() when the parsing requires handling
4201 * of non-ASCII characters.
4202 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004203void
4204xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004205 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4206 int nbchar = 0;
4207 int cur, l;
4208 int count = 0;
4209
4210 SHRINK;
4211 GROW;
4212 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004213 while ((cur != '<') && /* checked */
4214 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004215 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004216 if ((cur == ']') && (NXT(1) == ']') &&
4217 (NXT(2) == '>')) {
4218 if (cdata) break;
4219 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004220 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004221 }
4222 }
4223 COPY_BUF(l,buf,nbchar,cur);
4224 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004225 buf[nbchar] = 0;
4226
Owen Taylor3473f882001-02-23 17:55:21 +00004227 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004228 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004229 */
4230 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004231 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004232 if (ctxt->sax->ignorableWhitespace != NULL)
4233 ctxt->sax->ignorableWhitespace(ctxt->userData,
4234 buf, nbchar);
4235 } else {
4236 if (ctxt->sax->characters != NULL)
4237 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004238 if ((ctxt->sax->characters !=
4239 ctxt->sax->ignorableWhitespace) &&
4240 (*ctxt->space == -1))
4241 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004242 }
4243 }
4244 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004245 /* something really bad happened in the SAX callback */
4246 if (ctxt->instate != XML_PARSER_CONTENT)
4247 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004248 }
4249 count++;
4250 if (count > 50) {
4251 GROW;
4252 count = 0;
4253 }
4254 NEXTL(l);
4255 cur = CUR_CHAR(l);
4256 }
4257 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004258 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004259 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004260 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004261 */
4262 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004263 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004264 if (ctxt->sax->ignorableWhitespace != NULL)
4265 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4266 } else {
4267 if (ctxt->sax->characters != NULL)
4268 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004269 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4270 (*ctxt->space == -1))
4271 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004272 }
4273 }
4274 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004275 if ((cur != 0) && (!IS_CHAR(cur))) {
4276 /* Generate the error and skip the offending character */
4277 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4278 "PCDATA invalid Char value %d\n",
4279 cur);
4280 NEXTL(l);
4281 }
Owen Taylor3473f882001-02-23 17:55:21 +00004282}
4283
4284/**
4285 * xmlParseExternalID:
4286 * @ctxt: an XML parser context
4287 * @publicID: a xmlChar** receiving PubidLiteral
4288 * @strict: indicate whether we should restrict parsing to only
4289 * production [75], see NOTE below
4290 *
4291 * Parse an External ID or a Public ID
4292 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004293 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004294 * 'PUBLIC' S PubidLiteral S SystemLiteral
4295 *
4296 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4297 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4298 *
4299 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4300 *
4301 * Returns the function returns SystemLiteral and in the second
4302 * case publicID receives PubidLiteral, is strict is off
4303 * it is possible to return NULL and have publicID set.
4304 */
4305
4306xmlChar *
4307xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4308 xmlChar *URI = NULL;
4309
4310 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004311
4312 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004313 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004314 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004315 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004316 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4317 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004318 }
4319 SKIP_BLANKS;
4320 URI = xmlParseSystemLiteral(ctxt);
4321 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004322 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004323 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004324 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004325 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004326 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004327 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004328 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004329 }
4330 SKIP_BLANKS;
4331 *publicID = xmlParsePubidLiteral(ctxt);
4332 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004333 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004334 }
4335 if (strict) {
4336 /*
4337 * We don't handle [83] so "S SystemLiteral" is required.
4338 */
William M. Brack76e95df2003-10-18 16:20:14 +00004339 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004340 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004341 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004342 }
4343 } else {
4344 /*
4345 * We handle [83] so we return immediately, if
4346 * "S SystemLiteral" is not detected. From a purely parsing
4347 * point of view that's a nice mess.
4348 */
4349 const xmlChar *ptr;
4350 GROW;
4351
4352 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004353 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004354
William M. Brack76e95df2003-10-18 16:20:14 +00004355 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004356 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4357 }
4358 SKIP_BLANKS;
4359 URI = xmlParseSystemLiteral(ctxt);
4360 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004361 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004362 }
4363 }
4364 return(URI);
4365}
4366
4367/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004368 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004369 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004370 * @buf: the already parsed part of the buffer
4371 * @len: number of bytes filles in the buffer
4372 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004373 *
4374 * Skip an XML (SGML) comment <!-- .... -->
4375 * The spec says that "For compatibility, the string "--" (double-hyphen)
4376 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004377 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004378 *
4379 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4380 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004381static void
4382xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004383 int q, ql;
4384 int r, rl;
4385 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00004386 int count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004387 int inputid;
4388
4389 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004390
Owen Taylor3473f882001-02-23 17:55:21 +00004391 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004392 len = 0;
4393 size = XML_PARSER_BUFFER_SIZE;
4394 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4395 if (buf == NULL) {
4396 xmlErrMemory(ctxt, NULL);
4397 return;
4398 }
Owen Taylor3473f882001-02-23 17:55:21 +00004399 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004400 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004401 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004402 if (q == 0)
4403 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004404 if (!IS_CHAR(q)) {
4405 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4406 "xmlParseComment: invalid xmlChar value %d\n",
4407 q);
4408 xmlFree (buf);
4409 return;
4410 }
Owen Taylor3473f882001-02-23 17:55:21 +00004411 NEXTL(ql);
4412 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004413 if (r == 0)
4414 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004415 if (!IS_CHAR(r)) {
4416 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4417 "xmlParseComment: invalid xmlChar value %d\n",
4418 q);
4419 xmlFree (buf);
4420 return;
4421 }
Owen Taylor3473f882001-02-23 17:55:21 +00004422 NEXTL(rl);
4423 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004424 if (cur == 0)
4425 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004426 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004427 ((cur != '>') ||
4428 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004429 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004430 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004431 }
4432 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004433 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004434 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004435 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4436 if (new_buf == NULL) {
4437 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004438 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004439 return;
4440 }
William M. Bracka3215c72004-07-31 16:24:01 +00004441 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004442 }
4443 COPY_BUF(ql,buf,len,q);
4444 q = r;
4445 ql = rl;
4446 r = cur;
4447 rl = l;
4448
4449 count++;
4450 if (count > 50) {
4451 GROW;
4452 count = 0;
4453 }
4454 NEXTL(l);
4455 cur = CUR_CHAR(l);
4456 if (cur == 0) {
4457 SHRINK;
4458 GROW;
4459 cur = CUR_CHAR(l);
4460 }
4461 }
4462 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004463 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004464 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004465 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004466 } else if (!IS_CHAR(cur)) {
4467 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4468 "xmlParseComment: invalid xmlChar value %d\n",
4469 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004470 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004471 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004472 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4473 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004474 }
4475 NEXT;
4476 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4477 (!ctxt->disableSAX))
4478 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004479 }
Daniel Veillardda629342007-08-01 07:49:06 +00004480 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004481 return;
4482not_terminated:
4483 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4484 "Comment not terminated\n", NULL);
4485 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004486 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004487}
Daniel Veillardda629342007-08-01 07:49:06 +00004488
Daniel Veillard4c778d82005-01-23 17:37:44 +00004489/**
4490 * xmlParseComment:
4491 * @ctxt: an XML parser context
4492 *
4493 * Skip an XML (SGML) comment <!-- .... -->
4494 * The spec says that "For compatibility, the string "--" (double-hyphen)
4495 * must not occur within comments. "
4496 *
4497 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4498 */
4499void
4500xmlParseComment(xmlParserCtxtPtr ctxt) {
4501 xmlChar *buf = NULL;
4502 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004503 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004504 xmlParserInputState state;
4505 const xmlChar *in;
4506 int nbchar = 0, ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004507 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004508
4509 /*
4510 * Check that there is a comment right here.
4511 */
4512 if ((RAW != '<') || (NXT(1) != '!') ||
4513 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004514 state = ctxt->instate;
4515 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004516 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004517 SKIP(4);
4518 SHRINK;
4519 GROW;
4520
4521 /*
4522 * Accelerated common case where input don't need to be
4523 * modified before passing it to the handler.
4524 */
4525 in = ctxt->input->cur;
4526 do {
4527 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004528 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004529 ctxt->input->line++; ctxt->input->col = 1;
4530 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004531 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004532 }
4533get_more:
4534 ccol = ctxt->input->col;
4535 while (((*in > '-') && (*in <= 0x7F)) ||
4536 ((*in >= 0x20) && (*in < '-')) ||
4537 (*in == 0x09)) {
4538 in++;
4539 ccol++;
4540 }
4541 ctxt->input->col = ccol;
4542 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004543 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004544 ctxt->input->line++; ctxt->input->col = 1;
4545 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004546 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004547 goto get_more;
4548 }
4549 nbchar = in - ctxt->input->cur;
4550 /*
4551 * save current set of data
4552 */
4553 if (nbchar > 0) {
4554 if ((ctxt->sax != NULL) &&
4555 (ctxt->sax->comment != NULL)) {
4556 if (buf == NULL) {
4557 if ((*in == '-') && (in[1] == '-'))
4558 size = nbchar + 1;
4559 else
4560 size = XML_PARSER_BUFFER_SIZE + nbchar;
4561 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4562 if (buf == NULL) {
4563 xmlErrMemory(ctxt, NULL);
4564 ctxt->instate = state;
4565 return;
4566 }
4567 len = 0;
4568 } else if (len + nbchar + 1 >= size) {
4569 xmlChar *new_buf;
4570 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4571 new_buf = (xmlChar *) xmlRealloc(buf,
4572 size * sizeof(xmlChar));
4573 if (new_buf == NULL) {
4574 xmlFree (buf);
4575 xmlErrMemory(ctxt, NULL);
4576 ctxt->instate = state;
4577 return;
4578 }
4579 buf = new_buf;
4580 }
4581 memcpy(&buf[len], ctxt->input->cur, nbchar);
4582 len += nbchar;
4583 buf[len] = 0;
4584 }
4585 }
4586 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004587 if (*in == 0xA) {
4588 in++;
4589 ctxt->input->line++; ctxt->input->col = 1;
4590 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004591 if (*in == 0xD) {
4592 in++;
4593 if (*in == 0xA) {
4594 ctxt->input->cur = in;
4595 in++;
4596 ctxt->input->line++; ctxt->input->col = 1;
4597 continue; /* while */
4598 }
4599 in--;
4600 }
4601 SHRINK;
4602 GROW;
4603 in = ctxt->input->cur;
4604 if (*in == '-') {
4605 if (in[1] == '-') {
4606 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004607 if (ctxt->input->id != inputid) {
4608 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4609 "comment doesn't start and stop in the same entity\n");
4610 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004611 SKIP(3);
4612 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4613 (!ctxt->disableSAX)) {
4614 if (buf != NULL)
4615 ctxt->sax->comment(ctxt->userData, buf);
4616 else
4617 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4618 }
4619 if (buf != NULL)
4620 xmlFree(buf);
4621 ctxt->instate = state;
4622 return;
4623 }
4624 if (buf != NULL)
4625 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4626 "Comment not terminated \n<!--%.50s\n",
4627 buf);
4628 else
4629 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4630 "Comment not terminated \n", NULL);
4631 in++;
4632 ctxt->input->col++;
4633 }
4634 in++;
4635 ctxt->input->col++;
4636 goto get_more;
4637 }
4638 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4639 xmlParseCommentComplex(ctxt, buf, len, size);
4640 ctxt->instate = state;
4641 return;
4642}
4643
Owen Taylor3473f882001-02-23 17:55:21 +00004644
4645/**
4646 * xmlParsePITarget:
4647 * @ctxt: an XML parser context
4648 *
4649 * parse the name of a PI
4650 *
4651 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4652 *
4653 * Returns the PITarget name or NULL
4654 */
4655
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004656const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004657xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004658 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004659
4660 name = xmlParseName(ctxt);
4661 if ((name != NULL) &&
4662 ((name[0] == 'x') || (name[0] == 'X')) &&
4663 ((name[1] == 'm') || (name[1] == 'M')) &&
4664 ((name[2] == 'l') || (name[2] == 'L'))) {
4665 int i;
4666 if ((name[0] == 'x') && (name[1] == 'm') &&
4667 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004668 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004669 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004670 return(name);
4671 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004672 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004673 return(name);
4674 }
4675 for (i = 0;;i++) {
4676 if (xmlW3CPIs[i] == NULL) break;
4677 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4678 return(name);
4679 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004680 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4681 "xmlParsePITarget: invalid name prefix 'xml'\n",
4682 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004683 }
Daniel Veillard37334572008-07-31 08:20:02 +00004684 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4685 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4686 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4687 }
Owen Taylor3473f882001-02-23 17:55:21 +00004688 return(name);
4689}
4690
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004691#ifdef LIBXML_CATALOG_ENABLED
4692/**
4693 * xmlParseCatalogPI:
4694 * @ctxt: an XML parser context
4695 * @catalog: the PI value string
4696 *
4697 * parse an XML Catalog Processing Instruction.
4698 *
4699 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4700 *
4701 * Occurs only if allowed by the user and if happening in the Misc
4702 * part of the document before any doctype informations
4703 * This will add the given catalog to the parsing context in order
4704 * to be used if there is a resolution need further down in the document
4705 */
4706
4707static void
4708xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4709 xmlChar *URL = NULL;
4710 const xmlChar *tmp, *base;
4711 xmlChar marker;
4712
4713 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004714 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004715 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4716 goto error;
4717 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004718 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004719 if (*tmp != '=') {
4720 return;
4721 }
4722 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004723 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004724 marker = *tmp;
4725 if ((marker != '\'') && (marker != '"'))
4726 goto error;
4727 tmp++;
4728 base = tmp;
4729 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4730 if (*tmp == 0)
4731 goto error;
4732 URL = xmlStrndup(base, tmp - base);
4733 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004734 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004735 if (*tmp != 0)
4736 goto error;
4737
4738 if (URL != NULL) {
4739 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4740 xmlFree(URL);
4741 }
4742 return;
4743
4744error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004745 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4746 "Catalog PI syntax error: %s\n",
4747 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004748 if (URL != NULL)
4749 xmlFree(URL);
4750}
4751#endif
4752
Owen Taylor3473f882001-02-23 17:55:21 +00004753/**
4754 * xmlParsePI:
4755 * @ctxt: an XML parser context
4756 *
4757 * parse an XML Processing Instruction.
4758 *
4759 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4760 *
4761 * The processing is transfered to SAX once parsed.
4762 */
4763
4764void
4765xmlParsePI(xmlParserCtxtPtr ctxt) {
4766 xmlChar *buf = NULL;
4767 int len = 0;
4768 int size = XML_PARSER_BUFFER_SIZE;
4769 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004770 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004771 xmlParserInputState state;
4772 int count = 0;
4773
4774 if ((RAW == '<') && (NXT(1) == '?')) {
4775 xmlParserInputPtr input = ctxt->input;
4776 state = ctxt->instate;
4777 ctxt->instate = XML_PARSER_PI;
4778 /*
4779 * this is a Processing Instruction.
4780 */
4781 SKIP(2);
4782 SHRINK;
4783
4784 /*
4785 * Parse the target name and check for special support like
4786 * namespace.
4787 */
4788 target = xmlParsePITarget(ctxt);
4789 if (target != NULL) {
4790 if ((RAW == '?') && (NXT(1) == '>')) {
4791 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004792 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4793 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004794 }
4795 SKIP(2);
4796
4797 /*
4798 * SAX: PI detected.
4799 */
4800 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4801 (ctxt->sax->processingInstruction != NULL))
4802 ctxt->sax->processingInstruction(ctxt->userData,
4803 target, NULL);
4804 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004805 return;
4806 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004807 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004808 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004809 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004810 ctxt->instate = state;
4811 return;
4812 }
4813 cur = CUR;
4814 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004815 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4816 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004817 }
4818 SKIP_BLANKS;
4819 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004820 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004821 ((cur != '?') || (NXT(1) != '>'))) {
4822 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004823 xmlChar *tmp;
4824
Owen Taylor3473f882001-02-23 17:55:21 +00004825 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004826 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4827 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004828 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004829 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004830 ctxt->instate = state;
4831 return;
4832 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004833 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004834 }
4835 count++;
4836 if (count > 50) {
4837 GROW;
4838 count = 0;
4839 }
4840 COPY_BUF(l,buf,len,cur);
4841 NEXTL(l);
4842 cur = CUR_CHAR(l);
4843 if (cur == 0) {
4844 SHRINK;
4845 GROW;
4846 cur = CUR_CHAR(l);
4847 }
4848 }
4849 buf[len] = 0;
4850 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004851 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4852 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004853 } else {
4854 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004855 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4856 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004857 }
4858 SKIP(2);
4859
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004860#ifdef LIBXML_CATALOG_ENABLED
4861 if (((state == XML_PARSER_MISC) ||
4862 (state == XML_PARSER_START)) &&
4863 (xmlStrEqual(target, XML_CATALOG_PI))) {
4864 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4865 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4866 (allow == XML_CATA_ALLOW_ALL))
4867 xmlParseCatalogPI(ctxt, buf);
4868 }
4869#endif
4870
4871
Owen Taylor3473f882001-02-23 17:55:21 +00004872 /*
4873 * SAX: PI detected.
4874 */
4875 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4876 (ctxt->sax->processingInstruction != NULL))
4877 ctxt->sax->processingInstruction(ctxt->userData,
4878 target, buf);
4879 }
4880 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004881 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004882 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004883 }
4884 ctxt->instate = state;
4885 }
4886}
4887
4888/**
4889 * xmlParseNotationDecl:
4890 * @ctxt: an XML parser context
4891 *
4892 * parse a notation declaration
4893 *
4894 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4895 *
4896 * Hence there is actually 3 choices:
4897 * 'PUBLIC' S PubidLiteral
4898 * 'PUBLIC' S PubidLiteral S SystemLiteral
4899 * and 'SYSTEM' S SystemLiteral
4900 *
4901 * See the NOTE on xmlParseExternalID().
4902 */
4903
4904void
4905xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004906 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004907 xmlChar *Pubid;
4908 xmlChar *Systemid;
4909
Daniel Veillarda07050d2003-10-19 14:46:32 +00004910 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004911 xmlParserInputPtr input = ctxt->input;
4912 SHRINK;
4913 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004914 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004915 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4916 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004917 return;
4918 }
4919 SKIP_BLANKS;
4920
Daniel Veillard76d66f42001-05-16 21:05:17 +00004921 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004922 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004923 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004924 return;
4925 }
William M. Brack76e95df2003-10-18 16:20:14 +00004926 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004927 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004928 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004929 return;
4930 }
Daniel Veillard37334572008-07-31 08:20:02 +00004931 if (xmlStrchr(name, ':') != NULL) {
4932 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4933 "colon are forbidden from notation names '%s'\n",
4934 name, NULL, NULL);
4935 }
Owen Taylor3473f882001-02-23 17:55:21 +00004936 SKIP_BLANKS;
4937
4938 /*
4939 * Parse the IDs.
4940 */
4941 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4942 SKIP_BLANKS;
4943
4944 if (RAW == '>') {
4945 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004946 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4947 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004948 }
4949 NEXT;
4950 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4951 (ctxt->sax->notationDecl != NULL))
4952 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4953 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004954 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004955 }
Owen Taylor3473f882001-02-23 17:55:21 +00004956 if (Systemid != NULL) xmlFree(Systemid);
4957 if (Pubid != NULL) xmlFree(Pubid);
4958 }
4959}
4960
4961/**
4962 * xmlParseEntityDecl:
4963 * @ctxt: an XML parser context
4964 *
4965 * parse <!ENTITY declarations
4966 *
4967 * [70] EntityDecl ::= GEDecl | PEDecl
4968 *
4969 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4970 *
4971 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4972 *
4973 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4974 *
4975 * [74] PEDef ::= EntityValue | ExternalID
4976 *
4977 * [76] NDataDecl ::= S 'NDATA' S Name
4978 *
4979 * [ VC: Notation Declared ]
4980 * The Name must match the declared name of a notation.
4981 */
4982
4983void
4984xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004985 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004986 xmlChar *value = NULL;
4987 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004988 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004989 int isParameter = 0;
4990 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004991 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004992
Daniel Veillard4c778d82005-01-23 17:37:44 +00004993 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004994 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004995 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004996 SHRINK;
4997 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004998 skipped = SKIP_BLANKS;
4999 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005000 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5001 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005002 }
Owen Taylor3473f882001-02-23 17:55:21 +00005003
5004 if (RAW == '%') {
5005 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005006 skipped = SKIP_BLANKS;
5007 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005008 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5009 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005010 }
Owen Taylor3473f882001-02-23 17:55:21 +00005011 isParameter = 1;
5012 }
5013
Daniel Veillard76d66f42001-05-16 21:05:17 +00005014 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005015 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005016 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5017 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005018 return;
5019 }
Daniel Veillard37334572008-07-31 08:20:02 +00005020 if (xmlStrchr(name, ':') != NULL) {
5021 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5022 "colon are forbidden from entities names '%s'\n",
5023 name, NULL, NULL);
5024 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005025 skipped = SKIP_BLANKS;
5026 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005027 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5028 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005029 }
Owen Taylor3473f882001-02-23 17:55:21 +00005030
Daniel Veillardf5582f12002-06-11 10:08:16 +00005031 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005032 /*
5033 * handle the various case of definitions...
5034 */
5035 if (isParameter) {
5036 if ((RAW == '"') || (RAW == '\'')) {
5037 value = xmlParseEntityValue(ctxt, &orig);
5038 if (value) {
5039 if ((ctxt->sax != NULL) &&
5040 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5041 ctxt->sax->entityDecl(ctxt->userData, name,
5042 XML_INTERNAL_PARAMETER_ENTITY,
5043 NULL, NULL, value);
5044 }
5045 } else {
5046 URI = xmlParseExternalID(ctxt, &literal, 1);
5047 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005048 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005049 }
5050 if (URI) {
5051 xmlURIPtr uri;
5052
5053 uri = xmlParseURI((const char *) URI);
5054 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005055 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5056 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005057 /*
5058 * This really ought to be a well formedness error
5059 * but the XML Core WG decided otherwise c.f. issue
5060 * E26 of the XML erratas.
5061 */
Owen Taylor3473f882001-02-23 17:55:21 +00005062 } else {
5063 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005064 /*
5065 * Okay this is foolish to block those but not
5066 * invalid URIs.
5067 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005068 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005069 } else {
5070 if ((ctxt->sax != NULL) &&
5071 (!ctxt->disableSAX) &&
5072 (ctxt->sax->entityDecl != NULL))
5073 ctxt->sax->entityDecl(ctxt->userData, name,
5074 XML_EXTERNAL_PARAMETER_ENTITY,
5075 literal, URI, NULL);
5076 }
5077 xmlFreeURI(uri);
5078 }
5079 }
5080 }
5081 } else {
5082 if ((RAW == '"') || (RAW == '\'')) {
5083 value = xmlParseEntityValue(ctxt, &orig);
5084 if ((ctxt->sax != NULL) &&
5085 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5086 ctxt->sax->entityDecl(ctxt->userData, name,
5087 XML_INTERNAL_GENERAL_ENTITY,
5088 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005089 /*
5090 * For expat compatibility in SAX mode.
5091 */
5092 if ((ctxt->myDoc == NULL) ||
5093 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5094 if (ctxt->myDoc == NULL) {
5095 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005096 if (ctxt->myDoc == NULL) {
5097 xmlErrMemory(ctxt, "New Doc failed");
5098 return;
5099 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005100 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005101 }
5102 if (ctxt->myDoc->intSubset == NULL)
5103 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5104 BAD_CAST "fake", NULL, NULL);
5105
Daniel Veillard1af9a412003-08-20 22:54:39 +00005106 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5107 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005108 }
Owen Taylor3473f882001-02-23 17:55:21 +00005109 } else {
5110 URI = xmlParseExternalID(ctxt, &literal, 1);
5111 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005112 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005113 }
5114 if (URI) {
5115 xmlURIPtr uri;
5116
5117 uri = xmlParseURI((const char *)URI);
5118 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005119 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5120 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005121 /*
5122 * This really ought to be a well formedness error
5123 * but the XML Core WG decided otherwise c.f. issue
5124 * E26 of the XML erratas.
5125 */
Owen Taylor3473f882001-02-23 17:55:21 +00005126 } else {
5127 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005128 /*
5129 * Okay this is foolish to block those but not
5130 * invalid URIs.
5131 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005132 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005133 }
5134 xmlFreeURI(uri);
5135 }
5136 }
William M. Brack76e95df2003-10-18 16:20:14 +00005137 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005138 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5139 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005140 }
5141 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005142 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005143 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005144 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005145 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5146 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005147 }
5148 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005149 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005150 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5151 (ctxt->sax->unparsedEntityDecl != NULL))
5152 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5153 literal, URI, ndata);
5154 } else {
5155 if ((ctxt->sax != NULL) &&
5156 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5157 ctxt->sax->entityDecl(ctxt->userData, name,
5158 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5159 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005160 /*
5161 * For expat compatibility in SAX mode.
5162 * assuming the entity repalcement was asked for
5163 */
5164 if ((ctxt->replaceEntities != 0) &&
5165 ((ctxt->myDoc == NULL) ||
5166 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5167 if (ctxt->myDoc == NULL) {
5168 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005169 if (ctxt->myDoc == NULL) {
5170 xmlErrMemory(ctxt, "New Doc failed");
5171 return;
5172 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005173 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005174 }
5175
5176 if (ctxt->myDoc->intSubset == NULL)
5177 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5178 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005179 xmlSAX2EntityDecl(ctxt, name,
5180 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5181 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005182 }
Owen Taylor3473f882001-02-23 17:55:21 +00005183 }
5184 }
5185 }
5186 SKIP_BLANKS;
5187 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005188 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005189 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005190 } else {
5191 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005192 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5193 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005194 }
5195 NEXT;
5196 }
5197 if (orig != NULL) {
5198 /*
5199 * Ugly mechanism to save the raw entity value.
5200 */
5201 xmlEntityPtr cur = NULL;
5202
5203 if (isParameter) {
5204 if ((ctxt->sax != NULL) &&
5205 (ctxt->sax->getParameterEntity != NULL))
5206 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5207 } else {
5208 if ((ctxt->sax != NULL) &&
5209 (ctxt->sax->getEntity != NULL))
5210 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005211 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005212 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005213 }
Owen Taylor3473f882001-02-23 17:55:21 +00005214 }
5215 if (cur != NULL) {
5216 if (cur->orig != NULL)
5217 xmlFree(orig);
5218 else
5219 cur->orig = orig;
5220 } else
5221 xmlFree(orig);
5222 }
Owen Taylor3473f882001-02-23 17:55:21 +00005223 if (value != NULL) xmlFree(value);
5224 if (URI != NULL) xmlFree(URI);
5225 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005226 }
5227}
5228
5229/**
5230 * xmlParseDefaultDecl:
5231 * @ctxt: an XML parser context
5232 * @value: Receive a possible fixed default value for the attribute
5233 *
5234 * Parse an attribute default declaration
5235 *
5236 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5237 *
5238 * [ VC: Required Attribute ]
5239 * if the default declaration is the keyword #REQUIRED, then the
5240 * attribute must be specified for all elements of the type in the
5241 * attribute-list declaration.
5242 *
5243 * [ VC: Attribute Default Legal ]
5244 * The declared default value must meet the lexical constraints of
5245 * the declared attribute type c.f. xmlValidateAttributeDecl()
5246 *
5247 * [ VC: Fixed Attribute Default ]
5248 * if an attribute has a default value declared with the #FIXED
5249 * keyword, instances of that attribute must match the default value.
5250 *
5251 * [ WFC: No < in Attribute Values ]
5252 * handled in xmlParseAttValue()
5253 *
5254 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5255 * or XML_ATTRIBUTE_FIXED.
5256 */
5257
5258int
5259xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5260 int val;
5261 xmlChar *ret;
5262
5263 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005264 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005265 SKIP(9);
5266 return(XML_ATTRIBUTE_REQUIRED);
5267 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005268 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005269 SKIP(8);
5270 return(XML_ATTRIBUTE_IMPLIED);
5271 }
5272 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005273 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005274 SKIP(6);
5275 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005276 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005277 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5278 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005279 }
5280 SKIP_BLANKS;
5281 }
5282 ret = xmlParseAttValue(ctxt);
5283 ctxt->instate = XML_PARSER_DTD;
5284 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005285 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005286 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005287 } else
5288 *value = ret;
5289 return(val);
5290}
5291
5292/**
5293 * xmlParseNotationType:
5294 * @ctxt: an XML parser context
5295 *
5296 * parse an Notation attribute type.
5297 *
5298 * Note: the leading 'NOTATION' S part has already being parsed...
5299 *
5300 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5301 *
5302 * [ VC: Notation Attributes ]
5303 * Values of this type must match one of the notation names included
5304 * in the declaration; all notation names in the declaration must be declared.
5305 *
5306 * Returns: the notation attribute tree built while parsing
5307 */
5308
5309xmlEnumerationPtr
5310xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005311 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005312 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005313
5314 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005315 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005316 return(NULL);
5317 }
5318 SHRINK;
5319 do {
5320 NEXT;
5321 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005322 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005323 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005324 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5325 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005326 return(ret);
5327 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005328 tmp = ret;
5329 while (tmp != NULL) {
5330 if (xmlStrEqual(name, tmp->name)) {
5331 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5332 "standalone: attribute notation value token %s duplicated\n",
5333 name, NULL);
5334 if (!xmlDictOwns(ctxt->dict, name))
5335 xmlFree((xmlChar *) name);
5336 break;
5337 }
5338 tmp = tmp->next;
5339 }
5340 if (tmp == NULL) {
5341 cur = xmlCreateEnumeration(name);
5342 if (cur == NULL) return(ret);
5343 if (last == NULL) ret = last = cur;
5344 else {
5345 last->next = cur;
5346 last = cur;
5347 }
Owen Taylor3473f882001-02-23 17:55:21 +00005348 }
5349 SKIP_BLANKS;
5350 } while (RAW == '|');
5351 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005352 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005353 if ((last != NULL) && (last != ret))
5354 xmlFreeEnumeration(last);
5355 return(ret);
5356 }
5357 NEXT;
5358 return(ret);
5359}
5360
5361/**
5362 * xmlParseEnumerationType:
5363 * @ctxt: an XML parser context
5364 *
5365 * parse an Enumeration attribute type.
5366 *
5367 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5368 *
5369 * [ VC: Enumeration ]
5370 * Values of this type must match one of the Nmtoken tokens in
5371 * the declaration
5372 *
5373 * Returns: the enumeration attribute tree built while parsing
5374 */
5375
5376xmlEnumerationPtr
5377xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5378 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005379 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005380
5381 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005382 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005383 return(NULL);
5384 }
5385 SHRINK;
5386 do {
5387 NEXT;
5388 SKIP_BLANKS;
5389 name = xmlParseNmtoken(ctxt);
5390 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005391 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005392 return(ret);
5393 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005394 tmp = ret;
5395 while (tmp != NULL) {
5396 if (xmlStrEqual(name, tmp->name)) {
5397 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5398 "standalone: attribute enumeration value token %s duplicated\n",
5399 name, NULL);
5400 if (!xmlDictOwns(ctxt->dict, name))
5401 xmlFree(name);
5402 break;
5403 }
5404 tmp = tmp->next;
5405 }
5406 if (tmp == NULL) {
5407 cur = xmlCreateEnumeration(name);
5408 if (!xmlDictOwns(ctxt->dict, name))
5409 xmlFree(name);
5410 if (cur == NULL) return(ret);
5411 if (last == NULL) ret = last = cur;
5412 else {
5413 last->next = cur;
5414 last = cur;
5415 }
Owen Taylor3473f882001-02-23 17:55:21 +00005416 }
5417 SKIP_BLANKS;
5418 } while (RAW == '|');
5419 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005420 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005421 return(ret);
5422 }
5423 NEXT;
5424 return(ret);
5425}
5426
5427/**
5428 * xmlParseEnumeratedType:
5429 * @ctxt: an XML parser context
5430 * @tree: the enumeration tree built while parsing
5431 *
5432 * parse an Enumerated attribute type.
5433 *
5434 * [57] EnumeratedType ::= NotationType | Enumeration
5435 *
5436 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5437 *
5438 *
5439 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5440 */
5441
5442int
5443xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005444 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005445 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005446 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005447 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5448 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005449 return(0);
5450 }
5451 SKIP_BLANKS;
5452 *tree = xmlParseNotationType(ctxt);
5453 if (*tree == NULL) return(0);
5454 return(XML_ATTRIBUTE_NOTATION);
5455 }
5456 *tree = xmlParseEnumerationType(ctxt);
5457 if (*tree == NULL) return(0);
5458 return(XML_ATTRIBUTE_ENUMERATION);
5459}
5460
5461/**
5462 * xmlParseAttributeType:
5463 * @ctxt: an XML parser context
5464 * @tree: the enumeration tree built while parsing
5465 *
5466 * parse the Attribute list def for an element
5467 *
5468 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5469 *
5470 * [55] StringType ::= 'CDATA'
5471 *
5472 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5473 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5474 *
5475 * Validity constraints for attribute values syntax are checked in
5476 * xmlValidateAttributeValue()
5477 *
5478 * [ VC: ID ]
5479 * Values of type ID must match the Name production. A name must not
5480 * appear more than once in an XML document as a value of this type;
5481 * i.e., ID values must uniquely identify the elements which bear them.
5482 *
5483 * [ VC: One ID per Element Type ]
5484 * No element type may have more than one ID attribute specified.
5485 *
5486 * [ VC: ID Attribute Default ]
5487 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5488 *
5489 * [ VC: IDREF ]
5490 * Values of type IDREF must match the Name production, and values
5491 * of type IDREFS must match Names; each IDREF Name must match the value
5492 * of an ID attribute on some element in the XML document; i.e. IDREF
5493 * values must match the value of some ID attribute.
5494 *
5495 * [ VC: Entity Name ]
5496 * Values of type ENTITY must match the Name production, values
5497 * of type ENTITIES must match Names; each Entity Name must match the
5498 * name of an unparsed entity declared in the DTD.
5499 *
5500 * [ VC: Name Token ]
5501 * Values of type NMTOKEN must match the Nmtoken production; values
5502 * of type NMTOKENS must match Nmtokens.
5503 *
5504 * Returns the attribute type
5505 */
5506int
5507xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5508 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005509 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005510 SKIP(5);
5511 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005512 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005513 SKIP(6);
5514 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005515 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005516 SKIP(5);
5517 return(XML_ATTRIBUTE_IDREF);
5518 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5519 SKIP(2);
5520 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005521 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005522 SKIP(6);
5523 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005524 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005525 SKIP(8);
5526 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005527 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005528 SKIP(8);
5529 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005530 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005531 SKIP(7);
5532 return(XML_ATTRIBUTE_NMTOKEN);
5533 }
5534 return(xmlParseEnumeratedType(ctxt, tree));
5535}
5536
5537/**
5538 * xmlParseAttributeListDecl:
5539 * @ctxt: an XML parser context
5540 *
5541 * : parse the Attribute list def for an element
5542 *
5543 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5544 *
5545 * [53] AttDef ::= S Name S AttType S DefaultDecl
5546 *
5547 */
5548void
5549xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005550 const xmlChar *elemName;
5551 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005552 xmlEnumerationPtr tree;
5553
Daniel Veillarda07050d2003-10-19 14:46:32 +00005554 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005555 xmlParserInputPtr input = ctxt->input;
5556
5557 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005558 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005559 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005560 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005561 }
5562 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005563 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005564 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005565 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5566 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005567 return;
5568 }
5569 SKIP_BLANKS;
5570 GROW;
5571 while (RAW != '>') {
5572 const xmlChar *check = CUR_PTR;
5573 int type;
5574 int def;
5575 xmlChar *defaultValue = NULL;
5576
5577 GROW;
5578 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005579 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005580 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005581 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5582 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005583 break;
5584 }
5585 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005586 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005587 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005588 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005589 break;
5590 }
5591 SKIP_BLANKS;
5592
5593 type = xmlParseAttributeType(ctxt, &tree);
5594 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005595 break;
5596 }
5597
5598 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005599 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005600 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5601 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005602 if (tree != NULL)
5603 xmlFreeEnumeration(tree);
5604 break;
5605 }
5606 SKIP_BLANKS;
5607
5608 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5609 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005610 if (defaultValue != NULL)
5611 xmlFree(defaultValue);
5612 if (tree != NULL)
5613 xmlFreeEnumeration(tree);
5614 break;
5615 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005616 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5617 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005618
5619 GROW;
5620 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005621 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005622 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005623 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005624 if (defaultValue != NULL)
5625 xmlFree(defaultValue);
5626 if (tree != NULL)
5627 xmlFreeEnumeration(tree);
5628 break;
5629 }
5630 SKIP_BLANKS;
5631 }
5632 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005633 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5634 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005635 if (defaultValue != NULL)
5636 xmlFree(defaultValue);
5637 if (tree != NULL)
5638 xmlFreeEnumeration(tree);
5639 break;
5640 }
5641 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5642 (ctxt->sax->attributeDecl != NULL))
5643 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5644 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005645 else if (tree != NULL)
5646 xmlFreeEnumeration(tree);
5647
5648 if ((ctxt->sax2) && (defaultValue != NULL) &&
5649 (def != XML_ATTRIBUTE_IMPLIED) &&
5650 (def != XML_ATTRIBUTE_REQUIRED)) {
5651 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5652 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005653 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005654 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5655 }
Owen Taylor3473f882001-02-23 17:55:21 +00005656 if (defaultValue != NULL)
5657 xmlFree(defaultValue);
5658 GROW;
5659 }
5660 if (RAW == '>') {
5661 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00005662 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5663 "Attribute list declaration doesn't start and stop in the same entity\n",
5664 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005665 }
5666 NEXT;
5667 }
Owen Taylor3473f882001-02-23 17:55:21 +00005668 }
5669}
5670
5671/**
5672 * xmlParseElementMixedContentDecl:
5673 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005674 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005675 *
5676 * parse the declaration for a Mixed Element content
5677 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5678 *
5679 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5680 * '(' S? '#PCDATA' S? ')'
5681 *
5682 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5683 *
5684 * [ VC: No Duplicate Types ]
5685 * The same name must not appear more than once in a single
5686 * mixed-content declaration.
5687 *
5688 * returns: the list of the xmlElementContentPtr describing the element choices
5689 */
5690xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005691xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005692 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005693 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005694
5695 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005696 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005697 SKIP(7);
5698 SKIP_BLANKS;
5699 SHRINK;
5700 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005701 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005702 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5703"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005704 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005705 }
Owen Taylor3473f882001-02-23 17:55:21 +00005706 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005707 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005708 if (ret == NULL)
5709 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005710 if (RAW == '*') {
5711 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5712 NEXT;
5713 }
5714 return(ret);
5715 }
5716 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005717 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005718 if (ret == NULL) return(NULL);
5719 }
5720 while (RAW == '|') {
5721 NEXT;
5722 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005723 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005724 if (ret == NULL) return(NULL);
5725 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005726 if (cur != NULL)
5727 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005728 cur = ret;
5729 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005730 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005731 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005732 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005733 if (n->c1 != NULL)
5734 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005735 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005736 if (n != NULL)
5737 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005738 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005739 }
5740 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005741 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005742 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005743 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005744 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005745 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005746 return(NULL);
5747 }
5748 SKIP_BLANKS;
5749 GROW;
5750 }
5751 if ((RAW == ')') && (NXT(1) == '*')) {
5752 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005753 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005754 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005755 if (cur->c2 != NULL)
5756 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005757 }
5758 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005759 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005760 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5761"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005762 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005763 }
Owen Taylor3473f882001-02-23 17:55:21 +00005764 SKIP(2);
5765 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005766 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005767 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005768 return(NULL);
5769 }
5770
5771 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005772 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005773 }
5774 return(ret);
5775}
5776
5777/**
5778 * xmlParseElementChildrenContentDecl:
5779 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005780 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005781 *
5782 * parse the declaration for a Mixed Element content
5783 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5784 *
5785 *
5786 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5787 *
5788 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5789 *
5790 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5791 *
5792 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5793 *
5794 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5795 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005796 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005797 * opening or closing parentheses in a choice, seq, or Mixed
5798 * construct is contained in the replacement text for a parameter
5799 * entity, both must be contained in the same replacement text. For
5800 * interoperability, if a parameter-entity reference appears in a
5801 * choice, seq, or Mixed construct, its replacement text should not
5802 * be empty, and neither the first nor last non-blank character of
5803 * the replacement text should be a connector (| or ,).
5804 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005805 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005806 * hierarchy.
5807 */
5808xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005809xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005810 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005811 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005812 xmlChar type = 0;
5813
5814 SKIP_BLANKS;
5815 GROW;
5816 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005817 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005818
Owen Taylor3473f882001-02-23 17:55:21 +00005819 /* Recurse on first child */
5820 NEXT;
5821 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005822 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005823 SKIP_BLANKS;
5824 GROW;
5825 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005826 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005827 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005828 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005829 return(NULL);
5830 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005831 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005832 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005833 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005834 return(NULL);
5835 }
Owen Taylor3473f882001-02-23 17:55:21 +00005836 GROW;
5837 if (RAW == '?') {
5838 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5839 NEXT;
5840 } else if (RAW == '*') {
5841 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5842 NEXT;
5843 } else if (RAW == '+') {
5844 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5845 NEXT;
5846 } else {
5847 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5848 }
Owen Taylor3473f882001-02-23 17:55:21 +00005849 GROW;
5850 }
5851 SKIP_BLANKS;
5852 SHRINK;
5853 while (RAW != ')') {
5854 /*
5855 * Each loop we parse one separator and one element.
5856 */
5857 if (RAW == ',') {
5858 if (type == 0) type = CUR;
5859
5860 /*
5861 * Detect "Name | Name , Name" error
5862 */
5863 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005864 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005865 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005866 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005867 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005868 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005869 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005870 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005871 return(NULL);
5872 }
5873 NEXT;
5874
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005875 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005876 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005877 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005878 xmlFreeDocElementContent(ctxt->myDoc, last);
5879 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005880 return(NULL);
5881 }
5882 if (last == NULL) {
5883 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005884 if (ret != NULL)
5885 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005886 ret = cur = op;
5887 } else {
5888 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005889 if (op != NULL)
5890 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005891 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005892 if (last != NULL)
5893 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005894 cur =op;
5895 last = NULL;
5896 }
5897 } else if (RAW == '|') {
5898 if (type == 0) type = CUR;
5899
5900 /*
5901 * Detect "Name , Name | Name" error
5902 */
5903 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005904 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005905 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005906 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005907 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005908 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005909 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005910 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005911 return(NULL);
5912 }
5913 NEXT;
5914
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005915 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005916 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005917 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005918 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005919 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005920 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005921 return(NULL);
5922 }
5923 if (last == NULL) {
5924 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005925 if (ret != NULL)
5926 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005927 ret = cur = op;
5928 } else {
5929 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005930 if (op != NULL)
5931 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005932 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005933 if (last != NULL)
5934 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005935 cur =op;
5936 last = NULL;
5937 }
5938 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005939 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00005940 if ((last != NULL) && (last != ret))
5941 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005942 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005943 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005944 return(NULL);
5945 }
5946 GROW;
5947 SKIP_BLANKS;
5948 GROW;
5949 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005950 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005951 /* Recurse on second child */
5952 NEXT;
5953 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005954 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005955 SKIP_BLANKS;
5956 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005957 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005958 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005959 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005960 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005961 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005962 return(NULL);
5963 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005964 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005965 if (last == NULL) {
5966 if (ret != NULL)
5967 xmlFreeDocElementContent(ctxt->myDoc, ret);
5968 return(NULL);
5969 }
Owen Taylor3473f882001-02-23 17:55:21 +00005970 if (RAW == '?') {
5971 last->ocur = XML_ELEMENT_CONTENT_OPT;
5972 NEXT;
5973 } else if (RAW == '*') {
5974 last->ocur = XML_ELEMENT_CONTENT_MULT;
5975 NEXT;
5976 } else if (RAW == '+') {
5977 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5978 NEXT;
5979 } else {
5980 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5981 }
5982 }
5983 SKIP_BLANKS;
5984 GROW;
5985 }
5986 if ((cur != NULL) && (last != NULL)) {
5987 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005988 if (last != NULL)
5989 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005990 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005991 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005992 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5993"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005994 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005995 }
Owen Taylor3473f882001-02-23 17:55:21 +00005996 NEXT;
5997 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005998 if (ret != NULL) {
5999 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6000 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6001 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6002 else
6003 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6004 }
Owen Taylor3473f882001-02-23 17:55:21 +00006005 NEXT;
6006 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006007 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006008 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006009 cur = ret;
6010 /*
6011 * Some normalization:
6012 * (a | b* | c?)* == (a | b | c)*
6013 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006014 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006015 if ((cur->c1 != NULL) &&
6016 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6017 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6018 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6019 if ((cur->c2 != NULL) &&
6020 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6021 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6022 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6023 cur = cur->c2;
6024 }
6025 }
Owen Taylor3473f882001-02-23 17:55:21 +00006026 NEXT;
6027 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006028 if (ret != NULL) {
6029 int found = 0;
6030
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006031 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6032 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6033 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006034 else
6035 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006036 /*
6037 * Some normalization:
6038 * (a | b*)+ == (a | b)*
6039 * (a | b?)+ == (a | b)*
6040 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006041 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006042 if ((cur->c1 != NULL) &&
6043 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6044 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6045 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6046 found = 1;
6047 }
6048 if ((cur->c2 != NULL) &&
6049 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6050 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6051 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6052 found = 1;
6053 }
6054 cur = cur->c2;
6055 }
6056 if (found)
6057 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6058 }
Owen Taylor3473f882001-02-23 17:55:21 +00006059 NEXT;
6060 }
6061 return(ret);
6062}
6063
6064/**
6065 * xmlParseElementContentDecl:
6066 * @ctxt: an XML parser context
6067 * @name: the name of the element being defined.
6068 * @result: the Element Content pointer will be stored here if any
6069 *
6070 * parse the declaration for an Element content either Mixed or Children,
6071 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6072 *
6073 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6074 *
6075 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6076 */
6077
6078int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006079xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006080 xmlElementContentPtr *result) {
6081
6082 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006083 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006084 int res;
6085
6086 *result = NULL;
6087
6088 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006089 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006090 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006091 return(-1);
6092 }
6093 NEXT;
6094 GROW;
6095 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006096 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006097 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006098 res = XML_ELEMENT_TYPE_MIXED;
6099 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006100 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006101 res = XML_ELEMENT_TYPE_ELEMENT;
6102 }
Owen Taylor3473f882001-02-23 17:55:21 +00006103 SKIP_BLANKS;
6104 *result = tree;
6105 return(res);
6106}
6107
6108/**
6109 * xmlParseElementDecl:
6110 * @ctxt: an XML parser context
6111 *
6112 * parse an Element declaration.
6113 *
6114 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6115 *
6116 * [ VC: Unique Element Type Declaration ]
6117 * No element type may be declared more than once
6118 *
6119 * Returns the type of the element, or -1 in case of error
6120 */
6121int
6122xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006123 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006124 int ret = -1;
6125 xmlElementContentPtr content = NULL;
6126
Daniel Veillard4c778d82005-01-23 17:37:44 +00006127 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006128 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006129 xmlParserInputPtr input = ctxt->input;
6130
6131 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006132 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006133 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6134 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006135 }
6136 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006137 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006138 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006139 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6140 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006141 return(-1);
6142 }
6143 while ((RAW == 0) && (ctxt->inputNr > 1))
6144 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006145 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006146 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6147 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006148 }
6149 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006150 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006151 SKIP(5);
6152 /*
6153 * Element must always be empty.
6154 */
6155 ret = XML_ELEMENT_TYPE_EMPTY;
6156 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6157 (NXT(2) == 'Y')) {
6158 SKIP(3);
6159 /*
6160 * Element is a generic container.
6161 */
6162 ret = XML_ELEMENT_TYPE_ANY;
6163 } else if (RAW == '(') {
6164 ret = xmlParseElementContentDecl(ctxt, name, &content);
6165 } else {
6166 /*
6167 * [ WFC: PEs in Internal Subset ] error handling.
6168 */
6169 if ((RAW == '%') && (ctxt->external == 0) &&
6170 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006171 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006172 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006173 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006174 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006175 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6176 }
Owen Taylor3473f882001-02-23 17:55:21 +00006177 return(-1);
6178 }
6179
6180 SKIP_BLANKS;
6181 /*
6182 * Pop-up of finished entities.
6183 */
6184 while ((RAW == 0) && (ctxt->inputNr > 1))
6185 xmlPopInput(ctxt);
6186 SKIP_BLANKS;
6187
6188 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006189 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006190 if (content != NULL) {
6191 xmlFreeDocElementContent(ctxt->myDoc, content);
6192 }
Owen Taylor3473f882001-02-23 17:55:21 +00006193 } else {
6194 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006195 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6196 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006197 }
6198
6199 NEXT;
6200 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006201 (ctxt->sax->elementDecl != NULL)) {
6202 if (content != NULL)
6203 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006204 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6205 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006206 if ((content != NULL) && (content->parent == NULL)) {
6207 /*
6208 * this is a trick: if xmlAddElementDecl is called,
6209 * instead of copying the full tree it is plugged directly
6210 * if called from the parser. Avoid duplicating the
6211 * interfaces or change the API/ABI
6212 */
6213 xmlFreeDocElementContent(ctxt->myDoc, content);
6214 }
6215 } else if (content != NULL) {
6216 xmlFreeDocElementContent(ctxt->myDoc, content);
6217 }
Owen Taylor3473f882001-02-23 17:55:21 +00006218 }
Owen Taylor3473f882001-02-23 17:55:21 +00006219 }
6220 return(ret);
6221}
6222
6223/**
Owen Taylor3473f882001-02-23 17:55:21 +00006224 * xmlParseConditionalSections
6225 * @ctxt: an XML parser context
6226 *
6227 * [61] conditionalSect ::= includeSect | ignoreSect
6228 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6229 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6230 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6231 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6232 */
6233
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006234static void
Owen Taylor3473f882001-02-23 17:55:21 +00006235xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006236 int id = ctxt->input->id;
6237
Owen Taylor3473f882001-02-23 17:55:21 +00006238 SKIP(3);
6239 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006240 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006241 SKIP(7);
6242 SKIP_BLANKS;
6243 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006244 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006245 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006246 if (ctxt->input->id != id) {
6247 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6248 "All markup of the conditional section is not in the same entity\n",
6249 NULL, NULL);
6250 }
Owen Taylor3473f882001-02-23 17:55:21 +00006251 NEXT;
6252 }
6253 if (xmlParserDebugEntities) {
6254 if ((ctxt->input != NULL) && (ctxt->input->filename))
6255 xmlGenericError(xmlGenericErrorContext,
6256 "%s(%d): ", ctxt->input->filename,
6257 ctxt->input->line);
6258 xmlGenericError(xmlGenericErrorContext,
6259 "Entering INCLUDE Conditional Section\n");
6260 }
6261
6262 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6263 (NXT(2) != '>'))) {
6264 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006265 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006266
6267 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6268 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006269 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006270 NEXT;
6271 } else if (RAW == '%') {
6272 xmlParsePEReference(ctxt);
6273 } else
6274 xmlParseMarkupDecl(ctxt);
6275
6276 /*
6277 * Pop-up of finished entities.
6278 */
6279 while ((RAW == 0) && (ctxt->inputNr > 1))
6280 xmlPopInput(ctxt);
6281
Daniel Veillardfdc91562002-07-01 21:52:03 +00006282 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006283 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006284 break;
6285 }
6286 }
6287 if (xmlParserDebugEntities) {
6288 if ((ctxt->input != NULL) && (ctxt->input->filename))
6289 xmlGenericError(xmlGenericErrorContext,
6290 "%s(%d): ", ctxt->input->filename,
6291 ctxt->input->line);
6292 xmlGenericError(xmlGenericErrorContext,
6293 "Leaving INCLUDE Conditional Section\n");
6294 }
6295
Daniel Veillarda07050d2003-10-19 14:46:32 +00006296 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006297 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006298 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006299 int depth = 0;
6300
6301 SKIP(6);
6302 SKIP_BLANKS;
6303 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006304 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006305 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006306 if (ctxt->input->id != id) {
6307 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6308 "All markup of the conditional section is not in the same entity\n",
6309 NULL, NULL);
6310 }
Owen Taylor3473f882001-02-23 17:55:21 +00006311 NEXT;
6312 }
6313 if (xmlParserDebugEntities) {
6314 if ((ctxt->input != NULL) && (ctxt->input->filename))
6315 xmlGenericError(xmlGenericErrorContext,
6316 "%s(%d): ", ctxt->input->filename,
6317 ctxt->input->line);
6318 xmlGenericError(xmlGenericErrorContext,
6319 "Entering IGNORE Conditional Section\n");
6320 }
6321
6322 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006323 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006324 * But disable SAX event generating DTD building in the meantime
6325 */
6326 state = ctxt->disableSAX;
6327 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006328 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006329 ctxt->instate = XML_PARSER_IGNORE;
6330
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006331 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006332 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6333 depth++;
6334 SKIP(3);
6335 continue;
6336 }
6337 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6338 if (--depth >= 0) SKIP(3);
6339 continue;
6340 }
6341 NEXT;
6342 continue;
6343 }
6344
6345 ctxt->disableSAX = state;
6346 ctxt->instate = instate;
6347
6348 if (xmlParserDebugEntities) {
6349 if ((ctxt->input != NULL) && (ctxt->input->filename))
6350 xmlGenericError(xmlGenericErrorContext,
6351 "%s(%d): ", ctxt->input->filename,
6352 ctxt->input->line);
6353 xmlGenericError(xmlGenericErrorContext,
6354 "Leaving IGNORE Conditional Section\n");
6355 }
6356
6357 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006358 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006359 }
6360
6361 if (RAW == 0)
6362 SHRINK;
6363
6364 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006365 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006366 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006367 if (ctxt->input->id != id) {
6368 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6369 "All markup of the conditional section is not in the same entity\n",
6370 NULL, NULL);
6371 }
Owen Taylor3473f882001-02-23 17:55:21 +00006372 SKIP(3);
6373 }
6374}
6375
6376/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006377 * xmlParseMarkupDecl:
6378 * @ctxt: an XML parser context
6379 *
6380 * parse Markup declarations
6381 *
6382 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6383 * NotationDecl | PI | Comment
6384 *
6385 * [ VC: Proper Declaration/PE Nesting ]
6386 * Parameter-entity replacement text must be properly nested with
6387 * markup declarations. That is to say, if either the first character
6388 * or the last character of a markup declaration (markupdecl above) is
6389 * contained in the replacement text for a parameter-entity reference,
6390 * both must be contained in the same replacement text.
6391 *
6392 * [ WFC: PEs in Internal Subset ]
6393 * In the internal DTD subset, parameter-entity references can occur
6394 * only where markup declarations can occur, not within markup declarations.
6395 * (This does not apply to references that occur in external parameter
6396 * entities or to the external subset.)
6397 */
6398void
6399xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6400 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006401 if (CUR == '<') {
6402 if (NXT(1) == '!') {
6403 switch (NXT(2)) {
6404 case 'E':
6405 if (NXT(3) == 'L')
6406 xmlParseElementDecl(ctxt);
6407 else if (NXT(3) == 'N')
6408 xmlParseEntityDecl(ctxt);
6409 break;
6410 case 'A':
6411 xmlParseAttributeListDecl(ctxt);
6412 break;
6413 case 'N':
6414 xmlParseNotationDecl(ctxt);
6415 break;
6416 case '-':
6417 xmlParseComment(ctxt);
6418 break;
6419 default:
6420 /* there is an error but it will be detected later */
6421 break;
6422 }
6423 } else if (NXT(1) == '?') {
6424 xmlParsePI(ctxt);
6425 }
6426 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006427 /*
6428 * This is only for internal subset. On external entities,
6429 * the replacement is done before parsing stage
6430 */
6431 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6432 xmlParsePEReference(ctxt);
6433
6434 /*
6435 * Conditional sections are allowed from entities included
6436 * by PE References in the internal subset.
6437 */
6438 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6439 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6440 xmlParseConditionalSections(ctxt);
6441 }
6442 }
6443
6444 ctxt->instate = XML_PARSER_DTD;
6445}
6446
6447/**
6448 * xmlParseTextDecl:
6449 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006450 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006451 * parse an XML declaration header for external entities
6452 *
6453 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006454 */
6455
6456void
6457xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6458 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006459 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006460
6461 /*
6462 * We know that '<?xml' is here.
6463 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006464 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006465 SKIP(5);
6466 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006467 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006468 return;
6469 }
6470
William M. Brack76e95df2003-10-18 16:20:14 +00006471 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006472 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6473 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006474 }
6475 SKIP_BLANKS;
6476
6477 /*
6478 * We may have the VersionInfo here.
6479 */
6480 version = xmlParseVersionInfo(ctxt);
6481 if (version == NULL)
6482 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006483 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006484 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006485 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6486 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006487 }
6488 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006489 ctxt->input->version = version;
6490
6491 /*
6492 * We must have the encoding declaration
6493 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006494 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006495 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6496 /*
6497 * The XML REC instructs us to stop parsing right here
6498 */
6499 return;
6500 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006501 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6502 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6503 "Missing encoding in text declaration\n");
6504 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006505
6506 SKIP_BLANKS;
6507 if ((RAW == '?') && (NXT(1) == '>')) {
6508 SKIP(2);
6509 } else if (RAW == '>') {
6510 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006511 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006512 NEXT;
6513 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006514 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006515 MOVETO_ENDTAG(CUR_PTR);
6516 NEXT;
6517 }
6518}
6519
6520/**
Owen Taylor3473f882001-02-23 17:55:21 +00006521 * xmlParseExternalSubset:
6522 * @ctxt: an XML parser context
6523 * @ExternalID: the external identifier
6524 * @SystemID: the system identifier (or URL)
6525 *
6526 * parse Markup declarations from an external subset
6527 *
6528 * [30] extSubset ::= textDecl? extSubsetDecl
6529 *
6530 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6531 */
6532void
6533xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6534 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006535 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006536 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006537
6538 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
6539 (ctxt->input->end - ctxt->input->cur >= 4)) {
6540 xmlChar start[4];
6541 xmlCharEncoding enc;
6542
6543 start[0] = RAW;
6544 start[1] = NXT(1);
6545 start[2] = NXT(2);
6546 start[3] = NXT(3);
6547 enc = xmlDetectCharEncoding(start, 4);
6548 if (enc != XML_CHAR_ENCODING_NONE)
6549 xmlSwitchEncoding(ctxt, enc);
6550 }
6551
Daniel Veillarda07050d2003-10-19 14:46:32 +00006552 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006553 xmlParseTextDecl(ctxt);
6554 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6555 /*
6556 * The XML REC instructs us to stop parsing right here
6557 */
6558 ctxt->instate = XML_PARSER_EOF;
6559 return;
6560 }
6561 }
6562 if (ctxt->myDoc == NULL) {
6563 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006564 if (ctxt->myDoc == NULL) {
6565 xmlErrMemory(ctxt, "New Doc failed");
6566 return;
6567 }
6568 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006569 }
6570 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6571 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6572
6573 ctxt->instate = XML_PARSER_DTD;
6574 ctxt->external = 1;
6575 while (((RAW == '<') && (NXT(1) == '?')) ||
6576 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006577 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006578 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006579 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006580
6581 GROW;
6582 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6583 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006584 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006585 NEXT;
6586 } else if (RAW == '%') {
6587 xmlParsePEReference(ctxt);
6588 } else
6589 xmlParseMarkupDecl(ctxt);
6590
6591 /*
6592 * Pop-up of finished entities.
6593 */
6594 while ((RAW == 0) && (ctxt->inputNr > 1))
6595 xmlPopInput(ctxt);
6596
Daniel Veillardfdc91562002-07-01 21:52:03 +00006597 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006598 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006599 break;
6600 }
6601 }
6602
6603 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006604 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006605 }
6606
6607}
6608
6609/**
6610 * xmlParseReference:
6611 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006612 *
Owen Taylor3473f882001-02-23 17:55:21 +00006613 * parse and handle entity references in content, depending on the SAX
6614 * interface, this may end-up in a call to character() if this is a
6615 * CharRef, a predefined entity, if there is no reference() callback.
6616 * or if the parser was asked to switch to that mode.
6617 *
6618 * [67] Reference ::= EntityRef | CharRef
6619 */
6620void
6621xmlParseReference(xmlParserCtxtPtr ctxt) {
6622 xmlEntityPtr ent;
6623 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00006624 int was_checked;
6625 xmlNodePtr list = NULL;
6626 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006627
Daniel Veillard0161e632008-08-28 15:36:32 +00006628
6629 if (RAW != '&')
6630 return;
6631
6632 /*
6633 * Simple case of a CharRef
6634 */
Owen Taylor3473f882001-02-23 17:55:21 +00006635 if (NXT(1) == '#') {
6636 int i = 0;
6637 xmlChar out[10];
6638 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006639 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00006640
Daniel Veillarddc171602008-03-26 17:41:38 +00006641 if (value == 0)
6642 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006643 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6644 /*
6645 * So we are using non-UTF-8 buffers
6646 * Check that the char fit on 8bits, if not
6647 * generate a CharRef.
6648 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006649 if (value <= 0xFF) {
6650 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006651 out[1] = 0;
6652 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6653 (!ctxt->disableSAX))
6654 ctxt->sax->characters(ctxt->userData, out, 1);
6655 } else {
6656 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006657 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006658 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006659 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006660 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6661 (!ctxt->disableSAX))
6662 ctxt->sax->reference(ctxt->userData, out);
6663 }
6664 } else {
6665 /*
6666 * Just encode the value in UTF-8
6667 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006668 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006669 out[i] = 0;
6670 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6671 (!ctxt->disableSAX))
6672 ctxt->sax->characters(ctxt->userData, out, i);
6673 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006674 return;
6675 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006676
Daniel Veillard0161e632008-08-28 15:36:32 +00006677 /*
6678 * We are seeing an entity reference
6679 */
6680 ent = xmlParseEntityRef(ctxt);
6681 if (ent == NULL) return;
6682 if (!ctxt->wellFormed)
6683 return;
6684 was_checked = ent->checked;
6685
6686 /* special case of predefined entities */
6687 if ((ent->name == NULL) ||
6688 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6689 val = ent->content;
6690 if (val == NULL) return;
6691 /*
6692 * inline the entity.
6693 */
6694 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6695 (!ctxt->disableSAX))
6696 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6697 return;
6698 }
6699
6700 /*
6701 * The first reference to the entity trigger a parsing phase
6702 * where the ent->children is filled with the result from
6703 * the parsing.
6704 */
6705 if (ent->checked == 0) {
6706 unsigned long oldnbent = ctxt->nbentities;
6707
6708 /*
6709 * This is a bit hackish but this seems the best
6710 * way to make sure both SAX and DOM entity support
6711 * behaves okay.
6712 */
6713 void *user_data;
6714 if (ctxt->userData == ctxt)
6715 user_data = NULL;
6716 else
6717 user_data = ctxt->userData;
6718
6719 /*
6720 * Check that this entity is well formed
6721 * 4.3.2: An internal general parsed entity is well-formed
6722 * if its replacement text matches the production labeled
6723 * content.
6724 */
6725 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6726 ctxt->depth++;
6727 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6728 user_data, &list);
6729 ctxt->depth--;
6730
6731 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6732 ctxt->depth++;
6733 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6734 user_data, ctxt->depth, ent->URI,
6735 ent->ExternalID, &list);
6736 ctxt->depth--;
6737 } else {
6738 ret = XML_ERR_ENTITY_PE_INTERNAL;
6739 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6740 "invalid entity type found\n", NULL);
6741 }
6742
6743 /*
6744 * Store the number of entities needing parsing for this entity
6745 * content and do checkings
6746 */
6747 ent->checked = ctxt->nbentities - oldnbent;
6748 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006749 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00006750 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006751 return;
6752 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006753 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6754 xmlFreeNodeList(list);
6755 return;
6756 }
Owen Taylor3473f882001-02-23 17:55:21 +00006757
Daniel Veillard0161e632008-08-28 15:36:32 +00006758 if ((ret == XML_ERR_OK) && (list != NULL)) {
6759 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6760 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6761 (ent->children == NULL)) {
6762 ent->children = list;
6763 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00006764 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006765 * Prune it directly in the generated document
6766 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00006767 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006768 if (((list->type == XML_TEXT_NODE) &&
6769 (list->next == NULL)) ||
6770 (ctxt->parseMode == XML_PARSE_READER)) {
6771 list->parent = (xmlNodePtr) ent;
6772 list = NULL;
6773 ent->owner = 1;
6774 } else {
6775 ent->owner = 0;
6776 while (list != NULL) {
6777 list->parent = (xmlNodePtr) ctxt->node;
6778 list->doc = ctxt->myDoc;
6779 if (list->next == NULL)
6780 ent->last = list;
6781 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006782 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006783 list = ent->children;
6784#ifdef LIBXML_LEGACY_ENABLED
6785 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6786 xmlAddEntityReference(ent, list, NULL);
6787#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00006788 }
6789 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00006790 ent->owner = 1;
6791 while (list != NULL) {
6792 list->parent = (xmlNodePtr) ent;
6793 if (list->next == NULL)
6794 ent->last = list;
6795 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006796 }
6797 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006798 } else {
6799 xmlFreeNodeList(list);
6800 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006801 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006802 } else if ((ret != XML_ERR_OK) &&
6803 (ret != XML_WAR_UNDECLARED_ENTITY)) {
6804 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6805 "Entity '%s' failed to parse\n", ent->name);
6806 } else if (list != NULL) {
6807 xmlFreeNodeList(list);
6808 list = NULL;
6809 }
6810 if (ent->checked == 0)
6811 ent->checked = 1;
6812 } else if (ent->checked != 1) {
6813 ctxt->nbentities += ent->checked;
6814 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006815
Daniel Veillard0161e632008-08-28 15:36:32 +00006816 /*
6817 * Now that the entity content has been gathered
6818 * provide it to the application, this can take different forms based
6819 * on the parsing modes.
6820 */
6821 if (ent->children == NULL) {
6822 /*
6823 * Probably running in SAX mode and the callbacks don't
6824 * build the entity content. So unless we already went
6825 * though parsing for first checking go though the entity
6826 * content to generate callbacks associated to the entity
6827 */
6828 if (was_checked != 0) {
6829 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00006830 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006831 * This is a bit hackish but this seems the best
6832 * way to make sure both SAX and DOM entity support
6833 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00006834 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006835 if (ctxt->userData == ctxt)
6836 user_data = NULL;
6837 else
6838 user_data = ctxt->userData;
6839
6840 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6841 ctxt->depth++;
6842 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6843 ent->content, user_data, NULL);
6844 ctxt->depth--;
6845 } else if (ent->etype ==
6846 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6847 ctxt->depth++;
6848 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6849 ctxt->sax, user_data, ctxt->depth,
6850 ent->URI, ent->ExternalID, NULL);
6851 ctxt->depth--;
6852 } else {
6853 ret = XML_ERR_ENTITY_PE_INTERNAL;
6854 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6855 "invalid entity type found\n", NULL);
6856 }
6857 if (ret == XML_ERR_ENTITY_LOOP) {
6858 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6859 return;
6860 }
6861 }
6862 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6863 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6864 /*
6865 * Entity reference callback comes second, it's somewhat
6866 * superfluous but a compatibility to historical behaviour
6867 */
6868 ctxt->sax->reference(ctxt->userData, ent->name);
6869 }
6870 return;
6871 }
6872
6873 /*
6874 * If we didn't get any children for the entity being built
6875 */
6876 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6877 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6878 /*
6879 * Create a node.
6880 */
6881 ctxt->sax->reference(ctxt->userData, ent->name);
6882 return;
6883 }
6884
6885 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
6886 /*
6887 * There is a problem on the handling of _private for entities
6888 * (bug 155816): Should we copy the content of the field from
6889 * the entity (possibly overwriting some value set by the user
6890 * when a copy is created), should we leave it alone, or should
6891 * we try to take care of different situations? The problem
6892 * is exacerbated by the usage of this field by the xmlReader.
6893 * To fix this bug, we look at _private on the created node
6894 * and, if it's NULL, we copy in whatever was in the entity.
6895 * If it's not NULL we leave it alone. This is somewhat of a
6896 * hack - maybe we should have further tests to determine
6897 * what to do.
6898 */
6899 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6900 /*
6901 * Seems we are generating the DOM content, do
6902 * a simple tree copy for all references except the first
6903 * In the first occurrence list contains the replacement.
6904 * progressive == 2 means we are operating on the Reader
6905 * and since nodes are discarded we must copy all the time.
6906 */
6907 if (((list == NULL) && (ent->owner == 0)) ||
6908 (ctxt->parseMode == XML_PARSE_READER)) {
6909 xmlNodePtr nw = NULL, cur, firstChild = NULL;
6910
6911 /*
6912 * when operating on a reader, the entities definitions
6913 * are always owning the entities subtree.
6914 if (ctxt->parseMode == XML_PARSE_READER)
6915 ent->owner = 1;
6916 */
6917
6918 cur = ent->children;
6919 while (cur != NULL) {
6920 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6921 if (nw != NULL) {
6922 if (nw->_private == NULL)
6923 nw->_private = cur->_private;
6924 if (firstChild == NULL){
6925 firstChild = nw;
6926 }
6927 nw = xmlAddChild(ctxt->node, nw);
6928 }
6929 if (cur == ent->last) {
6930 /*
6931 * needed to detect some strange empty
6932 * node cases in the reader tests
6933 */
6934 if ((ctxt->parseMode == XML_PARSE_READER) &&
6935 (nw != NULL) &&
6936 (nw->type == XML_ELEMENT_NODE) &&
6937 (nw->children == NULL))
6938 nw->extra = 1;
6939
6940 break;
6941 }
6942 cur = cur->next;
6943 }
6944#ifdef LIBXML_LEGACY_ENABLED
6945 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6946 xmlAddEntityReference(ent, firstChild, nw);
6947#endif /* LIBXML_LEGACY_ENABLED */
6948 } else if (list == NULL) {
6949 xmlNodePtr nw = NULL, cur, next, last,
6950 firstChild = NULL;
6951 /*
6952 * Copy the entity child list and make it the new
6953 * entity child list. The goal is to make sure any
6954 * ID or REF referenced will be the one from the
6955 * document content and not the entity copy.
6956 */
6957 cur = ent->children;
6958 ent->children = NULL;
6959 last = ent->last;
6960 ent->last = NULL;
6961 while (cur != NULL) {
6962 next = cur->next;
6963 cur->next = NULL;
6964 cur->parent = NULL;
6965 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6966 if (nw != NULL) {
6967 if (nw->_private == NULL)
6968 nw->_private = cur->_private;
6969 if (firstChild == NULL){
6970 firstChild = cur;
6971 }
6972 xmlAddChild((xmlNodePtr) ent, nw);
6973 xmlAddChild(ctxt->node, cur);
6974 }
6975 if (cur == last)
6976 break;
6977 cur = next;
6978 }
Daniel Veillardcba68392008-08-29 12:43:40 +00006979 if (ent->owner == 0)
6980 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00006981#ifdef LIBXML_LEGACY_ENABLED
6982 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6983 xmlAddEntityReference(ent, firstChild, nw);
6984#endif /* LIBXML_LEGACY_ENABLED */
6985 } else {
6986 const xmlChar *nbktext;
6987
6988 /*
6989 * the name change is to avoid coalescing of the
6990 * node with a possible previous text one which
6991 * would make ent->children a dangling pointer
6992 */
6993 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6994 -1);
6995 if (ent->children->type == XML_TEXT_NODE)
6996 ent->children->name = nbktext;
6997 if ((ent->last != ent->children) &&
6998 (ent->last->type == XML_TEXT_NODE))
6999 ent->last->name = nbktext;
7000 xmlAddChildList(ctxt->node, ent->children);
7001 }
7002
7003 /*
7004 * This is to avoid a nasty side effect, see
7005 * characters() in SAX.c
7006 */
7007 ctxt->nodemem = 0;
7008 ctxt->nodelen = 0;
7009 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007010 }
7011 }
7012}
7013
7014/**
7015 * xmlParseEntityRef:
7016 * @ctxt: an XML parser context
7017 *
7018 * parse ENTITY references declarations
7019 *
7020 * [68] EntityRef ::= '&' Name ';'
7021 *
7022 * [ WFC: Entity Declared ]
7023 * In a document without any DTD, a document with only an internal DTD
7024 * subset which contains no parameter entity references, or a document
7025 * with "standalone='yes'", the Name given in the entity reference
7026 * must match that in an entity declaration, except that well-formed
7027 * documents need not declare any of the following entities: amp, lt,
7028 * gt, apos, quot. The declaration of a parameter entity must precede
7029 * any reference to it. Similarly, the declaration of a general entity
7030 * must precede any reference to it which appears in a default value in an
7031 * attribute-list declaration. Note that if entities are declared in the
7032 * external subset or in external parameter entities, a non-validating
7033 * processor is not obligated to read and process their declarations;
7034 * for such documents, the rule that an entity must be declared is a
7035 * well-formedness constraint only if standalone='yes'.
7036 *
7037 * [ WFC: Parsed Entity ]
7038 * An entity reference must not contain the name of an unparsed entity
7039 *
7040 * Returns the xmlEntityPtr if found, or NULL otherwise.
7041 */
7042xmlEntityPtr
7043xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007044 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007045 xmlEntityPtr ent = NULL;
7046
7047 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007048
Daniel Veillard0161e632008-08-28 15:36:32 +00007049 if (RAW != '&')
7050 return(NULL);
7051 NEXT;
7052 name = xmlParseName(ctxt);
7053 if (name == NULL) {
7054 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7055 "xmlParseEntityRef: no name\n");
7056 return(NULL);
7057 }
7058 if (RAW != ';') {
7059 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7060 return(NULL);
7061 }
7062 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007063
Daniel Veillard0161e632008-08-28 15:36:32 +00007064 /*
7065 * Predefined entites override any extra definition
7066 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007067 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7068 ent = xmlGetPredefinedEntity(name);
7069 if (ent != NULL)
7070 return(ent);
7071 }
Owen Taylor3473f882001-02-23 17:55:21 +00007072
Daniel Veillard0161e632008-08-28 15:36:32 +00007073 /*
7074 * Increate the number of entity references parsed
7075 */
7076 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007077
Daniel Veillard0161e632008-08-28 15:36:32 +00007078 /*
7079 * Ask first SAX for entity resolution, otherwise try the
7080 * entities which may have stored in the parser context.
7081 */
7082 if (ctxt->sax != NULL) {
7083 if (ctxt->sax->getEntity != NULL)
7084 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007085 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7086 (ctxt->options & XML_PARSE_OLDSAX))
7087 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007088 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7089 (ctxt->userData==ctxt)) {
7090 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007091 }
7092 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007093 /*
7094 * [ WFC: Entity Declared ]
7095 * In a document without any DTD, a document with only an
7096 * internal DTD subset which contains no parameter entity
7097 * references, or a document with "standalone='yes'", the
7098 * Name given in the entity reference must match that in an
7099 * entity declaration, except that well-formed documents
7100 * need not declare any of the following entities: amp, lt,
7101 * gt, apos, quot.
7102 * The declaration of a parameter entity must precede any
7103 * reference to it.
7104 * Similarly, the declaration of a general entity must
7105 * precede any reference to it which appears in a default
7106 * value in an attribute-list declaration. Note that if
7107 * entities are declared in the external subset or in
7108 * external parameter entities, a non-validating processor
7109 * is not obligated to read and process their declarations;
7110 * for such documents, the rule that an entity must be
7111 * declared is a well-formedness constraint only if
7112 * standalone='yes'.
7113 */
7114 if (ent == NULL) {
7115 if ((ctxt->standalone == 1) ||
7116 ((ctxt->hasExternalSubset == 0) &&
7117 (ctxt->hasPErefs == 0))) {
7118 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7119 "Entity '%s' not defined\n", name);
7120 } else {
7121 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7122 "Entity '%s' not defined\n", name);
7123 if ((ctxt->inSubset == 0) &&
7124 (ctxt->sax != NULL) &&
7125 (ctxt->sax->reference != NULL)) {
7126 ctxt->sax->reference(ctxt->userData, name);
7127 }
7128 }
7129 ctxt->valid = 0;
7130 }
7131
7132 /*
7133 * [ WFC: Parsed Entity ]
7134 * An entity reference must not contain the name of an
7135 * unparsed entity
7136 */
7137 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7138 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7139 "Entity reference to unparsed entity %s\n", name);
7140 }
7141
7142 /*
7143 * [ WFC: No External Entity References ]
7144 * Attribute values cannot contain direct or indirect
7145 * entity references to external entities.
7146 */
7147 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7148 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7149 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7150 "Attribute references external entity '%s'\n", name);
7151 }
7152 /*
7153 * [ WFC: No < in Attribute Values ]
7154 * The replacement text of any entity referred to directly or
7155 * indirectly in an attribute value (other than "&lt;") must
7156 * not contain a <.
7157 */
7158 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7159 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007160 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007161 (xmlStrchr(ent->content, '<'))) {
7162 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7163 "'<' in entity '%s' is not allowed in attributes values\n", name);
7164 }
7165
7166 /*
7167 * Internal check, no parameter entities here ...
7168 */
7169 else {
7170 switch (ent->etype) {
7171 case XML_INTERNAL_PARAMETER_ENTITY:
7172 case XML_EXTERNAL_PARAMETER_ENTITY:
7173 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7174 "Attempt to reference the parameter entity '%s'\n",
7175 name);
7176 break;
7177 default:
7178 break;
7179 }
7180 }
7181
7182 /*
7183 * [ WFC: No Recursion ]
7184 * A parsed entity must not contain a recursive reference
7185 * to itself, either directly or indirectly.
7186 * Done somewhere else
7187 */
Owen Taylor3473f882001-02-23 17:55:21 +00007188 return(ent);
7189}
7190
7191/**
7192 * xmlParseStringEntityRef:
7193 * @ctxt: an XML parser context
7194 * @str: a pointer to an index in the string
7195 *
7196 * parse ENTITY references declarations, but this version parses it from
7197 * a string value.
7198 *
7199 * [68] EntityRef ::= '&' Name ';'
7200 *
7201 * [ WFC: Entity Declared ]
7202 * In a document without any DTD, a document with only an internal DTD
7203 * subset which contains no parameter entity references, or a document
7204 * with "standalone='yes'", the Name given in the entity reference
7205 * must match that in an entity declaration, except that well-formed
7206 * documents need not declare any of the following entities: amp, lt,
7207 * gt, apos, quot. The declaration of a parameter entity must precede
7208 * any reference to it. Similarly, the declaration of a general entity
7209 * must precede any reference to it which appears in a default value in an
7210 * attribute-list declaration. Note that if entities are declared in the
7211 * external subset or in external parameter entities, a non-validating
7212 * processor is not obligated to read and process their declarations;
7213 * for such documents, the rule that an entity must be declared is a
7214 * well-formedness constraint only if standalone='yes'.
7215 *
7216 * [ WFC: Parsed Entity ]
7217 * An entity reference must not contain the name of an unparsed entity
7218 *
7219 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7220 * is updated to the current location in the string.
7221 */
7222xmlEntityPtr
7223xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7224 xmlChar *name;
7225 const xmlChar *ptr;
7226 xmlChar cur;
7227 xmlEntityPtr ent = NULL;
7228
7229 if ((str == NULL) || (*str == NULL))
7230 return(NULL);
7231 ptr = *str;
7232 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007233 if (cur != '&')
7234 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007235
Daniel Veillard0161e632008-08-28 15:36:32 +00007236 ptr++;
7237 cur = *ptr;
7238 name = xmlParseStringName(ctxt, &ptr);
7239 if (name == NULL) {
7240 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7241 "xmlParseStringEntityRef: no name\n");
7242 *str = ptr;
7243 return(NULL);
7244 }
7245 if (*ptr != ';') {
7246 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007247 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007248 *str = ptr;
7249 return(NULL);
7250 }
7251 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007252
Owen Taylor3473f882001-02-23 17:55:21 +00007253
Daniel Veillard0161e632008-08-28 15:36:32 +00007254 /*
7255 * Predefined entites override any extra definition
7256 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007257 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7258 ent = xmlGetPredefinedEntity(name);
7259 if (ent != NULL) {
7260 xmlFree(name);
7261 *str = ptr;
7262 return(ent);
7263 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007264 }
Owen Taylor3473f882001-02-23 17:55:21 +00007265
Daniel Veillard0161e632008-08-28 15:36:32 +00007266 /*
7267 * Increate the number of entity references parsed
7268 */
7269 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007270
Daniel Veillard0161e632008-08-28 15:36:32 +00007271 /*
7272 * Ask first SAX for entity resolution, otherwise try the
7273 * entities which may have stored in the parser context.
7274 */
7275 if (ctxt->sax != NULL) {
7276 if (ctxt->sax->getEntity != NULL)
7277 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007278 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7279 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007280 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7281 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007282 }
7283 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007284
7285 /*
7286 * [ WFC: Entity Declared ]
7287 * In a document without any DTD, a document with only an
7288 * internal DTD subset which contains no parameter entity
7289 * references, or a document with "standalone='yes'", the
7290 * Name given in the entity reference must match that in an
7291 * entity declaration, except that well-formed documents
7292 * need not declare any of the following entities: amp, lt,
7293 * gt, apos, quot.
7294 * The declaration of a parameter entity must precede any
7295 * reference to it.
7296 * Similarly, the declaration of a general entity must
7297 * precede any reference to it which appears in a default
7298 * value in an attribute-list declaration. Note that if
7299 * entities are declared in the external subset or in
7300 * external parameter entities, a non-validating processor
7301 * is not obligated to read and process their declarations;
7302 * for such documents, the rule that an entity must be
7303 * declared is a well-formedness constraint only if
7304 * standalone='yes'.
7305 */
7306 if (ent == NULL) {
7307 if ((ctxt->standalone == 1) ||
7308 ((ctxt->hasExternalSubset == 0) &&
7309 (ctxt->hasPErefs == 0))) {
7310 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7311 "Entity '%s' not defined\n", name);
7312 } else {
7313 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7314 "Entity '%s' not defined\n",
7315 name);
7316 }
7317 /* TODO ? check regressions ctxt->valid = 0; */
7318 }
7319
7320 /*
7321 * [ WFC: Parsed Entity ]
7322 * An entity reference must not contain the name of an
7323 * unparsed entity
7324 */
7325 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7326 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7327 "Entity reference to unparsed entity %s\n", name);
7328 }
7329
7330 /*
7331 * [ WFC: No External Entity References ]
7332 * Attribute values cannot contain direct or indirect
7333 * entity references to external entities.
7334 */
7335 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7336 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7337 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7338 "Attribute references external entity '%s'\n", name);
7339 }
7340 /*
7341 * [ WFC: No < in Attribute Values ]
7342 * The replacement text of any entity referred to directly or
7343 * indirectly in an attribute value (other than "&lt;") must
7344 * not contain a <.
7345 */
7346 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7347 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007348 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007349 (xmlStrchr(ent->content, '<'))) {
7350 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7351 "'<' in entity '%s' is not allowed in attributes values\n",
7352 name);
7353 }
7354
7355 /*
7356 * Internal check, no parameter entities here ...
7357 */
7358 else {
7359 switch (ent->etype) {
7360 case XML_INTERNAL_PARAMETER_ENTITY:
7361 case XML_EXTERNAL_PARAMETER_ENTITY:
7362 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7363 "Attempt to reference the parameter entity '%s'\n",
7364 name);
7365 break;
7366 default:
7367 break;
7368 }
7369 }
7370
7371 /*
7372 * [ WFC: No Recursion ]
7373 * A parsed entity must not contain a recursive reference
7374 * to itself, either directly or indirectly.
7375 * Done somewhere else
7376 */
7377
7378 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007379 *str = ptr;
7380 return(ent);
7381}
7382
7383/**
7384 * xmlParsePEReference:
7385 * @ctxt: an XML parser context
7386 *
7387 * parse PEReference declarations
7388 * The entity content is handled directly by pushing it's content as
7389 * a new input stream.
7390 *
7391 * [69] PEReference ::= '%' Name ';'
7392 *
7393 * [ WFC: No Recursion ]
7394 * A parsed entity must not contain a recursive
7395 * reference to itself, either directly or indirectly.
7396 *
7397 * [ WFC: Entity Declared ]
7398 * In a document without any DTD, a document with only an internal DTD
7399 * subset which contains no parameter entity references, or a document
7400 * with "standalone='yes'", ... ... The declaration of a parameter
7401 * entity must precede any reference to it...
7402 *
7403 * [ VC: Entity Declared ]
7404 * In a document with an external subset or external parameter entities
7405 * with "standalone='no'", ... ... The declaration of a parameter entity
7406 * must precede any reference to it...
7407 *
7408 * [ WFC: In DTD ]
7409 * Parameter-entity references may only appear in the DTD.
7410 * NOTE: misleading but this is handled.
7411 */
7412void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007413xmlParsePEReference(xmlParserCtxtPtr ctxt)
7414{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007415 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007416 xmlEntityPtr entity = NULL;
7417 xmlParserInputPtr input;
7418
Daniel Veillard0161e632008-08-28 15:36:32 +00007419 if (RAW != '%')
7420 return;
7421 NEXT;
7422 name = xmlParseName(ctxt);
7423 if (name == NULL) {
7424 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7425 "xmlParsePEReference: no name\n");
7426 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007427 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007428 if (RAW != ';') {
7429 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7430 return;
7431 }
7432
7433 NEXT;
7434
7435 /*
7436 * Increate the number of entity references parsed
7437 */
7438 ctxt->nbentities++;
7439
7440 /*
7441 * Request the entity from SAX
7442 */
7443 if ((ctxt->sax != NULL) &&
7444 (ctxt->sax->getParameterEntity != NULL))
7445 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7446 name);
7447 if (entity == NULL) {
7448 /*
7449 * [ WFC: Entity Declared ]
7450 * In a document without any DTD, a document with only an
7451 * internal DTD subset which contains no parameter entity
7452 * references, or a document with "standalone='yes'", ...
7453 * ... The declaration of a parameter entity must precede
7454 * any reference to it...
7455 */
7456 if ((ctxt->standalone == 1) ||
7457 ((ctxt->hasExternalSubset == 0) &&
7458 (ctxt->hasPErefs == 0))) {
7459 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7460 "PEReference: %%%s; not found\n",
7461 name);
7462 } else {
7463 /*
7464 * [ VC: Entity Declared ]
7465 * In a document with an external subset or external
7466 * parameter entities with "standalone='no'", ...
7467 * ... The declaration of a parameter entity must
7468 * precede any reference to it...
7469 */
7470 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7471 "PEReference: %%%s; not found\n",
7472 name, NULL);
7473 ctxt->valid = 0;
7474 }
7475 } else {
7476 /*
7477 * Internal checking in case the entity quest barfed
7478 */
7479 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7480 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7481 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7482 "Internal: %%%s; is not a parameter entity\n",
7483 name, NULL);
7484 } else if (ctxt->input->free != deallocblankswrapper) {
7485 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7486 if (xmlPushInput(ctxt, input) < 0)
7487 return;
7488 } else {
7489 /*
7490 * TODO !!!
7491 * handle the extra spaces added before and after
7492 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7493 */
7494 input = xmlNewEntityInputStream(ctxt, entity);
7495 if (xmlPushInput(ctxt, input) < 0)
7496 return;
7497 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7498 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7499 (IS_BLANK_CH(NXT(5)))) {
7500 xmlParseTextDecl(ctxt);
7501 if (ctxt->errNo ==
7502 XML_ERR_UNSUPPORTED_ENCODING) {
7503 /*
7504 * The XML REC instructs us to stop parsing
7505 * right here
7506 */
7507 ctxt->instate = XML_PARSER_EOF;
7508 return;
7509 }
7510 }
7511 }
7512 }
7513 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007514}
7515
7516/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007517 * xmlLoadEntityContent:
7518 * @ctxt: an XML parser context
7519 * @entity: an unloaded system entity
7520 *
7521 * Load the original content of the given system entity from the
7522 * ExternalID/SystemID given. This is to be used for Included in Literal
7523 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7524 *
7525 * Returns 0 in case of success and -1 in case of failure
7526 */
7527static int
7528xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7529 xmlParserInputPtr input;
7530 xmlBufferPtr buf;
7531 int l, c;
7532 int count = 0;
7533
7534 if ((ctxt == NULL) || (entity == NULL) ||
7535 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7536 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7537 (entity->content != NULL)) {
7538 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7539 "xmlLoadEntityContent parameter error");
7540 return(-1);
7541 }
7542
7543 if (xmlParserDebugEntities)
7544 xmlGenericError(xmlGenericErrorContext,
7545 "Reading %s entity content input\n", entity->name);
7546
7547 buf = xmlBufferCreate();
7548 if (buf == NULL) {
7549 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7550 "xmlLoadEntityContent parameter error");
7551 return(-1);
7552 }
7553
7554 input = xmlNewEntityInputStream(ctxt, entity);
7555 if (input == NULL) {
7556 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7557 "xmlLoadEntityContent input error");
7558 xmlBufferFree(buf);
7559 return(-1);
7560 }
7561
7562 /*
7563 * Push the entity as the current input, read char by char
7564 * saving to the buffer until the end of the entity or an error
7565 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00007566 if (xmlPushInput(ctxt, input) < 0) {
7567 xmlBufferFree(buf);
7568 return(-1);
7569 }
7570
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007571 GROW;
7572 c = CUR_CHAR(l);
7573 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7574 (IS_CHAR(c))) {
7575 xmlBufferAdd(buf, ctxt->input->cur, l);
7576 if (count++ > 100) {
7577 count = 0;
7578 GROW;
7579 }
7580 NEXTL(l);
7581 c = CUR_CHAR(l);
7582 }
7583
7584 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7585 xmlPopInput(ctxt);
7586 } else if (!IS_CHAR(c)) {
7587 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7588 "xmlLoadEntityContent: invalid char value %d\n",
7589 c);
7590 xmlBufferFree(buf);
7591 return(-1);
7592 }
7593 entity->content = buf->content;
7594 buf->content = NULL;
7595 xmlBufferFree(buf);
7596
7597 return(0);
7598}
7599
7600/**
Owen Taylor3473f882001-02-23 17:55:21 +00007601 * xmlParseStringPEReference:
7602 * @ctxt: an XML parser context
7603 * @str: a pointer to an index in the string
7604 *
7605 * parse PEReference declarations
7606 *
7607 * [69] PEReference ::= '%' Name ';'
7608 *
7609 * [ WFC: No Recursion ]
7610 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007611 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007612 *
7613 * [ WFC: Entity Declared ]
7614 * In a document without any DTD, a document with only an internal DTD
7615 * subset which contains no parameter entity references, or a document
7616 * with "standalone='yes'", ... ... The declaration of a parameter
7617 * entity must precede any reference to it...
7618 *
7619 * [ VC: Entity Declared ]
7620 * In a document with an external subset or external parameter entities
7621 * with "standalone='no'", ... ... The declaration of a parameter entity
7622 * must precede any reference to it...
7623 *
7624 * [ WFC: In DTD ]
7625 * Parameter-entity references may only appear in the DTD.
7626 * NOTE: misleading but this is handled.
7627 *
7628 * Returns the string of the entity content.
7629 * str is updated to the current value of the index
7630 */
7631xmlEntityPtr
7632xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7633 const xmlChar *ptr;
7634 xmlChar cur;
7635 xmlChar *name;
7636 xmlEntityPtr entity = NULL;
7637
7638 if ((str == NULL) || (*str == NULL)) return(NULL);
7639 ptr = *str;
7640 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007641 if (cur != '%')
7642 return(NULL);
7643 ptr++;
7644 cur = *ptr;
7645 name = xmlParseStringName(ctxt, &ptr);
7646 if (name == NULL) {
7647 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7648 "xmlParseStringPEReference: no name\n");
7649 *str = ptr;
7650 return(NULL);
7651 }
7652 cur = *ptr;
7653 if (cur != ';') {
7654 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7655 xmlFree(name);
7656 *str = ptr;
7657 return(NULL);
7658 }
7659 ptr++;
7660
7661 /*
7662 * Increate the number of entity references parsed
7663 */
7664 ctxt->nbentities++;
7665
7666 /*
7667 * Request the entity from SAX
7668 */
7669 if ((ctxt->sax != NULL) &&
7670 (ctxt->sax->getParameterEntity != NULL))
7671 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7672 name);
7673 if (entity == NULL) {
7674 /*
7675 * [ WFC: Entity Declared ]
7676 * In a document without any DTD, a document with only an
7677 * internal DTD subset which contains no parameter entity
7678 * references, or a document with "standalone='yes'", ...
7679 * ... The declaration of a parameter entity must precede
7680 * any reference to it...
7681 */
7682 if ((ctxt->standalone == 1) ||
7683 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7684 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7685 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007686 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007687 /*
7688 * [ VC: Entity Declared ]
7689 * In a document with an external subset or external
7690 * parameter entities with "standalone='no'", ...
7691 * ... The declaration of a parameter entity must
7692 * precede any reference to it...
7693 */
7694 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7695 "PEReference: %%%s; not found\n",
7696 name, NULL);
7697 ctxt->valid = 0;
7698 }
7699 } else {
7700 /*
7701 * Internal checking in case the entity quest barfed
7702 */
7703 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7704 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7705 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7706 "%%%s; is not a parameter entity\n",
7707 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007708 }
7709 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007710 ctxt->hasPErefs = 1;
7711 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007712 *str = ptr;
7713 return(entity);
7714}
7715
7716/**
7717 * xmlParseDocTypeDecl:
7718 * @ctxt: an XML parser context
7719 *
7720 * parse a DOCTYPE declaration
7721 *
7722 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7723 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7724 *
7725 * [ VC: Root Element Type ]
7726 * The Name in the document type declaration must match the element
7727 * type of the root element.
7728 */
7729
7730void
7731xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007732 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007733 xmlChar *ExternalID = NULL;
7734 xmlChar *URI = NULL;
7735
7736 /*
7737 * We know that '<!DOCTYPE' has been detected.
7738 */
7739 SKIP(9);
7740
7741 SKIP_BLANKS;
7742
7743 /*
7744 * Parse the DOCTYPE name.
7745 */
7746 name = xmlParseName(ctxt);
7747 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007748 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7749 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007750 }
7751 ctxt->intSubName = name;
7752
7753 SKIP_BLANKS;
7754
7755 /*
7756 * Check for SystemID and ExternalID
7757 */
7758 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7759
7760 if ((URI != NULL) || (ExternalID != NULL)) {
7761 ctxt->hasExternalSubset = 1;
7762 }
7763 ctxt->extSubURI = URI;
7764 ctxt->extSubSystem = ExternalID;
7765
7766 SKIP_BLANKS;
7767
7768 /*
7769 * Create and update the internal subset.
7770 */
7771 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7772 (!ctxt->disableSAX))
7773 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7774
7775 /*
7776 * Is there any internal subset declarations ?
7777 * they are handled separately in xmlParseInternalSubset()
7778 */
7779 if (RAW == '[')
7780 return;
7781
7782 /*
7783 * We should be at the end of the DOCTYPE declaration.
7784 */
7785 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007786 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007787 }
7788 NEXT;
7789}
7790
7791/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007792 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007793 * @ctxt: an XML parser context
7794 *
7795 * parse the internal subset declaration
7796 *
7797 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7798 */
7799
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007800static void
Owen Taylor3473f882001-02-23 17:55:21 +00007801xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7802 /*
7803 * Is there any DTD definition ?
7804 */
7805 if (RAW == '[') {
7806 ctxt->instate = XML_PARSER_DTD;
7807 NEXT;
7808 /*
7809 * Parse the succession of Markup declarations and
7810 * PEReferences.
7811 * Subsequence (markupdecl | PEReference | S)*
7812 */
7813 while (RAW != ']') {
7814 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007815 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007816
7817 SKIP_BLANKS;
7818 xmlParseMarkupDecl(ctxt);
7819 xmlParsePEReference(ctxt);
7820
7821 /*
7822 * Pop-up of finished entities.
7823 */
7824 while ((RAW == 0) && (ctxt->inputNr > 1))
7825 xmlPopInput(ctxt);
7826
7827 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007828 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00007829 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007830 break;
7831 }
7832 }
7833 if (RAW == ']') {
7834 NEXT;
7835 SKIP_BLANKS;
7836 }
7837 }
7838
7839 /*
7840 * We should be at the end of the DOCTYPE declaration.
7841 */
7842 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007843 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007844 }
7845 NEXT;
7846}
7847
Daniel Veillard81273902003-09-30 00:43:48 +00007848#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007849/**
7850 * xmlParseAttribute:
7851 * @ctxt: an XML parser context
7852 * @value: a xmlChar ** used to store the value of the attribute
7853 *
7854 * parse an attribute
7855 *
7856 * [41] Attribute ::= Name Eq AttValue
7857 *
7858 * [ WFC: No External Entity References ]
7859 * Attribute values cannot contain direct or indirect entity references
7860 * to external entities.
7861 *
7862 * [ WFC: No < in Attribute Values ]
7863 * The replacement text of any entity referred to directly or indirectly in
7864 * an attribute value (other than "&lt;") must not contain a <.
7865 *
7866 * [ VC: Attribute Value Type ]
7867 * The attribute must have been declared; the value must be of the type
7868 * declared for it.
7869 *
7870 * [25] Eq ::= S? '=' S?
7871 *
7872 * With namespace:
7873 *
7874 * [NS 11] Attribute ::= QName Eq AttValue
7875 *
7876 * Also the case QName == xmlns:??? is handled independently as a namespace
7877 * definition.
7878 *
7879 * Returns the attribute name, and the value in *value.
7880 */
7881
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007882const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007883xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007884 const xmlChar *name;
7885 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007886
7887 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007888 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007889 name = xmlParseName(ctxt);
7890 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007891 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007892 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007893 return(NULL);
7894 }
7895
7896 /*
7897 * read the value
7898 */
7899 SKIP_BLANKS;
7900 if (RAW == '=') {
7901 NEXT;
7902 SKIP_BLANKS;
7903 val = xmlParseAttValue(ctxt);
7904 ctxt->instate = XML_PARSER_CONTENT;
7905 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007906 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007907 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007908 return(NULL);
7909 }
7910
7911 /*
7912 * Check that xml:lang conforms to the specification
7913 * No more registered as an error, just generate a warning now
7914 * since this was deprecated in XML second edition
7915 */
7916 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7917 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007918 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7919 "Malformed value for xml:lang : %s\n",
7920 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007921 }
7922 }
7923
7924 /*
7925 * Check that xml:space conforms to the specification
7926 */
7927 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7928 if (xmlStrEqual(val, BAD_CAST "default"))
7929 *(ctxt->space) = 0;
7930 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7931 *(ctxt->space) = 1;
7932 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007933 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007934"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007935 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007936 }
7937 }
7938
7939 *value = val;
7940 return(name);
7941}
7942
7943/**
7944 * xmlParseStartTag:
7945 * @ctxt: an XML parser context
7946 *
7947 * parse a start of tag either for rule element or
7948 * EmptyElement. In both case we don't parse the tag closing chars.
7949 *
7950 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7951 *
7952 * [ WFC: Unique Att Spec ]
7953 * No attribute name may appear more than once in the same start-tag or
7954 * empty-element tag.
7955 *
7956 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7957 *
7958 * [ WFC: Unique Att Spec ]
7959 * No attribute name may appear more than once in the same start-tag or
7960 * empty-element tag.
7961 *
7962 * With namespace:
7963 *
7964 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7965 *
7966 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7967 *
7968 * Returns the element name parsed
7969 */
7970
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007971const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007972xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007973 const xmlChar *name;
7974 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007975 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007976 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007977 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007978 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007979 int i;
7980
7981 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007982 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007983
7984 name = xmlParseName(ctxt);
7985 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007986 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007987 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007988 return(NULL);
7989 }
7990
7991 /*
7992 * Now parse the attributes, it ends up with the ending
7993 *
7994 * (S Attribute)* S?
7995 */
7996 SKIP_BLANKS;
7997 GROW;
7998
Daniel Veillard21a0f912001-02-25 19:54:14 +00007999 while ((RAW != '>') &&
8000 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008001 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008002 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008003 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008004
8005 attname = xmlParseAttribute(ctxt, &attvalue);
8006 if ((attname != NULL) && (attvalue != NULL)) {
8007 /*
8008 * [ WFC: Unique Att Spec ]
8009 * No attribute name may appear more than once in the same
8010 * start-tag or empty-element tag.
8011 */
8012 for (i = 0; i < nbatts;i += 2) {
8013 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008014 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008015 xmlFree(attvalue);
8016 goto failed;
8017 }
8018 }
Owen Taylor3473f882001-02-23 17:55:21 +00008019 /*
8020 * Add the pair to atts
8021 */
8022 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008023 maxatts = 22; /* allow for 10 attrs by default */
8024 atts = (const xmlChar **)
8025 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008026 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008027 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008028 if (attvalue != NULL)
8029 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008030 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008031 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008032 ctxt->atts = atts;
8033 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008034 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008035 const xmlChar **n;
8036
Owen Taylor3473f882001-02-23 17:55:21 +00008037 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008038 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008039 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008040 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008041 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008042 if (attvalue != NULL)
8043 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008044 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008045 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008046 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008047 ctxt->atts = atts;
8048 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008049 }
8050 atts[nbatts++] = attname;
8051 atts[nbatts++] = attvalue;
8052 atts[nbatts] = NULL;
8053 atts[nbatts + 1] = NULL;
8054 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008055 if (attvalue != NULL)
8056 xmlFree(attvalue);
8057 }
8058
8059failed:
8060
Daniel Veillard3772de32002-12-17 10:31:45 +00008061 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008062 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8063 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008064 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008065 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8066 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008067 }
8068 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008069 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8070 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008071 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8072 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008073 break;
8074 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008075 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008076 GROW;
8077 }
8078
8079 /*
8080 * SAX: Start of Element !
8081 */
8082 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008083 (!ctxt->disableSAX)) {
8084 if (nbatts > 0)
8085 ctxt->sax->startElement(ctxt->userData, name, atts);
8086 else
8087 ctxt->sax->startElement(ctxt->userData, name, NULL);
8088 }
Owen Taylor3473f882001-02-23 17:55:21 +00008089
8090 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008091 /* Free only the content strings */
8092 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008093 if (atts[i] != NULL)
8094 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008095 }
8096 return(name);
8097}
8098
8099/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008100 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008101 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008102 * @line: line of the start tag
8103 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008104 *
8105 * parse an end of tag
8106 *
8107 * [42] ETag ::= '</' Name S? '>'
8108 *
8109 * With namespace
8110 *
8111 * [NS 9] ETag ::= '</' QName S? '>'
8112 */
8113
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008114static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008115xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008116 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008117
8118 GROW;
8119 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008120 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008121 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008122 return;
8123 }
8124 SKIP(2);
8125
Daniel Veillard46de64e2002-05-29 08:21:33 +00008126 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008127
8128 /*
8129 * We should definitely be at the ending "S? '>'" part
8130 */
8131 GROW;
8132 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008133 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008134 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008135 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008136 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008137
8138 /*
8139 * [ WFC: Element Type Match ]
8140 * The Name in an element's end-tag must match the element type in the
8141 * start-tag.
8142 *
8143 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008144 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008145 if (name == NULL) name = BAD_CAST "unparseable";
8146 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008147 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008148 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008149 }
8150
8151 /*
8152 * SAX: End of Tag
8153 */
8154 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8155 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008156 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008157
Daniel Veillarde57ec792003-09-10 10:50:59 +00008158 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008159 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008160 return;
8161}
8162
8163/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008164 * xmlParseEndTag:
8165 * @ctxt: an XML parser context
8166 *
8167 * parse an end of tag
8168 *
8169 * [42] ETag ::= '</' Name S? '>'
8170 *
8171 * With namespace
8172 *
8173 * [NS 9] ETag ::= '</' QName S? '>'
8174 */
8175
8176void
8177xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008178 xmlParseEndTag1(ctxt, 0);
8179}
Daniel Veillard81273902003-09-30 00:43:48 +00008180#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008181
8182/************************************************************************
8183 * *
8184 * SAX 2 specific operations *
8185 * *
8186 ************************************************************************/
8187
Daniel Veillard0fb18932003-09-07 09:14:37 +00008188/*
8189 * xmlGetNamespace:
8190 * @ctxt: an XML parser context
8191 * @prefix: the prefix to lookup
8192 *
8193 * Lookup the namespace name for the @prefix (which ca be NULL)
8194 * The prefix must come from the @ctxt->dict dictionnary
8195 *
8196 * Returns the namespace name or NULL if not bound
8197 */
8198static const xmlChar *
8199xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8200 int i;
8201
Daniel Veillarde57ec792003-09-10 10:50:59 +00008202 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008203 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008204 if (ctxt->nsTab[i] == prefix) {
8205 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8206 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008207 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008208 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008209 return(NULL);
8210}
8211
8212/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008213 * xmlParseQName:
8214 * @ctxt: an XML parser context
8215 * @prefix: pointer to store the prefix part
8216 *
8217 * parse an XML Namespace QName
8218 *
8219 * [6] QName ::= (Prefix ':')? LocalPart
8220 * [7] Prefix ::= NCName
8221 * [8] LocalPart ::= NCName
8222 *
8223 * Returns the Name parsed or NULL
8224 */
8225
8226static const xmlChar *
8227xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8228 const xmlChar *l, *p;
8229
8230 GROW;
8231
8232 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008233 if (l == NULL) {
8234 if (CUR == ':') {
8235 l = xmlParseName(ctxt);
8236 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008237 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8238 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008239 *prefix = NULL;
8240 return(l);
8241 }
8242 }
8243 return(NULL);
8244 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008245 if (CUR == ':') {
8246 NEXT;
8247 p = l;
8248 l = xmlParseNCName(ctxt);
8249 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008250 xmlChar *tmp;
8251
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008252 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8253 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008254 l = xmlParseNmtoken(ctxt);
8255 if (l == NULL)
8256 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8257 else {
8258 tmp = xmlBuildQName(l, p, NULL, 0);
8259 xmlFree((char *)l);
8260 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008261 p = xmlDictLookup(ctxt->dict, tmp, -1);
8262 if (tmp != NULL) xmlFree(tmp);
8263 *prefix = NULL;
8264 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008265 }
8266 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008267 xmlChar *tmp;
8268
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008269 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8270 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008271 NEXT;
8272 tmp = (xmlChar *) xmlParseName(ctxt);
8273 if (tmp != NULL) {
8274 tmp = xmlBuildQName(tmp, l, NULL, 0);
8275 l = xmlDictLookup(ctxt->dict, tmp, -1);
8276 if (tmp != NULL) xmlFree(tmp);
8277 *prefix = p;
8278 return(l);
8279 }
8280 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8281 l = xmlDictLookup(ctxt->dict, tmp, -1);
8282 if (tmp != NULL) xmlFree(tmp);
8283 *prefix = p;
8284 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008285 }
8286 *prefix = p;
8287 } else
8288 *prefix = NULL;
8289 return(l);
8290}
8291
8292/**
8293 * xmlParseQNameAndCompare:
8294 * @ctxt: an XML parser context
8295 * @name: the localname
8296 * @prefix: the prefix, if any.
8297 *
8298 * parse an XML name and compares for match
8299 * (specialized for endtag parsing)
8300 *
8301 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8302 * and the name for mismatch
8303 */
8304
8305static const xmlChar *
8306xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8307 xmlChar const *prefix) {
8308 const xmlChar *cmp = name;
8309 const xmlChar *in;
8310 const xmlChar *ret;
8311 const xmlChar *prefix2;
8312
8313 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8314
8315 GROW;
8316 in = ctxt->input->cur;
8317
8318 cmp = prefix;
8319 while (*in != 0 && *in == *cmp) {
8320 ++in;
8321 ++cmp;
8322 }
8323 if ((*cmp == 0) && (*in == ':')) {
8324 in++;
8325 cmp = name;
8326 while (*in != 0 && *in == *cmp) {
8327 ++in;
8328 ++cmp;
8329 }
William M. Brack76e95df2003-10-18 16:20:14 +00008330 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008331 /* success */
8332 ctxt->input->cur = in;
8333 return((const xmlChar*) 1);
8334 }
8335 }
8336 /*
8337 * all strings coms from the dictionary, equality can be done directly
8338 */
8339 ret = xmlParseQName (ctxt, &prefix2);
8340 if ((ret == name) && (prefix == prefix2))
8341 return((const xmlChar*) 1);
8342 return ret;
8343}
8344
8345/**
8346 * xmlParseAttValueInternal:
8347 * @ctxt: an XML parser context
8348 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008349 * @alloc: whether the attribute was reallocated as a new string
8350 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008351 *
8352 * parse a value for an attribute.
8353 * NOTE: if no normalization is needed, the routine will return pointers
8354 * directly from the data buffer.
8355 *
8356 * 3.3.3 Attribute-Value Normalization:
8357 * Before the value of an attribute is passed to the application or
8358 * checked for validity, the XML processor must normalize it as follows:
8359 * - a character reference is processed by appending the referenced
8360 * character to the attribute value
8361 * - an entity reference is processed by recursively processing the
8362 * replacement text of the entity
8363 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8364 * appending #x20 to the normalized value, except that only a single
8365 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8366 * parsed entity or the literal entity value of an internal parsed entity
8367 * - other characters are processed by appending them to the normalized value
8368 * If the declared value is not CDATA, then the XML processor must further
8369 * process the normalized attribute value by discarding any leading and
8370 * trailing space (#x20) characters, and by replacing sequences of space
8371 * (#x20) characters by a single space (#x20) character.
8372 * All attributes for which no declaration has been read should be treated
8373 * by a non-validating parser as if declared CDATA.
8374 *
8375 * Returns the AttValue parsed or NULL. The value has to be freed by the
8376 * caller if it was copied, this can be detected by val[*len] == 0.
8377 */
8378
8379static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008380xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8381 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008382{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008383 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008384 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008385 xmlChar *ret = NULL;
8386
8387 GROW;
8388 in = (xmlChar *) CUR_PTR;
8389 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008390 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008391 return (NULL);
8392 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008393 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008394
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008395 /*
8396 * try to handle in this routine the most common case where no
8397 * allocation of a new string is required and where content is
8398 * pure ASCII.
8399 */
8400 limit = *in++;
8401 end = ctxt->input->end;
8402 start = in;
8403 if (in >= end) {
8404 const xmlChar *oldbase = ctxt->input->base;
8405 GROW;
8406 if (oldbase != ctxt->input->base) {
8407 long delta = ctxt->input->base - oldbase;
8408 start = start + delta;
8409 in = in + delta;
8410 }
8411 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008412 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008413 if (normalize) {
8414 /*
8415 * Skip any leading spaces
8416 */
8417 while ((in < end) && (*in != limit) &&
8418 ((*in == 0x20) || (*in == 0x9) ||
8419 (*in == 0xA) || (*in == 0xD))) {
8420 in++;
8421 start = in;
8422 if (in >= end) {
8423 const xmlChar *oldbase = ctxt->input->base;
8424 GROW;
8425 if (oldbase != ctxt->input->base) {
8426 long delta = ctxt->input->base - oldbase;
8427 start = start + delta;
8428 in = in + delta;
8429 }
8430 end = ctxt->input->end;
8431 }
8432 }
8433 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8434 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8435 if ((*in++ == 0x20) && (*in == 0x20)) break;
8436 if (in >= end) {
8437 const xmlChar *oldbase = ctxt->input->base;
8438 GROW;
8439 if (oldbase != ctxt->input->base) {
8440 long delta = ctxt->input->base - oldbase;
8441 start = start + delta;
8442 in = in + delta;
8443 }
8444 end = ctxt->input->end;
8445 }
8446 }
8447 last = in;
8448 /*
8449 * skip the trailing blanks
8450 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008451 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008452 while ((in < end) && (*in != limit) &&
8453 ((*in == 0x20) || (*in == 0x9) ||
8454 (*in == 0xA) || (*in == 0xD))) {
8455 in++;
8456 if (in >= end) {
8457 const xmlChar *oldbase = ctxt->input->base;
8458 GROW;
8459 if (oldbase != ctxt->input->base) {
8460 long delta = ctxt->input->base - oldbase;
8461 start = start + delta;
8462 in = in + delta;
8463 last = last + delta;
8464 }
8465 end = ctxt->input->end;
8466 }
8467 }
8468 if (*in != limit) goto need_complex;
8469 } else {
8470 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8471 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8472 in++;
8473 if (in >= end) {
8474 const xmlChar *oldbase = ctxt->input->base;
8475 GROW;
8476 if (oldbase != ctxt->input->base) {
8477 long delta = ctxt->input->base - oldbase;
8478 start = start + delta;
8479 in = in + delta;
8480 }
8481 end = ctxt->input->end;
8482 }
8483 }
8484 last = in;
8485 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008486 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008487 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008488 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008489 *len = last - start;
8490 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008491 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008492 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008493 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008494 }
8495 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008496 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008497 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008498need_complex:
8499 if (alloc) *alloc = 1;
8500 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008501}
8502
8503/**
8504 * xmlParseAttribute2:
8505 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008506 * @pref: the element prefix
8507 * @elem: the element name
8508 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008509 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008510 * @len: an int * to save the length of the attribute
8511 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008512 *
8513 * parse an attribute in the new SAX2 framework.
8514 *
8515 * Returns the attribute name, and the value in *value, .
8516 */
8517
8518static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008519xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008520 const xmlChar * pref, const xmlChar * elem,
8521 const xmlChar ** prefix, xmlChar ** value,
8522 int *len, int *alloc)
8523{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008524 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008525 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008526 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008527
8528 *value = NULL;
8529 GROW;
8530 name = xmlParseQName(ctxt, prefix);
8531 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008532 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8533 "error parsing attribute name\n");
8534 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008535 }
8536
8537 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008538 * get the type if needed
8539 */
8540 if (ctxt->attsSpecial != NULL) {
8541 int type;
8542
8543 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008544 pref, elem, *prefix, name);
8545 if (type != 0)
8546 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008547 }
8548
8549 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008550 * read the value
8551 */
8552 SKIP_BLANKS;
8553 if (RAW == '=') {
8554 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008555 SKIP_BLANKS;
8556 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8557 if (normalize) {
8558 /*
8559 * Sometimes a second normalisation pass for spaces is needed
8560 * but that only happens if charrefs or entities refernces
8561 * have been used in the attribute value, i.e. the attribute
8562 * value have been extracted in an allocated string already.
8563 */
8564 if (*alloc) {
8565 const xmlChar *val2;
8566
8567 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008568 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008569 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008570 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008571 }
8572 }
8573 }
8574 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008575 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008576 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8577 "Specification mandate value for attribute %s\n",
8578 name);
8579 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008580 }
8581
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008582 if (*prefix == ctxt->str_xml) {
8583 /*
8584 * Check that xml:lang conforms to the specification
8585 * No more registered as an error, just generate a warning now
8586 * since this was deprecated in XML second edition
8587 */
8588 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8589 internal_val = xmlStrndup(val, *len);
8590 if (!xmlCheckLanguageID(internal_val)) {
8591 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8592 "Malformed value for xml:lang : %s\n",
8593 internal_val, NULL);
8594 }
8595 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008596
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008597 /*
8598 * Check that xml:space conforms to the specification
8599 */
8600 if (xmlStrEqual(name, BAD_CAST "space")) {
8601 internal_val = xmlStrndup(val, *len);
8602 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8603 *(ctxt->space) = 0;
8604 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8605 *(ctxt->space) = 1;
8606 else {
8607 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8608 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8609 internal_val, NULL);
8610 }
8611 }
8612 if (internal_val) {
8613 xmlFree(internal_val);
8614 }
8615 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008616
8617 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008618 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008619}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008620/**
8621 * xmlParseStartTag2:
8622 * @ctxt: an XML parser context
8623 *
8624 * parse a start of tag either for rule element or
8625 * EmptyElement. In both case we don't parse the tag closing chars.
8626 * This routine is called when running SAX2 parsing
8627 *
8628 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8629 *
8630 * [ WFC: Unique Att Spec ]
8631 * No attribute name may appear more than once in the same start-tag or
8632 * empty-element tag.
8633 *
8634 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8635 *
8636 * [ WFC: Unique Att Spec ]
8637 * No attribute name may appear more than once in the same start-tag or
8638 * empty-element tag.
8639 *
8640 * With namespace:
8641 *
8642 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8643 *
8644 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8645 *
8646 * Returns the element name parsed
8647 */
8648
8649static const xmlChar *
8650xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008651 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008652 const xmlChar *localname;
8653 const xmlChar *prefix;
8654 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008655 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008656 const xmlChar *nsname;
8657 xmlChar *attvalue;
8658 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008659 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008660 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008661 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008662 const xmlChar *base;
8663 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008664 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008665
8666 if (RAW != '<') return(NULL);
8667 NEXT1;
8668
8669 /*
8670 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8671 * point since the attribute values may be stored as pointers to
8672 * the buffer and calling SHRINK would destroy them !
8673 * The Shrinking is only possible once the full set of attribute
8674 * callbacks have been done.
8675 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008676reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008677 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008678 base = ctxt->input->base;
8679 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008680 oldline = ctxt->input->line;
8681 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008682 nbatts = 0;
8683 nratts = 0;
8684 nbdef = 0;
8685 nbNs = 0;
8686 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008687 /* Forget any namespaces added during an earlier parse of this element. */
8688 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008689
8690 localname = xmlParseQName(ctxt, &prefix);
8691 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008692 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8693 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008694 return(NULL);
8695 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008696 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008697
8698 /*
8699 * Now parse the attributes, it ends up with the ending
8700 *
8701 * (S Attribute)* S?
8702 */
8703 SKIP_BLANKS;
8704 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008705 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008706
8707 while ((RAW != '>') &&
8708 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008709 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008710 const xmlChar *q = CUR_PTR;
8711 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008712 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008713
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008714 attname = xmlParseAttribute2(ctxt, prefix, localname,
8715 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008716 if (ctxt->input->base != base) {
8717 if ((attvalue != NULL) && (alloc != 0))
8718 xmlFree(attvalue);
8719 attvalue = NULL;
8720 goto base_changed;
8721 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008722 if ((attname != NULL) && (attvalue != NULL)) {
8723 if (len < 0) len = xmlStrlen(attvalue);
8724 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008725 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8726 xmlURIPtr uri;
8727
8728 if (*URL != 0) {
8729 uri = xmlParseURI((const char *) URL);
8730 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008731 xmlNsErr(ctxt, XML_WAR_NS_URI,
8732 "xmlns: '%s' is not a valid URI\n",
8733 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008734 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00008735 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008736 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8737 "xmlns: URI %s is not absolute\n",
8738 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008739 }
8740 xmlFreeURI(uri);
8741 }
Daniel Veillard37334572008-07-31 08:20:02 +00008742 if (URL == ctxt->str_xml_ns) {
8743 if (attname != ctxt->str_xml) {
8744 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8745 "xml namespace URI cannot be the default namespace\n",
8746 NULL, NULL, NULL);
8747 }
8748 goto skip_default_ns;
8749 }
8750 if ((len == 29) &&
8751 (xmlStrEqual(URL,
8752 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8753 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8754 "reuse of the xmlns namespace name is forbidden\n",
8755 NULL, NULL, NULL);
8756 goto skip_default_ns;
8757 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008758 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008759 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008760 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008761 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008762 for (j = 1;j <= nbNs;j++)
8763 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8764 break;
8765 if (j <= nbNs)
8766 xmlErrAttributeDup(ctxt, NULL, attname);
8767 else
8768 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008769skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008770 if (alloc != 0) xmlFree(attvalue);
8771 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008772 continue;
8773 }
8774 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008775 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8776 xmlURIPtr uri;
8777
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008778 if (attname == ctxt->str_xml) {
8779 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008780 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8781 "xml namespace prefix mapped to wrong URI\n",
8782 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008783 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008784 /*
8785 * Do not keep a namespace definition node
8786 */
Daniel Veillard37334572008-07-31 08:20:02 +00008787 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008788 }
Daniel Veillard37334572008-07-31 08:20:02 +00008789 if (URL == ctxt->str_xml_ns) {
8790 if (attname != ctxt->str_xml) {
8791 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8792 "xml namespace URI mapped to wrong prefix\n",
8793 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008794 }
Daniel Veillard37334572008-07-31 08:20:02 +00008795 goto skip_ns;
8796 }
8797 if (attname == ctxt->str_xmlns) {
8798 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8799 "redefinition of the xmlns prefix is forbidden\n",
8800 NULL, NULL, NULL);
8801 goto skip_ns;
8802 }
8803 if ((len == 29) &&
8804 (xmlStrEqual(URL,
8805 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8806 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8807 "reuse of the xmlns namespace name is forbidden\n",
8808 NULL, NULL, NULL);
8809 goto skip_ns;
8810 }
8811 if ((URL == NULL) || (URL[0] == 0)) {
8812 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8813 "xmlns:%s: Empty XML namespace is not allowed\n",
8814 attname, NULL, NULL);
8815 goto skip_ns;
8816 } else {
8817 uri = xmlParseURI((const char *) URL);
8818 if (uri == NULL) {
8819 xmlNsErr(ctxt, XML_WAR_NS_URI,
8820 "xmlns:%s: '%s' is not a valid URI\n",
8821 attname, URL, NULL);
8822 } else {
8823 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8824 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8825 "xmlns:%s: URI %s is not absolute\n",
8826 attname, URL, NULL);
8827 }
8828 xmlFreeURI(uri);
8829 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008830 }
8831
Daniel Veillard0fb18932003-09-07 09:14:37 +00008832 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008833 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008834 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008835 for (j = 1;j <= nbNs;j++)
8836 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8837 break;
8838 if (j <= nbNs)
8839 xmlErrAttributeDup(ctxt, aprefix, attname);
8840 else
8841 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008842skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008843 if (alloc != 0) xmlFree(attvalue);
8844 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008845 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008846 continue;
8847 }
8848
8849 /*
8850 * Add the pair to atts
8851 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008852 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8853 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008854 if (attvalue[len] == 0)
8855 xmlFree(attvalue);
8856 goto failed;
8857 }
8858 maxatts = ctxt->maxatts;
8859 atts = ctxt->atts;
8860 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008861 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008862 atts[nbatts++] = attname;
8863 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008864 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008865 atts[nbatts++] = attvalue;
8866 attvalue += len;
8867 atts[nbatts++] = attvalue;
8868 /*
8869 * tag if some deallocation is needed
8870 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008871 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008872 } else {
8873 if ((attvalue != NULL) && (attvalue[len] == 0))
8874 xmlFree(attvalue);
8875 }
8876
Daniel Veillard37334572008-07-31 08:20:02 +00008877failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008878
8879 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008880 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008881 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8882 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008883 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008884 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8885 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008886 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008887 }
8888 SKIP_BLANKS;
8889 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8890 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008891 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008892 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008893 break;
8894 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008895 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008896 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008897 }
8898
Daniel Veillard0fb18932003-09-07 09:14:37 +00008899 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008900 * The attributes defaulting
8901 */
8902 if (ctxt->attsDefault != NULL) {
8903 xmlDefAttrsPtr defaults;
8904
8905 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8906 if (defaults != NULL) {
8907 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00008908 attname = defaults->values[5 * i];
8909 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00008910
8911 /*
8912 * special work for namespaces defaulted defs
8913 */
8914 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8915 /*
8916 * check that it's not a defined namespace
8917 */
8918 for (j = 1;j <= nbNs;j++)
8919 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8920 break;
8921 if (j <= nbNs) continue;
8922
8923 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008924 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008925 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008926 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008927 nbNs++;
8928 }
8929 } else if (aprefix == ctxt->str_xmlns) {
8930 /*
8931 * check that it's not a defined namespace
8932 */
8933 for (j = 1;j <= nbNs;j++)
8934 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8935 break;
8936 if (j <= nbNs) continue;
8937
8938 nsname = xmlGetNamespace(ctxt, attname);
8939 if (nsname != defaults->values[2]) {
8940 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008941 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008942 nbNs++;
8943 }
8944 } else {
8945 /*
8946 * check that it's not a defined attribute
8947 */
8948 for (j = 0;j < nbatts;j+=5) {
8949 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8950 break;
8951 }
8952 if (j < nbatts) continue;
8953
8954 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8955 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008956 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008957 }
8958 maxatts = ctxt->maxatts;
8959 atts = ctxt->atts;
8960 }
8961 atts[nbatts++] = attname;
8962 atts[nbatts++] = aprefix;
8963 if (aprefix == NULL)
8964 atts[nbatts++] = NULL;
8965 else
8966 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008967 atts[nbatts++] = defaults->values[5 * i + 2];
8968 atts[nbatts++] = defaults->values[5 * i + 3];
8969 if ((ctxt->standalone == 1) &&
8970 (defaults->values[5 * i + 4] != NULL)) {
8971 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
8972 "standalone: attribute %s on %s defaulted from external subset\n",
8973 attname, localname);
8974 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008975 nbdef++;
8976 }
8977 }
8978 }
8979 }
8980
Daniel Veillarde70c8772003-11-25 07:21:18 +00008981 /*
8982 * The attributes checkings
8983 */
8984 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008985 /*
8986 * The default namespace does not apply to attribute names.
8987 */
8988 if (atts[i + 1] != NULL) {
8989 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8990 if (nsname == NULL) {
8991 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8992 "Namespace prefix %s for %s on %s is not defined\n",
8993 atts[i + 1], atts[i], localname);
8994 }
8995 atts[i + 2] = nsname;
8996 } else
8997 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008998 /*
8999 * [ WFC: Unique Att Spec ]
9000 * No attribute name may appear more than once in the same
9001 * start-tag or empty-element tag.
9002 * As extended by the Namespace in XML REC.
9003 */
9004 for (j = 0; j < i;j += 5) {
9005 if (atts[i] == atts[j]) {
9006 if (atts[i+1] == atts[j+1]) {
9007 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9008 break;
9009 }
9010 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9011 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9012 "Namespaced Attribute %s in '%s' redefined\n",
9013 atts[i], nsname, NULL);
9014 break;
9015 }
9016 }
9017 }
9018 }
9019
Daniel Veillarde57ec792003-09-10 10:50:59 +00009020 nsname = xmlGetNamespace(ctxt, prefix);
9021 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009022 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9023 "Namespace prefix %s on %s is not defined\n",
9024 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009025 }
9026 *pref = prefix;
9027 *URI = nsname;
9028
9029 /*
9030 * SAX: Start of Element !
9031 */
9032 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9033 (!ctxt->disableSAX)) {
9034 if (nbNs > 0)
9035 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9036 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9037 nbatts / 5, nbdef, atts);
9038 else
9039 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9040 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9041 }
9042
9043 /*
9044 * Free up attribute allocated strings if needed
9045 */
9046 if (attval != 0) {
9047 for (i = 3,j = 0; j < nratts;i += 5,j++)
9048 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9049 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009050 }
9051
9052 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009053
9054base_changed:
9055 /*
9056 * the attribute strings are valid iif the base didn't changed
9057 */
9058 if (attval != 0) {
9059 for (i = 3,j = 0; j < nratts;i += 5,j++)
9060 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9061 xmlFree((xmlChar *) atts[i]);
9062 }
9063 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009064 ctxt->input->line = oldline;
9065 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009066 if (ctxt->wellFormed == 1) {
9067 goto reparse;
9068 }
9069 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009070}
9071
9072/**
9073 * xmlParseEndTag2:
9074 * @ctxt: an XML parser context
9075 * @line: line of the start tag
9076 * @nsNr: number of namespaces on the start tag
9077 *
9078 * parse an end of tag
9079 *
9080 * [42] ETag ::= '</' Name S? '>'
9081 *
9082 * With namespace
9083 *
9084 * [NS 9] ETag ::= '</' QName S? '>'
9085 */
9086
9087static void
9088xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009089 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009090 const xmlChar *name;
9091
9092 GROW;
9093 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009094 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009095 return;
9096 }
9097 SKIP(2);
9098
William M. Brack13dfa872004-09-18 04:52:08 +00009099 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009100 if (ctxt->input->cur[tlen] == '>') {
9101 ctxt->input->cur += tlen + 1;
9102 goto done;
9103 }
9104 ctxt->input->cur += tlen;
9105 name = (xmlChar*)1;
9106 } else {
9107 if (prefix == NULL)
9108 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9109 else
9110 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9111 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009112
9113 /*
9114 * We should definitely be at the ending "S? '>'" part
9115 */
9116 GROW;
9117 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009118 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009119 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009120 } else
9121 NEXT1;
9122
9123 /*
9124 * [ WFC: Element Type Match ]
9125 * The Name in an element's end-tag must match the element type in the
9126 * start-tag.
9127 *
9128 */
9129 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009130 if (name == NULL) name = BAD_CAST "unparseable";
9131 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009132 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009133 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009134 }
9135
9136 /*
9137 * SAX: End of Tag
9138 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009139done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009140 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9141 (!ctxt->disableSAX))
9142 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9143
Daniel Veillard0fb18932003-09-07 09:14:37 +00009144 spacePop(ctxt);
9145 if (nsNr != 0)
9146 nsPop(ctxt, nsNr);
9147 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009148}
9149
9150/**
Owen Taylor3473f882001-02-23 17:55:21 +00009151 * xmlParseCDSect:
9152 * @ctxt: an XML parser context
9153 *
9154 * Parse escaped pure raw content.
9155 *
9156 * [18] CDSect ::= CDStart CData CDEnd
9157 *
9158 * [19] CDStart ::= '<![CDATA['
9159 *
9160 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9161 *
9162 * [21] CDEnd ::= ']]>'
9163 */
9164void
9165xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9166 xmlChar *buf = NULL;
9167 int len = 0;
9168 int size = XML_PARSER_BUFFER_SIZE;
9169 int r, rl;
9170 int s, sl;
9171 int cur, l;
9172 int count = 0;
9173
Daniel Veillard8f597c32003-10-06 08:19:27 +00009174 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009175 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009176 SKIP(9);
9177 } else
9178 return;
9179
9180 ctxt->instate = XML_PARSER_CDATA_SECTION;
9181 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009182 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009183 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009184 ctxt->instate = XML_PARSER_CONTENT;
9185 return;
9186 }
9187 NEXTL(rl);
9188 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009189 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009190 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009191 ctxt->instate = XML_PARSER_CONTENT;
9192 return;
9193 }
9194 NEXTL(sl);
9195 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009196 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009197 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009198 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009199 return;
9200 }
William M. Brack871611b2003-10-18 04:53:14 +00009201 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009202 ((r != ']') || (s != ']') || (cur != '>'))) {
9203 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009204 xmlChar *tmp;
9205
Owen Taylor3473f882001-02-23 17:55:21 +00009206 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009207 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9208 if (tmp == NULL) {
9209 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009210 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009211 return;
9212 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009213 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009214 }
9215 COPY_BUF(rl,buf,len,r);
9216 r = s;
9217 rl = sl;
9218 s = cur;
9219 sl = l;
9220 count++;
9221 if (count > 50) {
9222 GROW;
9223 count = 0;
9224 }
9225 NEXTL(l);
9226 cur = CUR_CHAR(l);
9227 }
9228 buf[len] = 0;
9229 ctxt->instate = XML_PARSER_CONTENT;
9230 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009231 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009232 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009233 xmlFree(buf);
9234 return;
9235 }
9236 NEXTL(l);
9237
9238 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009239 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009240 */
9241 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9242 if (ctxt->sax->cdataBlock != NULL)
9243 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009244 else if (ctxt->sax->characters != NULL)
9245 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009246 }
9247 xmlFree(buf);
9248}
9249
9250/**
9251 * xmlParseContent:
9252 * @ctxt: an XML parser context
9253 *
9254 * Parse a content:
9255 *
9256 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9257 */
9258
9259void
9260xmlParseContent(xmlParserCtxtPtr ctxt) {
9261 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009262 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009263 ((RAW != '<') || (NXT(1) != '/')) &&
9264 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009265 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009266 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009267 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009268
9269 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009270 * First case : a Processing Instruction.
9271 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009272 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009273 xmlParsePI(ctxt);
9274 }
9275
9276 /*
9277 * Second case : a CDSection
9278 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009279 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009280 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009281 xmlParseCDSect(ctxt);
9282 }
9283
9284 /*
9285 * Third case : a comment
9286 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009287 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009288 (NXT(2) == '-') && (NXT(3) == '-')) {
9289 xmlParseComment(ctxt);
9290 ctxt->instate = XML_PARSER_CONTENT;
9291 }
9292
9293 /*
9294 * Fourth case : a sub-element.
9295 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009296 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009297 xmlParseElement(ctxt);
9298 }
9299
9300 /*
9301 * Fifth case : a reference. If if has not been resolved,
9302 * parsing returns it's Name, create the node
9303 */
9304
Daniel Veillard21a0f912001-02-25 19:54:14 +00009305 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009306 xmlParseReference(ctxt);
9307 }
9308
9309 /*
9310 * Last case, text. Note that References are handled directly.
9311 */
9312 else {
9313 xmlParseCharData(ctxt, 0);
9314 }
9315
9316 GROW;
9317 /*
9318 * Pop-up of finished entities.
9319 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009320 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009321 xmlPopInput(ctxt);
9322 SHRINK;
9323
Daniel Veillardfdc91562002-07-01 21:52:03 +00009324 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009325 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9326 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009327 ctxt->instate = XML_PARSER_EOF;
9328 break;
9329 }
9330 }
9331}
9332
9333/**
9334 * xmlParseElement:
9335 * @ctxt: an XML parser context
9336 *
9337 * parse an XML element, this is highly recursive
9338 *
9339 * [39] element ::= EmptyElemTag | STag content ETag
9340 *
9341 * [ WFC: Element Type Match ]
9342 * The Name in an element's end-tag must match the element type in the
9343 * start-tag.
9344 *
Owen Taylor3473f882001-02-23 17:55:21 +00009345 */
9346
9347void
9348xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009349 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009350 const xmlChar *prefix;
9351 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00009352 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009353 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009354 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009355 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009356
Daniel Veillard8915c152008-08-26 13:05:34 +00009357 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9358 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9359 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9360 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9361 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009362 ctxt->instate = XML_PARSER_EOF;
9363 return;
9364 }
9365
Owen Taylor3473f882001-02-23 17:55:21 +00009366 /* Capture start position */
9367 if (ctxt->record_info) {
9368 node_info.begin_pos = ctxt->input->consumed +
9369 (CUR_PTR - ctxt->input->base);
9370 node_info.begin_line = ctxt->input->line;
9371 }
9372
9373 if (ctxt->spaceNr == 0)
9374 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009375 else if (*ctxt->space == -2)
9376 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009377 else
9378 spacePush(ctxt, *ctxt->space);
9379
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009380 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009381#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009382 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009383#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009384 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009385#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009386 else
9387 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009388#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009389 if (name == NULL) {
9390 spacePop(ctxt);
9391 return;
9392 }
9393 namePush(ctxt, name);
9394 ret = ctxt->node;
9395
Daniel Veillard4432df22003-09-28 18:58:27 +00009396#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009397 /*
9398 * [ VC: Root Element Type ]
9399 * The Name in the document type declaration must match the element
9400 * type of the root element.
9401 */
9402 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9403 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9404 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009405#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009406
9407 /*
9408 * Check for an Empty Element.
9409 */
9410 if ((RAW == '/') && (NXT(1) == '>')) {
9411 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009412 if (ctxt->sax2) {
9413 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9414 (!ctxt->disableSAX))
9415 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009416#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009417 } else {
9418 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9419 (!ctxt->disableSAX))
9420 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009421#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009422 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009423 namePop(ctxt);
9424 spacePop(ctxt);
9425 if (nsNr != ctxt->nsNr)
9426 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009427 if ( ret != NULL && ctxt->record_info ) {
9428 node_info.end_pos = ctxt->input->consumed +
9429 (CUR_PTR - ctxt->input->base);
9430 node_info.end_line = ctxt->input->line;
9431 node_info.node = ret;
9432 xmlParserAddNodeInfo(ctxt, &node_info);
9433 }
9434 return;
9435 }
9436 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009437 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009438 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009439 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9440 "Couldn't find end of Start Tag %s line %d\n",
9441 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009442
9443 /*
9444 * end of parsing of this node.
9445 */
9446 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009447 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009448 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009449 if (nsNr != ctxt->nsNr)
9450 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009451
9452 /*
9453 * Capture end position and add node
9454 */
9455 if ( ret != NULL && ctxt->record_info ) {
9456 node_info.end_pos = ctxt->input->consumed +
9457 (CUR_PTR - ctxt->input->base);
9458 node_info.end_line = ctxt->input->line;
9459 node_info.node = ret;
9460 xmlParserAddNodeInfo(ctxt, &node_info);
9461 }
9462 return;
9463 }
9464
9465 /*
9466 * Parse the content of the element:
9467 */
9468 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009469 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009470 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009471 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009472 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009473
9474 /*
9475 * end of parsing of this node.
9476 */
9477 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009478 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009479 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009480 if (nsNr != ctxt->nsNr)
9481 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009482 return;
9483 }
9484
9485 /*
9486 * parse the end of tag: '</' should be here.
9487 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009488 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009489 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009490 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009491 }
9492#ifdef LIBXML_SAX1_ENABLED
9493 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00009494 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00009495#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009496
9497 /*
9498 * Capture end position and add node
9499 */
9500 if ( ret != NULL && ctxt->record_info ) {
9501 node_info.end_pos = ctxt->input->consumed +
9502 (CUR_PTR - ctxt->input->base);
9503 node_info.end_line = ctxt->input->line;
9504 node_info.node = ret;
9505 xmlParserAddNodeInfo(ctxt, &node_info);
9506 }
9507}
9508
9509/**
9510 * xmlParseVersionNum:
9511 * @ctxt: an XML parser context
9512 *
9513 * parse the XML version value.
9514 *
Daniel Veillard34e3f642008-07-29 09:02:27 +00009515 * [26] VersionNum ::= '1.' [0-9]+
9516 *
9517 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +00009518 *
9519 * Returns the string giving the XML version number, or NULL
9520 */
9521xmlChar *
9522xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9523 xmlChar *buf = NULL;
9524 int len = 0;
9525 int size = 10;
9526 xmlChar cur;
9527
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009528 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009529 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009530 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009531 return(NULL);
9532 }
9533 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00009534 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009535 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009536 return(NULL);
9537 }
9538 buf[len++] = cur;
9539 NEXT;
9540 cur=CUR;
9541 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009542 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009543 return(NULL);
9544 }
9545 buf[len++] = cur;
9546 NEXT;
9547 cur=CUR;
9548 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009549 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009550 xmlChar *tmp;
9551
Owen Taylor3473f882001-02-23 17:55:21 +00009552 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009553 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9554 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009555 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009556 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009557 return(NULL);
9558 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009559 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009560 }
9561 buf[len++] = cur;
9562 NEXT;
9563 cur=CUR;
9564 }
9565 buf[len] = 0;
9566 return(buf);
9567}
9568
9569/**
9570 * xmlParseVersionInfo:
9571 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009572 *
Owen Taylor3473f882001-02-23 17:55:21 +00009573 * parse the XML version.
9574 *
9575 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009576 *
Owen Taylor3473f882001-02-23 17:55:21 +00009577 * [25] Eq ::= S? '=' S?
9578 *
9579 * Returns the version string, e.g. "1.0"
9580 */
9581
9582xmlChar *
9583xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9584 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009585
Daniel Veillarda07050d2003-10-19 14:46:32 +00009586 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009587 SKIP(7);
9588 SKIP_BLANKS;
9589 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009590 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009591 return(NULL);
9592 }
9593 NEXT;
9594 SKIP_BLANKS;
9595 if (RAW == '"') {
9596 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009597 version = xmlParseVersionNum(ctxt);
9598 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009599 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009600 } else
9601 NEXT;
9602 } else if (RAW == '\''){
9603 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009604 version = xmlParseVersionNum(ctxt);
9605 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009606 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009607 } else
9608 NEXT;
9609 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009610 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009611 }
9612 }
9613 return(version);
9614}
9615
9616/**
9617 * xmlParseEncName:
9618 * @ctxt: an XML parser context
9619 *
9620 * parse the XML encoding name
9621 *
9622 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9623 *
9624 * Returns the encoding name value or NULL
9625 */
9626xmlChar *
9627xmlParseEncName(xmlParserCtxtPtr ctxt) {
9628 xmlChar *buf = NULL;
9629 int len = 0;
9630 int size = 10;
9631 xmlChar cur;
9632
9633 cur = CUR;
9634 if (((cur >= 'a') && (cur <= 'z')) ||
9635 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009636 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009637 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009638 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009639 return(NULL);
9640 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00009641
Owen Taylor3473f882001-02-23 17:55:21 +00009642 buf[len++] = cur;
9643 NEXT;
9644 cur = CUR;
9645 while (((cur >= 'a') && (cur <= 'z')) ||
9646 ((cur >= 'A') && (cur <= 'Z')) ||
9647 ((cur >= '0') && (cur <= '9')) ||
9648 (cur == '.') || (cur == '_') ||
9649 (cur == '-')) {
9650 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009651 xmlChar *tmp;
9652
Owen Taylor3473f882001-02-23 17:55:21 +00009653 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009654 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9655 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009656 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009657 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009658 return(NULL);
9659 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009660 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009661 }
9662 buf[len++] = cur;
9663 NEXT;
9664 cur = CUR;
9665 if (cur == 0) {
9666 SHRINK;
9667 GROW;
9668 cur = CUR;
9669 }
9670 }
9671 buf[len] = 0;
9672 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009673 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009674 }
9675 return(buf);
9676}
9677
9678/**
9679 * xmlParseEncodingDecl:
9680 * @ctxt: an XML parser context
9681 *
9682 * parse the XML encoding declaration
9683 *
9684 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9685 *
9686 * this setups the conversion filters.
9687 *
9688 * Returns the encoding value or NULL
9689 */
9690
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009691const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009692xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9693 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009694
9695 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009696 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009697 SKIP(8);
9698 SKIP_BLANKS;
9699 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009700 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009701 return(NULL);
9702 }
9703 NEXT;
9704 SKIP_BLANKS;
9705 if (RAW == '"') {
9706 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009707 encoding = xmlParseEncName(ctxt);
9708 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009709 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009710 } else
9711 NEXT;
9712 } else if (RAW == '\''){
9713 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009714 encoding = xmlParseEncName(ctxt);
9715 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009716 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009717 } else
9718 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009719 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009720 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009721 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00009722 /*
9723 * UTF-16 encoding stwich has already taken place at this stage,
9724 * more over the little-endian/big-endian selection is already done
9725 */
9726 if ((encoding != NULL) &&
9727 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9728 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +00009729 /*
9730 * If no encoding was passed to the parser, that we are
9731 * using UTF-16 and no decoder is present i.e. the
9732 * document is apparently UTF-8 compatible, then raise an
9733 * encoding mismatch fatal error
9734 */
9735 if ((ctxt->encoding == NULL) &&
9736 (ctxt->input->buf != NULL) &&
9737 (ctxt->input->buf->encoder == NULL)) {
9738 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9739 "Document labelled UTF-16 but has UTF-8 content\n");
9740 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009741 if (ctxt->encoding != NULL)
9742 xmlFree((xmlChar *) ctxt->encoding);
9743 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009744 }
9745 /*
9746 * UTF-8 encoding is handled natively
9747 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009748 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009749 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9750 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009751 if (ctxt->encoding != NULL)
9752 xmlFree((xmlChar *) ctxt->encoding);
9753 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009754 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009755 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009756 xmlCharEncodingHandlerPtr handler;
9757
9758 if (ctxt->input->encoding != NULL)
9759 xmlFree((xmlChar *) ctxt->input->encoding);
9760 ctxt->input->encoding = encoding;
9761
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009762 handler = xmlFindCharEncodingHandler((const char *) encoding);
9763 if (handler != NULL) {
9764 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009765 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009766 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009767 "Unsupported encoding %s\n", encoding);
9768 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009769 }
9770 }
9771 }
9772 return(encoding);
9773}
9774
9775/**
9776 * xmlParseSDDecl:
9777 * @ctxt: an XML parser context
9778 *
9779 * parse the XML standalone declaration
9780 *
9781 * [32] SDDecl ::= S 'standalone' Eq
9782 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9783 *
9784 * [ VC: Standalone Document Declaration ]
9785 * TODO The standalone document declaration must have the value "no"
9786 * if any external markup declarations contain declarations of:
9787 * - attributes with default values, if elements to which these
9788 * attributes apply appear in the document without specifications
9789 * of values for these attributes, or
9790 * - entities (other than amp, lt, gt, apos, quot), if references
9791 * to those entities appear in the document, or
9792 * - attributes with values subject to normalization, where the
9793 * attribute appears in the document with a value which will change
9794 * as a result of normalization, or
9795 * - element types with element content, if white space occurs directly
9796 * within any instance of those types.
9797 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009798 * Returns:
9799 * 1 if standalone="yes"
9800 * 0 if standalone="no"
9801 * -2 if standalone attribute is missing or invalid
9802 * (A standalone value of -2 means that the XML declaration was found,
9803 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00009804 */
9805
9806int
9807xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009808 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00009809
9810 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009811 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009812 SKIP(10);
9813 SKIP_BLANKS;
9814 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009815 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009816 return(standalone);
9817 }
9818 NEXT;
9819 SKIP_BLANKS;
9820 if (RAW == '\''){
9821 NEXT;
9822 if ((RAW == 'n') && (NXT(1) == 'o')) {
9823 standalone = 0;
9824 SKIP(2);
9825 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9826 (NXT(2) == 's')) {
9827 standalone = 1;
9828 SKIP(3);
9829 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009830 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009831 }
9832 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009833 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009834 } else
9835 NEXT;
9836 } else if (RAW == '"'){
9837 NEXT;
9838 if ((RAW == 'n') && (NXT(1) == 'o')) {
9839 standalone = 0;
9840 SKIP(2);
9841 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9842 (NXT(2) == 's')) {
9843 standalone = 1;
9844 SKIP(3);
9845 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009846 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009847 }
9848 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009849 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009850 } else
9851 NEXT;
9852 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009853 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009854 }
9855 }
9856 return(standalone);
9857}
9858
9859/**
9860 * xmlParseXMLDecl:
9861 * @ctxt: an XML parser context
9862 *
9863 * parse an XML declaration header
9864 *
9865 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9866 */
9867
9868void
9869xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9870 xmlChar *version;
9871
9872 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009873 * This value for standalone indicates that the document has an
9874 * XML declaration but it does not have a standalone attribute.
9875 * It will be overwritten later if a standalone attribute is found.
9876 */
9877 ctxt->input->standalone = -2;
9878
9879 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009880 * We know that '<?xml' is here.
9881 */
9882 SKIP(5);
9883
William M. Brack76e95df2003-10-18 16:20:14 +00009884 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009885 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9886 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009887 }
9888 SKIP_BLANKS;
9889
9890 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009891 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009892 */
9893 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009894 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009895 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009896 } else {
9897 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9898 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +00009899 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +00009900 */
Daniel Veillard34e3f642008-07-29 09:02:27 +00009901 if (ctxt->options & XML_PARSE_OLD10) {
9902 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9903 "Unsupported version '%s'\n",
9904 version);
9905 } else {
9906 if ((version[0] == '1') && ((version[1] == '.'))) {
9907 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9908 "Unsupported version '%s'\n",
9909 version, NULL);
9910 } else {
9911 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9912 "Unsupported version '%s'\n",
9913 version);
9914 }
9915 }
Daniel Veillard19840942001-11-29 16:11:38 +00009916 }
9917 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009918 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009919 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009920 }
Owen Taylor3473f882001-02-23 17:55:21 +00009921
9922 /*
9923 * We may have the encoding declaration
9924 */
William M. Brack76e95df2003-10-18 16:20:14 +00009925 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009926 if ((RAW == '?') && (NXT(1) == '>')) {
9927 SKIP(2);
9928 return;
9929 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009930 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009931 }
9932 xmlParseEncodingDecl(ctxt);
9933 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9934 /*
9935 * The XML REC instructs us to stop parsing right here
9936 */
9937 return;
9938 }
9939
9940 /*
9941 * We may have the standalone status.
9942 */
William M. Brack76e95df2003-10-18 16:20:14 +00009943 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009944 if ((RAW == '?') && (NXT(1) == '>')) {
9945 SKIP(2);
9946 return;
9947 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009948 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009949 }
9950 SKIP_BLANKS;
9951 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9952
9953 SKIP_BLANKS;
9954 if ((RAW == '?') && (NXT(1) == '>')) {
9955 SKIP(2);
9956 } else if (RAW == '>') {
9957 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009958 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009959 NEXT;
9960 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009961 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009962 MOVETO_ENDTAG(CUR_PTR);
9963 NEXT;
9964 }
9965}
9966
9967/**
9968 * xmlParseMisc:
9969 * @ctxt: an XML parser context
9970 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009971 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009972 *
9973 * [27] Misc ::= Comment | PI | S
9974 */
9975
9976void
9977xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009978 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009979 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009980 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009981 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009982 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009983 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009984 NEXT;
9985 } else
9986 xmlParseComment(ctxt);
9987 }
9988}
9989
9990/**
9991 * xmlParseDocument:
9992 * @ctxt: an XML parser context
9993 *
9994 * parse an XML document (and build a tree if using the standard SAX
9995 * interface).
9996 *
9997 * [1] document ::= prolog element Misc*
9998 *
9999 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10000 *
10001 * Returns 0, -1 in case of error. the parser context is augmented
10002 * as a result of the parsing.
10003 */
10004
10005int
10006xmlParseDocument(xmlParserCtxtPtr ctxt) {
10007 xmlChar start[4];
10008 xmlCharEncoding enc;
10009
10010 xmlInitParser();
10011
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010012 if ((ctxt == NULL) || (ctxt->input == NULL))
10013 return(-1);
10014
Owen Taylor3473f882001-02-23 17:55:21 +000010015 GROW;
10016
10017 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010018 * SAX: detecting the level.
10019 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010020 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010021
10022 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010023 * SAX: beginning of the document processing.
10024 */
10025 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10026 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10027
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010028 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
10029 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +000010030 /*
10031 * Get the 4 first bytes and decode the charset
10032 * if enc != XML_CHAR_ENCODING_NONE
10033 * plug some encoding conversion routines.
10034 */
10035 start[0] = RAW;
10036 start[1] = NXT(1);
10037 start[2] = NXT(2);
10038 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010039 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010040 if (enc != XML_CHAR_ENCODING_NONE) {
10041 xmlSwitchEncoding(ctxt, enc);
10042 }
Owen Taylor3473f882001-02-23 17:55:21 +000010043 }
10044
10045
10046 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010047 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010048 }
10049
10050 /*
10051 * Check for the XMLDecl in the Prolog.
10052 */
10053 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010054 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010055
10056 /*
10057 * Note that we will switch encoding on the fly.
10058 */
10059 xmlParseXMLDecl(ctxt);
10060 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10061 /*
10062 * The XML REC instructs us to stop parsing right here
10063 */
10064 return(-1);
10065 }
10066 ctxt->standalone = ctxt->input->standalone;
10067 SKIP_BLANKS;
10068 } else {
10069 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10070 }
10071 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10072 ctxt->sax->startDocument(ctxt->userData);
10073
10074 /*
10075 * The Misc part of the Prolog
10076 */
10077 GROW;
10078 xmlParseMisc(ctxt);
10079
10080 /*
10081 * Then possibly doc type declaration(s) and more Misc
10082 * (doctypedecl Misc*)?
10083 */
10084 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010085 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010086
10087 ctxt->inSubset = 1;
10088 xmlParseDocTypeDecl(ctxt);
10089 if (RAW == '[') {
10090 ctxt->instate = XML_PARSER_DTD;
10091 xmlParseInternalSubset(ctxt);
10092 }
10093
10094 /*
10095 * Create and update the external subset.
10096 */
10097 ctxt->inSubset = 2;
10098 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10099 (!ctxt->disableSAX))
10100 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10101 ctxt->extSubSystem, ctxt->extSubURI);
10102 ctxt->inSubset = 0;
10103
Daniel Veillardac4118d2008-01-11 05:27:32 +000010104 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010105
10106 ctxt->instate = XML_PARSER_PROLOG;
10107 xmlParseMisc(ctxt);
10108 }
10109
10110 /*
10111 * Time to start parsing the tree itself
10112 */
10113 GROW;
10114 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010115 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10116 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010117 } else {
10118 ctxt->instate = XML_PARSER_CONTENT;
10119 xmlParseElement(ctxt);
10120 ctxt->instate = XML_PARSER_EPILOG;
10121
10122
10123 /*
10124 * The Misc part at the end
10125 */
10126 xmlParseMisc(ctxt);
10127
Daniel Veillard561b7f82002-03-20 21:55:57 +000010128 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010129 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010130 }
10131 ctxt->instate = XML_PARSER_EOF;
10132 }
10133
10134 /*
10135 * SAX: end of the document processing.
10136 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010137 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010138 ctxt->sax->endDocument(ctxt->userData);
10139
Daniel Veillard5997aca2002-03-18 18:36:20 +000010140 /*
10141 * Remove locally kept entity definitions if the tree was not built
10142 */
10143 if ((ctxt->myDoc != NULL) &&
10144 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10145 xmlFreeDoc(ctxt->myDoc);
10146 ctxt->myDoc = NULL;
10147 }
10148
Daniel Veillardae0765b2008-07-31 19:54:59 +000010149 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10150 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10151 if (ctxt->valid)
10152 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10153 if (ctxt->nsWellFormed)
10154 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10155 if (ctxt->options & XML_PARSE_OLD10)
10156 ctxt->myDoc->properties |= XML_DOC_OLD10;
10157 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010158 if (! ctxt->wellFormed) {
10159 ctxt->valid = 0;
10160 return(-1);
10161 }
Owen Taylor3473f882001-02-23 17:55:21 +000010162 return(0);
10163}
10164
10165/**
10166 * xmlParseExtParsedEnt:
10167 * @ctxt: an XML parser context
10168 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010169 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010170 * An external general parsed entity is well-formed if it matches the
10171 * production labeled extParsedEnt.
10172 *
10173 * [78] extParsedEnt ::= TextDecl? content
10174 *
10175 * Returns 0, -1 in case of error. the parser context is augmented
10176 * as a result of the parsing.
10177 */
10178
10179int
10180xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10181 xmlChar start[4];
10182 xmlCharEncoding enc;
10183
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010184 if ((ctxt == NULL) || (ctxt->input == NULL))
10185 return(-1);
10186
Owen Taylor3473f882001-02-23 17:55:21 +000010187 xmlDefaultSAXHandlerInit();
10188
Daniel Veillard309f81d2003-09-23 09:02:53 +000010189 xmlDetectSAX2(ctxt);
10190
Owen Taylor3473f882001-02-23 17:55:21 +000010191 GROW;
10192
10193 /*
10194 * SAX: beginning of the document processing.
10195 */
10196 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10197 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10198
10199 /*
10200 * Get the 4 first bytes and decode the charset
10201 * if enc != XML_CHAR_ENCODING_NONE
10202 * plug some encoding conversion routines.
10203 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010204 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10205 start[0] = RAW;
10206 start[1] = NXT(1);
10207 start[2] = NXT(2);
10208 start[3] = NXT(3);
10209 enc = xmlDetectCharEncoding(start, 4);
10210 if (enc != XML_CHAR_ENCODING_NONE) {
10211 xmlSwitchEncoding(ctxt, enc);
10212 }
Owen Taylor3473f882001-02-23 17:55:21 +000010213 }
10214
10215
10216 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010217 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010218 }
10219
10220 /*
10221 * Check for the XMLDecl in the Prolog.
10222 */
10223 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010224 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010225
10226 /*
10227 * Note that we will switch encoding on the fly.
10228 */
10229 xmlParseXMLDecl(ctxt);
10230 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10231 /*
10232 * The XML REC instructs us to stop parsing right here
10233 */
10234 return(-1);
10235 }
10236 SKIP_BLANKS;
10237 } else {
10238 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10239 }
10240 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10241 ctxt->sax->startDocument(ctxt->userData);
10242
10243 /*
10244 * Doing validity checking on chunk doesn't make sense
10245 */
10246 ctxt->instate = XML_PARSER_CONTENT;
10247 ctxt->validate = 0;
10248 ctxt->loadsubset = 0;
10249 ctxt->depth = 0;
10250
10251 xmlParseContent(ctxt);
10252
10253 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010254 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010255 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010256 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010257 }
10258
10259 /*
10260 * SAX: end of the document processing.
10261 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010262 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010263 ctxt->sax->endDocument(ctxt->userData);
10264
10265 if (! ctxt->wellFormed) return(-1);
10266 return(0);
10267}
10268
Daniel Veillard73b013f2003-09-30 12:36:01 +000010269#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010270/************************************************************************
10271 * *
10272 * Progressive parsing interfaces *
10273 * *
10274 ************************************************************************/
10275
10276/**
10277 * xmlParseLookupSequence:
10278 * @ctxt: an XML parser context
10279 * @first: the first char to lookup
10280 * @next: the next char to lookup or zero
10281 * @third: the next char to lookup or zero
10282 *
10283 * Try to find if a sequence (first, next, third) or just (first next) or
10284 * (first) is available in the input stream.
10285 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10286 * to avoid rescanning sequences of bytes, it DOES change the state of the
10287 * parser, do not use liberally.
10288 *
10289 * Returns the index to the current parsing point if the full sequence
10290 * is available, -1 otherwise.
10291 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010292static int
Owen Taylor3473f882001-02-23 17:55:21 +000010293xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10294 xmlChar next, xmlChar third) {
10295 int base, len;
10296 xmlParserInputPtr in;
10297 const xmlChar *buf;
10298
10299 in = ctxt->input;
10300 if (in == NULL) return(-1);
10301 base = in->cur - in->base;
10302 if (base < 0) return(-1);
10303 if (ctxt->checkIndex > base)
10304 base = ctxt->checkIndex;
10305 if (in->buf == NULL) {
10306 buf = in->base;
10307 len = in->length;
10308 } else {
10309 buf = in->buf->buffer->content;
10310 len = in->buf->buffer->use;
10311 }
10312 /* take into account the sequence length */
10313 if (third) len -= 2;
10314 else if (next) len --;
10315 for (;base < len;base++) {
10316 if (buf[base] == first) {
10317 if (third != 0) {
10318 if ((buf[base + 1] != next) ||
10319 (buf[base + 2] != third)) continue;
10320 } else if (next != 0) {
10321 if (buf[base + 1] != next) continue;
10322 }
10323 ctxt->checkIndex = 0;
10324#ifdef DEBUG_PUSH
10325 if (next == 0)
10326 xmlGenericError(xmlGenericErrorContext,
10327 "PP: lookup '%c' found at %d\n",
10328 first, base);
10329 else if (third == 0)
10330 xmlGenericError(xmlGenericErrorContext,
10331 "PP: lookup '%c%c' found at %d\n",
10332 first, next, base);
10333 else
10334 xmlGenericError(xmlGenericErrorContext,
10335 "PP: lookup '%c%c%c' found at %d\n",
10336 first, next, third, base);
10337#endif
10338 return(base - (in->cur - in->base));
10339 }
10340 }
10341 ctxt->checkIndex = base;
10342#ifdef DEBUG_PUSH
10343 if (next == 0)
10344 xmlGenericError(xmlGenericErrorContext,
10345 "PP: lookup '%c' failed\n", first);
10346 else if (third == 0)
10347 xmlGenericError(xmlGenericErrorContext,
10348 "PP: lookup '%c%c' failed\n", first, next);
10349 else
10350 xmlGenericError(xmlGenericErrorContext,
10351 "PP: lookup '%c%c%c' failed\n", first, next, third);
10352#endif
10353 return(-1);
10354}
10355
10356/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010357 * xmlParseGetLasts:
10358 * @ctxt: an XML parser context
10359 * @lastlt: pointer to store the last '<' from the input
10360 * @lastgt: pointer to store the last '>' from the input
10361 *
10362 * Lookup the last < and > in the current chunk
10363 */
10364static void
10365xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10366 const xmlChar **lastgt) {
10367 const xmlChar *tmp;
10368
10369 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10370 xmlGenericError(xmlGenericErrorContext,
10371 "Internal error: xmlParseGetLasts\n");
10372 return;
10373 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010374 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010375 tmp = ctxt->input->end;
10376 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010377 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010378 if (tmp < ctxt->input->base) {
10379 *lastlt = NULL;
10380 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010381 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010382 *lastlt = tmp;
10383 tmp++;
10384 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10385 if (*tmp == '\'') {
10386 tmp++;
10387 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10388 if (tmp < ctxt->input->end) tmp++;
10389 } else if (*tmp == '"') {
10390 tmp++;
10391 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10392 if (tmp < ctxt->input->end) tmp++;
10393 } else
10394 tmp++;
10395 }
10396 if (tmp < ctxt->input->end)
10397 *lastgt = tmp;
10398 else {
10399 tmp = *lastlt;
10400 tmp--;
10401 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10402 if (tmp >= ctxt->input->base)
10403 *lastgt = tmp;
10404 else
10405 *lastgt = NULL;
10406 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010407 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010408 } else {
10409 *lastlt = NULL;
10410 *lastgt = NULL;
10411 }
10412}
10413/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010414 * xmlCheckCdataPush:
10415 * @cur: pointer to the bock of characters
10416 * @len: length of the block in bytes
10417 *
10418 * Check that the block of characters is okay as SCdata content [20]
10419 *
10420 * Returns the number of bytes to pass if okay, a negative index where an
10421 * UTF-8 error occured otherwise
10422 */
10423static int
10424xmlCheckCdataPush(const xmlChar *utf, int len) {
10425 int ix;
10426 unsigned char c;
10427 int codepoint;
10428
10429 if ((utf == NULL) || (len <= 0))
10430 return(0);
10431
10432 for (ix = 0; ix < len;) { /* string is 0-terminated */
10433 c = utf[ix];
10434 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10435 if (c >= 0x20)
10436 ix++;
10437 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10438 ix++;
10439 else
10440 return(-ix);
10441 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10442 if (ix + 2 > len) return(ix);
10443 if ((utf[ix+1] & 0xc0 ) != 0x80)
10444 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010445 codepoint = (utf[ix] & 0x1f) << 6;
10446 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010447 if (!xmlIsCharQ(codepoint))
10448 return(-ix);
10449 ix += 2;
10450 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10451 if (ix + 3 > len) return(ix);
10452 if (((utf[ix+1] & 0xc0) != 0x80) ||
10453 ((utf[ix+2] & 0xc0) != 0x80))
10454 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010455 codepoint = (utf[ix] & 0xf) << 12;
10456 codepoint |= (utf[ix+1] & 0x3f) << 6;
10457 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010458 if (!xmlIsCharQ(codepoint))
10459 return(-ix);
10460 ix += 3;
10461 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10462 if (ix + 4 > len) return(ix);
10463 if (((utf[ix+1] & 0xc0) != 0x80) ||
10464 ((utf[ix+2] & 0xc0) != 0x80) ||
10465 ((utf[ix+3] & 0xc0) != 0x80))
10466 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010467 codepoint = (utf[ix] & 0x7) << 18;
10468 codepoint |= (utf[ix+1] & 0x3f) << 12;
10469 codepoint |= (utf[ix+2] & 0x3f) << 6;
10470 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010471 if (!xmlIsCharQ(codepoint))
10472 return(-ix);
10473 ix += 4;
10474 } else /* unknown encoding */
10475 return(-ix);
10476 }
10477 return(ix);
10478}
10479
10480/**
Owen Taylor3473f882001-02-23 17:55:21 +000010481 * xmlParseTryOrFinish:
10482 * @ctxt: an XML parser context
10483 * @terminate: last chunk indicator
10484 *
10485 * Try to progress on parsing
10486 *
10487 * Returns zero if no parsing was possible
10488 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010489static int
Owen Taylor3473f882001-02-23 17:55:21 +000010490xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10491 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010492 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000010493 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000010494 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000010495
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010496 if (ctxt->input == NULL)
10497 return(0);
10498
Owen Taylor3473f882001-02-23 17:55:21 +000010499#ifdef DEBUG_PUSH
10500 switch (ctxt->instate) {
10501 case XML_PARSER_EOF:
10502 xmlGenericError(xmlGenericErrorContext,
10503 "PP: try EOF\n"); break;
10504 case XML_PARSER_START:
10505 xmlGenericError(xmlGenericErrorContext,
10506 "PP: try START\n"); break;
10507 case XML_PARSER_MISC:
10508 xmlGenericError(xmlGenericErrorContext,
10509 "PP: try MISC\n");break;
10510 case XML_PARSER_COMMENT:
10511 xmlGenericError(xmlGenericErrorContext,
10512 "PP: try COMMENT\n");break;
10513 case XML_PARSER_PROLOG:
10514 xmlGenericError(xmlGenericErrorContext,
10515 "PP: try PROLOG\n");break;
10516 case XML_PARSER_START_TAG:
10517 xmlGenericError(xmlGenericErrorContext,
10518 "PP: try START_TAG\n");break;
10519 case XML_PARSER_CONTENT:
10520 xmlGenericError(xmlGenericErrorContext,
10521 "PP: try CONTENT\n");break;
10522 case XML_PARSER_CDATA_SECTION:
10523 xmlGenericError(xmlGenericErrorContext,
10524 "PP: try CDATA_SECTION\n");break;
10525 case XML_PARSER_END_TAG:
10526 xmlGenericError(xmlGenericErrorContext,
10527 "PP: try END_TAG\n");break;
10528 case XML_PARSER_ENTITY_DECL:
10529 xmlGenericError(xmlGenericErrorContext,
10530 "PP: try ENTITY_DECL\n");break;
10531 case XML_PARSER_ENTITY_VALUE:
10532 xmlGenericError(xmlGenericErrorContext,
10533 "PP: try ENTITY_VALUE\n");break;
10534 case XML_PARSER_ATTRIBUTE_VALUE:
10535 xmlGenericError(xmlGenericErrorContext,
10536 "PP: try ATTRIBUTE_VALUE\n");break;
10537 case XML_PARSER_DTD:
10538 xmlGenericError(xmlGenericErrorContext,
10539 "PP: try DTD\n");break;
10540 case XML_PARSER_EPILOG:
10541 xmlGenericError(xmlGenericErrorContext,
10542 "PP: try EPILOG\n");break;
10543 case XML_PARSER_PI:
10544 xmlGenericError(xmlGenericErrorContext,
10545 "PP: try PI\n");break;
10546 case XML_PARSER_IGNORE:
10547 xmlGenericError(xmlGenericErrorContext,
10548 "PP: try IGNORE\n");break;
10549 }
10550#endif
10551
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010552 if ((ctxt->input != NULL) &&
10553 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010554 xmlSHRINK(ctxt);
10555 ctxt->checkIndex = 0;
10556 }
10557 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010558
Daniel Veillarda880b122003-04-21 21:36:41 +000010559 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010560 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010561 return(0);
10562
10563
Owen Taylor3473f882001-02-23 17:55:21 +000010564 /*
10565 * Pop-up of finished entities.
10566 */
10567 while ((RAW == 0) && (ctxt->inputNr > 1))
10568 xmlPopInput(ctxt);
10569
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010570 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010571 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010572 avail = ctxt->input->length -
10573 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010574 else {
10575 /*
10576 * If we are operating on converted input, try to flush
10577 * remainng chars to avoid them stalling in the non-converted
10578 * buffer.
10579 */
10580 if ((ctxt->input->buf->raw != NULL) &&
10581 (ctxt->input->buf->raw->use > 0)) {
10582 int base = ctxt->input->base -
10583 ctxt->input->buf->buffer->content;
10584 int current = ctxt->input->cur - ctxt->input->base;
10585
10586 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10587 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10588 ctxt->input->cur = ctxt->input->base + current;
10589 ctxt->input->end =
10590 &ctxt->input->buf->buffer->content[
10591 ctxt->input->buf->buffer->use];
10592 }
10593 avail = ctxt->input->buf->buffer->use -
10594 (ctxt->input->cur - ctxt->input->base);
10595 }
Owen Taylor3473f882001-02-23 17:55:21 +000010596 if (avail < 1)
10597 goto done;
10598 switch (ctxt->instate) {
10599 case XML_PARSER_EOF:
10600 /*
10601 * Document parsing is done !
10602 */
10603 goto done;
10604 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010605 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10606 xmlChar start[4];
10607 xmlCharEncoding enc;
10608
10609 /*
10610 * Very first chars read from the document flow.
10611 */
10612 if (avail < 4)
10613 goto done;
10614
10615 /*
10616 * Get the 4 first bytes and decode the charset
10617 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010618 * plug some encoding conversion routines,
10619 * else xmlSwitchEncoding will set to (default)
10620 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010621 */
10622 start[0] = RAW;
10623 start[1] = NXT(1);
10624 start[2] = NXT(2);
10625 start[3] = NXT(3);
10626 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010627 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010628 break;
10629 }
Owen Taylor3473f882001-02-23 17:55:21 +000010630
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010631 if (avail < 2)
10632 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010633 cur = ctxt->input->cur[0];
10634 next = ctxt->input->cur[1];
10635 if (cur == 0) {
10636 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10637 ctxt->sax->setDocumentLocator(ctxt->userData,
10638 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010639 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010640 ctxt->instate = XML_PARSER_EOF;
10641#ifdef DEBUG_PUSH
10642 xmlGenericError(xmlGenericErrorContext,
10643 "PP: entering EOF\n");
10644#endif
10645 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10646 ctxt->sax->endDocument(ctxt->userData);
10647 goto done;
10648 }
10649 if ((cur == '<') && (next == '?')) {
10650 /* PI or XML decl */
10651 if (avail < 5) return(ret);
10652 if ((!terminate) &&
10653 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10654 return(ret);
10655 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10656 ctxt->sax->setDocumentLocator(ctxt->userData,
10657 &xmlDefaultSAXLocator);
10658 if ((ctxt->input->cur[2] == 'x') &&
10659 (ctxt->input->cur[3] == 'm') &&
10660 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010661 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010662 ret += 5;
10663#ifdef DEBUG_PUSH
10664 xmlGenericError(xmlGenericErrorContext,
10665 "PP: Parsing XML Decl\n");
10666#endif
10667 xmlParseXMLDecl(ctxt);
10668 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10669 /*
10670 * The XML REC instructs us to stop parsing right
10671 * here
10672 */
10673 ctxt->instate = XML_PARSER_EOF;
10674 return(0);
10675 }
10676 ctxt->standalone = ctxt->input->standalone;
10677 if ((ctxt->encoding == NULL) &&
10678 (ctxt->input->encoding != NULL))
10679 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10680 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10681 (!ctxt->disableSAX))
10682 ctxt->sax->startDocument(ctxt->userData);
10683 ctxt->instate = XML_PARSER_MISC;
10684#ifdef DEBUG_PUSH
10685 xmlGenericError(xmlGenericErrorContext,
10686 "PP: entering MISC\n");
10687#endif
10688 } else {
10689 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10690 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10691 (!ctxt->disableSAX))
10692 ctxt->sax->startDocument(ctxt->userData);
10693 ctxt->instate = XML_PARSER_MISC;
10694#ifdef DEBUG_PUSH
10695 xmlGenericError(xmlGenericErrorContext,
10696 "PP: entering MISC\n");
10697#endif
10698 }
10699 } else {
10700 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10701 ctxt->sax->setDocumentLocator(ctxt->userData,
10702 &xmlDefaultSAXLocator);
10703 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010704 if (ctxt->version == NULL) {
10705 xmlErrMemory(ctxt, NULL);
10706 break;
10707 }
Owen Taylor3473f882001-02-23 17:55:21 +000010708 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10709 (!ctxt->disableSAX))
10710 ctxt->sax->startDocument(ctxt->userData);
10711 ctxt->instate = XML_PARSER_MISC;
10712#ifdef DEBUG_PUSH
10713 xmlGenericError(xmlGenericErrorContext,
10714 "PP: entering MISC\n");
10715#endif
10716 }
10717 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010718 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010719 const xmlChar *name;
10720 const xmlChar *prefix;
10721 const xmlChar *URI;
10722 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010723
10724 if ((avail < 2) && (ctxt->inputNr == 1))
10725 goto done;
10726 cur = ctxt->input->cur[0];
10727 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010728 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010729 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010730 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10731 ctxt->sax->endDocument(ctxt->userData);
10732 goto done;
10733 }
10734 if (!terminate) {
10735 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010736 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010737 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010738 goto done;
10739 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10740 goto done;
10741 }
10742 }
10743 if (ctxt->spaceNr == 0)
10744 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010745 else if (*ctxt->space == -2)
10746 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010747 else
10748 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010749#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010750 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010751#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010752 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010753#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010754 else
10755 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010756#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010757 if (name == NULL) {
10758 spacePop(ctxt);
10759 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010760 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10761 ctxt->sax->endDocument(ctxt->userData);
10762 goto done;
10763 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010764#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000010765 /*
10766 * [ VC: Root Element Type ]
10767 * The Name in the document type declaration must match
10768 * the element type of the root element.
10769 */
10770 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10771 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10772 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010773#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010774
10775 /*
10776 * Check for an Empty Element.
10777 */
10778 if ((RAW == '/') && (NXT(1) == '>')) {
10779 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010780
10781 if (ctxt->sax2) {
10782 if ((ctxt->sax != NULL) &&
10783 (ctxt->sax->endElementNs != NULL) &&
10784 (!ctxt->disableSAX))
10785 ctxt->sax->endElementNs(ctxt->userData, name,
10786 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000010787 if (ctxt->nsNr - nsNr > 0)
10788 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010789#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010790 } else {
10791 if ((ctxt->sax != NULL) &&
10792 (ctxt->sax->endElement != NULL) &&
10793 (!ctxt->disableSAX))
10794 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010795#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010796 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010797 spacePop(ctxt);
10798 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010799 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010800 } else {
10801 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010802 }
10803 break;
10804 }
10805 if (RAW == '>') {
10806 NEXT;
10807 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000010808 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000010809 "Couldn't find end of Start Tag %s\n",
10810 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000010811 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010812 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010813 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010814 if (ctxt->sax2)
10815 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010816#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010817 else
10818 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010819#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010820
Daniel Veillarda880b122003-04-21 21:36:41 +000010821 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010822 break;
10823 }
10824 case XML_PARSER_CONTENT: {
10825 const xmlChar *test;
10826 unsigned int cons;
10827 if ((avail < 2) && (ctxt->inputNr == 1))
10828 goto done;
10829 cur = ctxt->input->cur[0];
10830 next = ctxt->input->cur[1];
10831
10832 test = CUR_PTR;
10833 cons = ctxt->input->consumed;
10834 if ((cur == '<') && (next == '/')) {
10835 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010836 break;
10837 } else if ((cur == '<') && (next == '?')) {
10838 if ((!terminate) &&
10839 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10840 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010841 xmlParsePI(ctxt);
10842 } else if ((cur == '<') && (next != '!')) {
10843 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010844 break;
10845 } else if ((cur == '<') && (next == '!') &&
10846 (ctxt->input->cur[2] == '-') &&
10847 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000010848 int term;
10849
10850 if (avail < 4)
10851 goto done;
10852 ctxt->input->cur += 4;
10853 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10854 ctxt->input->cur -= 4;
10855 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000010856 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010857 xmlParseComment(ctxt);
10858 ctxt->instate = XML_PARSER_CONTENT;
10859 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10860 (ctxt->input->cur[2] == '[') &&
10861 (ctxt->input->cur[3] == 'C') &&
10862 (ctxt->input->cur[4] == 'D') &&
10863 (ctxt->input->cur[5] == 'A') &&
10864 (ctxt->input->cur[6] == 'T') &&
10865 (ctxt->input->cur[7] == 'A') &&
10866 (ctxt->input->cur[8] == '[')) {
10867 SKIP(9);
10868 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000010869 break;
10870 } else if ((cur == '<') && (next == '!') &&
10871 (avail < 9)) {
10872 goto done;
10873 } else if (cur == '&') {
10874 if ((!terminate) &&
10875 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10876 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010877 xmlParseReference(ctxt);
10878 } else {
10879 /* TODO Avoid the extra copy, handle directly !!! */
10880 /*
10881 * Goal of the following test is:
10882 * - minimize calls to the SAX 'character' callback
10883 * when they are mergeable
10884 * - handle an problem for isBlank when we only parse
10885 * a sequence of blank chars and the next one is
10886 * not available to check against '<' presence.
10887 * - tries to homogenize the differences in SAX
10888 * callbacks between the push and pull versions
10889 * of the parser.
10890 */
10891 if ((ctxt->inputNr == 1) &&
10892 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10893 if (!terminate) {
10894 if (ctxt->progressive) {
10895 if ((lastlt == NULL) ||
10896 (ctxt->input->cur > lastlt))
10897 goto done;
10898 } else if (xmlParseLookupSequence(ctxt,
10899 '<', 0, 0) < 0) {
10900 goto done;
10901 }
10902 }
10903 }
10904 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010905 xmlParseCharData(ctxt, 0);
10906 }
10907 /*
10908 * Pop-up of finished entities.
10909 */
10910 while ((RAW == 0) && (ctxt->inputNr > 1))
10911 xmlPopInput(ctxt);
10912 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010913 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10914 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010915 ctxt->instate = XML_PARSER_EOF;
10916 break;
10917 }
10918 break;
10919 }
10920 case XML_PARSER_END_TAG:
10921 if (avail < 2)
10922 goto done;
10923 if (!terminate) {
10924 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010925 /* > can be found unescaped in attribute values */
10926 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010927 goto done;
10928 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10929 goto done;
10930 }
10931 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010932 if (ctxt->sax2) {
10933 xmlParseEndTag2(ctxt,
10934 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10935 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010936 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010937 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010938 }
10939#ifdef LIBXML_SAX1_ENABLED
10940 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010941 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010942#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010943 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010944 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010945 } else {
10946 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010947 }
10948 break;
10949 case XML_PARSER_CDATA_SECTION: {
10950 /*
10951 * The Push mode need to have the SAX callback for
10952 * cdataBlock merge back contiguous callbacks.
10953 */
10954 int base;
10955
10956 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10957 if (base < 0) {
10958 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010959 int tmp;
10960
10961 tmp = xmlCheckCdataPush(ctxt->input->cur,
10962 XML_PARSER_BIG_BUFFER_SIZE);
10963 if (tmp < 0) {
10964 tmp = -tmp;
10965 ctxt->input->cur += tmp;
10966 goto encoding_error;
10967 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010968 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10969 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010970 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010971 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010972 else if (ctxt->sax->characters != NULL)
10973 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010974 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010975 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010976 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010977 ctxt->checkIndex = 0;
10978 }
10979 goto done;
10980 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010981 int tmp;
10982
10983 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10984 if ((tmp < 0) || (tmp != base)) {
10985 tmp = -tmp;
10986 ctxt->input->cur += tmp;
10987 goto encoding_error;
10988 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000010989 if ((ctxt->sax != NULL) && (base == 0) &&
10990 (ctxt->sax->cdataBlock != NULL) &&
10991 (!ctxt->disableSAX)) {
10992 /*
10993 * Special case to provide identical behaviour
10994 * between pull and push parsers on enpty CDATA
10995 * sections
10996 */
10997 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
10998 (!strncmp((const char *)&ctxt->input->cur[-9],
10999 "<![CDATA[", 9)))
11000 ctxt->sax->cdataBlock(ctxt->userData,
11001 BAD_CAST "", 0);
11002 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011003 (!ctxt->disableSAX)) {
11004 if (ctxt->sax->cdataBlock != NULL)
11005 ctxt->sax->cdataBlock(ctxt->userData,
11006 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011007 else if (ctxt->sax->characters != NULL)
11008 ctxt->sax->characters(ctxt->userData,
11009 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011010 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000011011 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011012 ctxt->checkIndex = 0;
11013 ctxt->instate = XML_PARSER_CONTENT;
11014#ifdef DEBUG_PUSH
11015 xmlGenericError(xmlGenericErrorContext,
11016 "PP: entering CONTENT\n");
11017#endif
11018 }
11019 break;
11020 }
Owen Taylor3473f882001-02-23 17:55:21 +000011021 case XML_PARSER_MISC:
11022 SKIP_BLANKS;
11023 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011024 avail = ctxt->input->length -
11025 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011026 else
Daniel Veillarda880b122003-04-21 21:36:41 +000011027 avail = ctxt->input->buf->buffer->use -
11028 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011029 if (avail < 2)
11030 goto done;
11031 cur = ctxt->input->cur[0];
11032 next = ctxt->input->cur[1];
11033 if ((cur == '<') && (next == '?')) {
11034 if ((!terminate) &&
11035 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11036 goto done;
11037#ifdef DEBUG_PUSH
11038 xmlGenericError(xmlGenericErrorContext,
11039 "PP: Parsing PI\n");
11040#endif
11041 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000011042 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011043 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011044 (ctxt->input->cur[2] == '-') &&
11045 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011046 if ((!terminate) &&
11047 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11048 goto done;
11049#ifdef DEBUG_PUSH
11050 xmlGenericError(xmlGenericErrorContext,
11051 "PP: Parsing Comment\n");
11052#endif
11053 xmlParseComment(ctxt);
11054 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011055 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011056 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011057 (ctxt->input->cur[2] == 'D') &&
11058 (ctxt->input->cur[3] == 'O') &&
11059 (ctxt->input->cur[4] == 'C') &&
11060 (ctxt->input->cur[5] == 'T') &&
11061 (ctxt->input->cur[6] == 'Y') &&
11062 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011063 (ctxt->input->cur[8] == 'E')) {
11064 if ((!terminate) &&
11065 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11066 goto done;
11067#ifdef DEBUG_PUSH
11068 xmlGenericError(xmlGenericErrorContext,
11069 "PP: Parsing internal subset\n");
11070#endif
11071 ctxt->inSubset = 1;
11072 xmlParseDocTypeDecl(ctxt);
11073 if (RAW == '[') {
11074 ctxt->instate = XML_PARSER_DTD;
11075#ifdef DEBUG_PUSH
11076 xmlGenericError(xmlGenericErrorContext,
11077 "PP: entering DTD\n");
11078#endif
11079 } else {
11080 /*
11081 * Create and update the external subset.
11082 */
11083 ctxt->inSubset = 2;
11084 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11085 (ctxt->sax->externalSubset != NULL))
11086 ctxt->sax->externalSubset(ctxt->userData,
11087 ctxt->intSubName, ctxt->extSubSystem,
11088 ctxt->extSubURI);
11089 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011090 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011091 ctxt->instate = XML_PARSER_PROLOG;
11092#ifdef DEBUG_PUSH
11093 xmlGenericError(xmlGenericErrorContext,
11094 "PP: entering PROLOG\n");
11095#endif
11096 }
11097 } else if ((cur == '<') && (next == '!') &&
11098 (avail < 9)) {
11099 goto done;
11100 } else {
11101 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011102 ctxt->progressive = 1;
11103 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011104#ifdef DEBUG_PUSH
11105 xmlGenericError(xmlGenericErrorContext,
11106 "PP: entering START_TAG\n");
11107#endif
11108 }
11109 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011110 case XML_PARSER_PROLOG:
11111 SKIP_BLANKS;
11112 if (ctxt->input->buf == NULL)
11113 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11114 else
11115 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11116 if (avail < 2)
11117 goto done;
11118 cur = ctxt->input->cur[0];
11119 next = ctxt->input->cur[1];
11120 if ((cur == '<') && (next == '?')) {
11121 if ((!terminate) &&
11122 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11123 goto done;
11124#ifdef DEBUG_PUSH
11125 xmlGenericError(xmlGenericErrorContext,
11126 "PP: Parsing PI\n");
11127#endif
11128 xmlParsePI(ctxt);
11129 } else if ((cur == '<') && (next == '!') &&
11130 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11131 if ((!terminate) &&
11132 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11133 goto done;
11134#ifdef DEBUG_PUSH
11135 xmlGenericError(xmlGenericErrorContext,
11136 "PP: Parsing Comment\n");
11137#endif
11138 xmlParseComment(ctxt);
11139 ctxt->instate = XML_PARSER_PROLOG;
11140 } else if ((cur == '<') && (next == '!') &&
11141 (avail < 4)) {
11142 goto done;
11143 } else {
11144 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011145 if (ctxt->progressive == 0)
11146 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011147 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011148#ifdef DEBUG_PUSH
11149 xmlGenericError(xmlGenericErrorContext,
11150 "PP: entering START_TAG\n");
11151#endif
11152 }
11153 break;
11154 case XML_PARSER_EPILOG:
11155 SKIP_BLANKS;
11156 if (ctxt->input->buf == NULL)
11157 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11158 else
11159 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11160 if (avail < 2)
11161 goto done;
11162 cur = ctxt->input->cur[0];
11163 next = ctxt->input->cur[1];
11164 if ((cur == '<') && (next == '?')) {
11165 if ((!terminate) &&
11166 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11167 goto done;
11168#ifdef DEBUG_PUSH
11169 xmlGenericError(xmlGenericErrorContext,
11170 "PP: Parsing PI\n");
11171#endif
11172 xmlParsePI(ctxt);
11173 ctxt->instate = XML_PARSER_EPILOG;
11174 } else if ((cur == '<') && (next == '!') &&
11175 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11176 if ((!terminate) &&
11177 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11178 goto done;
11179#ifdef DEBUG_PUSH
11180 xmlGenericError(xmlGenericErrorContext,
11181 "PP: Parsing Comment\n");
11182#endif
11183 xmlParseComment(ctxt);
11184 ctxt->instate = XML_PARSER_EPILOG;
11185 } else if ((cur == '<') && (next == '!') &&
11186 (avail < 4)) {
11187 goto done;
11188 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011189 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011190 ctxt->instate = XML_PARSER_EOF;
11191#ifdef DEBUG_PUSH
11192 xmlGenericError(xmlGenericErrorContext,
11193 "PP: entering EOF\n");
11194#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011195 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011196 ctxt->sax->endDocument(ctxt->userData);
11197 goto done;
11198 }
11199 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011200 case XML_PARSER_DTD: {
11201 /*
11202 * Sorry but progressive parsing of the internal subset
11203 * is not expected to be supported. We first check that
11204 * the full content of the internal subset is available and
11205 * the parsing is launched only at that point.
11206 * Internal subset ends up with "']' S? '>'" in an unescaped
11207 * section and not in a ']]>' sequence which are conditional
11208 * sections (whoever argued to keep that crap in XML deserve
11209 * a place in hell !).
11210 */
11211 int base, i;
11212 xmlChar *buf;
11213 xmlChar quote = 0;
11214
11215 base = ctxt->input->cur - ctxt->input->base;
11216 if (base < 0) return(0);
11217 if (ctxt->checkIndex > base)
11218 base = ctxt->checkIndex;
11219 buf = ctxt->input->buf->buffer->content;
11220 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11221 base++) {
11222 if (quote != 0) {
11223 if (buf[base] == quote)
11224 quote = 0;
11225 continue;
11226 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011227 if ((quote == 0) && (buf[base] == '<')) {
11228 int found = 0;
11229 /* special handling of comments */
11230 if (((unsigned int) base + 4 <
11231 ctxt->input->buf->buffer->use) &&
11232 (buf[base + 1] == '!') &&
11233 (buf[base + 2] == '-') &&
11234 (buf[base + 3] == '-')) {
11235 for (;(unsigned int) base + 3 <
11236 ctxt->input->buf->buffer->use; base++) {
11237 if ((buf[base] == '-') &&
11238 (buf[base + 1] == '-') &&
11239 (buf[base + 2] == '>')) {
11240 found = 1;
11241 base += 2;
11242 break;
11243 }
11244 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011245 if (!found) {
11246#if 0
11247 fprintf(stderr, "unfinished comment\n");
11248#endif
11249 break; /* for */
11250 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011251 continue;
11252 }
11253 }
Owen Taylor3473f882001-02-23 17:55:21 +000011254 if (buf[base] == '"') {
11255 quote = '"';
11256 continue;
11257 }
11258 if (buf[base] == '\'') {
11259 quote = '\'';
11260 continue;
11261 }
11262 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011263#if 0
11264 fprintf(stderr, "%c%c%c%c: ", buf[base],
11265 buf[base + 1], buf[base + 2], buf[base + 3]);
11266#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011267 if ((unsigned int) base +1 >=
11268 ctxt->input->buf->buffer->use)
11269 break;
11270 if (buf[base + 1] == ']') {
11271 /* conditional crap, skip both ']' ! */
11272 base++;
11273 continue;
11274 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011275 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011276 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11277 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011278 if (buf[base + i] == '>') {
11279#if 0
11280 fprintf(stderr, "found\n");
11281#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011282 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011283 }
11284 if (!IS_BLANK_CH(buf[base + i])) {
11285#if 0
11286 fprintf(stderr, "not found\n");
11287#endif
11288 goto not_end_of_int_subset;
11289 }
Owen Taylor3473f882001-02-23 17:55:21 +000011290 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011291#if 0
11292 fprintf(stderr, "end of stream\n");
11293#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011294 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011295
Owen Taylor3473f882001-02-23 17:55:21 +000011296 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011297not_end_of_int_subset:
11298 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011299 }
11300 /*
11301 * We didn't found the end of the Internal subset
11302 */
Owen Taylor3473f882001-02-23 17:55:21 +000011303#ifdef DEBUG_PUSH
11304 if (next == 0)
11305 xmlGenericError(xmlGenericErrorContext,
11306 "PP: lookup of int subset end filed\n");
11307#endif
11308 goto done;
11309
11310found_end_int_subset:
11311 xmlParseInternalSubset(ctxt);
11312 ctxt->inSubset = 2;
11313 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11314 (ctxt->sax->externalSubset != NULL))
11315 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11316 ctxt->extSubSystem, ctxt->extSubURI);
11317 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011318 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011319 ctxt->instate = XML_PARSER_PROLOG;
11320 ctxt->checkIndex = 0;
11321#ifdef DEBUG_PUSH
11322 xmlGenericError(xmlGenericErrorContext,
11323 "PP: entering PROLOG\n");
11324#endif
11325 break;
11326 }
11327 case XML_PARSER_COMMENT:
11328 xmlGenericError(xmlGenericErrorContext,
11329 "PP: internal error, state == COMMENT\n");
11330 ctxt->instate = XML_PARSER_CONTENT;
11331#ifdef DEBUG_PUSH
11332 xmlGenericError(xmlGenericErrorContext,
11333 "PP: entering CONTENT\n");
11334#endif
11335 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011336 case XML_PARSER_IGNORE:
11337 xmlGenericError(xmlGenericErrorContext,
11338 "PP: internal error, state == IGNORE");
11339 ctxt->instate = XML_PARSER_DTD;
11340#ifdef DEBUG_PUSH
11341 xmlGenericError(xmlGenericErrorContext,
11342 "PP: entering DTD\n");
11343#endif
11344 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011345 case XML_PARSER_PI:
11346 xmlGenericError(xmlGenericErrorContext,
11347 "PP: internal error, state == PI\n");
11348 ctxt->instate = XML_PARSER_CONTENT;
11349#ifdef DEBUG_PUSH
11350 xmlGenericError(xmlGenericErrorContext,
11351 "PP: entering CONTENT\n");
11352#endif
11353 break;
11354 case XML_PARSER_ENTITY_DECL:
11355 xmlGenericError(xmlGenericErrorContext,
11356 "PP: internal error, state == ENTITY_DECL\n");
11357 ctxt->instate = XML_PARSER_DTD;
11358#ifdef DEBUG_PUSH
11359 xmlGenericError(xmlGenericErrorContext,
11360 "PP: entering DTD\n");
11361#endif
11362 break;
11363 case XML_PARSER_ENTITY_VALUE:
11364 xmlGenericError(xmlGenericErrorContext,
11365 "PP: internal error, state == ENTITY_VALUE\n");
11366 ctxt->instate = XML_PARSER_CONTENT;
11367#ifdef DEBUG_PUSH
11368 xmlGenericError(xmlGenericErrorContext,
11369 "PP: entering DTD\n");
11370#endif
11371 break;
11372 case XML_PARSER_ATTRIBUTE_VALUE:
11373 xmlGenericError(xmlGenericErrorContext,
11374 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11375 ctxt->instate = XML_PARSER_START_TAG;
11376#ifdef DEBUG_PUSH
11377 xmlGenericError(xmlGenericErrorContext,
11378 "PP: entering START_TAG\n");
11379#endif
11380 break;
11381 case XML_PARSER_SYSTEM_LITERAL:
11382 xmlGenericError(xmlGenericErrorContext,
11383 "PP: internal error, state == SYSTEM_LITERAL\n");
11384 ctxt->instate = XML_PARSER_START_TAG;
11385#ifdef DEBUG_PUSH
11386 xmlGenericError(xmlGenericErrorContext,
11387 "PP: entering START_TAG\n");
11388#endif
11389 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011390 case XML_PARSER_PUBLIC_LITERAL:
11391 xmlGenericError(xmlGenericErrorContext,
11392 "PP: internal error, state == PUBLIC_LITERAL\n");
11393 ctxt->instate = XML_PARSER_START_TAG;
11394#ifdef DEBUG_PUSH
11395 xmlGenericError(xmlGenericErrorContext,
11396 "PP: entering START_TAG\n");
11397#endif
11398 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011399 }
11400 }
11401done:
11402#ifdef DEBUG_PUSH
11403 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11404#endif
11405 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011406encoding_error:
11407 {
11408 char buffer[150];
11409
11410 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11411 ctxt->input->cur[0], ctxt->input->cur[1],
11412 ctxt->input->cur[2], ctxt->input->cur[3]);
11413 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11414 "Input is not proper UTF-8, indicate encoding !\n%s",
11415 BAD_CAST buffer, NULL);
11416 }
11417 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000011418}
11419
11420/**
Owen Taylor3473f882001-02-23 17:55:21 +000011421 * xmlParseChunk:
11422 * @ctxt: an XML parser context
11423 * @chunk: an char array
11424 * @size: the size in byte of the chunk
11425 * @terminate: last chunk indicator
11426 *
11427 * Parse a Chunk of memory
11428 *
11429 * Returns zero if no error, the xmlParserErrors otherwise.
11430 */
11431int
11432xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11433 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000011434 int end_in_lf = 0;
11435
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011436 if (ctxt == NULL)
11437 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000011438 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011439 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000011440 if (ctxt->instate == XML_PARSER_START)
11441 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000011442 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11443 (chunk[size - 1] == '\r')) {
11444 end_in_lf = 1;
11445 size--;
11446 }
Owen Taylor3473f882001-02-23 17:55:21 +000011447 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11448 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11449 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11450 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000011451 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000011452
William M. Bracka3215c72004-07-31 16:24:01 +000011453 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11454 if (res < 0) {
11455 ctxt->errNo = XML_PARSER_EOF;
11456 ctxt->disableSAX = 1;
11457 return (XML_PARSER_EOF);
11458 }
Owen Taylor3473f882001-02-23 17:55:21 +000011459 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11460 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011461 ctxt->input->end =
11462 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011463#ifdef DEBUG_PUSH
11464 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11465#endif
11466
Owen Taylor3473f882001-02-23 17:55:21 +000011467 } else if (ctxt->instate != XML_PARSER_EOF) {
11468 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11469 xmlParserInputBufferPtr in = ctxt->input->buf;
11470 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11471 (in->raw != NULL)) {
11472 int nbchars;
11473
11474 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11475 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011476 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000011477 xmlGenericError(xmlGenericErrorContext,
11478 "xmlParseChunk: encoder error\n");
11479 return(XML_ERR_INVALID_ENCODING);
11480 }
11481 }
11482 }
11483 }
11484 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000011485 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11486 (ctxt->input->buf != NULL)) {
11487 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11488 }
Daniel Veillard14412512005-01-21 23:53:26 +000011489 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011490 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000011491 if (terminate) {
11492 /*
11493 * Check for termination
11494 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011495 int avail = 0;
11496
11497 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011498 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011499 avail = ctxt->input->length -
11500 (ctxt->input->cur - ctxt->input->base);
11501 else
11502 avail = ctxt->input->buf->buffer->use -
11503 (ctxt->input->cur - ctxt->input->base);
11504 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011505
Owen Taylor3473f882001-02-23 17:55:21 +000011506 if ((ctxt->instate != XML_PARSER_EOF) &&
11507 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011508 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011509 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011510 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011511 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011512 }
Owen Taylor3473f882001-02-23 17:55:21 +000011513 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011514 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011515 ctxt->sax->endDocument(ctxt->userData);
11516 }
11517 ctxt->instate = XML_PARSER_EOF;
11518 }
11519 return((xmlParserErrors) ctxt->errNo);
11520}
11521
11522/************************************************************************
11523 * *
11524 * I/O front end functions to the parser *
11525 * *
11526 ************************************************************************/
11527
11528/**
Owen Taylor3473f882001-02-23 17:55:21 +000011529 * xmlCreatePushParserCtxt:
11530 * @sax: a SAX handler
11531 * @user_data: The user data returned on SAX callbacks
11532 * @chunk: a pointer to an array of chars
11533 * @size: number of chars in the array
11534 * @filename: an optional file name or URI
11535 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000011536 * Create a parser context for using the XML parser in push mode.
11537 * If @buffer and @size are non-NULL, the data is used to detect
11538 * the encoding. The remaining characters will be parsed so they
11539 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000011540 * To allow content encoding detection, @size should be >= 4
11541 * The value of @filename is used for fetching external entities
11542 * and error/warning reports.
11543 *
11544 * Returns the new parser context or NULL
11545 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000011546
Owen Taylor3473f882001-02-23 17:55:21 +000011547xmlParserCtxtPtr
11548xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11549 const char *chunk, int size, const char *filename) {
11550 xmlParserCtxtPtr ctxt;
11551 xmlParserInputPtr inputStream;
11552 xmlParserInputBufferPtr buf;
11553 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11554
11555 /*
11556 * plug some encoding conversion routines
11557 */
11558 if ((chunk != NULL) && (size >= 4))
11559 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11560
11561 buf = xmlAllocParserInputBuffer(enc);
11562 if (buf == NULL) return(NULL);
11563
11564 ctxt = xmlNewParserCtxt();
11565 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011566 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011567 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011568 return(NULL);
11569 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011570 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011571 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11572 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011573 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011574 xmlFreeParserInputBuffer(buf);
11575 xmlFreeParserCtxt(ctxt);
11576 return(NULL);
11577 }
Owen Taylor3473f882001-02-23 17:55:21 +000011578 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011579#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011580 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011581#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011582 xmlFree(ctxt->sax);
11583 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11584 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011585 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011586 xmlFreeParserInputBuffer(buf);
11587 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011588 return(NULL);
11589 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011590 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11591 if (sax->initialized == XML_SAX2_MAGIC)
11592 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11593 else
11594 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011595 if (user_data != NULL)
11596 ctxt->userData = user_data;
11597 }
11598 if (filename == NULL) {
11599 ctxt->directory = NULL;
11600 } else {
11601 ctxt->directory = xmlParserGetDirectory(filename);
11602 }
11603
11604 inputStream = xmlNewInputStream(ctxt);
11605 if (inputStream == NULL) {
11606 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011607 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011608 return(NULL);
11609 }
11610
11611 if (filename == NULL)
11612 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011613 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011614 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011615 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011616 if (inputStream->filename == NULL) {
11617 xmlFreeParserCtxt(ctxt);
11618 xmlFreeParserInputBuffer(buf);
11619 return(NULL);
11620 }
11621 }
Owen Taylor3473f882001-02-23 17:55:21 +000011622 inputStream->buf = buf;
11623 inputStream->base = inputStream->buf->buffer->content;
11624 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011625 inputStream->end =
11626 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011627
11628 inputPush(ctxt, inputStream);
11629
William M. Brack3a1cd212005-02-11 14:35:54 +000011630 /*
11631 * If the caller didn't provide an initial 'chunk' for determining
11632 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11633 * that it can be automatically determined later
11634 */
11635 if ((size == 0) || (chunk == NULL)) {
11636 ctxt->charset = XML_CHAR_ENCODING_NONE;
11637 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011638 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11639 int cur = ctxt->input->cur - ctxt->input->base;
11640
Owen Taylor3473f882001-02-23 17:55:21 +000011641 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011642
11643 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11644 ctxt->input->cur = ctxt->input->base + cur;
11645 ctxt->input->end =
11646 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011647#ifdef DEBUG_PUSH
11648 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11649#endif
11650 }
11651
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011652 if (enc != XML_CHAR_ENCODING_NONE) {
11653 xmlSwitchEncoding(ctxt, enc);
11654 }
11655
Owen Taylor3473f882001-02-23 17:55:21 +000011656 return(ctxt);
11657}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011658#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011659
11660/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011661 * xmlStopParser:
11662 * @ctxt: an XML parser context
11663 *
11664 * Blocks further parser processing
11665 */
11666void
11667xmlStopParser(xmlParserCtxtPtr ctxt) {
11668 if (ctxt == NULL)
11669 return;
11670 ctxt->instate = XML_PARSER_EOF;
11671 ctxt->disableSAX = 1;
11672 if (ctxt->input != NULL) {
11673 ctxt->input->cur = BAD_CAST"";
11674 ctxt->input->base = ctxt->input->cur;
11675 }
11676}
11677
11678/**
Owen Taylor3473f882001-02-23 17:55:21 +000011679 * xmlCreateIOParserCtxt:
11680 * @sax: a SAX handler
11681 * @user_data: The user data returned on SAX callbacks
11682 * @ioread: an I/O read function
11683 * @ioclose: an I/O close function
11684 * @ioctx: an I/O handler
11685 * @enc: the charset encoding if known
11686 *
11687 * Create a parser context for using the XML parser with an existing
11688 * I/O stream
11689 *
11690 * Returns the new parser context or NULL
11691 */
11692xmlParserCtxtPtr
11693xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11694 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11695 void *ioctx, xmlCharEncoding enc) {
11696 xmlParserCtxtPtr ctxt;
11697 xmlParserInputPtr inputStream;
11698 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011699
11700 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011701
11702 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11703 if (buf == NULL) return(NULL);
11704
11705 ctxt = xmlNewParserCtxt();
11706 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011707 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011708 return(NULL);
11709 }
11710 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011711#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011712 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011713#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011714 xmlFree(ctxt->sax);
11715 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11716 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011717 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011718 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011719 return(NULL);
11720 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011721 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11722 if (sax->initialized == XML_SAX2_MAGIC)
11723 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11724 else
11725 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011726 if (user_data != NULL)
11727 ctxt->userData = user_data;
11728 }
11729
11730 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11731 if (inputStream == NULL) {
11732 xmlFreeParserCtxt(ctxt);
11733 return(NULL);
11734 }
11735 inputPush(ctxt, inputStream);
11736
11737 return(ctxt);
11738}
11739
Daniel Veillard4432df22003-09-28 18:58:27 +000011740#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011741/************************************************************************
11742 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011743 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000011744 * *
11745 ************************************************************************/
11746
11747/**
11748 * xmlIOParseDTD:
11749 * @sax: the SAX handler block or NULL
11750 * @input: an Input Buffer
11751 * @enc: the charset encoding if known
11752 *
11753 * Load and parse a DTD
11754 *
11755 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000011756 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000011757 */
11758
11759xmlDtdPtr
11760xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11761 xmlCharEncoding enc) {
11762 xmlDtdPtr ret = NULL;
11763 xmlParserCtxtPtr ctxt;
11764 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011765 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000011766
11767 if (input == NULL)
11768 return(NULL);
11769
11770 ctxt = xmlNewParserCtxt();
11771 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000011772 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011773 return(NULL);
11774 }
11775
11776 /*
11777 * Set-up the SAX context
11778 */
11779 if (sax != NULL) {
11780 if (ctxt->sax != NULL)
11781 xmlFree(ctxt->sax);
11782 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000011783 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011784 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011785 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011786
11787 /*
11788 * generate a parser input from the I/O handler
11789 */
11790
Daniel Veillard43caefb2003-12-07 19:32:22 +000011791 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000011792 if (pinput == NULL) {
11793 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000011794 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011795 xmlFreeParserCtxt(ctxt);
11796 return(NULL);
11797 }
11798
11799 /*
11800 * plug some encoding conversion routines here.
11801 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000011802 if (xmlPushInput(ctxt, pinput) < 0) {
11803 if (sax != NULL) ctxt->sax = NULL;
11804 xmlFreeParserCtxt(ctxt);
11805 return(NULL);
11806 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000011807 if (enc != XML_CHAR_ENCODING_NONE) {
11808 xmlSwitchEncoding(ctxt, enc);
11809 }
Owen Taylor3473f882001-02-23 17:55:21 +000011810
11811 pinput->filename = NULL;
11812 pinput->line = 1;
11813 pinput->col = 1;
11814 pinput->base = ctxt->input->cur;
11815 pinput->cur = ctxt->input->cur;
11816 pinput->free = NULL;
11817
11818 /*
11819 * let's parse that entity knowing it's an external subset.
11820 */
11821 ctxt->inSubset = 2;
11822 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000011823 if (ctxt->myDoc == NULL) {
11824 xmlErrMemory(ctxt, "New Doc failed");
11825 return(NULL);
11826 }
11827 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000011828 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11829 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000011830
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011831 if ((enc == XML_CHAR_ENCODING_NONE) &&
11832 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000011833 /*
11834 * Get the 4 first bytes and decode the charset
11835 * if enc != XML_CHAR_ENCODING_NONE
11836 * plug some encoding conversion routines.
11837 */
11838 start[0] = RAW;
11839 start[1] = NXT(1);
11840 start[2] = NXT(2);
11841 start[3] = NXT(3);
11842 enc = xmlDetectCharEncoding(start, 4);
11843 if (enc != XML_CHAR_ENCODING_NONE) {
11844 xmlSwitchEncoding(ctxt, enc);
11845 }
11846 }
11847
Owen Taylor3473f882001-02-23 17:55:21 +000011848 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11849
11850 if (ctxt->myDoc != NULL) {
11851 if (ctxt->wellFormed) {
11852 ret = ctxt->myDoc->extSubset;
11853 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000011854 if (ret != NULL) {
11855 xmlNodePtr tmp;
11856
11857 ret->doc = NULL;
11858 tmp = ret->children;
11859 while (tmp != NULL) {
11860 tmp->doc = NULL;
11861 tmp = tmp->next;
11862 }
11863 }
Owen Taylor3473f882001-02-23 17:55:21 +000011864 } else {
11865 ret = NULL;
11866 }
11867 xmlFreeDoc(ctxt->myDoc);
11868 ctxt->myDoc = NULL;
11869 }
11870 if (sax != NULL) ctxt->sax = NULL;
11871 xmlFreeParserCtxt(ctxt);
11872
11873 return(ret);
11874}
11875
11876/**
11877 * xmlSAXParseDTD:
11878 * @sax: the SAX handler block
11879 * @ExternalID: a NAME* containing the External ID of the DTD
11880 * @SystemID: a NAME* containing the URL to the DTD
11881 *
11882 * Load and parse an external subset.
11883 *
11884 * Returns the resulting xmlDtdPtr or NULL in case of error.
11885 */
11886
11887xmlDtdPtr
11888xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11889 const xmlChar *SystemID) {
11890 xmlDtdPtr ret = NULL;
11891 xmlParserCtxtPtr ctxt;
11892 xmlParserInputPtr input = NULL;
11893 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011894 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000011895
11896 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11897
11898 ctxt = xmlNewParserCtxt();
11899 if (ctxt == NULL) {
11900 return(NULL);
11901 }
11902
11903 /*
11904 * Set-up the SAX context
11905 */
11906 if (sax != NULL) {
11907 if (ctxt->sax != NULL)
11908 xmlFree(ctxt->sax);
11909 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000011910 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011911 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011912
11913 /*
11914 * Canonicalise the system ID
11915 */
11916 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000011917 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011918 xmlFreeParserCtxt(ctxt);
11919 return(NULL);
11920 }
Owen Taylor3473f882001-02-23 17:55:21 +000011921
11922 /*
11923 * Ask the Entity resolver to load the damn thing
11924 */
11925
11926 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011927 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11928 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011929 if (input == NULL) {
11930 if (sax != NULL) ctxt->sax = NULL;
11931 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011932 if (systemIdCanonic != NULL)
11933 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011934 return(NULL);
11935 }
11936
11937 /*
11938 * plug some encoding conversion routines here.
11939 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000011940 if (xmlPushInput(ctxt, input) < 0) {
11941 if (sax != NULL) ctxt->sax = NULL;
11942 xmlFreeParserCtxt(ctxt);
11943 if (systemIdCanonic != NULL)
11944 xmlFree(systemIdCanonic);
11945 return(NULL);
11946 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011947 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11948 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11949 xmlSwitchEncoding(ctxt, enc);
11950 }
Owen Taylor3473f882001-02-23 17:55:21 +000011951
11952 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011953 input->filename = (char *) systemIdCanonic;
11954 else
11955 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011956 input->line = 1;
11957 input->col = 1;
11958 input->base = ctxt->input->cur;
11959 input->cur = ctxt->input->cur;
11960 input->free = NULL;
11961
11962 /*
11963 * let's parse that entity knowing it's an external subset.
11964 */
11965 ctxt->inSubset = 2;
11966 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000011967 if (ctxt->myDoc == NULL) {
11968 xmlErrMemory(ctxt, "New Doc failed");
11969 if (sax != NULL) ctxt->sax = NULL;
11970 xmlFreeParserCtxt(ctxt);
11971 return(NULL);
11972 }
11973 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000011974 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11975 ExternalID, SystemID);
11976 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11977
11978 if (ctxt->myDoc != NULL) {
11979 if (ctxt->wellFormed) {
11980 ret = ctxt->myDoc->extSubset;
11981 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011982 if (ret != NULL) {
11983 xmlNodePtr tmp;
11984
11985 ret->doc = NULL;
11986 tmp = ret->children;
11987 while (tmp != NULL) {
11988 tmp->doc = NULL;
11989 tmp = tmp->next;
11990 }
11991 }
Owen Taylor3473f882001-02-23 17:55:21 +000011992 } else {
11993 ret = NULL;
11994 }
11995 xmlFreeDoc(ctxt->myDoc);
11996 ctxt->myDoc = NULL;
11997 }
11998 if (sax != NULL) ctxt->sax = NULL;
11999 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012000
Owen Taylor3473f882001-02-23 17:55:21 +000012001 return(ret);
12002}
12003
Daniel Veillard4432df22003-09-28 18:58:27 +000012004
Owen Taylor3473f882001-02-23 17:55:21 +000012005/**
12006 * xmlParseDTD:
12007 * @ExternalID: a NAME* containing the External ID of the DTD
12008 * @SystemID: a NAME* containing the URL to the DTD
12009 *
12010 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012011 *
Owen Taylor3473f882001-02-23 17:55:21 +000012012 * Returns the resulting xmlDtdPtr or NULL in case of error.
12013 */
12014
12015xmlDtdPtr
12016xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12017 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12018}
Daniel Veillard4432df22003-09-28 18:58:27 +000012019#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012020
12021/************************************************************************
12022 * *
12023 * Front ends when parsing an Entity *
12024 * *
12025 ************************************************************************/
12026
12027/**
Owen Taylor3473f882001-02-23 17:55:21 +000012028 * xmlParseCtxtExternalEntity:
12029 * @ctx: the existing parsing context
12030 * @URL: the URL for the entity to load
12031 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012032 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012033 *
12034 * Parse an external general entity within an existing parsing context
12035 * An external general parsed entity is well-formed if it matches the
12036 * production labeled extParsedEnt.
12037 *
12038 * [78] extParsedEnt ::= TextDecl? content
12039 *
12040 * Returns 0 if the entity is well formed, -1 in case of args problem and
12041 * the parser error code otherwise
12042 */
12043
12044int
12045xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012046 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012047 xmlParserCtxtPtr ctxt;
12048 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012049 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012050 xmlSAXHandlerPtr oldsax = NULL;
12051 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012052 xmlChar start[4];
12053 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012054 xmlParserInputPtr inputStream;
12055 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012056
Daniel Veillardce682bc2004-11-05 17:22:25 +000012057 if (ctx == NULL) return(-1);
12058
Daniel Veillard0161e632008-08-28 15:36:32 +000012059 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12060 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012061 return(XML_ERR_ENTITY_LOOP);
12062 }
12063
Daniel Veillardcda96922001-08-21 10:56:31 +000012064 if (lst != NULL)
12065 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012066 if ((URL == NULL) && (ID == NULL))
12067 return(-1);
12068 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12069 return(-1);
12070
Rob Richards798743a2009-06-19 13:54:25 -040012071 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012072 if (ctxt == NULL) {
12073 return(-1);
12074 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012075
Owen Taylor3473f882001-02-23 17:55:21 +000012076 oldsax = ctxt->sax;
12077 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012078 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012079 newDoc = xmlNewDoc(BAD_CAST "1.0");
12080 if (newDoc == NULL) {
12081 xmlFreeParserCtxt(ctxt);
12082 return(-1);
12083 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012084 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012085 if (ctx->myDoc->dict) {
12086 newDoc->dict = ctx->myDoc->dict;
12087 xmlDictReference(newDoc->dict);
12088 }
Owen Taylor3473f882001-02-23 17:55:21 +000012089 if (ctx->myDoc != NULL) {
12090 newDoc->intSubset = ctx->myDoc->intSubset;
12091 newDoc->extSubset = ctx->myDoc->extSubset;
12092 }
12093 if (ctx->myDoc->URL != NULL) {
12094 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12095 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012096 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12097 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012098 ctxt->sax = oldsax;
12099 xmlFreeParserCtxt(ctxt);
12100 newDoc->intSubset = NULL;
12101 newDoc->extSubset = NULL;
12102 xmlFreeDoc(newDoc);
12103 return(-1);
12104 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012105 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012106 nodePush(ctxt, newDoc->children);
12107 if (ctx->myDoc == NULL) {
12108 ctxt->myDoc = newDoc;
12109 } else {
12110 ctxt->myDoc = ctx->myDoc;
12111 newDoc->children->doc = ctx->myDoc;
12112 }
12113
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012114 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012115 * Get the 4 first bytes and decode the charset
12116 * if enc != XML_CHAR_ENCODING_NONE
12117 * plug some encoding conversion routines.
12118 */
12119 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012120 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12121 start[0] = RAW;
12122 start[1] = NXT(1);
12123 start[2] = NXT(2);
12124 start[3] = NXT(3);
12125 enc = xmlDetectCharEncoding(start, 4);
12126 if (enc != XML_CHAR_ENCODING_NONE) {
12127 xmlSwitchEncoding(ctxt, enc);
12128 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012129 }
12130
Owen Taylor3473f882001-02-23 17:55:21 +000012131 /*
12132 * Parse a possible text declaration first
12133 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012134 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012135 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012136 /*
12137 * An XML-1.0 document can't reference an entity not XML-1.0
12138 */
12139 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12140 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12141 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12142 "Version mismatch between document and entity\n");
12143 }
Owen Taylor3473f882001-02-23 17:55:21 +000012144 }
12145
12146 /*
12147 * Doing validity checking on chunk doesn't make sense
12148 */
12149 ctxt->instate = XML_PARSER_CONTENT;
12150 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012151 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012152 ctxt->loadsubset = ctx->loadsubset;
12153 ctxt->depth = ctx->depth + 1;
12154 ctxt->replaceEntities = ctx->replaceEntities;
12155 if (ctxt->validate) {
12156 ctxt->vctxt.error = ctx->vctxt.error;
12157 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012158 } else {
12159 ctxt->vctxt.error = NULL;
12160 ctxt->vctxt.warning = NULL;
12161 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012162 ctxt->vctxt.nodeTab = NULL;
12163 ctxt->vctxt.nodeNr = 0;
12164 ctxt->vctxt.nodeMax = 0;
12165 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012166 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12167 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012168 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12169 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12170 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012171 ctxt->dictNames = ctx->dictNames;
12172 ctxt->attsDefault = ctx->attsDefault;
12173 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012174 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012175
12176 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012177
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012178 ctx->validate = ctxt->validate;
12179 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012180 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012181 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012182 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012183 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012184 }
12185 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012186 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012187 }
12188
12189 if (!ctxt->wellFormed) {
12190 if (ctxt->errNo == 0)
12191 ret = 1;
12192 else
12193 ret = ctxt->errNo;
12194 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012195 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012196 xmlNodePtr cur;
12197
12198 /*
12199 * Return the newly created nodeset after unlinking it from
12200 * they pseudo parent.
12201 */
12202 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012203 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012204 while (cur != NULL) {
12205 cur->parent = NULL;
12206 cur = cur->next;
12207 }
12208 newDoc->children->children = NULL;
12209 }
12210 ret = 0;
12211 }
12212 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012213 ctxt->dict = NULL;
12214 ctxt->attsDefault = NULL;
12215 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012216 xmlFreeParserCtxt(ctxt);
12217 newDoc->intSubset = NULL;
12218 newDoc->extSubset = NULL;
12219 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012220
Owen Taylor3473f882001-02-23 17:55:21 +000012221 return(ret);
12222}
12223
12224/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012225 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012226 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012227 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012228 * @sax: the SAX handler bloc (possibly NULL)
12229 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12230 * @depth: Used for loop detection, use 0
12231 * @URL: the URL for the entity to load
12232 * @ID: the System ID for the entity to load
12233 * @list: the return value for the set of parsed nodes
12234 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012235 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012236 *
12237 * Returns 0 if the entity is well formed, -1 in case of args problem and
12238 * the parser error code otherwise
12239 */
12240
Daniel Veillard7d515752003-09-26 19:12:37 +000012241static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012242xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12243 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012244 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012245 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012246 xmlParserCtxtPtr ctxt;
12247 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012248 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012249 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012250 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012251 xmlChar start[4];
12252 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012253
Daniel Veillard0161e632008-08-28 15:36:32 +000012254 if (((depth > 40) &&
12255 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12256 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012257 return(XML_ERR_ENTITY_LOOP);
12258 }
12259
Owen Taylor3473f882001-02-23 17:55:21 +000012260 if (list != NULL)
12261 *list = NULL;
12262 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012263 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012264 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012265 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012266
12267
Rob Richards9c0aa472009-03-26 18:10:19 +000012268 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000012269 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012270 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012271 if (oldctxt != NULL) {
12272 ctxt->_private = oldctxt->_private;
12273 ctxt->loadsubset = oldctxt->loadsubset;
12274 ctxt->validate = oldctxt->validate;
12275 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012276 ctxt->record_info = oldctxt->record_info;
12277 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12278 ctxt->node_seq.length = oldctxt->node_seq.length;
12279 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012280 } else {
12281 /*
12282 * Doing validity checking on chunk without context
12283 * doesn't make sense
12284 */
12285 ctxt->_private = NULL;
12286 ctxt->validate = 0;
12287 ctxt->external = 2;
12288 ctxt->loadsubset = 0;
12289 }
Owen Taylor3473f882001-02-23 17:55:21 +000012290 if (sax != NULL) {
12291 oldsax = ctxt->sax;
12292 ctxt->sax = sax;
12293 if (user_data != NULL)
12294 ctxt->userData = user_data;
12295 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012296 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012297 newDoc = xmlNewDoc(BAD_CAST "1.0");
12298 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012299 ctxt->node_seq.maximum = 0;
12300 ctxt->node_seq.length = 0;
12301 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012302 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000012303 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012304 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012305 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000012306 newDoc->intSubset = doc->intSubset;
12307 newDoc->extSubset = doc->extSubset;
12308 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012309 xmlDictReference(newDoc->dict);
12310
Owen Taylor3473f882001-02-23 17:55:21 +000012311 if (doc->URL != NULL) {
12312 newDoc->URL = xmlStrdup(doc->URL);
12313 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012314 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12315 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012316 if (sax != NULL)
12317 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012318 ctxt->node_seq.maximum = 0;
12319 ctxt->node_seq.length = 0;
12320 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012321 xmlFreeParserCtxt(ctxt);
12322 newDoc->intSubset = NULL;
12323 newDoc->extSubset = NULL;
12324 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000012325 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012326 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012327 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012328 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000012329 ctxt->myDoc = doc;
12330 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000012331
Daniel Veillard0161e632008-08-28 15:36:32 +000012332 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012333 * Get the 4 first bytes and decode the charset
12334 * if enc != XML_CHAR_ENCODING_NONE
12335 * plug some encoding conversion routines.
12336 */
12337 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012338 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12339 start[0] = RAW;
12340 start[1] = NXT(1);
12341 start[2] = NXT(2);
12342 start[3] = NXT(3);
12343 enc = xmlDetectCharEncoding(start, 4);
12344 if (enc != XML_CHAR_ENCODING_NONE) {
12345 xmlSwitchEncoding(ctxt, enc);
12346 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012347 }
12348
Owen Taylor3473f882001-02-23 17:55:21 +000012349 /*
12350 * Parse a possible text declaration first
12351 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012352 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012353 xmlParseTextDecl(ctxt);
12354 }
12355
Owen Taylor3473f882001-02-23 17:55:21 +000012356 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012357 ctxt->depth = depth;
12358
12359 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012360
Daniel Veillard561b7f82002-03-20 21:55:57 +000012361 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012362 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000012363 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012364 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012365 }
12366 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012367 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012368 }
12369
12370 if (!ctxt->wellFormed) {
12371 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012372 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000012373 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012374 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000012375 } else {
12376 if (list != NULL) {
12377 xmlNodePtr cur;
12378
12379 /*
12380 * Return the newly created nodeset after unlinking it from
12381 * they pseudo parent.
12382 */
12383 cur = newDoc->children->children;
12384 *list = cur;
12385 while (cur != NULL) {
12386 cur->parent = NULL;
12387 cur = cur->next;
12388 }
12389 newDoc->children->children = NULL;
12390 }
Daniel Veillard7d515752003-09-26 19:12:37 +000012391 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000012392 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012393
12394 /*
12395 * Record in the parent context the number of entities replacement
12396 * done when parsing that reference.
12397 */
12398 oldctxt->nbentities += ctxt->nbentities;
12399 /*
12400 * Also record the size of the entity parsed
12401 */
12402 if (ctxt->input != NULL) {
12403 oldctxt->sizeentities += ctxt->input->consumed;
12404 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12405 }
12406 /*
12407 * And record the last error if any
12408 */
12409 if (ctxt->lastError.code != XML_ERR_OK)
12410 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12411
Owen Taylor3473f882001-02-23 17:55:21 +000012412 if (sax != NULL)
12413 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000012414 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12415 oldctxt->node_seq.length = ctxt->node_seq.length;
12416 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012417 ctxt->node_seq.maximum = 0;
12418 ctxt->node_seq.length = 0;
12419 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012420 xmlFreeParserCtxt(ctxt);
12421 newDoc->intSubset = NULL;
12422 newDoc->extSubset = NULL;
12423 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000012424
Owen Taylor3473f882001-02-23 17:55:21 +000012425 return(ret);
12426}
12427
Daniel Veillard81273902003-09-30 00:43:48 +000012428#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012429/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012430 * xmlParseExternalEntity:
12431 * @doc: the document the chunk pertains to
12432 * @sax: the SAX handler bloc (possibly NULL)
12433 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12434 * @depth: Used for loop detection, use 0
12435 * @URL: the URL for the entity to load
12436 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012437 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000012438 *
12439 * Parse an external general entity
12440 * An external general parsed entity is well-formed if it matches the
12441 * production labeled extParsedEnt.
12442 *
12443 * [78] extParsedEnt ::= TextDecl? content
12444 *
12445 * Returns 0 if the entity is well formed, -1 in case of args problem and
12446 * the parser error code otherwise
12447 */
12448
12449int
12450xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000012451 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012452 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012453 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000012454}
12455
12456/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000012457 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000012458 * @doc: the document the chunk pertains to
12459 * @sax: the SAX handler bloc (possibly NULL)
12460 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12461 * @depth: Used for loop detection, use 0
12462 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000012463 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012464 *
12465 * Parse a well-balanced chunk of an XML document
12466 * called by the parser
12467 * The allowed sequence for the Well Balanced Chunk is the one defined by
12468 * the content production in the XML grammar:
12469 *
12470 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12471 *
12472 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12473 * the parser error code otherwise
12474 */
12475
12476int
12477xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000012478 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012479 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12480 depth, string, lst, 0 );
12481}
Daniel Veillard81273902003-09-30 00:43:48 +000012482#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000012483
12484/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000012485 * xmlParseBalancedChunkMemoryInternal:
12486 * @oldctxt: the existing parsing context
12487 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12488 * @user_data: the user data field for the parser context
12489 * @lst: the return value for the set of parsed nodes
12490 *
12491 *
12492 * Parse a well-balanced chunk of an XML document
12493 * called by the parser
12494 * The allowed sequence for the Well Balanced Chunk is the one defined by
12495 * the content production in the XML grammar:
12496 *
12497 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12498 *
Daniel Veillard7d515752003-09-26 19:12:37 +000012499 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12500 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000012501 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000012502 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000012503 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000012504 */
Daniel Veillard7d515752003-09-26 19:12:37 +000012505static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000012506xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12507 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12508 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012509 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012510 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012511 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012512 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012513 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012514 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000012515 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012516
Daniel Veillard0161e632008-08-28 15:36:32 +000012517 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12518 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012519 return(XML_ERR_ENTITY_LOOP);
12520 }
12521
12522
12523 if (lst != NULL)
12524 *lst = NULL;
12525 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000012526 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012527
12528 size = xmlStrlen(string);
12529
12530 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000012531 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012532 if (user_data != NULL)
12533 ctxt->userData = user_data;
12534 else
12535 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012536 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12537 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012538 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12539 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12540 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012541
12542 oldsax = ctxt->sax;
12543 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012544 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012545 ctxt->replaceEntities = oldctxt->replaceEntities;
12546 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000012547
Daniel Veillarde1ca5032002-12-09 14:13:43 +000012548 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012549 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012550 newDoc = xmlNewDoc(BAD_CAST "1.0");
12551 if (newDoc == NULL) {
12552 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012553 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012554 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000012555 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012556 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012557 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012558 newDoc->dict = ctxt->dict;
12559 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012560 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012561 } else {
12562 ctxt->myDoc = oldctxt->myDoc;
12563 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012564 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012565 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012566 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12567 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012568 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012569 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012570 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012571 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012572 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012573 }
William M. Brack7b9154b2003-09-27 19:23:50 +000012574 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012575 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012576 ctxt->myDoc->children = NULL;
12577 ctxt->myDoc->last = NULL;
12578 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012579 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012580 ctxt->instate = XML_PARSER_CONTENT;
12581 ctxt->depth = oldctxt->depth + 1;
12582
Daniel Veillard328f48c2002-11-15 15:24:34 +000012583 ctxt->validate = 0;
12584 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000012585 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12586 /*
12587 * ID/IDREF registration will be done in xmlValidateElement below
12588 */
12589 ctxt->loadsubset |= XML_SKIP_IDS;
12590 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012591 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012592 ctxt->attsDefault = oldctxt->attsDefault;
12593 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012594
Daniel Veillard68e9e742002-11-16 15:35:11 +000012595 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012596 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012597 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012598 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012599 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012600 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012601 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012602 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012603 }
12604
12605 if (!ctxt->wellFormed) {
12606 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012607 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012608 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012609 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012610 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012611 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012612 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012613
William M. Brack7b9154b2003-09-27 19:23:50 +000012614 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012615 xmlNodePtr cur;
12616
12617 /*
12618 * Return the newly created nodeset after unlinking it from
12619 * they pseudo parent.
12620 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012621 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012622 *lst = cur;
12623 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012624#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012625 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12626 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12627 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012628 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12629 oldctxt->myDoc, cur);
12630 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012631#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012632 cur->parent = NULL;
12633 cur = cur->next;
12634 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012635 ctxt->myDoc->children->children = NULL;
12636 }
12637 if (ctxt->myDoc != NULL) {
12638 xmlFreeNode(ctxt->myDoc->children);
12639 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012640 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012641 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012642
12643 /*
12644 * Record in the parent context the number of entities replacement
12645 * done when parsing that reference.
12646 */
Daniel Veillard4bf899b2008-08-20 17:04:30 +000012647 oldctxt->nbentities += ctxt->nbentities;
Daniel Veillard0161e632008-08-28 15:36:32 +000012648 /*
12649 * Also record the last error if any
12650 */
12651 if (ctxt->lastError.code != XML_ERR_OK)
12652 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12653
Daniel Veillard328f48c2002-11-15 15:24:34 +000012654 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012655 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012656 ctxt->attsDefault = NULL;
12657 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012658 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012659 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012660 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012661 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012662
Daniel Veillard328f48c2002-11-15 15:24:34 +000012663 return(ret);
12664}
12665
Daniel Veillard29b17482004-08-16 00:39:03 +000012666/**
12667 * xmlParseInNodeContext:
12668 * @node: the context node
12669 * @data: the input string
12670 * @datalen: the input string length in bytes
12671 * @options: a combination of xmlParserOption
12672 * @lst: the return value for the set of parsed nodes
12673 *
12674 * Parse a well-balanced chunk of an XML document
12675 * within the context (DTD, namespaces, etc ...) of the given node.
12676 *
12677 * The allowed sequence for the data is a Well Balanced Chunk defined by
12678 * the content production in the XML grammar:
12679 *
12680 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12681 *
12682 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12683 * error code otherwise
12684 */
12685xmlParserErrors
12686xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12687 int options, xmlNodePtr *lst) {
12688#ifdef SAX2
12689 xmlParserCtxtPtr ctxt;
12690 xmlDocPtr doc = NULL;
12691 xmlNodePtr fake, cur;
12692 int nsnr = 0;
12693
12694 xmlParserErrors ret = XML_ERR_OK;
12695
12696 /*
12697 * check all input parameters, grab the document
12698 */
12699 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12700 return(XML_ERR_INTERNAL_ERROR);
12701 switch (node->type) {
12702 case XML_ELEMENT_NODE:
12703 case XML_ATTRIBUTE_NODE:
12704 case XML_TEXT_NODE:
12705 case XML_CDATA_SECTION_NODE:
12706 case XML_ENTITY_REF_NODE:
12707 case XML_PI_NODE:
12708 case XML_COMMENT_NODE:
12709 case XML_DOCUMENT_NODE:
12710 case XML_HTML_DOCUMENT_NODE:
12711 break;
12712 default:
12713 return(XML_ERR_INTERNAL_ERROR);
12714
12715 }
12716 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12717 (node->type != XML_DOCUMENT_NODE) &&
12718 (node->type != XML_HTML_DOCUMENT_NODE))
12719 node = node->parent;
12720 if (node == NULL)
12721 return(XML_ERR_INTERNAL_ERROR);
12722 if (node->type == XML_ELEMENT_NODE)
12723 doc = node->doc;
12724 else
12725 doc = (xmlDocPtr) node;
12726 if (doc == NULL)
12727 return(XML_ERR_INTERNAL_ERROR);
12728
12729 /*
12730 * allocate a context and set-up everything not related to the
12731 * node position in the tree
12732 */
12733 if (doc->type == XML_DOCUMENT_NODE)
12734 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12735#ifdef LIBXML_HTML_ENABLED
12736 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12737 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12738#endif
12739 else
12740 return(XML_ERR_INTERNAL_ERROR);
12741
12742 if (ctxt == NULL)
12743 return(XML_ERR_NO_MEMORY);
12744 fake = xmlNewComment(NULL);
12745 if (fake == NULL) {
12746 xmlFreeParserCtxt(ctxt);
12747 return(XML_ERR_NO_MEMORY);
12748 }
12749 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000012750
12751 /*
12752 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12753 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12754 * we must wait until the last moment to free the original one.
12755 */
Daniel Veillard29b17482004-08-16 00:39:03 +000012756 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000012757 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000012758 xmlDictFree(ctxt->dict);
12759 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000012760 } else
12761 options |= XML_PARSE_NODICT;
12762
Daniel Veillard37334572008-07-31 08:20:02 +000012763 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000012764 xmlDetectSAX2(ctxt);
12765 ctxt->myDoc = doc;
12766
12767 if (node->type == XML_ELEMENT_NODE) {
12768 nodePush(ctxt, node);
12769 /*
12770 * initialize the SAX2 namespaces stack
12771 */
12772 cur = node;
12773 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12774 xmlNsPtr ns = cur->nsDef;
12775 const xmlChar *iprefix, *ihref;
12776
12777 while (ns != NULL) {
12778 if (ctxt->dict) {
12779 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12780 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12781 } else {
12782 iprefix = ns->prefix;
12783 ihref = ns->href;
12784 }
12785
12786 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12787 nsPush(ctxt, iprefix, ihref);
12788 nsnr++;
12789 }
12790 ns = ns->next;
12791 }
12792 cur = cur->parent;
12793 }
12794 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000012795 }
Daniel Veillard29b17482004-08-16 00:39:03 +000012796
12797 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12798 /*
12799 * ID/IDREF registration will be done in xmlValidateElement below
12800 */
12801 ctxt->loadsubset |= XML_SKIP_IDS;
12802 }
12803
Daniel Veillard499cc922006-01-18 17:22:35 +000012804#ifdef LIBXML_HTML_ENABLED
12805 if (doc->type == XML_HTML_DOCUMENT_NODE)
12806 __htmlParseContent(ctxt);
12807 else
12808#endif
12809 xmlParseContent(ctxt);
12810
Daniel Veillard29b17482004-08-16 00:39:03 +000012811 nsPop(ctxt, nsnr);
12812 if ((RAW == '<') && (NXT(1) == '/')) {
12813 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12814 } else if (RAW != 0) {
12815 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12816 }
12817 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12818 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12819 ctxt->wellFormed = 0;
12820 }
12821
12822 if (!ctxt->wellFormed) {
12823 if (ctxt->errNo == 0)
12824 ret = XML_ERR_INTERNAL_ERROR;
12825 else
12826 ret = (xmlParserErrors)ctxt->errNo;
12827 } else {
12828 ret = XML_ERR_OK;
12829 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012830
Daniel Veillard29b17482004-08-16 00:39:03 +000012831 /*
12832 * Return the newly created nodeset after unlinking it from
12833 * the pseudo sibling.
12834 */
Daniel Veillard0161e632008-08-28 15:36:32 +000012835
Daniel Veillard29b17482004-08-16 00:39:03 +000012836 cur = fake->next;
12837 fake->next = NULL;
12838 node->last = fake;
12839
12840 if (cur != NULL) {
12841 cur->prev = NULL;
12842 }
12843
12844 *lst = cur;
12845
12846 while (cur != NULL) {
12847 cur->parent = NULL;
12848 cur = cur->next;
12849 }
12850
12851 xmlUnlinkNode(fake);
12852 xmlFreeNode(fake);
12853
12854
12855 if (ret != XML_ERR_OK) {
12856 xmlFreeNodeList(*lst);
12857 *lst = NULL;
12858 }
William M. Brackc3f81342004-10-03 01:22:44 +000012859
William M. Brackb7b54de2004-10-06 16:38:01 +000012860 if (doc->dict != NULL)
12861 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000012862 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012863
Daniel Veillard29b17482004-08-16 00:39:03 +000012864 return(ret);
12865#else /* !SAX2 */
12866 return(XML_ERR_INTERNAL_ERROR);
12867#endif
12868}
12869
Daniel Veillard81273902003-09-30 00:43:48 +000012870#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000012871/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000012872 * xmlParseBalancedChunkMemoryRecover:
12873 * @doc: the document the chunk pertains to
12874 * @sax: the SAX handler bloc (possibly NULL)
12875 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12876 * @depth: Used for loop detection, use 0
12877 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12878 * @lst: the return value for the set of parsed nodes
12879 * @recover: return nodes even if the data is broken (use 0)
12880 *
12881 *
12882 * Parse a well-balanced chunk of an XML document
12883 * called by the parser
12884 * The allowed sequence for the Well Balanced Chunk is the one defined by
12885 * the content production in the XML grammar:
12886 *
12887 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12888 *
12889 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12890 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000012891 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000012892 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000012893 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12894 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000012895 */
12896int
12897xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000012898 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000012899 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000012900 xmlParserCtxtPtr ctxt;
12901 xmlDocPtr newDoc;
12902 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012903 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012904 int size;
12905 int ret = 0;
12906
Daniel Veillard0161e632008-08-28 15:36:32 +000012907 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000012908 return(XML_ERR_ENTITY_LOOP);
12909 }
12910
12911
Daniel Veillardcda96922001-08-21 10:56:31 +000012912 if (lst != NULL)
12913 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012914 if (string == NULL)
12915 return(-1);
12916
12917 size = xmlStrlen(string);
12918
12919 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12920 if (ctxt == NULL) return(-1);
12921 ctxt->userData = ctxt;
12922 if (sax != NULL) {
12923 oldsax = ctxt->sax;
12924 ctxt->sax = sax;
12925 if (user_data != NULL)
12926 ctxt->userData = user_data;
12927 }
12928 newDoc = xmlNewDoc(BAD_CAST "1.0");
12929 if (newDoc == NULL) {
12930 xmlFreeParserCtxt(ctxt);
12931 return(-1);
12932 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012933 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012934 if ((doc != NULL) && (doc->dict != NULL)) {
12935 xmlDictFree(ctxt->dict);
12936 ctxt->dict = doc->dict;
12937 xmlDictReference(ctxt->dict);
12938 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12939 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12940 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12941 ctxt->dictNames = 1;
12942 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000012943 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012944 }
Owen Taylor3473f882001-02-23 17:55:21 +000012945 if (doc != NULL) {
12946 newDoc->intSubset = doc->intSubset;
12947 newDoc->extSubset = doc->extSubset;
12948 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012949 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12950 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012951 if (sax != NULL)
12952 ctxt->sax = oldsax;
12953 xmlFreeParserCtxt(ctxt);
12954 newDoc->intSubset = NULL;
12955 newDoc->extSubset = NULL;
12956 xmlFreeDoc(newDoc);
12957 return(-1);
12958 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012959 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12960 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012961 if (doc == NULL) {
12962 ctxt->myDoc = newDoc;
12963 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000012964 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000012965 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000012966 /* Ensure that doc has XML spec namespace */
12967 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12968 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000012969 }
12970 ctxt->instate = XML_PARSER_CONTENT;
12971 ctxt->depth = depth;
12972
12973 /*
12974 * Doing validity checking on chunk doesn't make sense
12975 */
12976 ctxt->validate = 0;
12977 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012978 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012979
Daniel Veillardb39bc392002-10-26 19:29:51 +000012980 if ( doc != NULL ){
12981 content = doc->children;
12982 doc->children = NULL;
12983 xmlParseContent(ctxt);
12984 doc->children = content;
12985 }
12986 else {
12987 xmlParseContent(ctxt);
12988 }
Owen Taylor3473f882001-02-23 17:55:21 +000012989 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012990 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012991 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012992 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012993 }
12994 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012995 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012996 }
12997
12998 if (!ctxt->wellFormed) {
12999 if (ctxt->errNo == 0)
13000 ret = 1;
13001 else
13002 ret = ctxt->errNo;
13003 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013004 ret = 0;
13005 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013006
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013007 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13008 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013009
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013010 /*
13011 * Return the newly created nodeset after unlinking it from
13012 * they pseudo parent.
13013 */
13014 cur = newDoc->children->children;
13015 *lst = cur;
13016 while (cur != NULL) {
13017 xmlSetTreeDoc(cur, doc);
13018 cur->parent = NULL;
13019 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013020 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013021 newDoc->children->children = NULL;
13022 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013023
13024 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013025 ctxt->sax = oldsax;
13026 xmlFreeParserCtxt(ctxt);
13027 newDoc->intSubset = NULL;
13028 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013029 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013030 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013031
Owen Taylor3473f882001-02-23 17:55:21 +000013032 return(ret);
13033}
13034
13035/**
13036 * xmlSAXParseEntity:
13037 * @sax: the SAX handler block
13038 * @filename: the filename
13039 *
13040 * parse an XML external entity out of context and build a tree.
13041 * It use the given SAX function block to handle the parsing callback.
13042 * If sax is NULL, fallback to the default DOM tree building routines.
13043 *
13044 * [78] extParsedEnt ::= TextDecl? content
13045 *
13046 * This correspond to a "Well Balanced" chunk
13047 *
13048 * Returns the resulting document tree
13049 */
13050
13051xmlDocPtr
13052xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13053 xmlDocPtr ret;
13054 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013055
13056 ctxt = xmlCreateFileParserCtxt(filename);
13057 if (ctxt == NULL) {
13058 return(NULL);
13059 }
13060 if (sax != NULL) {
13061 if (ctxt->sax != NULL)
13062 xmlFree(ctxt->sax);
13063 ctxt->sax = sax;
13064 ctxt->userData = NULL;
13065 }
13066
Owen Taylor3473f882001-02-23 17:55:21 +000013067 xmlParseExtParsedEnt(ctxt);
13068
13069 if (ctxt->wellFormed)
13070 ret = ctxt->myDoc;
13071 else {
13072 ret = NULL;
13073 xmlFreeDoc(ctxt->myDoc);
13074 ctxt->myDoc = NULL;
13075 }
13076 if (sax != NULL)
13077 ctxt->sax = NULL;
13078 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013079
Owen Taylor3473f882001-02-23 17:55:21 +000013080 return(ret);
13081}
13082
13083/**
13084 * xmlParseEntity:
13085 * @filename: the filename
13086 *
13087 * parse an XML external entity out of context and build a tree.
13088 *
13089 * [78] extParsedEnt ::= TextDecl? content
13090 *
13091 * This correspond to a "Well Balanced" chunk
13092 *
13093 * Returns the resulting document tree
13094 */
13095
13096xmlDocPtr
13097xmlParseEntity(const char *filename) {
13098 return(xmlSAXParseEntity(NULL, filename));
13099}
Daniel Veillard81273902003-09-30 00:43:48 +000013100#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013101
13102/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013103 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013104 * @URL: the entity URL
13105 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013106 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013107 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013108 *
13109 * Create a parser context for an external entity
13110 * Automatic support for ZLIB/Compress compressed document is provided
13111 * by default if found at compile-time.
13112 *
13113 * Returns the new parser context or NULL
13114 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013115static xmlParserCtxtPtr
13116xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13117 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013118 xmlParserCtxtPtr ctxt;
13119 xmlParserInputPtr inputStream;
13120 char *directory = NULL;
13121 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013122
Owen Taylor3473f882001-02-23 17:55:21 +000013123 ctxt = xmlNewParserCtxt();
13124 if (ctxt == NULL) {
13125 return(NULL);
13126 }
13127
Rob Richards9c0aa472009-03-26 18:10:19 +000013128 if (pctx != NULL) {
13129 ctxt->options = pctx->options;
13130 }
13131
Owen Taylor3473f882001-02-23 17:55:21 +000013132 uri = xmlBuildURI(URL, base);
13133
13134 if (uri == NULL) {
13135 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13136 if (inputStream == NULL) {
13137 xmlFreeParserCtxt(ctxt);
13138 return(NULL);
13139 }
13140
13141 inputPush(ctxt, inputStream);
13142
13143 if ((ctxt->directory == NULL) && (directory == NULL))
13144 directory = xmlParserGetDirectory((char *)URL);
13145 if ((ctxt->directory == NULL) && (directory != NULL))
13146 ctxt->directory = directory;
13147 } else {
13148 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13149 if (inputStream == NULL) {
13150 xmlFree(uri);
13151 xmlFreeParserCtxt(ctxt);
13152 return(NULL);
13153 }
13154
13155 inputPush(ctxt, inputStream);
13156
13157 if ((ctxt->directory == NULL) && (directory == NULL))
13158 directory = xmlParserGetDirectory((char *)uri);
13159 if ((ctxt->directory == NULL) && (directory != NULL))
13160 ctxt->directory = directory;
13161 xmlFree(uri);
13162 }
Owen Taylor3473f882001-02-23 17:55:21 +000013163 return(ctxt);
13164}
13165
Rob Richards9c0aa472009-03-26 18:10:19 +000013166/**
13167 * xmlCreateEntityParserCtxt:
13168 * @URL: the entity URL
13169 * @ID: the entity PUBLIC ID
13170 * @base: a possible base for the target URI
13171 *
13172 * Create a parser context for an external entity
13173 * Automatic support for ZLIB/Compress compressed document is provided
13174 * by default if found at compile-time.
13175 *
13176 * Returns the new parser context or NULL
13177 */
13178xmlParserCtxtPtr
13179xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13180 const xmlChar *base) {
13181 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13182
13183}
13184
Owen Taylor3473f882001-02-23 17:55:21 +000013185/************************************************************************
13186 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000013187 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000013188 * *
13189 ************************************************************************/
13190
13191/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013192 * xmlCreateURLParserCtxt:
13193 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013194 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013195 *
Daniel Veillard61b93382003-11-03 14:28:31 +000013196 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000013197 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000013198 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000013199 *
13200 * Returns the new parser context or NULL
13201 */
13202xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000013203xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000013204{
13205 xmlParserCtxtPtr ctxt;
13206 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000013207 char *directory = NULL;
13208
Owen Taylor3473f882001-02-23 17:55:21 +000013209 ctxt = xmlNewParserCtxt();
13210 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000013211 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000013212 return(NULL);
13213 }
13214
Daniel Veillarddf292f72005-01-16 19:00:15 +000013215 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000013216 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000013217 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000013218
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000013219 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013220 if (inputStream == NULL) {
13221 xmlFreeParserCtxt(ctxt);
13222 return(NULL);
13223 }
13224
Owen Taylor3473f882001-02-23 17:55:21 +000013225 inputPush(ctxt, inputStream);
13226 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000013227 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013228 if ((ctxt->directory == NULL) && (directory != NULL))
13229 ctxt->directory = directory;
13230
13231 return(ctxt);
13232}
13233
Daniel Veillard61b93382003-11-03 14:28:31 +000013234/**
13235 * xmlCreateFileParserCtxt:
13236 * @filename: the filename
13237 *
13238 * Create a parser context for a file content.
13239 * Automatic support for ZLIB/Compress compressed document is provided
13240 * by default if found at compile-time.
13241 *
13242 * Returns the new parser context or NULL
13243 */
13244xmlParserCtxtPtr
13245xmlCreateFileParserCtxt(const char *filename)
13246{
13247 return(xmlCreateURLParserCtxt(filename, 0));
13248}
13249
Daniel Veillard81273902003-09-30 00:43:48 +000013250#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013251/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013252 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000013253 * @sax: the SAX handler block
13254 * @filename: the filename
13255 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13256 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000013257 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000013258 *
13259 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13260 * compressed document is provided by default if found at compile-time.
13261 * It use the given SAX function block to handle the parsing callback.
13262 * If sax is NULL, fallback to the default DOM tree building routines.
13263 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000013264 * User data (void *) is stored within the parser context in the
13265 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000013266 *
Owen Taylor3473f882001-02-23 17:55:21 +000013267 * Returns the resulting document tree
13268 */
13269
13270xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000013271xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13272 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000013273 xmlDocPtr ret;
13274 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013275
Daniel Veillard635ef722001-10-29 11:48:19 +000013276 xmlInitParser();
13277
Owen Taylor3473f882001-02-23 17:55:21 +000013278 ctxt = xmlCreateFileParserCtxt(filename);
13279 if (ctxt == NULL) {
13280 return(NULL);
13281 }
13282 if (sax != NULL) {
13283 if (ctxt->sax != NULL)
13284 xmlFree(ctxt->sax);
13285 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013286 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013287 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000013288 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000013289 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000013290 }
Owen Taylor3473f882001-02-23 17:55:21 +000013291
Daniel Veillard37d2d162008-03-14 10:54:00 +000013292 if (ctxt->directory == NULL)
13293 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013294
Daniel Veillarddad3f682002-11-17 16:47:27 +000013295 ctxt->recovery = recovery;
13296
Owen Taylor3473f882001-02-23 17:55:21 +000013297 xmlParseDocument(ctxt);
13298
William M. Brackc07329e2003-09-08 01:57:30 +000013299 if ((ctxt->wellFormed) || recovery) {
13300 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000013301 if (ret != NULL) {
13302 if (ctxt->input->buf->compressed > 0)
13303 ret->compression = 9;
13304 else
13305 ret->compression = ctxt->input->buf->compressed;
13306 }
William M. Brackc07329e2003-09-08 01:57:30 +000013307 }
Owen Taylor3473f882001-02-23 17:55:21 +000013308 else {
13309 ret = NULL;
13310 xmlFreeDoc(ctxt->myDoc);
13311 ctxt->myDoc = NULL;
13312 }
13313 if (sax != NULL)
13314 ctxt->sax = NULL;
13315 xmlFreeParserCtxt(ctxt);
13316
13317 return(ret);
13318}
13319
13320/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013321 * xmlSAXParseFile:
13322 * @sax: the SAX handler block
13323 * @filename: the filename
13324 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13325 * documents
13326 *
13327 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13328 * compressed document is provided by default if found at compile-time.
13329 * It use the given SAX function block to handle the parsing callback.
13330 * If sax is NULL, fallback to the default DOM tree building routines.
13331 *
13332 * Returns the resulting document tree
13333 */
13334
13335xmlDocPtr
13336xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13337 int recovery) {
13338 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13339}
13340
13341/**
Owen Taylor3473f882001-02-23 17:55:21 +000013342 * xmlRecoverDoc:
13343 * @cur: a pointer to an array of xmlChar
13344 *
13345 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013346 * In the case the document is not Well Formed, a attempt to build a
13347 * tree is tried anyway
13348 *
13349 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013350 */
13351
13352xmlDocPtr
13353xmlRecoverDoc(xmlChar *cur) {
13354 return(xmlSAXParseDoc(NULL, cur, 1));
13355}
13356
13357/**
13358 * xmlParseFile:
13359 * @filename: the filename
13360 *
13361 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13362 * compressed document is provided by default if found at compile-time.
13363 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000013364 * Returns the resulting document tree if the file was wellformed,
13365 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000013366 */
13367
13368xmlDocPtr
13369xmlParseFile(const char *filename) {
13370 return(xmlSAXParseFile(NULL, filename, 0));
13371}
13372
13373/**
13374 * xmlRecoverFile:
13375 * @filename: the filename
13376 *
13377 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13378 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013379 * In the case the document is not Well Formed, it attempts to build
13380 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000013381 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000013382 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013383 */
13384
13385xmlDocPtr
13386xmlRecoverFile(const char *filename) {
13387 return(xmlSAXParseFile(NULL, filename, 1));
13388}
13389
13390
13391/**
13392 * xmlSetupParserForBuffer:
13393 * @ctxt: an XML parser context
13394 * @buffer: a xmlChar * buffer
13395 * @filename: a file name
13396 *
13397 * Setup the parser context to parse a new buffer; Clears any prior
13398 * contents from the parser context. The buffer parameter must not be
13399 * NULL, but the filename parameter can be
13400 */
13401void
13402xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13403 const char* filename)
13404{
13405 xmlParserInputPtr input;
13406
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013407 if ((ctxt == NULL) || (buffer == NULL))
13408 return;
13409
Owen Taylor3473f882001-02-23 17:55:21 +000013410 input = xmlNewInputStream(ctxt);
13411 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000013412 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013413 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013414 return;
13415 }
13416
13417 xmlClearParserCtxt(ctxt);
13418 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000013419 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013420 input->base = buffer;
13421 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013422 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000013423 inputPush(ctxt, input);
13424}
13425
13426/**
13427 * xmlSAXUserParseFile:
13428 * @sax: a SAX handler
13429 * @user_data: The user data returned on SAX callbacks
13430 * @filename: a file name
13431 *
13432 * parse an XML file and call the given SAX handler routines.
13433 * Automatic support for ZLIB/Compress compressed document is provided
13434 *
13435 * Returns 0 in case of success or a error number otherwise
13436 */
13437int
13438xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13439 const char *filename) {
13440 int ret = 0;
13441 xmlParserCtxtPtr ctxt;
13442
13443 ctxt = xmlCreateFileParserCtxt(filename);
13444 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000013445 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000013446 xmlFree(ctxt->sax);
13447 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013448 xmlDetectSAX2(ctxt);
13449
Owen Taylor3473f882001-02-23 17:55:21 +000013450 if (user_data != NULL)
13451 ctxt->userData = user_data;
13452
13453 xmlParseDocument(ctxt);
13454
13455 if (ctxt->wellFormed)
13456 ret = 0;
13457 else {
13458 if (ctxt->errNo != 0)
13459 ret = ctxt->errNo;
13460 else
13461 ret = -1;
13462 }
13463 if (sax != NULL)
13464 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013465 if (ctxt->myDoc != NULL) {
13466 xmlFreeDoc(ctxt->myDoc);
13467 ctxt->myDoc = NULL;
13468 }
Owen Taylor3473f882001-02-23 17:55:21 +000013469 xmlFreeParserCtxt(ctxt);
13470
13471 return ret;
13472}
Daniel Veillard81273902003-09-30 00:43:48 +000013473#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013474
13475/************************************************************************
13476 * *
13477 * Front ends when parsing from memory *
13478 * *
13479 ************************************************************************/
13480
13481/**
13482 * xmlCreateMemoryParserCtxt:
13483 * @buffer: a pointer to a char array
13484 * @size: the size of the array
13485 *
13486 * Create a parser context for an XML in-memory document.
13487 *
13488 * Returns the new parser context or NULL
13489 */
13490xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013491xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013492 xmlParserCtxtPtr ctxt;
13493 xmlParserInputPtr input;
13494 xmlParserInputBufferPtr buf;
13495
13496 if (buffer == NULL)
13497 return(NULL);
13498 if (size <= 0)
13499 return(NULL);
13500
13501 ctxt = xmlNewParserCtxt();
13502 if (ctxt == NULL)
13503 return(NULL);
13504
Daniel Veillard53350552003-09-18 13:35:51 +000013505 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000013506 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013507 if (buf == NULL) {
13508 xmlFreeParserCtxt(ctxt);
13509 return(NULL);
13510 }
Owen Taylor3473f882001-02-23 17:55:21 +000013511
13512 input = xmlNewInputStream(ctxt);
13513 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013514 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000013515 xmlFreeParserCtxt(ctxt);
13516 return(NULL);
13517 }
13518
13519 input->filename = NULL;
13520 input->buf = buf;
13521 input->base = input->buf->buffer->content;
13522 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013523 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000013524
13525 inputPush(ctxt, input);
13526 return(ctxt);
13527}
13528
Daniel Veillard81273902003-09-30 00:43:48 +000013529#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013530/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013531 * xmlSAXParseMemoryWithData:
13532 * @sax: the SAX handler block
13533 * @buffer: an pointer to a char array
13534 * @size: the size of the array
13535 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13536 * documents
13537 * @data: the userdata
13538 *
13539 * parse an XML in-memory block and use the given SAX function block
13540 * to handle the parsing callback. If sax is NULL, fallback to the default
13541 * DOM tree building routines.
13542 *
13543 * User data (void *) is stored within the parser context in the
13544 * context's _private member, so it is available nearly everywhere in libxml
13545 *
13546 * Returns the resulting document tree
13547 */
13548
13549xmlDocPtr
13550xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13551 int size, int recovery, void *data) {
13552 xmlDocPtr ret;
13553 xmlParserCtxtPtr ctxt;
13554
13555 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13556 if (ctxt == NULL) return(NULL);
13557 if (sax != NULL) {
13558 if (ctxt->sax != NULL)
13559 xmlFree(ctxt->sax);
13560 ctxt->sax = sax;
13561 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013562 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013563 if (data!=NULL) {
13564 ctxt->_private=data;
13565 }
13566
Daniel Veillardadba5f12003-04-04 16:09:01 +000013567 ctxt->recovery = recovery;
13568
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013569 xmlParseDocument(ctxt);
13570
13571 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13572 else {
13573 ret = NULL;
13574 xmlFreeDoc(ctxt->myDoc);
13575 ctxt->myDoc = NULL;
13576 }
13577 if (sax != NULL)
13578 ctxt->sax = NULL;
13579 xmlFreeParserCtxt(ctxt);
13580
13581 return(ret);
13582}
13583
13584/**
Owen Taylor3473f882001-02-23 17:55:21 +000013585 * xmlSAXParseMemory:
13586 * @sax: the SAX handler block
13587 * @buffer: an pointer to a char array
13588 * @size: the size of the array
13589 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13590 * documents
13591 *
13592 * parse an XML in-memory block and use the given SAX function block
13593 * to handle the parsing callback. If sax is NULL, fallback to the default
13594 * DOM tree building routines.
13595 *
13596 * Returns the resulting document tree
13597 */
13598xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000013599xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13600 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013601 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013602}
13603
13604/**
13605 * xmlParseMemory:
13606 * @buffer: an pointer to a char array
13607 * @size: the size of the array
13608 *
13609 * parse an XML in-memory block and build a tree.
13610 *
13611 * Returns the resulting document tree
13612 */
13613
Daniel Veillard50822cb2001-07-26 20:05:51 +000013614xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013615 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13616}
13617
13618/**
13619 * xmlRecoverMemory:
13620 * @buffer: an pointer to a char array
13621 * @size: the size of the array
13622 *
13623 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013624 * In the case the document is not Well Formed, an attempt to
13625 * build a tree is tried anyway
13626 *
13627 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000013628 */
13629
Daniel Veillard50822cb2001-07-26 20:05:51 +000013630xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013631 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13632}
13633
13634/**
13635 * xmlSAXUserParseMemory:
13636 * @sax: a SAX handler
13637 * @user_data: The user data returned on SAX callbacks
13638 * @buffer: an in-memory XML document input
13639 * @size: the length of the XML document in bytes
13640 *
13641 * A better SAX parsing routine.
13642 * parse an XML in-memory buffer and call the given SAX handler routines.
13643 *
13644 * Returns 0 in case of success or a error number otherwise
13645 */
13646int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013647 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013648 int ret = 0;
13649 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013650
13651 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13652 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013653 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13654 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000013655 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013656 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013657
Daniel Veillard30211a02001-04-26 09:33:18 +000013658 if (user_data != NULL)
13659 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000013660
13661 xmlParseDocument(ctxt);
13662
13663 if (ctxt->wellFormed)
13664 ret = 0;
13665 else {
13666 if (ctxt->errNo != 0)
13667 ret = ctxt->errNo;
13668 else
13669 ret = -1;
13670 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013671 if (sax != NULL)
13672 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013673 if (ctxt->myDoc != NULL) {
13674 xmlFreeDoc(ctxt->myDoc);
13675 ctxt->myDoc = NULL;
13676 }
Owen Taylor3473f882001-02-23 17:55:21 +000013677 xmlFreeParserCtxt(ctxt);
13678
13679 return ret;
13680}
Daniel Veillard81273902003-09-30 00:43:48 +000013681#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013682
13683/**
13684 * xmlCreateDocParserCtxt:
13685 * @cur: a pointer to an array of xmlChar
13686 *
13687 * Creates a parser context for an XML in-memory document.
13688 *
13689 * Returns the new parser context or NULL
13690 */
13691xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013692xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013693 int len;
13694
13695 if (cur == NULL)
13696 return(NULL);
13697 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013698 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000013699}
13700
Daniel Veillard81273902003-09-30 00:43:48 +000013701#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013702/**
13703 * xmlSAXParseDoc:
13704 * @sax: the SAX handler block
13705 * @cur: a pointer to an array of xmlChar
13706 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13707 * documents
13708 *
13709 * parse an XML in-memory document and build a tree.
13710 * It use the given SAX function block to handle the parsing callback.
13711 * If sax is NULL, fallback to the default DOM tree building routines.
13712 *
13713 * Returns the resulting document tree
13714 */
13715
13716xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013717xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000013718 xmlDocPtr ret;
13719 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000013720 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013721
Daniel Veillard38936062004-11-04 17:45:11 +000013722 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013723
13724
13725 ctxt = xmlCreateDocParserCtxt(cur);
13726 if (ctxt == NULL) return(NULL);
13727 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000013728 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013729 ctxt->sax = sax;
13730 ctxt->userData = NULL;
13731 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013732 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013733
13734 xmlParseDocument(ctxt);
13735 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13736 else {
13737 ret = NULL;
13738 xmlFreeDoc(ctxt->myDoc);
13739 ctxt->myDoc = NULL;
13740 }
Daniel Veillard34099b42004-11-04 17:34:35 +000013741 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000013742 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000013743 xmlFreeParserCtxt(ctxt);
13744
13745 return(ret);
13746}
13747
13748/**
13749 * xmlParseDoc:
13750 * @cur: a pointer to an array of xmlChar
13751 *
13752 * parse an XML in-memory document and build a tree.
13753 *
13754 * Returns the resulting document tree
13755 */
13756
13757xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013758xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013759 return(xmlSAXParseDoc(NULL, cur, 0));
13760}
Daniel Veillard81273902003-09-30 00:43:48 +000013761#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013762
Daniel Veillard81273902003-09-30 00:43:48 +000013763#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000013764/************************************************************************
13765 * *
13766 * Specific function to keep track of entities references *
13767 * and used by the XSLT debugger *
13768 * *
13769 ************************************************************************/
13770
13771static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13772
13773/**
13774 * xmlAddEntityReference:
13775 * @ent : A valid entity
13776 * @firstNode : A valid first node for children of entity
13777 * @lastNode : A valid last node of children entity
13778 *
13779 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13780 */
13781static void
13782xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13783 xmlNodePtr lastNode)
13784{
13785 if (xmlEntityRefFunc != NULL) {
13786 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13787 }
13788}
13789
13790
13791/**
13792 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000013793 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000013794 *
13795 * Set the function to call call back when a xml reference has been made
13796 */
13797void
13798xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13799{
13800 xmlEntityRefFunc = func;
13801}
Daniel Veillard81273902003-09-30 00:43:48 +000013802#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013803
13804/************************************************************************
13805 * *
13806 * Miscellaneous *
13807 * *
13808 ************************************************************************/
13809
13810#ifdef LIBXML_XPATH_ENABLED
13811#include <libxml/xpath.h>
13812#endif
13813
Daniel Veillardffa3c742005-07-21 13:24:09 +000013814extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000013815static int xmlParserInitialized = 0;
13816
13817/**
13818 * xmlInitParser:
13819 *
13820 * Initialization function for the XML parser.
13821 * This is not reentrant. Call once before processing in case of
13822 * use in multithreaded programs.
13823 */
13824
13825void
13826xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000013827 if (xmlParserInitialized != 0)
13828 return;
Owen Taylor3473f882001-02-23 17:55:21 +000013829
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013830#ifdef LIBXML_THREAD_ENABLED
13831 __xmlGlobalInitMutexLock();
13832 if (xmlParserInitialized == 0) {
13833#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020013834 xmlInitGlobals();
13835 xmlInitThreads();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013836 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13837 (xmlGenericError == NULL))
13838 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013839 xmlInitMemory();
13840 xmlInitCharEncodingHandlers();
13841 xmlDefaultSAXHandlerInit();
13842 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013843#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013844 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013845#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013846#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013847 htmlInitAutoClose();
13848 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013849#endif
13850#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013851 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013852#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013853 xmlParserInitialized = 1;
13854#ifdef LIBXML_THREAD_ENABLED
13855 }
13856 __xmlGlobalInitMutexUnlock();
13857#endif
Owen Taylor3473f882001-02-23 17:55:21 +000013858}
13859
13860/**
13861 * xmlCleanupParser:
13862 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000013863 * This function name is somewhat misleading. It does not clean up
13864 * parser state, it cleans up memory allocated by the library itself.
13865 * It is a cleanup function for the XML library. It tries to reclaim all
13866 * related global memory allocated for the library processing.
13867 * It doesn't deallocate any document related memory. One should
13868 * call xmlCleanupParser() only when the process has finished using
13869 * the library and all XML/HTML documents built with it.
13870 * See also xmlInitParser() which has the opposite function of preparing
13871 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000013872 *
13873 * WARNING: if your application is multithreaded or has plugin support
13874 * calling this may crash the application if another thread or
13875 * a plugin is still using libxml2. It's sometimes very hard to
13876 * guess if libxml2 is in use in the application, some libraries
13877 * or plugins may use it without notice. In case of doubt abstain
13878 * from calling this function or do it just before calling exit()
13879 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000013880 */
13881
13882void
13883xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000013884 if (!xmlParserInitialized)
13885 return;
13886
Owen Taylor3473f882001-02-23 17:55:21 +000013887 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000013888#ifdef LIBXML_CATALOG_ENABLED
13889 xmlCatalogCleanup();
13890#endif
Daniel Veillard14412512005-01-21 23:53:26 +000013891 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000013892 xmlCleanupInputCallbacks();
13893#ifdef LIBXML_OUTPUT_ENABLED
13894 xmlCleanupOutputCallbacks();
13895#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013896#ifdef LIBXML_SCHEMAS_ENABLED
13897 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000013898 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013899#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000013900 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000013901 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000013902 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000013903 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000013904 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000013905}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013906
13907/************************************************************************
13908 * *
13909 * New set (2.6.0) of simpler and more flexible APIs *
13910 * *
13911 ************************************************************************/
13912
13913/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013914 * DICT_FREE:
13915 * @str: a string
13916 *
13917 * Free a string if it is not owned by the "dict" dictionnary in the
13918 * current scope
13919 */
13920#define DICT_FREE(str) \
13921 if ((str) && ((!dict) || \
13922 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13923 xmlFree((char *)(str));
13924
13925/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013926 * xmlCtxtReset:
13927 * @ctxt: an XML parser context
13928 *
13929 * Reset a parser context
13930 */
13931void
13932xmlCtxtReset(xmlParserCtxtPtr ctxt)
13933{
13934 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013935 xmlDictPtr dict;
13936
13937 if (ctxt == NULL)
13938 return;
13939
13940 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013941
13942 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13943 xmlFreeInputStream(input);
13944 }
13945 ctxt->inputNr = 0;
13946 ctxt->input = NULL;
13947
13948 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000013949 if (ctxt->spaceTab != NULL) {
13950 ctxt->spaceTab[0] = -1;
13951 ctxt->space = &ctxt->spaceTab[0];
13952 } else {
13953 ctxt->space = NULL;
13954 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013955
13956
13957 ctxt->nodeNr = 0;
13958 ctxt->node = NULL;
13959
13960 ctxt->nameNr = 0;
13961 ctxt->name = NULL;
13962
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013963 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013964 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013965 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013966 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013967 DICT_FREE(ctxt->directory);
13968 ctxt->directory = NULL;
13969 DICT_FREE(ctxt->extSubURI);
13970 ctxt->extSubURI = NULL;
13971 DICT_FREE(ctxt->extSubSystem);
13972 ctxt->extSubSystem = NULL;
13973 if (ctxt->myDoc != NULL)
13974 xmlFreeDoc(ctxt->myDoc);
13975 ctxt->myDoc = NULL;
13976
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013977 ctxt->standalone = -1;
13978 ctxt->hasExternalSubset = 0;
13979 ctxt->hasPErefs = 0;
13980 ctxt->html = 0;
13981 ctxt->external = 0;
13982 ctxt->instate = XML_PARSER_START;
13983 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013984
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013985 ctxt->wellFormed = 1;
13986 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000013987 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013988 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013989#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013990 ctxt->vctxt.userData = ctxt;
13991 ctxt->vctxt.error = xmlParserValidityError;
13992 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013993#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013994 ctxt->record_info = 0;
13995 ctxt->nbChars = 0;
13996 ctxt->checkIndex = 0;
13997 ctxt->inSubset = 0;
13998 ctxt->errNo = XML_ERR_OK;
13999 ctxt->depth = 0;
14000 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14001 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014002 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014003 ctxt->sizeentities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014004 xmlInitNodeInfoSeq(&ctxt->node_seq);
14005
14006 if (ctxt->attsDefault != NULL) {
14007 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14008 ctxt->attsDefault = NULL;
14009 }
14010 if (ctxt->attsSpecial != NULL) {
14011 xmlHashFree(ctxt->attsSpecial, NULL);
14012 ctxt->attsSpecial = NULL;
14013 }
14014
Daniel Veillard4432df22003-09-28 18:58:27 +000014015#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014016 if (ctxt->catalogs != NULL)
14017 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014018#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014019 if (ctxt->lastError.code != XML_ERR_OK)
14020 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014021}
14022
14023/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014024 * xmlCtxtResetPush:
14025 * @ctxt: an XML parser context
14026 * @chunk: a pointer to an array of chars
14027 * @size: number of chars in the array
14028 * @filename: an optional file name or URI
14029 * @encoding: the document encoding, or NULL
14030 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014031 * Reset a push parser context
14032 *
14033 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014034 */
14035int
14036xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14037 int size, const char *filename, const char *encoding)
14038{
14039 xmlParserInputPtr inputStream;
14040 xmlParserInputBufferPtr buf;
14041 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14042
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014043 if (ctxt == NULL)
14044 return(1);
14045
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014046 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14047 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14048
14049 buf = xmlAllocParserInputBuffer(enc);
14050 if (buf == NULL)
14051 return(1);
14052
14053 if (ctxt == NULL) {
14054 xmlFreeParserInputBuffer(buf);
14055 return(1);
14056 }
14057
14058 xmlCtxtReset(ctxt);
14059
14060 if (ctxt->pushTab == NULL) {
14061 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14062 sizeof(xmlChar *));
14063 if (ctxt->pushTab == NULL) {
14064 xmlErrMemory(ctxt, NULL);
14065 xmlFreeParserInputBuffer(buf);
14066 return(1);
14067 }
14068 }
14069
14070 if (filename == NULL) {
14071 ctxt->directory = NULL;
14072 } else {
14073 ctxt->directory = xmlParserGetDirectory(filename);
14074 }
14075
14076 inputStream = xmlNewInputStream(ctxt);
14077 if (inputStream == NULL) {
14078 xmlFreeParserInputBuffer(buf);
14079 return(1);
14080 }
14081
14082 if (filename == NULL)
14083 inputStream->filename = NULL;
14084 else
14085 inputStream->filename = (char *)
14086 xmlCanonicPath((const xmlChar *) filename);
14087 inputStream->buf = buf;
14088 inputStream->base = inputStream->buf->buffer->content;
14089 inputStream->cur = inputStream->buf->buffer->content;
14090 inputStream->end =
14091 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14092
14093 inputPush(ctxt, inputStream);
14094
14095 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14096 (ctxt->input->buf != NULL)) {
14097 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14098 int cur = ctxt->input->cur - ctxt->input->base;
14099
14100 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14101
14102 ctxt->input->base = ctxt->input->buf->buffer->content + base;
14103 ctxt->input->cur = ctxt->input->base + cur;
14104 ctxt->input->end =
14105 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14106 use];
14107#ifdef DEBUG_PUSH
14108 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14109#endif
14110 }
14111
14112 if (encoding != NULL) {
14113 xmlCharEncodingHandlerPtr hdlr;
14114
Daniel Veillard37334572008-07-31 08:20:02 +000014115 if (ctxt->encoding != NULL)
14116 xmlFree((xmlChar *) ctxt->encoding);
14117 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14118
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014119 hdlr = xmlFindCharEncodingHandler(encoding);
14120 if (hdlr != NULL) {
14121 xmlSwitchToEncoding(ctxt, hdlr);
14122 } else {
14123 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14124 "Unsupported encoding %s\n", BAD_CAST encoding);
14125 }
14126 } else if (enc != XML_CHAR_ENCODING_NONE) {
14127 xmlSwitchEncoding(ctxt, enc);
14128 }
14129
14130 return(0);
14131}
14132
Daniel Veillard37334572008-07-31 08:20:02 +000014133
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014134/**
Daniel Veillard37334572008-07-31 08:20:02 +000014135 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014136 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014137 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014138 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014139 *
14140 * Applies the options to the parser context
14141 *
14142 * Returns 0 in case of success, the set of unknown or unimplemented options
14143 * in case of error.
14144 */
Daniel Veillard37334572008-07-31 08:20:02 +000014145static int
14146xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014147{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014148 if (ctxt == NULL)
14149 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014150 if (encoding != NULL) {
14151 if (ctxt->encoding != NULL)
14152 xmlFree((xmlChar *) ctxt->encoding);
14153 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14154 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014155 if (options & XML_PARSE_RECOVER) {
14156 ctxt->recovery = 1;
14157 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014158 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014159 } else
14160 ctxt->recovery = 0;
14161 if (options & XML_PARSE_DTDLOAD) {
14162 ctxt->loadsubset = XML_DETECT_IDS;
14163 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014164 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014165 } else
14166 ctxt->loadsubset = 0;
14167 if (options & XML_PARSE_DTDATTR) {
14168 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14169 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014170 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014171 }
14172 if (options & XML_PARSE_NOENT) {
14173 ctxt->replaceEntities = 1;
14174 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14175 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014176 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014177 } else
14178 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014179 if (options & XML_PARSE_PEDANTIC) {
14180 ctxt->pedantic = 1;
14181 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014182 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014183 } else
14184 ctxt->pedantic = 0;
14185 if (options & XML_PARSE_NOBLANKS) {
14186 ctxt->keepBlanks = 0;
14187 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14188 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014189 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014190 } else
14191 ctxt->keepBlanks = 1;
14192 if (options & XML_PARSE_DTDVALID) {
14193 ctxt->validate = 1;
14194 if (options & XML_PARSE_NOWARNING)
14195 ctxt->vctxt.warning = NULL;
14196 if (options & XML_PARSE_NOERROR)
14197 ctxt->vctxt.error = NULL;
14198 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014199 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014200 } else
14201 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000014202 if (options & XML_PARSE_NOWARNING) {
14203 ctxt->sax->warning = NULL;
14204 options -= XML_PARSE_NOWARNING;
14205 }
14206 if (options & XML_PARSE_NOERROR) {
14207 ctxt->sax->error = NULL;
14208 ctxt->sax->fatalError = NULL;
14209 options -= XML_PARSE_NOERROR;
14210 }
Daniel Veillard81273902003-09-30 00:43:48 +000014211#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014212 if (options & XML_PARSE_SAX1) {
14213 ctxt->sax->startElement = xmlSAX2StartElement;
14214 ctxt->sax->endElement = xmlSAX2EndElement;
14215 ctxt->sax->startElementNs = NULL;
14216 ctxt->sax->endElementNs = NULL;
14217 ctxt->sax->initialized = 1;
14218 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014219 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014220 }
Daniel Veillard81273902003-09-30 00:43:48 +000014221#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014222 if (options & XML_PARSE_NODICT) {
14223 ctxt->dictNames = 0;
14224 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014225 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014226 } else {
14227 ctxt->dictNames = 1;
14228 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014229 if (options & XML_PARSE_NOCDATA) {
14230 ctxt->sax->cdataBlock = NULL;
14231 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014232 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014233 }
14234 if (options & XML_PARSE_NSCLEAN) {
14235 ctxt->options |= XML_PARSE_NSCLEAN;
14236 options -= XML_PARSE_NSCLEAN;
14237 }
Daniel Veillard61b93382003-11-03 14:28:31 +000014238 if (options & XML_PARSE_NONET) {
14239 ctxt->options |= XML_PARSE_NONET;
14240 options -= XML_PARSE_NONET;
14241 }
Daniel Veillard8874b942005-08-25 13:19:21 +000014242 if (options & XML_PARSE_COMPACT) {
14243 ctxt->options |= XML_PARSE_COMPACT;
14244 options -= XML_PARSE_COMPACT;
14245 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000014246 if (options & XML_PARSE_OLD10) {
14247 ctxt->options |= XML_PARSE_OLD10;
14248 options -= XML_PARSE_OLD10;
14249 }
Daniel Veillard8915c152008-08-26 13:05:34 +000014250 if (options & XML_PARSE_NOBASEFIX) {
14251 ctxt->options |= XML_PARSE_NOBASEFIX;
14252 options -= XML_PARSE_NOBASEFIX;
14253 }
14254 if (options & XML_PARSE_HUGE) {
14255 ctxt->options |= XML_PARSE_HUGE;
14256 options -= XML_PARSE_HUGE;
14257 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000014258 if (options & XML_PARSE_OLDSAX) {
14259 ctxt->options |= XML_PARSE_OLDSAX;
14260 options -= XML_PARSE_OLDSAX;
14261 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000014262 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014263 return (options);
14264}
14265
14266/**
Daniel Veillard37334572008-07-31 08:20:02 +000014267 * xmlCtxtUseOptions:
14268 * @ctxt: an XML parser context
14269 * @options: a combination of xmlParserOption
14270 *
14271 * Applies the options to the parser context
14272 *
14273 * Returns 0 in case of success, the set of unknown or unimplemented options
14274 * in case of error.
14275 */
14276int
14277xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14278{
14279 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14280}
14281
14282/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014283 * xmlDoRead:
14284 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000014285 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014286 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014287 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014288 * @reuse: keep the context for reuse
14289 *
14290 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000014291 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014292 * Returns the resulting document tree or NULL
14293 */
14294static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014295xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14296 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014297{
14298 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000014299
14300 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014301 if (encoding != NULL) {
14302 xmlCharEncodingHandlerPtr hdlr;
14303
14304 hdlr = xmlFindCharEncodingHandler(encoding);
14305 if (hdlr != NULL)
14306 xmlSwitchToEncoding(ctxt, hdlr);
14307 }
Daniel Veillard60942de2003-09-25 21:05:58 +000014308 if ((URL != NULL) && (ctxt->input != NULL) &&
14309 (ctxt->input->filename == NULL))
14310 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014311 xmlParseDocument(ctxt);
14312 if ((ctxt->wellFormed) || ctxt->recovery)
14313 ret = ctxt->myDoc;
14314 else {
14315 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014316 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014317 xmlFreeDoc(ctxt->myDoc);
14318 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014319 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014320 ctxt->myDoc = NULL;
14321 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014322 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014323 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014324
14325 return (ret);
14326}
14327
14328/**
14329 * xmlReadDoc:
14330 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014331 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014332 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014333 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014334 *
14335 * parse an XML in-memory document and build a tree.
14336 *
14337 * Returns the resulting document tree
14338 */
14339xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014340xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014341{
14342 xmlParserCtxtPtr ctxt;
14343
14344 if (cur == NULL)
14345 return (NULL);
14346
14347 ctxt = xmlCreateDocParserCtxt(cur);
14348 if (ctxt == NULL)
14349 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014350 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014351}
14352
14353/**
14354 * xmlReadFile:
14355 * @filename: a file or URL
14356 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014357 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014358 *
14359 * parse an XML file from the filesystem or the network.
14360 *
14361 * Returns the resulting document tree
14362 */
14363xmlDocPtr
14364xmlReadFile(const char *filename, const char *encoding, int options)
14365{
14366 xmlParserCtxtPtr ctxt;
14367
Daniel Veillard61b93382003-11-03 14:28:31 +000014368 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014369 if (ctxt == NULL)
14370 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014371 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014372}
14373
14374/**
14375 * xmlReadMemory:
14376 * @buffer: a pointer to a char array
14377 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014378 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014379 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014380 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014381 *
14382 * parse an XML in-memory document and build a tree.
14383 *
14384 * Returns the resulting document tree
14385 */
14386xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014387xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014388{
14389 xmlParserCtxtPtr ctxt;
14390
14391 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14392 if (ctxt == NULL)
14393 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014394 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014395}
14396
14397/**
14398 * xmlReadFd:
14399 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014400 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014401 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014402 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014403 *
14404 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014405 * NOTE that the file descriptor will not be closed when the
14406 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014407 *
14408 * Returns the resulting document tree
14409 */
14410xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014411xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014412{
14413 xmlParserCtxtPtr ctxt;
14414 xmlParserInputBufferPtr input;
14415 xmlParserInputPtr stream;
14416
14417 if (fd < 0)
14418 return (NULL);
14419
14420 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14421 if (input == NULL)
14422 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014423 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014424 ctxt = xmlNewParserCtxt();
14425 if (ctxt == NULL) {
14426 xmlFreeParserInputBuffer(input);
14427 return (NULL);
14428 }
14429 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14430 if (stream == NULL) {
14431 xmlFreeParserInputBuffer(input);
14432 xmlFreeParserCtxt(ctxt);
14433 return (NULL);
14434 }
14435 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014436 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014437}
14438
14439/**
14440 * xmlReadIO:
14441 * @ioread: an I/O read function
14442 * @ioclose: an I/O close function
14443 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014444 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014445 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014446 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014447 *
14448 * parse an XML document from I/O functions and source and build a tree.
14449 *
14450 * Returns the resulting document tree
14451 */
14452xmlDocPtr
14453xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000014454 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014455{
14456 xmlParserCtxtPtr ctxt;
14457 xmlParserInputBufferPtr input;
14458 xmlParserInputPtr stream;
14459
14460 if (ioread == NULL)
14461 return (NULL);
14462
14463 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14464 XML_CHAR_ENCODING_NONE);
14465 if (input == NULL)
14466 return (NULL);
14467 ctxt = xmlNewParserCtxt();
14468 if (ctxt == NULL) {
14469 xmlFreeParserInputBuffer(input);
14470 return (NULL);
14471 }
14472 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14473 if (stream == NULL) {
14474 xmlFreeParserInputBuffer(input);
14475 xmlFreeParserCtxt(ctxt);
14476 return (NULL);
14477 }
14478 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014479 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014480}
14481
14482/**
14483 * xmlCtxtReadDoc:
14484 * @ctxt: an XML parser context
14485 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014486 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014487 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014488 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014489 *
14490 * parse an XML in-memory document and build a tree.
14491 * This reuses the existing @ctxt parser context
14492 *
14493 * Returns the resulting document tree
14494 */
14495xmlDocPtr
14496xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000014497 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014498{
14499 xmlParserInputPtr stream;
14500
14501 if (cur == NULL)
14502 return (NULL);
14503 if (ctxt == NULL)
14504 return (NULL);
14505
14506 xmlCtxtReset(ctxt);
14507
14508 stream = xmlNewStringInputStream(ctxt, cur);
14509 if (stream == NULL) {
14510 return (NULL);
14511 }
14512 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014513 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014514}
14515
14516/**
14517 * xmlCtxtReadFile:
14518 * @ctxt: an XML parser context
14519 * @filename: a file or URL
14520 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014521 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014522 *
14523 * parse an XML file from the filesystem or the network.
14524 * This reuses the existing @ctxt parser context
14525 *
14526 * Returns the resulting document tree
14527 */
14528xmlDocPtr
14529xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14530 const char *encoding, int options)
14531{
14532 xmlParserInputPtr stream;
14533
14534 if (filename == NULL)
14535 return (NULL);
14536 if (ctxt == NULL)
14537 return (NULL);
14538
14539 xmlCtxtReset(ctxt);
14540
Daniel Veillard29614c72004-11-26 10:47:26 +000014541 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014542 if (stream == NULL) {
14543 return (NULL);
14544 }
14545 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014546 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014547}
14548
14549/**
14550 * xmlCtxtReadMemory:
14551 * @ctxt: an XML parser context
14552 * @buffer: a pointer to a char array
14553 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014554 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014555 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014556 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014557 *
14558 * parse an XML in-memory document and build a tree.
14559 * This reuses the existing @ctxt parser context
14560 *
14561 * Returns the resulting document tree
14562 */
14563xmlDocPtr
14564xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000014565 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014566{
14567 xmlParserInputBufferPtr input;
14568 xmlParserInputPtr stream;
14569
14570 if (ctxt == NULL)
14571 return (NULL);
14572 if (buffer == NULL)
14573 return (NULL);
14574
14575 xmlCtxtReset(ctxt);
14576
14577 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14578 if (input == NULL) {
14579 return(NULL);
14580 }
14581
14582 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14583 if (stream == NULL) {
14584 xmlFreeParserInputBuffer(input);
14585 return(NULL);
14586 }
14587
14588 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014589 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014590}
14591
14592/**
14593 * xmlCtxtReadFd:
14594 * @ctxt: an XML parser context
14595 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014596 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014597 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014598 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014599 *
14600 * parse an XML from a file descriptor and build a tree.
14601 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014602 * NOTE that the file descriptor will not be closed when the
14603 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014604 *
14605 * Returns the resulting document tree
14606 */
14607xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014608xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14609 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014610{
14611 xmlParserInputBufferPtr input;
14612 xmlParserInputPtr stream;
14613
14614 if (fd < 0)
14615 return (NULL);
14616 if (ctxt == NULL)
14617 return (NULL);
14618
14619 xmlCtxtReset(ctxt);
14620
14621
14622 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14623 if (input == NULL)
14624 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014625 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014626 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14627 if (stream == NULL) {
14628 xmlFreeParserInputBuffer(input);
14629 return (NULL);
14630 }
14631 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014632 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014633}
14634
14635/**
14636 * xmlCtxtReadIO:
14637 * @ctxt: an XML parser context
14638 * @ioread: an I/O read function
14639 * @ioclose: an I/O close function
14640 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014641 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014642 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014643 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014644 *
14645 * parse an XML document from I/O functions and source and build a tree.
14646 * This reuses the existing @ctxt parser context
14647 *
14648 * Returns the resulting document tree
14649 */
14650xmlDocPtr
14651xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14652 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000014653 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014654 const char *encoding, int options)
14655{
14656 xmlParserInputBufferPtr input;
14657 xmlParserInputPtr stream;
14658
14659 if (ioread == NULL)
14660 return (NULL);
14661 if (ctxt == NULL)
14662 return (NULL);
14663
14664 xmlCtxtReset(ctxt);
14665
14666 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14667 XML_CHAR_ENCODING_NONE);
14668 if (input == NULL)
14669 return (NULL);
14670 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14671 if (stream == NULL) {
14672 xmlFreeParserInputBuffer(input);
14673 return (NULL);
14674 }
14675 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014676 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014677}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000014678
14679#define bottom_parser
14680#include "elfgcchack.h"