blob: bd2be675adaa158228ba12171be06501aea8417f [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard0161e632008-08-28 15:36:32 +000083static void
84xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
85
Rob Richards9c0aa472009-03-26 18:10:19 +000086static xmlParserCtxtPtr
87xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
88 const xmlChar *base, xmlParserCtxtPtr pctx);
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090/************************************************************************
91 * *
92 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
93 * *
94 ************************************************************************/
95
96#define XML_PARSER_BIG_ENTITY 1000
97#define XML_PARSER_LOT_ENTITY 5000
98
99/*
100 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
101 * replacement over the size in byte of the input indicates that you have
102 * and eponential behaviour. A value of 10 correspond to at least 3 entity
103 * replacement per byte of input.
104 */
105#define XML_PARSER_NON_LINEAR 10
106
107/*
108 * xmlParserEntityCheck
109 *
110 * Function to check non-linear entity expansion behaviour
111 * This is here to detect and stop exponential linear entity expansion
112 * This is not a limitation of the parser but a safety
113 * boundary feature. It can be disabled with the XML_PARSE_HUGE
114 * parser option.
115 */
116static int
117xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
118 xmlEntityPtr ent)
119{
Daniel Veillardcba68392008-08-29 12:43:40 +0000120 unsigned long consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000121
122 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
123 return (0);
124 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
125 return (1);
126 if (size != 0) {
127 /*
128 * Do the check based on the replacement size of the entity
129 */
130 if (size < XML_PARSER_BIG_ENTITY)
131 return(0);
132
133 /*
134 * A limit on the amount of text data reasonably used
135 */
136 if (ctxt->input != NULL) {
137 consumed = ctxt->input->consumed +
138 (ctxt->input->cur - ctxt->input->base);
139 }
140 consumed += ctxt->sizeentities;
141
142 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
143 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
144 return (0);
145 } else if (ent != NULL) {
146 /*
147 * use the number of parsed entities in the replacement
148 */
149 size = ent->checked;
150
151 /*
152 * The amount of data parsed counting entities size only once
153 */
154 if (ctxt->input != NULL) {
155 consumed = ctxt->input->consumed +
156 (ctxt->input->cur - ctxt->input->base);
157 }
158 consumed += ctxt->sizeentities;
159
160 /*
161 * Check the density of entities for the amount of data
162 * knowing an entity reference will take at least 3 bytes
163 */
164 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
165 return (0);
166 } else {
167 /*
168 * strange we got no data for checking just return
169 */
170 return (0);
171 }
172
173 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
174 return (1);
175}
176
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000177/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000178 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000179 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000180 * arbitrary depth limit for the XML documents that we allow to
181 * process. This is not a limitation of the parser but a safety
182 * boundary feature. It can be disabled with the XML_PARSE_HUGE
183 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000184 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000185unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000186
Daniel Veillard0fb18932003-09-07 09:14:37 +0000187
Daniel Veillard0161e632008-08-28 15:36:32 +0000188
189#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000190#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000191#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000192#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
193
Owen Taylor3473f882001-02-23 17:55:21 +0000194/*
Owen Taylor3473f882001-02-23 17:55:21 +0000195 * List of XML prefixed PI allowed by W3C specs
196 */
197
Daniel Veillardb44025c2001-10-11 22:55:55 +0000198static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000199 "xml-stylesheet",
200 NULL
201};
202
Daniel Veillarda07050d2003-10-19 14:46:32 +0000203
Owen Taylor3473f882001-02-23 17:55:21 +0000204/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000205xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206 const xmlChar **str);
207
Daniel Veillard7d515752003-09-26 19:12:37 +0000208static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000209xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
210 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000211 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000212 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000213
Daniel Veillard37334572008-07-31 08:20:02 +0000214static int
215xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
216 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000217#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000218static void
219xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
220 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000221#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000222
Daniel Veillard7d515752003-09-26 19:12:37 +0000223static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000224xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
225 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000226
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000227static int
228xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
229
Daniel Veillarde57ec792003-09-10 10:50:59 +0000230/************************************************************************
231 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 * Some factorized error routines *
233 * *
234 ************************************************************************/
235
236/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000237 * xmlErrAttributeDup:
238 * @ctxt: an XML parser context
239 * @prefix: the attribute prefix
240 * @localname: the attribute localname
241 *
242 * Handle a redefinition of attribute error
243 */
244static void
245xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
246 const xmlChar * localname)
247{
Daniel Veillard157fee02003-10-31 10:36:03 +0000248 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
249 (ctxt->instate == XML_PARSER_EOF))
250 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000251 if (ctxt != NULL)
252 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000253 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000254 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000255 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
256 (const char *) localname, NULL, NULL, 0, 0,
257 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000258 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000259 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
261 (const char *) prefix, (const char *) localname,
262 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
263 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000264 if (ctxt != NULL) {
265 ctxt->wellFormed = 0;
266 if (ctxt->recovery == 0)
267 ctxt->disableSAX = 1;
268 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000269}
270
271/**
272 * xmlFatalErr:
273 * @ctxt: an XML parser context
274 * @error: the error number
275 * @extra: extra information string
276 *
277 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
278 */
279static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000280xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000281{
282 const char *errmsg;
283
Daniel Veillard157fee02003-10-31 10:36:03 +0000284 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
285 (ctxt->instate == XML_PARSER_EOF))
286 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 switch (error) {
288 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000289 errmsg = "CharRef: invalid hexadecimal value\n";
290 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000291 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000292 errmsg = "CharRef: invalid decimal value\n";
293 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000294 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000295 errmsg = "CharRef: invalid value\n";
296 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000297 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000298 errmsg = "internal error";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "PEReference at end of document\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "PEReference in prolog\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "PEReference in epilog\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "PEReference: no name\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "PEReference: expecting ';'\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "Detected an entity reference loop\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EntityValue: \" or ' expected\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "PEReferences forbidden in internal subset\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "EntityValue: \" or ' expected\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "AttValue: \" or ' expected\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "Unescaped '<' not allowed in attributes values\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "SystemLiteral \" or ' expected\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Unfinished System or Public ID \" or ' expected\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Sequence ']]>' not allowed in content\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "PUBLIC, the Public Identifier is missing\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Comment must not contain '--' (double-hyphen)\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "xmlParsePI : no target name\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "Invalid PI name\n";
356 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 errmsg = "NOTATION: Name expected here\n";
359 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000360 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 errmsg = "'>' required to close NOTATION declaration\n";
362 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 errmsg = "Entity value required\n";
365 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000367 errmsg = "Fragment not allowed";
368 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000369 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000370 errmsg = "'(' required to start ATTLIST enumeration\n";
371 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000372 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000373 errmsg = "NmToken expected in ATTLIST enumeration\n";
374 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000375 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000376 errmsg = "')' required to finish ATTLIST enumeration\n";
377 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000378 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000379 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
380 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000381 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000382 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
383 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000384 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 errmsg = "ContentDecl : Name or '(' expected\n";
386 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000387 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
389 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000390 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000391 errmsg =
392 "PEReference: forbidden within markup decl in internal subset\n";
393 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000394 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000395 errmsg = "expected '>'\n";
396 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000397 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000398 errmsg = "XML conditional section '[' expected\n";
399 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000400 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000401 errmsg = "Content error in the external subset\n";
402 break;
403 case XML_ERR_CONDSEC_INVALID_KEYWORD:
404 errmsg =
405 "conditional section INCLUDE or IGNORE keyword expected\n";
406 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000407 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000408 errmsg = "XML conditional section not closed\n";
409 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000410 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000411 errmsg = "Text declaration '<?xml' required\n";
412 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000413 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 errmsg = "parsing XML declaration: '?>' expected\n";
415 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000416 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 errmsg = "external parsed entities cannot be standalone\n";
418 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000419 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 errmsg = "EntityRef: expecting ';'\n";
421 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000422 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 errmsg = "DOCTYPE improperly terminated\n";
424 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000425 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 errmsg = "EndTag: '</' not found\n";
427 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000428 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000429 errmsg = "expected '='\n";
430 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000431 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000432 errmsg = "String not closed expecting \" or '\n";
433 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000434 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000435 errmsg = "String not started expecting ' or \"\n";
436 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000437 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000438 errmsg = "Invalid XML encoding name\n";
439 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000440 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000441 errmsg = "standalone accepts only 'yes' or 'no'\n";
442 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000443 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000444 errmsg = "Document is empty\n";
445 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000446 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000447 errmsg = "Extra content at the end of the document\n";
448 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000449 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000450 errmsg = "chunk is not well balanced\n";
451 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000452 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000453 errmsg = "extra content at the end of well balanced chunk\n";
454 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000455 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000456 errmsg = "Malformed declaration expecting version\n";
457 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000458#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000459 case:
460 errmsg = "\n";
461 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000462#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 default:
464 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000465 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000466 if (ctxt != NULL)
467 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000468 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000469 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
470 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000471 if (ctxt != NULL) {
472 ctxt->wellFormed = 0;
473 if (ctxt->recovery == 0)
474 ctxt->disableSAX = 1;
475 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000476}
477
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000478/**
479 * xmlFatalErrMsg:
480 * @ctxt: an XML parser context
481 * @error: the error number
482 * @msg: the error message
483 *
484 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
485 */
486static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000487xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
488 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000489{
Daniel Veillard157fee02003-10-31 10:36:03 +0000490 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
491 (ctxt->instate == XML_PARSER_EOF))
492 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000493 if (ctxt != NULL)
494 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000495 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200496 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000497 if (ctxt != NULL) {
498 ctxt->wellFormed = 0;
499 if (ctxt->recovery == 0)
500 ctxt->disableSAX = 1;
501 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000502}
503
504/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000505 * xmlWarningMsg:
506 * @ctxt: an XML parser context
507 * @error: the error number
508 * @msg: the error message
509 * @str1: extra data
510 * @str2: extra data
511 *
512 * Handle a warning.
513 */
514static void
515xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
516 const char *msg, const xmlChar *str1, const xmlChar *str2)
517{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000518 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000519
Daniel Veillard157fee02003-10-31 10:36:03 +0000520 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
521 (ctxt->instate == XML_PARSER_EOF))
522 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000523 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
524 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000525 schannel = ctxt->sax->serror;
526 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000527 (ctxt->sax) ? ctxt->sax->warning : NULL,
528 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000529 ctxt, NULL, XML_FROM_PARSER, error,
530 XML_ERR_WARNING, NULL, 0,
531 (const char *) str1, (const char *) str2, NULL, 0, 0,
532 msg, (const char *) str1, (const char *) str2);
533}
534
535/**
536 * xmlValidityError:
537 * @ctxt: an XML parser context
538 * @error: the error number
539 * @msg: the error message
540 * @str1: extra data
541 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000542 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000543 */
544static void
545xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000546 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000547{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000548 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000549
550 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
551 (ctxt->instate == XML_PARSER_EOF))
552 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000553 if (ctxt != NULL) {
554 ctxt->errNo = error;
555 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
556 schannel = ctxt->sax->serror;
557 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000558 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000559 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000560 ctxt, NULL, XML_FROM_DTD, error,
561 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000562 (const char *) str2, NULL, 0, 0,
563 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000564 if (ctxt != NULL) {
565 ctxt->valid = 0;
566 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000567}
568
569/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000570 * xmlFatalErrMsgInt:
571 * @ctxt: an XML parser context
572 * @error: the error number
573 * @msg: the error message
574 * @val: an integer value
575 *
576 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
577 */
578static void
579xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000580 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000581{
Daniel Veillard157fee02003-10-31 10:36:03 +0000582 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
583 (ctxt->instate == XML_PARSER_EOF))
584 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000585 if (ctxt != NULL)
586 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000587 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000588 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
589 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000590 if (ctxt != NULL) {
591 ctxt->wellFormed = 0;
592 if (ctxt->recovery == 0)
593 ctxt->disableSAX = 1;
594 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000595}
596
597/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000598 * xmlFatalErrMsgStrIntStr:
599 * @ctxt: an XML parser context
600 * @error: the error number
601 * @msg: the error message
602 * @str1: an string info
603 * @val: an integer value
604 * @str2: an string info
605 *
606 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
607 */
608static void
609xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
610 const char *msg, const xmlChar *str1, int val,
611 const xmlChar *str2)
612{
Daniel Veillard157fee02003-10-31 10:36:03 +0000613 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
614 (ctxt->instate == XML_PARSER_EOF))
615 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000616 if (ctxt != NULL)
617 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000618 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000619 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
620 NULL, 0, (const char *) str1, (const char *) str2,
621 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000622 if (ctxt != NULL) {
623 ctxt->wellFormed = 0;
624 if (ctxt->recovery == 0)
625 ctxt->disableSAX = 1;
626 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000627}
628
629/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000630 * xmlFatalErrMsgStr:
631 * @ctxt: an XML parser context
632 * @error: the error number
633 * @msg: the error message
634 * @val: a string value
635 *
636 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
637 */
638static void
639xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000640 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000641{
Daniel Veillard157fee02003-10-31 10:36:03 +0000642 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
643 (ctxt->instate == XML_PARSER_EOF))
644 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000645 if (ctxt != NULL)
646 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000647 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000648 XML_FROM_PARSER, error, XML_ERR_FATAL,
649 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
650 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000651 if (ctxt != NULL) {
652 ctxt->wellFormed = 0;
653 if (ctxt->recovery == 0)
654 ctxt->disableSAX = 1;
655 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000656}
657
658/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000659 * xmlErrMsgStr:
660 * @ctxt: an XML parser context
661 * @error: the error number
662 * @msg: the error message
663 * @val: a string value
664 *
665 * Handle a non fatal parser error
666 */
667static void
668xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
669 const char *msg, const xmlChar * val)
670{
Daniel Veillard157fee02003-10-31 10:36:03 +0000671 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672 (ctxt->instate == XML_PARSER_EOF))
673 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000674 if (ctxt != NULL)
675 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000676 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000677 XML_FROM_PARSER, error, XML_ERR_ERROR,
678 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
679 val);
680}
681
682/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000683 * xmlNsErr:
684 * @ctxt: an XML parser context
685 * @error: the error number
686 * @msg: the message
687 * @info1: extra information string
688 * @info2: extra information string
689 *
690 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
691 */
692static void
693xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
694 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000695 const xmlChar * info1, const xmlChar * info2,
696 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000697{
Daniel Veillard157fee02003-10-31 10:36:03 +0000698 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
699 (ctxt->instate == XML_PARSER_EOF))
700 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000701 if (ctxt != NULL)
702 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000703 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000704 XML_ERR_ERROR, NULL, 0, (const char *) info1,
705 (const char *) info2, (const char *) info3, 0, 0, msg,
706 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000707 if (ctxt != NULL)
708 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000709}
710
Daniel Veillard37334572008-07-31 08:20:02 +0000711/**
712 * xmlNsWarn
713 * @ctxt: an XML parser context
714 * @error: the error number
715 * @msg: the message
716 * @info1: extra information string
717 * @info2: extra information string
718 *
719 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
720 */
721static void
722xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
723 const char *msg,
724 const xmlChar * info1, const xmlChar * info2,
725 const xmlChar * info3)
726{
727 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
728 (ctxt->instate == XML_PARSER_EOF))
729 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000730 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
731 XML_ERR_WARNING, NULL, 0, (const char *) info1,
732 (const char *) info2, (const char *) info3, 0, 0, msg,
733 info1, info2, info3);
734}
735
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000736/************************************************************************
737 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000738 * Library wide options *
739 * *
740 ************************************************************************/
741
742/**
743 * xmlHasFeature:
744 * @feature: the feature to be examined
745 *
746 * Examines if the library has been compiled with a given feature.
747 *
748 * Returns a non-zero value if the feature exist, otherwise zero.
749 * Returns zero (0) if the feature does not exist or an unknown
750 * unknown feature is requested, non-zero otherwise.
751 */
752int
753xmlHasFeature(xmlFeature feature)
754{
755 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000756 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000757#ifdef LIBXML_THREAD_ENABLED
758 return(1);
759#else
760 return(0);
761#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000762 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000763#ifdef LIBXML_TREE_ENABLED
764 return(1);
765#else
766 return(0);
767#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000768 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000769#ifdef LIBXML_OUTPUT_ENABLED
770 return(1);
771#else
772 return(0);
773#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000774 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000775#ifdef LIBXML_PUSH_ENABLED
776 return(1);
777#else
778 return(0);
779#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000780 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000781#ifdef LIBXML_READER_ENABLED
782 return(1);
783#else
784 return(0);
785#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000786 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000787#ifdef LIBXML_PATTERN_ENABLED
788 return(1);
789#else
790 return(0);
791#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000792 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000793#ifdef LIBXML_WRITER_ENABLED
794 return(1);
795#else
796 return(0);
797#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000798 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000799#ifdef LIBXML_SAX1_ENABLED
800 return(1);
801#else
802 return(0);
803#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000804 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000805#ifdef LIBXML_FTP_ENABLED
806 return(1);
807#else
808 return(0);
809#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000810 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000811#ifdef LIBXML_HTTP_ENABLED
812 return(1);
813#else
814 return(0);
815#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000816 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000817#ifdef LIBXML_VALID_ENABLED
818 return(1);
819#else
820 return(0);
821#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000822 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000823#ifdef LIBXML_HTML_ENABLED
824 return(1);
825#else
826 return(0);
827#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000828 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000829#ifdef LIBXML_LEGACY_ENABLED
830 return(1);
831#else
832 return(0);
833#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000834 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000835#ifdef LIBXML_C14N_ENABLED
836 return(1);
837#else
838 return(0);
839#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000840 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000841#ifdef LIBXML_CATALOG_ENABLED
842 return(1);
843#else
844 return(0);
845#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000846 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000847#ifdef LIBXML_XPATH_ENABLED
848 return(1);
849#else
850 return(0);
851#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000852 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000853#ifdef LIBXML_XPTR_ENABLED
854 return(1);
855#else
856 return(0);
857#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000858 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000859#ifdef LIBXML_XINCLUDE_ENABLED
860 return(1);
861#else
862 return(0);
863#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000864 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000865#ifdef LIBXML_ICONV_ENABLED
866 return(1);
867#else
868 return(0);
869#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000870 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000871#ifdef LIBXML_ISO8859X_ENABLED
872 return(1);
873#else
874 return(0);
875#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000876 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000877#ifdef LIBXML_UNICODE_ENABLED
878 return(1);
879#else
880 return(0);
881#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000882 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000883#ifdef LIBXML_REGEXP_ENABLED
884 return(1);
885#else
886 return(0);
887#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000888 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000889#ifdef LIBXML_AUTOMATA_ENABLED
890 return(1);
891#else
892 return(0);
893#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000894 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000895#ifdef LIBXML_EXPR_ENABLED
896 return(1);
897#else
898 return(0);
899#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000900 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000901#ifdef LIBXML_SCHEMAS_ENABLED
902 return(1);
903#else
904 return(0);
905#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000906 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000907#ifdef LIBXML_SCHEMATRON_ENABLED
908 return(1);
909#else
910 return(0);
911#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000912 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000913#ifdef LIBXML_MODULES_ENABLED
914 return(1);
915#else
916 return(0);
917#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000918 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000919#ifdef LIBXML_DEBUG_ENABLED
920 return(1);
921#else
922 return(0);
923#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000924 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000925#ifdef DEBUG_MEMORY_LOCATION
926 return(1);
927#else
928 return(0);
929#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000930 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000931#ifdef LIBXML_DEBUG_RUNTIME
932 return(1);
933#else
934 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000935#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000936 case XML_WITH_ZLIB:
937#ifdef LIBXML_ZLIB_ENABLED
938 return(1);
939#else
940 return(0);
941#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000942 default:
943 break;
944 }
945 return(0);
946}
947
948/************************************************************************
949 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000950 * SAX2 defaulted attributes handling *
951 * *
952 ************************************************************************/
953
954/**
955 * xmlDetectSAX2:
956 * @ctxt: an XML parser context
957 *
958 * Do the SAX2 detection and specific intialization
959 */
960static void
961xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
962 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000963#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000964 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
965 ((ctxt->sax->startElementNs != NULL) ||
966 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000967#else
968 ctxt->sax2 = 1;
969#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000970
971 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
972 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
973 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000974 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
975 (ctxt->str_xml_ns == NULL)) {
976 xmlErrMemory(ctxt, NULL);
977 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000978}
979
Daniel Veillarde57ec792003-09-10 10:50:59 +0000980typedef struct _xmlDefAttrs xmlDefAttrs;
981typedef xmlDefAttrs *xmlDefAttrsPtr;
982struct _xmlDefAttrs {
983 int nbAttrs; /* number of defaulted attributes on that element */
984 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +0000985 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000986};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000987
988/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000989 * xmlAttrNormalizeSpace:
990 * @src: the source string
991 * @dst: the target string
992 *
993 * Normalize the space in non CDATA attribute values:
994 * If the attribute type is not CDATA, then the XML processor MUST further
995 * process the normalized attribute value by discarding any leading and
996 * trailing space (#x20) characters, and by replacing sequences of space
997 * (#x20) characters by a single space (#x20) character.
998 * Note that the size of dst need to be at least src, and if one doesn't need
999 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1000 * passing src as dst is just fine.
1001 *
1002 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1003 * is needed.
1004 */
1005static xmlChar *
1006xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1007{
1008 if ((src == NULL) || (dst == NULL))
1009 return(NULL);
1010
1011 while (*src == 0x20) src++;
1012 while (*src != 0) {
1013 if (*src == 0x20) {
1014 while (*src == 0x20) src++;
1015 if (*src != 0)
1016 *dst++ = 0x20;
1017 } else {
1018 *dst++ = *src++;
1019 }
1020 }
1021 *dst = 0;
1022 if (dst == src)
1023 return(NULL);
1024 return(dst);
1025}
1026
1027/**
1028 * xmlAttrNormalizeSpace2:
1029 * @src: the source string
1030 *
1031 * Normalize the space in non CDATA attribute values, a slightly more complex
1032 * front end to avoid allocation problems when running on attribute values
1033 * coming from the input.
1034 *
1035 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1036 * is needed.
1037 */
1038static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001039xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001040{
1041 int i;
1042 int remove_head = 0;
1043 int need_realloc = 0;
1044 const xmlChar *cur;
1045
1046 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1047 return(NULL);
1048 i = *len;
1049 if (i <= 0)
1050 return(NULL);
1051
1052 cur = src;
1053 while (*cur == 0x20) {
1054 cur++;
1055 remove_head++;
1056 }
1057 while (*cur != 0) {
1058 if (*cur == 0x20) {
1059 cur++;
1060 if ((*cur == 0x20) || (*cur == 0)) {
1061 need_realloc = 1;
1062 break;
1063 }
1064 } else
1065 cur++;
1066 }
1067 if (need_realloc) {
1068 xmlChar *ret;
1069
1070 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1071 if (ret == NULL) {
1072 xmlErrMemory(ctxt, NULL);
1073 return(NULL);
1074 }
1075 xmlAttrNormalizeSpace(ret, ret);
1076 *len = (int) strlen((const char *)ret);
1077 return(ret);
1078 } else if (remove_head) {
1079 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001080 memmove(src, src + remove_head, 1 + *len);
1081 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001082 }
1083 return(NULL);
1084}
1085
1086/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001087 * xmlAddDefAttrs:
1088 * @ctxt: an XML parser context
1089 * @fullname: the element fullname
1090 * @fullattr: the attribute fullname
1091 * @value: the attribute value
1092 *
1093 * Add a defaulted attribute for an element
1094 */
1095static void
1096xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1097 const xmlChar *fullname,
1098 const xmlChar *fullattr,
1099 const xmlChar *value) {
1100 xmlDefAttrsPtr defaults;
1101 int len;
1102 const xmlChar *name;
1103 const xmlChar *prefix;
1104
Daniel Veillard6a31b832008-03-26 14:06:44 +00001105 /*
1106 * Allows to detect attribute redefinitions
1107 */
1108 if (ctxt->attsSpecial != NULL) {
1109 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1110 return;
1111 }
1112
Daniel Veillarde57ec792003-09-10 10:50:59 +00001113 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001114 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001115 if (ctxt->attsDefault == NULL)
1116 goto mem_error;
1117 }
1118
1119 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001120 * split the element name into prefix:localname , the string found
1121 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001122 */
1123 name = xmlSplitQName3(fullname, &len);
1124 if (name == NULL) {
1125 name = xmlDictLookup(ctxt->dict, fullname, -1);
1126 prefix = NULL;
1127 } else {
1128 name = xmlDictLookup(ctxt->dict, name, -1);
1129 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1130 }
1131
1132 /*
1133 * make sure there is some storage
1134 */
1135 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1136 if (defaults == NULL) {
1137 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001138 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001139 if (defaults == NULL)
1140 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001141 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001142 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001143 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1144 defaults, NULL) < 0) {
1145 xmlFree(defaults);
1146 goto mem_error;
1147 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001148 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001149 xmlDefAttrsPtr temp;
1150
1151 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001152 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001153 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001154 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001155 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001156 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001157 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1158 defaults, NULL) < 0) {
1159 xmlFree(defaults);
1160 goto mem_error;
1161 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001162 }
1163
1164 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001165 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001166 * are within the DTD and hen not associated to namespace names.
1167 */
1168 name = xmlSplitQName3(fullattr, &len);
1169 if (name == NULL) {
1170 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1171 prefix = NULL;
1172 } else {
1173 name = xmlDictLookup(ctxt->dict, name, -1);
1174 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1175 }
1176
Daniel Veillardae0765b2008-07-31 19:54:59 +00001177 defaults->values[5 * defaults->nbAttrs] = name;
1178 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001179 /* intern the string and precompute the end */
1180 len = xmlStrlen(value);
1181 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001182 defaults->values[5 * defaults->nbAttrs + 2] = value;
1183 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1184 if (ctxt->external)
1185 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1186 else
1187 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001188 defaults->nbAttrs++;
1189
1190 return;
1191
1192mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001193 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001194 return;
1195}
1196
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001197/**
1198 * xmlAddSpecialAttr:
1199 * @ctxt: an XML parser context
1200 * @fullname: the element fullname
1201 * @fullattr: the attribute fullname
1202 * @type: the attribute type
1203 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001204 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001205 */
1206static void
1207xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1208 const xmlChar *fullname,
1209 const xmlChar *fullattr,
1210 int type)
1211{
1212 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001213 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001214 if (ctxt->attsSpecial == NULL)
1215 goto mem_error;
1216 }
1217
Daniel Veillardac4118d2008-01-11 05:27:32 +00001218 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1219 return;
1220
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001221 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1222 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001223 return;
1224
1225mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001226 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001227 return;
1228}
1229
Daniel Veillard4432df22003-09-28 18:58:27 +00001230/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001231 * xmlCleanSpecialAttrCallback:
1232 *
1233 * Removes CDATA attributes from the special attribute table
1234 */
1235static void
1236xmlCleanSpecialAttrCallback(void *payload, void *data,
1237 const xmlChar *fullname, const xmlChar *fullattr,
1238 const xmlChar *unused ATTRIBUTE_UNUSED) {
1239 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1240
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001241 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001242 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1243 }
1244}
1245
1246/**
1247 * xmlCleanSpecialAttr:
1248 * @ctxt: an XML parser context
1249 *
1250 * Trim the list of attributes defined to remove all those of type
1251 * CDATA as they are not special. This call should be done when finishing
1252 * to parse the DTD and before starting to parse the document root.
1253 */
1254static void
1255xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1256{
1257 if (ctxt->attsSpecial == NULL)
1258 return;
1259
1260 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1261
1262 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1263 xmlHashFree(ctxt->attsSpecial, NULL);
1264 ctxt->attsSpecial = NULL;
1265 }
1266 return;
1267}
1268
1269/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001270 * xmlCheckLanguageID:
1271 * @lang: pointer to the string value
1272 *
1273 * Checks that the value conforms to the LanguageID production:
1274 *
1275 * NOTE: this is somewhat deprecated, those productions were removed from
1276 * the XML Second edition.
1277 *
1278 * [33] LanguageID ::= Langcode ('-' Subcode)*
1279 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1280 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1281 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1282 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1283 * [38] Subcode ::= ([a-z] | [A-Z])+
1284 *
1285 * Returns 1 if correct 0 otherwise
1286 **/
1287int
1288xmlCheckLanguageID(const xmlChar * lang)
1289{
1290 const xmlChar *cur = lang;
1291
1292 if (cur == NULL)
1293 return (0);
1294 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1295 ((cur[0] == 'I') && (cur[1] == '-'))) {
1296 /*
1297 * IANA code
1298 */
1299 cur += 2;
1300 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1301 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1302 cur++;
1303 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1304 ((cur[0] == 'X') && (cur[1] == '-'))) {
1305 /*
1306 * User code
1307 */
1308 cur += 2;
1309 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1310 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1311 cur++;
1312 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1313 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1314 /*
1315 * ISO639
1316 */
1317 cur++;
1318 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1319 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1320 cur++;
1321 else
1322 return (0);
1323 } else
1324 return (0);
1325 while (cur[0] != 0) { /* non input consuming */
1326 if (cur[0] != '-')
1327 return (0);
1328 cur++;
1329 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1330 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1331 cur++;
1332 else
1333 return (0);
1334 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1335 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1336 cur++;
1337 }
1338 return (1);
1339}
1340
Owen Taylor3473f882001-02-23 17:55:21 +00001341/************************************************************************
1342 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001343 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001344 * *
1345 ************************************************************************/
1346
1347xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1348 const xmlChar ** str);
1349
Daniel Veillard0fb18932003-09-07 09:14:37 +00001350#ifdef SAX2
1351/**
1352 * nsPush:
1353 * @ctxt: an XML parser context
1354 * @prefix: the namespace prefix or NULL
1355 * @URL: the namespace name
1356 *
1357 * Pushes a new parser namespace on top of the ns stack
1358 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001359 * Returns -1 in case of error, -2 if the namespace should be discarded
1360 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001361 */
1362static int
1363nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1364{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001365 if (ctxt->options & XML_PARSE_NSCLEAN) {
1366 int i;
1367 for (i = 0;i < ctxt->nsNr;i += 2) {
1368 if (ctxt->nsTab[i] == prefix) {
1369 /* in scope */
1370 if (ctxt->nsTab[i + 1] == URL)
1371 return(-2);
1372 /* out of scope keep it */
1373 break;
1374 }
1375 }
1376 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001377 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1378 ctxt->nsMax = 10;
1379 ctxt->nsNr = 0;
1380 ctxt->nsTab = (const xmlChar **)
1381 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1382 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001383 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001384 ctxt->nsMax = 0;
1385 return (-1);
1386 }
1387 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001388 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001389 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001390 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1391 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1392 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001393 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001394 ctxt->nsMax /= 2;
1395 return (-1);
1396 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001397 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001398 }
1399 ctxt->nsTab[ctxt->nsNr++] = prefix;
1400 ctxt->nsTab[ctxt->nsNr++] = URL;
1401 return (ctxt->nsNr);
1402}
1403/**
1404 * nsPop:
1405 * @ctxt: an XML parser context
1406 * @nr: the number to pop
1407 *
1408 * Pops the top @nr parser prefix/namespace from the ns stack
1409 *
1410 * Returns the number of namespaces removed
1411 */
1412static int
1413nsPop(xmlParserCtxtPtr ctxt, int nr)
1414{
1415 int i;
1416
1417 if (ctxt->nsTab == NULL) return(0);
1418 if (ctxt->nsNr < nr) {
1419 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1420 nr = ctxt->nsNr;
1421 }
1422 if (ctxt->nsNr <= 0)
1423 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001424
Daniel Veillard0fb18932003-09-07 09:14:37 +00001425 for (i = 0;i < nr;i++) {
1426 ctxt->nsNr--;
1427 ctxt->nsTab[ctxt->nsNr] = NULL;
1428 }
1429 return(nr);
1430}
1431#endif
1432
1433static int
1434xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1435 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001436 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001437 int maxatts;
1438
1439 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001440 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001441 atts = (const xmlChar **)
1442 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001443 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001444 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001445 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1446 if (attallocs == NULL) goto mem_error;
1447 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001448 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001449 } else if (nr + 5 > ctxt->maxatts) {
1450 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001451 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1452 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001453 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001454 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001455 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1456 (maxatts / 5) * sizeof(int));
1457 if (attallocs == NULL) goto mem_error;
1458 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001459 ctxt->maxatts = maxatts;
1460 }
1461 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001462mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001463 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001464 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001465}
1466
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001467/**
1468 * inputPush:
1469 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001470 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001471 *
1472 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001473 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001474 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001475 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001476int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001477inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1478{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001479 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001480 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001481 if (ctxt->inputNr >= ctxt->inputMax) {
1482 ctxt->inputMax *= 2;
1483 ctxt->inputTab =
1484 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1485 ctxt->inputMax *
1486 sizeof(ctxt->inputTab[0]));
1487 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001488 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001489 xmlFreeInputStream(value);
1490 ctxt->inputMax /= 2;
1491 value = NULL;
1492 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001493 }
1494 }
1495 ctxt->inputTab[ctxt->inputNr] = value;
1496 ctxt->input = value;
1497 return (ctxt->inputNr++);
1498}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001499/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001500 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001501 * @ctxt: an XML parser context
1502 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001503 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001504 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001505 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001506 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001507xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001508inputPop(xmlParserCtxtPtr ctxt)
1509{
1510 xmlParserInputPtr ret;
1511
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001512 if (ctxt == NULL)
1513 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001514 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001515 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001516 ctxt->inputNr--;
1517 if (ctxt->inputNr > 0)
1518 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1519 else
1520 ctxt->input = NULL;
1521 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001522 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001523 return (ret);
1524}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001525/**
1526 * nodePush:
1527 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001528 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001529 *
1530 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001531 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001532 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001533 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001534int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001535nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1536{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001537 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001538 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001539 xmlNodePtr *tmp;
1540
1541 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1542 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001543 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001544 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001545 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001546 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001547 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001548 ctxt->nodeTab = tmp;
1549 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001550 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001551 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1552 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001553 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001554 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001555 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001556 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001557 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001558 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001559 ctxt->nodeTab[ctxt->nodeNr] = value;
1560 ctxt->node = value;
1561 return (ctxt->nodeNr++);
1562}
Daniel Veillard8915c152008-08-26 13:05:34 +00001563
Daniel Veillard1c732d22002-11-30 11:22:59 +00001564/**
1565 * nodePop:
1566 * @ctxt: an XML parser context
1567 *
1568 * Pops the top element node from the node stack
1569 *
1570 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001571 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001572xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001573nodePop(xmlParserCtxtPtr ctxt)
1574{
1575 xmlNodePtr ret;
1576
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001577 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001578 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001579 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001580 ctxt->nodeNr--;
1581 if (ctxt->nodeNr > 0)
1582 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1583 else
1584 ctxt->node = NULL;
1585 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001586 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001587 return (ret);
1588}
Daniel Veillarda2351322004-06-27 12:08:10 +00001589
1590#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001591/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001592 * nameNsPush:
1593 * @ctxt: an XML parser context
1594 * @value: the element name
1595 * @prefix: the element prefix
1596 * @URI: the element namespace name
1597 *
1598 * Pushes a new element name/prefix/URL on top of the name stack
1599 *
1600 * Returns -1 in case of error, the index in the stack otherwise
1601 */
1602static int
1603nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1604 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1605{
1606 if (ctxt->nameNr >= ctxt->nameMax) {
1607 const xmlChar * *tmp;
1608 void **tmp2;
1609 ctxt->nameMax *= 2;
1610 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1611 ctxt->nameMax *
1612 sizeof(ctxt->nameTab[0]));
1613 if (tmp == NULL) {
1614 ctxt->nameMax /= 2;
1615 goto mem_error;
1616 }
1617 ctxt->nameTab = tmp;
1618 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1619 ctxt->nameMax * 3 *
1620 sizeof(ctxt->pushTab[0]));
1621 if (tmp2 == NULL) {
1622 ctxt->nameMax /= 2;
1623 goto mem_error;
1624 }
1625 ctxt->pushTab = tmp2;
1626 }
1627 ctxt->nameTab[ctxt->nameNr] = value;
1628 ctxt->name = value;
1629 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1630 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001631 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001632 return (ctxt->nameNr++);
1633mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001634 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001635 return (-1);
1636}
1637/**
1638 * nameNsPop:
1639 * @ctxt: an XML parser context
1640 *
1641 * Pops the top element/prefix/URI name from the name stack
1642 *
1643 * Returns the name just removed
1644 */
1645static const xmlChar *
1646nameNsPop(xmlParserCtxtPtr ctxt)
1647{
1648 const xmlChar *ret;
1649
1650 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001651 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001652 ctxt->nameNr--;
1653 if (ctxt->nameNr > 0)
1654 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1655 else
1656 ctxt->name = NULL;
1657 ret = ctxt->nameTab[ctxt->nameNr];
1658 ctxt->nameTab[ctxt->nameNr] = NULL;
1659 return (ret);
1660}
Daniel Veillarda2351322004-06-27 12:08:10 +00001661#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001662
1663/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001664 * namePush:
1665 * @ctxt: an XML parser context
1666 * @value: the element name
1667 *
1668 * Pushes a new element name on top of the name stack
1669 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001670 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001671 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001672int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001673namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001674{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001675 if (ctxt == NULL) return (-1);
1676
Daniel Veillard1c732d22002-11-30 11:22:59 +00001677 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001678 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001679 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001680 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001681 ctxt->nameMax *
1682 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001683 if (tmp == NULL) {
1684 ctxt->nameMax /= 2;
1685 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001686 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001687 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001688 }
1689 ctxt->nameTab[ctxt->nameNr] = value;
1690 ctxt->name = value;
1691 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001692mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001693 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001694 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001695}
1696/**
1697 * namePop:
1698 * @ctxt: an XML parser context
1699 *
1700 * Pops the top element name from the name stack
1701 *
1702 * Returns the name just removed
1703 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001704const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001705namePop(xmlParserCtxtPtr ctxt)
1706{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001707 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001708
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001709 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1710 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001711 ctxt->nameNr--;
1712 if (ctxt->nameNr > 0)
1713 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1714 else
1715 ctxt->name = NULL;
1716 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001717 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001718 return (ret);
1719}
Owen Taylor3473f882001-02-23 17:55:21 +00001720
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001721static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001722 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001723 int *tmp;
1724
Owen Taylor3473f882001-02-23 17:55:21 +00001725 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001726 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1727 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1728 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001729 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001730 ctxt->spaceMax /=2;
1731 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001732 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001733 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001734 }
1735 ctxt->spaceTab[ctxt->spaceNr] = val;
1736 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1737 return(ctxt->spaceNr++);
1738}
1739
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001740static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001741 int ret;
1742 if (ctxt->spaceNr <= 0) return(0);
1743 ctxt->spaceNr--;
1744 if (ctxt->spaceNr > 0)
1745 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1746 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001747 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001748 ret = ctxt->spaceTab[ctxt->spaceNr];
1749 ctxt->spaceTab[ctxt->spaceNr] = -1;
1750 return(ret);
1751}
1752
1753/*
1754 * Macros for accessing the content. Those should be used only by the parser,
1755 * and not exported.
1756 *
1757 * Dirty macros, i.e. one often need to make assumption on the context to
1758 * use them
1759 *
1760 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1761 * To be used with extreme caution since operations consuming
1762 * characters may move the input buffer to a different location !
1763 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1764 * This should be used internally by the parser
1765 * only to compare to ASCII values otherwise it would break when
1766 * running with UTF-8 encoding.
1767 * RAW same as CUR but in the input buffer, bypass any token
1768 * extraction that may have been done
1769 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1770 * to compare on ASCII based substring.
1771 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001772 * strings without newlines within the parser.
1773 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1774 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001775 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1776 *
1777 * NEXT Skip to the next character, this does the proper decoding
1778 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001779 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001780 * CUR_CHAR(l) returns the current unicode character (int), set l
1781 * to the number of xmlChars used for the encoding [0-5].
1782 * CUR_SCHAR same but operate on a string instead of the context
1783 * COPY_BUF copy the current unicode char to the target buffer, increment
1784 * the index
1785 * GROW, SHRINK handling of input buffers
1786 */
1787
Daniel Veillardfdc91562002-07-01 21:52:03 +00001788#define RAW (*ctxt->input->cur)
1789#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001790#define NXT(val) ctxt->input->cur[(val)]
1791#define CUR_PTR ctxt->input->cur
1792
Daniel Veillarda07050d2003-10-19 14:46:32 +00001793#define CMP4( s, c1, c2, c3, c4 ) \
1794 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1795 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1796#define CMP5( s, c1, c2, c3, c4, c5 ) \
1797 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1798#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1799 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1800#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1801 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1802#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1803 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1804#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1805 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1806 ((unsigned char *) s)[ 8 ] == c9 )
1807#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1808 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1809 ((unsigned char *) s)[ 9 ] == c10 )
1810
Owen Taylor3473f882001-02-23 17:55:21 +00001811#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001812 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001813 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001814 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001815 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1816 xmlPopInput(ctxt); \
1817 } while (0)
1818
Daniel Veillard0b787f32004-03-26 17:29:53 +00001819#define SKIPL(val) do { \
1820 int skipl; \
1821 for(skipl=0; skipl<val; skipl++) { \
1822 if (*(ctxt->input->cur) == '\n') { \
1823 ctxt->input->line++; ctxt->input->col = 1; \
1824 } else ctxt->input->col++; \
1825 ctxt->nbChars++; \
1826 ctxt->input->cur++; \
1827 } \
1828 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1829 if ((*ctxt->input->cur == 0) && \
1830 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1831 xmlPopInput(ctxt); \
1832 } while (0)
1833
Daniel Veillarda880b122003-04-21 21:36:41 +00001834#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001835 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1836 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001837 xmlSHRINK (ctxt);
1838
1839static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1840 xmlParserInputShrink(ctxt->input);
1841 if ((*ctxt->input->cur == 0) &&
1842 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1843 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001844 }
Owen Taylor3473f882001-02-23 17:55:21 +00001845
Daniel Veillarda880b122003-04-21 21:36:41 +00001846#define GROW if ((ctxt->progressive == 0) && \
1847 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001848 xmlGROW (ctxt);
1849
1850static void xmlGROW (xmlParserCtxtPtr ctxt) {
1851 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1852 if ((*ctxt->input->cur == 0) &&
1853 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1854 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001855}
Owen Taylor3473f882001-02-23 17:55:21 +00001856
1857#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1858
1859#define NEXT xmlNextChar(ctxt)
1860
Daniel Veillard21a0f912001-02-25 19:54:14 +00001861#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001862 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001863 ctxt->input->cur++; \
1864 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001865 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001866 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1867 }
1868
Owen Taylor3473f882001-02-23 17:55:21 +00001869#define NEXTL(l) do { \
1870 if (*(ctxt->input->cur) == '\n') { \
1871 ctxt->input->line++; ctxt->input->col = 1; \
1872 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001873 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001874 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001875 } while (0)
1876
1877#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1878#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1879
1880#define COPY_BUF(l,b,i,v) \
1881 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001882 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001883
1884/**
1885 * xmlSkipBlankChars:
1886 * @ctxt: the XML parser context
1887 *
1888 * skip all blanks character found at that point in the input streams.
1889 * It pops up finished entities in the process if allowable at that point.
1890 *
1891 * Returns the number of space chars skipped
1892 */
1893
1894int
1895xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001896 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001897
1898 /*
1899 * It's Okay to use CUR/NEXT here since all the blanks are on
1900 * the ASCII range.
1901 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001902 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1903 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001904 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001905 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001906 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001907 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001908 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001909 if (*cur == '\n') {
1910 ctxt->input->line++; ctxt->input->col = 1;
1911 }
1912 cur++;
1913 res++;
1914 if (*cur == 0) {
1915 ctxt->input->cur = cur;
1916 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1917 cur = ctxt->input->cur;
1918 }
1919 }
1920 ctxt->input->cur = cur;
1921 } else {
1922 int cur;
1923 do {
1924 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001925 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001926 NEXT;
1927 cur = CUR;
1928 res++;
1929 }
1930 while ((cur == 0) && (ctxt->inputNr > 1) &&
1931 (ctxt->instate != XML_PARSER_COMMENT)) {
1932 xmlPopInput(ctxt);
1933 cur = CUR;
1934 }
1935 /*
1936 * Need to handle support of entities branching here
1937 */
1938 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1939 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1940 }
Owen Taylor3473f882001-02-23 17:55:21 +00001941 return(res);
1942}
1943
1944/************************************************************************
1945 * *
1946 * Commodity functions to handle entities *
1947 * *
1948 ************************************************************************/
1949
1950/**
1951 * xmlPopInput:
1952 * @ctxt: an XML parser context
1953 *
1954 * xmlPopInput: the current input pointed by ctxt->input came to an end
1955 * pop it and return the next char.
1956 *
1957 * Returns the current xmlChar in the parser context
1958 */
1959xmlChar
1960xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001961 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001962 if (xmlParserDebugEntities)
1963 xmlGenericError(xmlGenericErrorContext,
1964 "Popping input %d\n", ctxt->inputNr);
1965 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001966 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001967 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1968 return(xmlPopInput(ctxt));
1969 return(CUR);
1970}
1971
1972/**
1973 * xmlPushInput:
1974 * @ctxt: an XML parser context
1975 * @input: an XML parser input fragment (entity, XML fragment ...).
1976 *
1977 * xmlPushInput: switch to a new input stream which is stacked on top
1978 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001979 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00001980 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001981int
Owen Taylor3473f882001-02-23 17:55:21 +00001982xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001983 int ret;
1984 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001985
1986 if (xmlParserDebugEntities) {
1987 if ((ctxt->input != NULL) && (ctxt->input->filename))
1988 xmlGenericError(xmlGenericErrorContext,
1989 "%s(%d): ", ctxt->input->filename,
1990 ctxt->input->line);
1991 xmlGenericError(xmlGenericErrorContext,
1992 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1993 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001994 ret = inputPush(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001995 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001996 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001997}
1998
1999/**
2000 * xmlParseCharRef:
2001 * @ctxt: an XML parser context
2002 *
2003 * parse Reference declarations
2004 *
2005 * [66] CharRef ::= '&#' [0-9]+ ';' |
2006 * '&#x' [0-9a-fA-F]+ ';'
2007 *
2008 * [ WFC: Legal Character ]
2009 * Characters referred to using character references must match the
2010 * production for Char.
2011 *
2012 * Returns the value parsed (as an int), 0 in case of error
2013 */
2014int
2015xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002016 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002017 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002018 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002019
Owen Taylor3473f882001-02-23 17:55:21 +00002020 /*
2021 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2022 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002023 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002024 (NXT(2) == 'x')) {
2025 SKIP(3);
2026 GROW;
2027 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002028 if (count++ > 20) {
2029 count = 0;
2030 GROW;
2031 }
2032 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002033 val = val * 16 + (CUR - '0');
2034 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2035 val = val * 16 + (CUR - 'a') + 10;
2036 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2037 val = val * 16 + (CUR - 'A') + 10;
2038 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002039 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002040 val = 0;
2041 break;
2042 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002043 if (val > 0x10FFFF)
2044 outofrange = val;
2045
Owen Taylor3473f882001-02-23 17:55:21 +00002046 NEXT;
2047 count++;
2048 }
2049 if (RAW == ';') {
2050 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002051 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002052 ctxt->nbChars ++;
2053 ctxt->input->cur++;
2054 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002055 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002056 SKIP(2);
2057 GROW;
2058 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002059 if (count++ > 20) {
2060 count = 0;
2061 GROW;
2062 }
2063 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002064 val = val * 10 + (CUR - '0');
2065 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002066 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002067 val = 0;
2068 break;
2069 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002070 if (val > 0x10FFFF)
2071 outofrange = val;
2072
Owen Taylor3473f882001-02-23 17:55:21 +00002073 NEXT;
2074 count++;
2075 }
2076 if (RAW == ';') {
2077 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002078 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002079 ctxt->nbChars ++;
2080 ctxt->input->cur++;
2081 }
2082 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002083 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002084 }
2085
2086 /*
2087 * [ WFC: Legal Character ]
2088 * Characters referred to using character references must match the
2089 * production for Char.
2090 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002091 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002092 return(val);
2093 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002094 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2095 "xmlParseCharRef: invalid xmlChar value %d\n",
2096 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002097 }
2098 return(0);
2099}
2100
2101/**
2102 * xmlParseStringCharRef:
2103 * @ctxt: an XML parser context
2104 * @str: a pointer to an index in the string
2105 *
2106 * parse Reference declarations, variant parsing from a string rather
2107 * than an an input flow.
2108 *
2109 * [66] CharRef ::= '&#' [0-9]+ ';' |
2110 * '&#x' [0-9a-fA-F]+ ';'
2111 *
2112 * [ WFC: Legal Character ]
2113 * Characters referred to using character references must match the
2114 * production for Char.
2115 *
2116 * Returns the value parsed (as an int), 0 in case of error, str will be
2117 * updated to the current value of the index
2118 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002119static int
Owen Taylor3473f882001-02-23 17:55:21 +00002120xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2121 const xmlChar *ptr;
2122 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002123 unsigned int val = 0;
2124 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002125
2126 if ((str == NULL) || (*str == NULL)) return(0);
2127 ptr = *str;
2128 cur = *ptr;
2129 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2130 ptr += 3;
2131 cur = *ptr;
2132 while (cur != ';') { /* Non input consuming loop */
2133 if ((cur >= '0') && (cur <= '9'))
2134 val = val * 16 + (cur - '0');
2135 else if ((cur >= 'a') && (cur <= 'f'))
2136 val = val * 16 + (cur - 'a') + 10;
2137 else if ((cur >= 'A') && (cur <= 'F'))
2138 val = val * 16 + (cur - 'A') + 10;
2139 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002140 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002141 val = 0;
2142 break;
2143 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002144 if (val > 0x10FFFF)
2145 outofrange = val;
2146
Owen Taylor3473f882001-02-23 17:55:21 +00002147 ptr++;
2148 cur = *ptr;
2149 }
2150 if (cur == ';')
2151 ptr++;
2152 } else if ((cur == '&') && (ptr[1] == '#')){
2153 ptr += 2;
2154 cur = *ptr;
2155 while (cur != ';') { /* Non input consuming loops */
2156 if ((cur >= '0') && (cur <= '9'))
2157 val = val * 10 + (cur - '0');
2158 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002159 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002160 val = 0;
2161 break;
2162 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002163 if (val > 0x10FFFF)
2164 outofrange = val;
2165
Owen Taylor3473f882001-02-23 17:55:21 +00002166 ptr++;
2167 cur = *ptr;
2168 }
2169 if (cur == ';')
2170 ptr++;
2171 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002172 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002173 return(0);
2174 }
2175 *str = ptr;
2176
2177 /*
2178 * [ WFC: Legal Character ]
2179 * Characters referred to using character references must match the
2180 * production for Char.
2181 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002182 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002183 return(val);
2184 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002185 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2186 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2187 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002188 }
2189 return(0);
2190}
2191
2192/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002193 * xmlNewBlanksWrapperInputStream:
2194 * @ctxt: an XML parser context
2195 * @entity: an Entity pointer
2196 *
2197 * Create a new input stream for wrapping
2198 * blanks around a PEReference
2199 *
2200 * Returns the new input stream or NULL
2201 */
2202
2203static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2204
Daniel Veillardf4862f02002-09-10 11:13:43 +00002205static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002206xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2207 xmlParserInputPtr input;
2208 xmlChar *buffer;
2209 size_t length;
2210 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002211 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2212 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002213 return(NULL);
2214 }
2215 if (xmlParserDebugEntities)
2216 xmlGenericError(xmlGenericErrorContext,
2217 "new blanks wrapper for entity: %s\n", entity->name);
2218 input = xmlNewInputStream(ctxt);
2219 if (input == NULL) {
2220 return(NULL);
2221 }
2222 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002223 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002224 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002225 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002226 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002227 return(NULL);
2228 }
2229 buffer [0] = ' ';
2230 buffer [1] = '%';
2231 buffer [length-3] = ';';
2232 buffer [length-2] = ' ';
2233 buffer [length-1] = 0;
2234 memcpy(buffer + 2, entity->name, length - 5);
2235 input->free = deallocblankswrapper;
2236 input->base = buffer;
2237 input->cur = buffer;
2238 input->length = length;
2239 input->end = &buffer[length];
2240 return(input);
2241}
2242
2243/**
Owen Taylor3473f882001-02-23 17:55:21 +00002244 * xmlParserHandlePEReference:
2245 * @ctxt: the parser context
2246 *
2247 * [69] PEReference ::= '%' Name ';'
2248 *
2249 * [ WFC: No Recursion ]
2250 * A parsed entity must not contain a recursive
2251 * reference to itself, either directly or indirectly.
2252 *
2253 * [ WFC: Entity Declared ]
2254 * In a document without any DTD, a document with only an internal DTD
2255 * subset which contains no parameter entity references, or a document
2256 * with "standalone='yes'", ... ... The declaration of a parameter
2257 * entity must precede any reference to it...
2258 *
2259 * [ VC: Entity Declared ]
2260 * In a document with an external subset or external parameter entities
2261 * with "standalone='no'", ... ... The declaration of a parameter entity
2262 * must precede any reference to it...
2263 *
2264 * [ WFC: In DTD ]
2265 * Parameter-entity references may only appear in the DTD.
2266 * NOTE: misleading but this is handled.
2267 *
2268 * A PEReference may have been detected in the current input stream
2269 * the handling is done accordingly to
2270 * http://www.w3.org/TR/REC-xml#entproc
2271 * i.e.
2272 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002273 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002274 */
2275void
2276xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002277 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002278 xmlEntityPtr entity = NULL;
2279 xmlParserInputPtr input;
2280
Owen Taylor3473f882001-02-23 17:55:21 +00002281 if (RAW != '%') return;
2282 switch(ctxt->instate) {
2283 case XML_PARSER_CDATA_SECTION:
2284 return;
2285 case XML_PARSER_COMMENT:
2286 return;
2287 case XML_PARSER_START_TAG:
2288 return;
2289 case XML_PARSER_END_TAG:
2290 return;
2291 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002292 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002293 return;
2294 case XML_PARSER_PROLOG:
2295 case XML_PARSER_START:
2296 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002297 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002298 return;
2299 case XML_PARSER_ENTITY_DECL:
2300 case XML_PARSER_CONTENT:
2301 case XML_PARSER_ATTRIBUTE_VALUE:
2302 case XML_PARSER_PI:
2303 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002304 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002305 /* we just ignore it there */
2306 return;
2307 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002308 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002309 return;
2310 case XML_PARSER_ENTITY_VALUE:
2311 /*
2312 * NOTE: in the case of entity values, we don't do the
2313 * substitution here since we need the literal
2314 * entity value to be able to save the internal
2315 * subset of the document.
2316 * This will be handled by xmlStringDecodeEntities
2317 */
2318 return;
2319 case XML_PARSER_DTD:
2320 /*
2321 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2322 * In the internal DTD subset, parameter-entity references
2323 * can occur only where markup declarations can occur, not
2324 * within markup declarations.
2325 * In that case this is handled in xmlParseMarkupDecl
2326 */
2327 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2328 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002329 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002330 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002331 break;
2332 case XML_PARSER_IGNORE:
2333 return;
2334 }
2335
2336 NEXT;
2337 name = xmlParseName(ctxt);
2338 if (xmlParserDebugEntities)
2339 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002340 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002341 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002342 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002343 } else {
2344 if (RAW == ';') {
2345 NEXT;
2346 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2347 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2348 if (entity == NULL) {
2349
2350 /*
2351 * [ WFC: Entity Declared ]
2352 * In a document without any DTD, a document with only an
2353 * internal DTD subset which contains no parameter entity
2354 * references, or a document with "standalone='yes'", ...
2355 * ... The declaration of a parameter entity must precede
2356 * any reference to it...
2357 */
2358 if ((ctxt->standalone == 1) ||
2359 ((ctxt->hasExternalSubset == 0) &&
2360 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002361 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002362 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002363 } else {
2364 /*
2365 * [ VC: Entity Declared ]
2366 * In a document with an external subset or external
2367 * parameter entities with "standalone='no'", ...
2368 * ... The declaration of a parameter entity must precede
2369 * any reference to it...
2370 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002371 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2372 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2373 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002374 name, NULL);
Daniel Veillard24eb9782003-10-04 21:08:09 +00002375 } else
2376 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2377 "PEReference: %%%s; not found\n",
2378 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002379 ctxt->valid = 0;
2380 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002381 } else if (ctxt->input->free != deallocblankswrapper) {
2382 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002383 if (xmlPushInput(ctxt, input) < 0)
2384 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002385 } else {
2386 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2387 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002388 xmlChar start[4];
2389 xmlCharEncoding enc;
2390
Owen Taylor3473f882001-02-23 17:55:21 +00002391 /*
2392 * handle the extra spaces added before and after
2393 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002394 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002395 */
2396 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002397 if (xmlPushInput(ctxt, input) < 0)
2398 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002399
2400 /*
2401 * Get the 4 first bytes and decode the charset
2402 * if enc != XML_CHAR_ENCODING_NONE
2403 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002404 * Note that, since we may have some non-UTF8
2405 * encoding (like UTF16, bug 135229), the 'length'
2406 * is not known, but we can calculate based upon
2407 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002408 */
2409 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002410 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002411 start[0] = RAW;
2412 start[1] = NXT(1);
2413 start[2] = NXT(2);
2414 start[3] = NXT(3);
2415 enc = xmlDetectCharEncoding(start, 4);
2416 if (enc != XML_CHAR_ENCODING_NONE) {
2417 xmlSwitchEncoding(ctxt, enc);
2418 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002419 }
2420
Owen Taylor3473f882001-02-23 17:55:21 +00002421 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002422 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2423 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002424 xmlParseTextDecl(ctxt);
2425 }
Owen Taylor3473f882001-02-23 17:55:21 +00002426 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002427 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2428 "PEReference: %s is not a parameter entity\n",
2429 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002430 }
2431 }
2432 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002433 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002434 }
Owen Taylor3473f882001-02-23 17:55:21 +00002435 }
2436}
2437
2438/*
2439 * Macro used to grow the current buffer.
2440 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002441#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002442 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002443 buffer##_size *= 2; \
Daniel Veillard0161e632008-08-28 15:36:32 +00002444 buffer##_size += n; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002445 tmp = (xmlChar *) \
Daniel Veillard68b6e022008-03-31 09:26:00 +00002446 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002447 if (tmp == NULL) goto mem_error; \
2448 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002449}
2450
2451/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002452 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002453 * @ctxt: the parser context
2454 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002455 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002456 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2457 * @end: an end marker xmlChar, 0 if none
2458 * @end2: an end marker xmlChar, 0 if none
2459 * @end3: an end marker xmlChar, 0 if none
2460 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002461 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002462 *
2463 * [67] Reference ::= EntityRef | CharRef
2464 *
2465 * [69] PEReference ::= '%' Name ';'
2466 *
2467 * Returns A newly allocated string with the substitution done. The caller
2468 * must deallocate it !
2469 */
2470xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002471xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2472 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002473 xmlChar *buffer = NULL;
2474 int buffer_size = 0;
2475
2476 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002477 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002478 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002479 xmlEntityPtr ent;
2480 int c,l;
2481 int nbchars = 0;
2482
Daniel Veillarda82b1822004-11-08 16:24:57 +00002483 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002484 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002485 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002486
Daniel Veillard0161e632008-08-28 15:36:32 +00002487 if (((ctxt->depth > 40) &&
2488 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2489 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002490 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002491 return(NULL);
2492 }
2493
2494 /*
2495 * allocate a translation buffer.
2496 */
2497 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002498 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002499 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002500
2501 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002502 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002503 * we are operating on already parsed values.
2504 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002505 if (str < last)
2506 c = CUR_SCHAR(str, l);
2507 else
2508 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002509 while ((c != 0) && (c != end) && /* non input consuming loop */
2510 (c != end2) && (c != end3)) {
2511
2512 if (c == 0) break;
2513 if ((c == '&') && (str[1] == '#')) {
2514 int val = xmlParseStringCharRef(ctxt, &str);
2515 if (val != 0) {
2516 COPY_BUF(0,buffer,nbchars,val);
2517 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002518 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002519 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002520 }
Owen Taylor3473f882001-02-23 17:55:21 +00002521 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2522 if (xmlParserDebugEntities)
2523 xmlGenericError(xmlGenericErrorContext,
2524 "String decoding Entity Reference: %.30s\n",
2525 str);
2526 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002527 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2528 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002529 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002530 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002531 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002532 if ((ent != NULL) &&
2533 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2534 if (ent->content != NULL) {
2535 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002536 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002537 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002538 }
Owen Taylor3473f882001-02-23 17:55:21 +00002539 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002540 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2541 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002542 }
2543 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002544 ctxt->depth++;
2545 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2546 0, 0, 0);
2547 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002548
Owen Taylor3473f882001-02-23 17:55:21 +00002549 if (rep != NULL) {
2550 current = rep;
2551 while (*current != 0) { /* non input consuming loop */
2552 buffer[nbchars++] = *current++;
2553 if (nbchars >
2554 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002555 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2556 goto int_error;
2557 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002558 }
2559 }
2560 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002561 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002562 }
2563 } else if (ent != NULL) {
2564 int i = xmlStrlen(ent->name);
2565 const xmlChar *cur = ent->name;
2566
2567 buffer[nbchars++] = '&';
2568 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002569 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002570 }
2571 for (;i > 0;i--)
2572 buffer[nbchars++] = *cur++;
2573 buffer[nbchars++] = ';';
2574 }
2575 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2576 if (xmlParserDebugEntities)
2577 xmlGenericError(xmlGenericErrorContext,
2578 "String decoding PE Reference: %.30s\n", str);
2579 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002580 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2581 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002582 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002583 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002584 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002585 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002586 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002587 }
Owen Taylor3473f882001-02-23 17:55:21 +00002588 ctxt->depth++;
2589 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2590 0, 0, 0);
2591 ctxt->depth--;
2592 if (rep != NULL) {
2593 current = rep;
2594 while (*current != 0) { /* non input consuming loop */
2595 buffer[nbchars++] = *current++;
2596 if (nbchars >
2597 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002598 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2599 goto int_error;
2600 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002601 }
2602 }
2603 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002604 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002605 }
2606 }
2607 } else {
2608 COPY_BUF(l,buffer,nbchars,c);
2609 str += l;
2610 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002611 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002612 }
2613 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002614 if (str < last)
2615 c = CUR_SCHAR(str, l);
2616 else
2617 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002618 }
2619 buffer[nbchars++] = 0;
2620 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002621
2622mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002623 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002624int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002625 if (rep != NULL)
2626 xmlFree(rep);
2627 if (buffer != NULL)
2628 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002629 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002630}
2631
Daniel Veillarde57ec792003-09-10 10:50:59 +00002632/**
2633 * xmlStringDecodeEntities:
2634 * @ctxt: the parser context
2635 * @str: the input string
2636 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2637 * @end: an end marker xmlChar, 0 if none
2638 * @end2: an end marker xmlChar, 0 if none
2639 * @end3: an end marker xmlChar, 0 if none
2640 *
2641 * Takes a entity string content and process to do the adequate substitutions.
2642 *
2643 * [67] Reference ::= EntityRef | CharRef
2644 *
2645 * [69] PEReference ::= '%' Name ';'
2646 *
2647 * Returns A newly allocated string with the substitution done. The caller
2648 * must deallocate it !
2649 */
2650xmlChar *
2651xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2652 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002653 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002654 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2655 end, end2, end3));
2656}
Owen Taylor3473f882001-02-23 17:55:21 +00002657
2658/************************************************************************
2659 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002660 * Commodity functions, cleanup needed ? *
2661 * *
2662 ************************************************************************/
2663
2664/**
2665 * areBlanks:
2666 * @ctxt: an XML parser context
2667 * @str: a xmlChar *
2668 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002669 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002670 *
2671 * Is this a sequence of blank chars that one can ignore ?
2672 *
2673 * Returns 1 if ignorable 0 otherwise.
2674 */
2675
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002676static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2677 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002678 int i, ret;
2679 xmlNodePtr lastChild;
2680
Daniel Veillard05c13a22001-09-09 08:38:09 +00002681 /*
2682 * Don't spend time trying to differentiate them, the same callback is
2683 * used !
2684 */
2685 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002686 return(0);
2687
Owen Taylor3473f882001-02-23 17:55:21 +00002688 /*
2689 * Check for xml:space value.
2690 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002691 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2692 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002693 return(0);
2694
2695 /*
2696 * Check that the string is made of blanks
2697 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002698 if (blank_chars == 0) {
2699 for (i = 0;i < len;i++)
2700 if (!(IS_BLANK_CH(str[i]))) return(0);
2701 }
Owen Taylor3473f882001-02-23 17:55:21 +00002702
2703 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002704 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002705 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002706 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002707 if (ctxt->myDoc != NULL) {
2708 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2709 if (ret == 0) return(1);
2710 if (ret == 1) return(0);
2711 }
2712
2713 /*
2714 * Otherwise, heuristic :-\
2715 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002716 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002717 if ((ctxt->node->children == NULL) &&
2718 (RAW == '<') && (NXT(1) == '/')) return(0);
2719
2720 lastChild = xmlGetLastChild(ctxt->node);
2721 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002722 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2723 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002724 } else if (xmlNodeIsText(lastChild))
2725 return(0);
2726 else if ((ctxt->node->children != NULL) &&
2727 (xmlNodeIsText(ctxt->node->children)))
2728 return(0);
2729 return(1);
2730}
2731
Owen Taylor3473f882001-02-23 17:55:21 +00002732/************************************************************************
2733 * *
2734 * Extra stuff for namespace support *
2735 * Relates to http://www.w3.org/TR/WD-xml-names *
2736 * *
2737 ************************************************************************/
2738
2739/**
2740 * xmlSplitQName:
2741 * @ctxt: an XML parser context
2742 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002743 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002744 *
2745 * parse an UTF8 encoded XML qualified name string
2746 *
2747 * [NS 5] QName ::= (Prefix ':')? LocalPart
2748 *
2749 * [NS 6] Prefix ::= NCName
2750 *
2751 * [NS 7] LocalPart ::= NCName
2752 *
2753 * Returns the local part, and prefix is updated
2754 * to get the Prefix if any.
2755 */
2756
2757xmlChar *
2758xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2759 xmlChar buf[XML_MAX_NAMELEN + 5];
2760 xmlChar *buffer = NULL;
2761 int len = 0;
2762 int max = XML_MAX_NAMELEN;
2763 xmlChar *ret = NULL;
2764 const xmlChar *cur = name;
2765 int c;
2766
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002767 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002768 *prefix = NULL;
2769
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002770 if (cur == NULL) return(NULL);
2771
Owen Taylor3473f882001-02-23 17:55:21 +00002772#ifndef XML_XML_NAMESPACE
2773 /* xml: prefix is not really a namespace */
2774 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2775 (cur[2] == 'l') && (cur[3] == ':'))
2776 return(xmlStrdup(name));
2777#endif
2778
Daniel Veillard597bc482003-07-24 16:08:28 +00002779 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002780 if (cur[0] == ':')
2781 return(xmlStrdup(name));
2782
2783 c = *cur++;
2784 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2785 buf[len++] = c;
2786 c = *cur++;
2787 }
2788 if (len >= max) {
2789 /*
2790 * Okay someone managed to make a huge name, so he's ready to pay
2791 * for the processing speed.
2792 */
2793 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002794
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002795 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002796 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002797 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002798 return(NULL);
2799 }
2800 memcpy(buffer, buf, len);
2801 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2802 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002803 xmlChar *tmp;
2804
Owen Taylor3473f882001-02-23 17:55:21 +00002805 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002806 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002807 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002808 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002809 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002810 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002811 return(NULL);
2812 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002813 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002814 }
2815 buffer[len++] = c;
2816 c = *cur++;
2817 }
2818 buffer[len] = 0;
2819 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002820
Daniel Veillard597bc482003-07-24 16:08:28 +00002821 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002822 if (buffer != NULL)
2823 xmlFree(buffer);
2824 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002825 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002826 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002827
Owen Taylor3473f882001-02-23 17:55:21 +00002828 if (buffer == NULL)
2829 ret = xmlStrndup(buf, len);
2830 else {
2831 ret = buffer;
2832 buffer = NULL;
2833 max = XML_MAX_NAMELEN;
2834 }
2835
2836
2837 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002838 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002839 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002840 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002841 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002842 }
Owen Taylor3473f882001-02-23 17:55:21 +00002843 len = 0;
2844
Daniel Veillardbb284f42002-10-16 18:02:47 +00002845 /*
2846 * Check that the first character is proper to start
2847 * a new name
2848 */
2849 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2850 ((c >= 0x41) && (c <= 0x5A)) ||
2851 (c == '_') || (c == ':'))) {
2852 int l;
2853 int first = CUR_SCHAR(cur, l);
2854
2855 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002856 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002857 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002858 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002859 }
2860 }
2861 cur++;
2862
Owen Taylor3473f882001-02-23 17:55:21 +00002863 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2864 buf[len++] = c;
2865 c = *cur++;
2866 }
2867 if (len >= max) {
2868 /*
2869 * Okay someone managed to make a huge name, so he's ready to pay
2870 * for the processing speed.
2871 */
2872 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002873
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002874 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002875 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002876 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002877 return(NULL);
2878 }
2879 memcpy(buffer, buf, len);
2880 while (c != 0) { /* tested bigname2.xml */
2881 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002882 xmlChar *tmp;
2883
Owen Taylor3473f882001-02-23 17:55:21 +00002884 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002885 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002886 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002887 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002888 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002889 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002890 return(NULL);
2891 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002892 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002893 }
2894 buffer[len++] = c;
2895 c = *cur++;
2896 }
2897 buffer[len] = 0;
2898 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00002899
Owen Taylor3473f882001-02-23 17:55:21 +00002900 if (buffer == NULL)
2901 ret = xmlStrndup(buf, len);
2902 else {
2903 ret = buffer;
2904 }
2905 }
2906
2907 return(ret);
2908}
2909
2910/************************************************************************
2911 * *
2912 * The parser itself *
2913 * Relates to http://www.w3.org/TR/REC-xml *
2914 * *
2915 ************************************************************************/
2916
Daniel Veillard34e3f642008-07-29 09:02:27 +00002917/************************************************************************
2918 * *
2919 * Routines to parse Name, NCName and NmToken *
2920 * *
2921 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00002922#ifdef DEBUG
2923static unsigned long nbParseName = 0;
2924static unsigned long nbParseNmToken = 0;
2925static unsigned long nbParseNCName = 0;
2926static unsigned long nbParseNCNameComplex = 0;
2927static unsigned long nbParseNameComplex = 0;
2928static unsigned long nbParseStringName = 0;
2929#endif
2930
Daniel Veillard34e3f642008-07-29 09:02:27 +00002931/*
2932 * The two following functions are related to the change of accepted
2933 * characters for Name and NmToken in the Revision 5 of XML-1.0
2934 * They correspond to the modified production [4] and the new production [4a]
2935 * changes in that revision. Also note that the macros used for the
2936 * productions Letter, Digit, CombiningChar and Extender are not needed
2937 * anymore.
2938 * We still keep compatibility to pre-revision5 parsing semantic if the
2939 * new XML_PARSE_OLD10 option is given to the parser.
2940 */
2941static int
2942xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
2943 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2944 /*
2945 * Use the new checks of production [4] [4a] amd [5] of the
2946 * Update 5 of XML-1.0
2947 */
2948 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2949 (((c >= 'a') && (c <= 'z')) ||
2950 ((c >= 'A') && (c <= 'Z')) ||
2951 (c == '_') || (c == ':') ||
2952 ((c >= 0xC0) && (c <= 0xD6)) ||
2953 ((c >= 0xD8) && (c <= 0xF6)) ||
2954 ((c >= 0xF8) && (c <= 0x2FF)) ||
2955 ((c >= 0x370) && (c <= 0x37D)) ||
2956 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2957 ((c >= 0x200C) && (c <= 0x200D)) ||
2958 ((c >= 0x2070) && (c <= 0x218F)) ||
2959 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2960 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2961 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2962 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2963 ((c >= 0x10000) && (c <= 0xEFFFF))))
2964 return(1);
2965 } else {
2966 if (IS_LETTER(c) || (c == '_') || (c == ':'))
2967 return(1);
2968 }
2969 return(0);
2970}
2971
2972static int
2973xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
2974 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2975 /*
2976 * Use the new checks of production [4] [4a] amd [5] of the
2977 * Update 5 of XML-1.0
2978 */
2979 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2980 (((c >= 'a') && (c <= 'z')) ||
2981 ((c >= 'A') && (c <= 'Z')) ||
2982 ((c >= '0') && (c <= '9')) || /* !start */
2983 (c == '_') || (c == ':') ||
2984 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2985 ((c >= 0xC0) && (c <= 0xD6)) ||
2986 ((c >= 0xD8) && (c <= 0xF6)) ||
2987 ((c >= 0xF8) && (c <= 0x2FF)) ||
2988 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2989 ((c >= 0x370) && (c <= 0x37D)) ||
2990 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2991 ((c >= 0x200C) && (c <= 0x200D)) ||
2992 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2993 ((c >= 0x2070) && (c <= 0x218F)) ||
2994 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2995 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2996 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2997 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2998 ((c >= 0x10000) && (c <= 0xEFFFF))))
2999 return(1);
3000 } else {
3001 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3002 (c == '.') || (c == '-') ||
3003 (c == '_') || (c == ':') ||
3004 (IS_COMBINING(c)) ||
3005 (IS_EXTENDER(c)))
3006 return(1);
3007 }
3008 return(0);
3009}
3010
Daniel Veillarde57ec792003-09-10 10:50:59 +00003011static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003012 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003013
Daniel Veillard34e3f642008-07-29 09:02:27 +00003014static const xmlChar *
3015xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3016 int len = 0, l;
3017 int c;
3018 int count = 0;
3019
Daniel Veillardc6561462009-03-25 10:22:31 +00003020#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003021 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003022#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003023
3024 /*
3025 * Handler for more complex cases
3026 */
3027 GROW;
3028 c = CUR_CHAR(l);
3029 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3030 /*
3031 * Use the new checks of production [4] [4a] amd [5] of the
3032 * Update 5 of XML-1.0
3033 */
3034 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3035 (!(((c >= 'a') && (c <= 'z')) ||
3036 ((c >= 'A') && (c <= 'Z')) ||
3037 (c == '_') || (c == ':') ||
3038 ((c >= 0xC0) && (c <= 0xD6)) ||
3039 ((c >= 0xD8) && (c <= 0xF6)) ||
3040 ((c >= 0xF8) && (c <= 0x2FF)) ||
3041 ((c >= 0x370) && (c <= 0x37D)) ||
3042 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3043 ((c >= 0x200C) && (c <= 0x200D)) ||
3044 ((c >= 0x2070) && (c <= 0x218F)) ||
3045 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3046 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3047 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3048 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3049 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3050 return(NULL);
3051 }
3052 len += l;
3053 NEXTL(l);
3054 c = CUR_CHAR(l);
3055 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3056 (((c >= 'a') && (c <= 'z')) ||
3057 ((c >= 'A') && (c <= 'Z')) ||
3058 ((c >= '0') && (c <= '9')) || /* !start */
3059 (c == '_') || (c == ':') ||
3060 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3061 ((c >= 0xC0) && (c <= 0xD6)) ||
3062 ((c >= 0xD8) && (c <= 0xF6)) ||
3063 ((c >= 0xF8) && (c <= 0x2FF)) ||
3064 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3065 ((c >= 0x370) && (c <= 0x37D)) ||
3066 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3067 ((c >= 0x200C) && (c <= 0x200D)) ||
3068 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3069 ((c >= 0x2070) && (c <= 0x218F)) ||
3070 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3071 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3072 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3073 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3074 ((c >= 0x10000) && (c <= 0xEFFFF))
3075 )) {
3076 if (count++ > 100) {
3077 count = 0;
3078 GROW;
3079 }
3080 len += l;
3081 NEXTL(l);
3082 c = CUR_CHAR(l);
3083 }
3084 } else {
3085 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3086 (!IS_LETTER(c) && (c != '_') &&
3087 (c != ':'))) {
3088 return(NULL);
3089 }
3090 len += l;
3091 NEXTL(l);
3092 c = CUR_CHAR(l);
3093
3094 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3095 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3096 (c == '.') || (c == '-') ||
3097 (c == '_') || (c == ':') ||
3098 (IS_COMBINING(c)) ||
3099 (IS_EXTENDER(c)))) {
3100 if (count++ > 100) {
3101 count = 0;
3102 GROW;
3103 }
3104 len += l;
3105 NEXTL(l);
3106 c = CUR_CHAR(l);
3107 }
3108 }
3109 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3110 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3111 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3112}
3113
Owen Taylor3473f882001-02-23 17:55:21 +00003114/**
3115 * xmlParseName:
3116 * @ctxt: an XML parser context
3117 *
3118 * parse an XML name.
3119 *
3120 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3121 * CombiningChar | Extender
3122 *
3123 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3124 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003125 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003126 *
3127 * Returns the Name parsed or NULL
3128 */
3129
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003130const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003131xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003132 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003133 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003134 int count = 0;
3135
3136 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003137
Daniel Veillardc6561462009-03-25 10:22:31 +00003138#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003139 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003140#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003141
Daniel Veillard48b2f892001-02-25 16:11:03 +00003142 /*
3143 * Accelerator for simple ASCII names
3144 */
3145 in = ctxt->input->cur;
3146 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3147 ((*in >= 0x41) && (*in <= 0x5A)) ||
3148 (*in == '_') || (*in == ':')) {
3149 in++;
3150 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3151 ((*in >= 0x41) && (*in <= 0x5A)) ||
3152 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003153 (*in == '_') || (*in == '-') ||
3154 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003155 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003156 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003157 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003158 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003159 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003160 ctxt->nbChars += count;
3161 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003162 if (ret == NULL)
3163 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003164 return(ret);
3165 }
3166 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003167 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003168 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003169}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003170
Daniel Veillard34e3f642008-07-29 09:02:27 +00003171static const xmlChar *
3172xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3173 int len = 0, l;
3174 int c;
3175 int count = 0;
3176
Daniel Veillardc6561462009-03-25 10:22:31 +00003177#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003178 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003179#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003180
3181 /*
3182 * Handler for more complex cases
3183 */
3184 GROW;
3185 c = CUR_CHAR(l);
3186 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3187 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3188 return(NULL);
3189 }
3190
3191 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3192 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3193 if (count++ > 100) {
3194 count = 0;
3195 GROW;
3196 }
3197 len += l;
3198 NEXTL(l);
3199 c = CUR_CHAR(l);
3200 }
3201 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3202}
3203
3204/**
3205 * xmlParseNCName:
3206 * @ctxt: an XML parser context
3207 * @len: lenght of the string parsed
3208 *
3209 * parse an XML name.
3210 *
3211 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3212 * CombiningChar | Extender
3213 *
3214 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3215 *
3216 * Returns the Name parsed or NULL
3217 */
3218
3219static const xmlChar *
3220xmlParseNCName(xmlParserCtxtPtr ctxt) {
3221 const xmlChar *in;
3222 const xmlChar *ret;
3223 int count = 0;
3224
Daniel Veillardc6561462009-03-25 10:22:31 +00003225#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003226 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003227#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003228
3229 /*
3230 * Accelerator for simple ASCII names
3231 */
3232 in = ctxt->input->cur;
3233 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3234 ((*in >= 0x41) && (*in <= 0x5A)) ||
3235 (*in == '_')) {
3236 in++;
3237 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3238 ((*in >= 0x41) && (*in <= 0x5A)) ||
3239 ((*in >= 0x30) && (*in <= 0x39)) ||
3240 (*in == '_') || (*in == '-') ||
3241 (*in == '.'))
3242 in++;
3243 if ((*in > 0) && (*in < 0x80)) {
3244 count = in - ctxt->input->cur;
3245 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3246 ctxt->input->cur = in;
3247 ctxt->nbChars += count;
3248 ctxt->input->col += count;
3249 if (ret == NULL) {
3250 xmlErrMemory(ctxt, NULL);
3251 }
3252 return(ret);
3253 }
3254 }
3255 return(xmlParseNCNameComplex(ctxt));
3256}
3257
Daniel Veillard46de64e2002-05-29 08:21:33 +00003258/**
3259 * xmlParseNameAndCompare:
3260 * @ctxt: an XML parser context
3261 *
3262 * parse an XML name and compares for match
3263 * (specialized for endtag parsing)
3264 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003265 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3266 * and the name for mismatch
3267 */
3268
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003269static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003270xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003271 register const xmlChar *cmp = other;
3272 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003273 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003274
3275 GROW;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003276
Daniel Veillard46de64e2002-05-29 08:21:33 +00003277 in = ctxt->input->cur;
3278 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003279 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003280 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003281 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003282 }
William M. Brack76e95df2003-10-18 16:20:14 +00003283 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003284 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003285 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003286 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003287 }
3288 /* failure (or end of input buffer), check with full function */
3289 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003290 /* strings coming from the dictionnary direct compare possible */
3291 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003292 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003293 }
3294 return ret;
3295}
3296
Owen Taylor3473f882001-02-23 17:55:21 +00003297/**
3298 * xmlParseStringName:
3299 * @ctxt: an XML parser context
3300 * @str: a pointer to the string pointer (IN/OUT)
3301 *
3302 * parse an XML name.
3303 *
3304 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3305 * CombiningChar | Extender
3306 *
3307 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3308 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003309 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003310 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003311 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003312 * is updated to the current location in the string.
3313 */
3314
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003315static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003316xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3317 xmlChar buf[XML_MAX_NAMELEN + 5];
3318 const xmlChar *cur = *str;
3319 int len = 0, l;
3320 int c;
3321
Daniel Veillardc6561462009-03-25 10:22:31 +00003322#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003323 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003324#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003325
Owen Taylor3473f882001-02-23 17:55:21 +00003326 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003327 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003328 return(NULL);
3329 }
3330
Daniel Veillard34e3f642008-07-29 09:02:27 +00003331 COPY_BUF(l,buf,len,c);
3332 cur += l;
3333 c = CUR_SCHAR(cur, l);
3334 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003335 COPY_BUF(l,buf,len,c);
3336 cur += l;
3337 c = CUR_SCHAR(cur, l);
3338 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3339 /*
3340 * Okay someone managed to make a huge name, so he's ready to pay
3341 * for the processing speed.
3342 */
3343 xmlChar *buffer;
3344 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003345
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003346 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003347 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003348 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003349 return(NULL);
3350 }
3351 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003352 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003353 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003354 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003355 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003356 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003357 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003358 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003359 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003360 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003361 return(NULL);
3362 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003363 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003364 }
3365 COPY_BUF(l,buffer,len,c);
3366 cur += l;
3367 c = CUR_SCHAR(cur, l);
3368 }
3369 buffer[len] = 0;
3370 *str = cur;
3371 return(buffer);
3372 }
3373 }
3374 *str = cur;
3375 return(xmlStrndup(buf, len));
3376}
3377
3378/**
3379 * xmlParseNmtoken:
3380 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003381 *
Owen Taylor3473f882001-02-23 17:55:21 +00003382 * parse an XML Nmtoken.
3383 *
3384 * [7] Nmtoken ::= (NameChar)+
3385 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003386 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003387 *
3388 * Returns the Nmtoken parsed or NULL
3389 */
3390
3391xmlChar *
3392xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3393 xmlChar buf[XML_MAX_NAMELEN + 5];
3394 int len = 0, l;
3395 int c;
3396 int count = 0;
3397
Daniel Veillardc6561462009-03-25 10:22:31 +00003398#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003399 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003400#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003401
Owen Taylor3473f882001-02-23 17:55:21 +00003402 GROW;
3403 c = CUR_CHAR(l);
3404
Daniel Veillard34e3f642008-07-29 09:02:27 +00003405 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003406 if (count++ > 100) {
3407 count = 0;
3408 GROW;
3409 }
3410 COPY_BUF(l,buf,len,c);
3411 NEXTL(l);
3412 c = CUR_CHAR(l);
3413 if (len >= XML_MAX_NAMELEN) {
3414 /*
3415 * Okay someone managed to make a huge token, so he's ready to pay
3416 * for the processing speed.
3417 */
3418 xmlChar *buffer;
3419 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003420
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003421 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003422 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003423 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003424 return(NULL);
3425 }
3426 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003427 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003428 if (count++ > 100) {
3429 count = 0;
3430 GROW;
3431 }
3432 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003433 xmlChar *tmp;
3434
Owen Taylor3473f882001-02-23 17:55:21 +00003435 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003436 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003437 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003438 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003439 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003440 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003441 return(NULL);
3442 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003443 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003444 }
3445 COPY_BUF(l,buffer,len,c);
3446 NEXTL(l);
3447 c = CUR_CHAR(l);
3448 }
3449 buffer[len] = 0;
3450 return(buffer);
3451 }
3452 }
3453 if (len == 0)
3454 return(NULL);
3455 return(xmlStrndup(buf, len));
3456}
3457
3458/**
3459 * xmlParseEntityValue:
3460 * @ctxt: an XML parser context
3461 * @orig: if non-NULL store a copy of the original entity value
3462 *
3463 * parse a value for ENTITY declarations
3464 *
3465 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3466 * "'" ([^%&'] | PEReference | Reference)* "'"
3467 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003468 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003469 */
3470
3471xmlChar *
3472xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3473 xmlChar *buf = NULL;
3474 int len = 0;
3475 int size = XML_PARSER_BUFFER_SIZE;
3476 int c, l;
3477 xmlChar stop;
3478 xmlChar *ret = NULL;
3479 const xmlChar *cur = NULL;
3480 xmlParserInputPtr input;
3481
3482 if (RAW == '"') stop = '"';
3483 else if (RAW == '\'') stop = '\'';
3484 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003485 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003486 return(NULL);
3487 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003488 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003489 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003490 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003491 return(NULL);
3492 }
3493
3494 /*
3495 * The content of the entity definition is copied in a buffer.
3496 */
3497
3498 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3499 input = ctxt->input;
3500 GROW;
3501 NEXT;
3502 c = CUR_CHAR(l);
3503 /*
3504 * NOTE: 4.4.5 Included in Literal
3505 * When a parameter entity reference appears in a literal entity
3506 * value, ... a single or double quote character in the replacement
3507 * text is always treated as a normal data character and will not
3508 * terminate the literal.
3509 * In practice it means we stop the loop only when back at parsing
3510 * the initial entity and the quote is found
3511 */
William M. Brack871611b2003-10-18 04:53:14 +00003512 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003513 (ctxt->input != input))) {
3514 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003515 xmlChar *tmp;
3516
Owen Taylor3473f882001-02-23 17:55:21 +00003517 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003518 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3519 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003520 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003521 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003522 return(NULL);
3523 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003524 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003525 }
3526 COPY_BUF(l,buf,len,c);
3527 NEXTL(l);
3528 /*
3529 * Pop-up of finished entities.
3530 */
3531 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3532 xmlPopInput(ctxt);
3533
3534 GROW;
3535 c = CUR_CHAR(l);
3536 if (c == 0) {
3537 GROW;
3538 c = CUR_CHAR(l);
3539 }
3540 }
3541 buf[len] = 0;
3542
3543 /*
3544 * Raise problem w.r.t. '&' and '%' being used in non-entities
3545 * reference constructs. Note Charref will be handled in
3546 * xmlStringDecodeEntities()
3547 */
3548 cur = buf;
3549 while (*cur != 0) { /* non input consuming */
3550 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3551 xmlChar *name;
3552 xmlChar tmp = *cur;
3553
3554 cur++;
3555 name = xmlParseStringName(ctxt, &cur);
3556 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003557 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003558 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003559 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003560 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003561 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3562 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003563 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003564 }
3565 if (name != NULL)
3566 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003567 if (*cur == 0)
3568 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003569 }
3570 cur++;
3571 }
3572
3573 /*
3574 * Then PEReference entities are substituted.
3575 */
3576 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003577 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003578 xmlFree(buf);
3579 } else {
3580 NEXT;
3581 /*
3582 * NOTE: 4.4.7 Bypassed
3583 * When a general entity reference appears in the EntityValue in
3584 * an entity declaration, it is bypassed and left as is.
3585 * so XML_SUBSTITUTE_REF is not set here.
3586 */
3587 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3588 0, 0, 0);
3589 if (orig != NULL)
3590 *orig = buf;
3591 else
3592 xmlFree(buf);
3593 }
3594
3595 return(ret);
3596}
3597
3598/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003599 * xmlParseAttValueComplex:
3600 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003601 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003602 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003603 *
3604 * parse a value for an attribute, this is the fallback function
3605 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003606 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003607 *
3608 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3609 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003610static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003611xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003612 xmlChar limit = 0;
3613 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003614 xmlChar *rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003615 int len = 0;
3616 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003617 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003618 xmlChar *current = NULL;
3619 xmlEntityPtr ent;
3620
Owen Taylor3473f882001-02-23 17:55:21 +00003621 if (NXT(0) == '"') {
3622 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3623 limit = '"';
3624 NEXT;
3625 } else if (NXT(0) == '\'') {
3626 limit = '\'';
3627 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3628 NEXT;
3629 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003630 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003631 return(NULL);
3632 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003633
Owen Taylor3473f882001-02-23 17:55:21 +00003634 /*
3635 * allocate a translation buffer.
3636 */
3637 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003638 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003639 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003640
3641 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003642 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003643 */
3644 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003645 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003646 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003647 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003648 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003649 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003650 if (NXT(1) == '#') {
3651 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003652
Owen Taylor3473f882001-02-23 17:55:21 +00003653 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003654 if (ctxt->replaceEntities) {
3655 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003656 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003657 }
3658 buf[len++] = '&';
3659 } else {
3660 /*
3661 * The reparsing will be done in xmlStringGetNodeList()
3662 * called by the attribute() function in SAX.c
3663 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003664 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003665 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003666 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003667 buf[len++] = '&';
3668 buf[len++] = '#';
3669 buf[len++] = '3';
3670 buf[len++] = '8';
3671 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003672 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003673 } else if (val != 0) {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003674 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003675 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003676 }
Owen Taylor3473f882001-02-23 17:55:21 +00003677 len += xmlCopyChar(0, &buf[len], val);
3678 }
3679 } else {
3680 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003681 ctxt->nbentities++;
3682 if (ent != NULL)
3683 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003684 if ((ent != NULL) &&
3685 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3686 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003687 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003688 }
3689 if ((ctxt->replaceEntities == 0) &&
3690 (ent->content[0] == '&')) {
3691 buf[len++] = '&';
3692 buf[len++] = '#';
3693 buf[len++] = '3';
3694 buf[len++] = '8';
3695 buf[len++] = ';';
3696 } else {
3697 buf[len++] = ent->content[0];
3698 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003699 } else if ((ent != NULL) &&
3700 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003701 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3702 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003703 XML_SUBSTITUTE_REF,
3704 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003705 if (rep != NULL) {
3706 current = rep;
3707 while (*current != 0) { /* non input consuming */
3708 buf[len++] = *current++;
3709 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003710 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003711 }
3712 }
3713 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003714 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003715 }
3716 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003717 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003718 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003719 }
Owen Taylor3473f882001-02-23 17:55:21 +00003720 if (ent->content != NULL)
3721 buf[len++] = ent->content[0];
3722 }
3723 } else if (ent != NULL) {
3724 int i = xmlStrlen(ent->name);
3725 const xmlChar *cur = ent->name;
3726
3727 /*
3728 * This may look absurd but is needed to detect
3729 * entities problems
3730 */
3731 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3732 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003733 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003734 XML_SUBSTITUTE_REF, 0, 0, 0);
3735 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003736 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003737 rep = NULL;
3738 }
Owen Taylor3473f882001-02-23 17:55:21 +00003739 }
3740
3741 /*
3742 * Just output the reference
3743 */
3744 buf[len++] = '&';
Daniel Veillard0161e632008-08-28 15:36:32 +00003745 while (len > buf_size - i - 10) {
3746 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003747 }
3748 for (;i > 0;i--)
3749 buf[len++] = *cur++;
3750 buf[len++] = ';';
3751 }
3752 }
3753 } else {
3754 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003755 if ((len != 0) || (!normalize)) {
3756 if ((!normalize) || (!in_space)) {
3757 COPY_BUF(l,buf,len,0x20);
Daniel Veillard0161e632008-08-28 15:36:32 +00003758 while (len > buf_size - 10) {
3759 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003760 }
3761 }
3762 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003763 }
3764 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003765 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003766 COPY_BUF(l,buf,len,c);
3767 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003768 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003769 }
3770 }
3771 NEXTL(l);
3772 }
3773 GROW;
3774 c = CUR_CHAR(l);
3775 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003776 if ((in_space) && (normalize)) {
3777 while (buf[len - 1] == 0x20) len--;
3778 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003779 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003780 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003781 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003782 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003783 if ((c != 0) && (!IS_CHAR(c))) {
3784 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3785 "invalid character in attribute value\n");
3786 } else {
3787 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3788 "AttValue: ' expected\n");
3789 }
Owen Taylor3473f882001-02-23 17:55:21 +00003790 } else
3791 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003792 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003793 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003794
3795mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003796 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003797 if (buf != NULL)
3798 xmlFree(buf);
3799 if (rep != NULL)
3800 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003801 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003802}
3803
3804/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003805 * xmlParseAttValue:
3806 * @ctxt: an XML parser context
3807 *
3808 * parse a value for an attribute
3809 * Note: the parser won't do substitution of entities here, this
3810 * will be handled later in xmlStringGetNodeList
3811 *
3812 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3813 * "'" ([^<&'] | Reference)* "'"
3814 *
3815 * 3.3.3 Attribute-Value Normalization:
3816 * Before the value of an attribute is passed to the application or
3817 * checked for validity, the XML processor must normalize it as follows:
3818 * - a character reference is processed by appending the referenced
3819 * character to the attribute value
3820 * - an entity reference is processed by recursively processing the
3821 * replacement text of the entity
3822 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3823 * appending #x20 to the normalized value, except that only a single
3824 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3825 * parsed entity or the literal entity value of an internal parsed entity
3826 * - other characters are processed by appending them to the normalized value
3827 * If the declared value is not CDATA, then the XML processor must further
3828 * process the normalized attribute value by discarding any leading and
3829 * trailing space (#x20) characters, and by replacing sequences of space
3830 * (#x20) characters by a single space (#x20) character.
3831 * All attributes for which no declaration has been read should be treated
3832 * by a non-validating parser as if declared CDATA.
3833 *
3834 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3835 */
3836
3837
3838xmlChar *
3839xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003840 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003841 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003842}
3843
3844/**
Owen Taylor3473f882001-02-23 17:55:21 +00003845 * xmlParseSystemLiteral:
3846 * @ctxt: an XML parser context
3847 *
3848 * parse an XML Literal
3849 *
3850 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3851 *
3852 * Returns the SystemLiteral parsed or NULL
3853 */
3854
3855xmlChar *
3856xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3857 xmlChar *buf = NULL;
3858 int len = 0;
3859 int size = XML_PARSER_BUFFER_SIZE;
3860 int cur, l;
3861 xmlChar stop;
3862 int state = ctxt->instate;
3863 int count = 0;
3864
3865 SHRINK;
3866 if (RAW == '"') {
3867 NEXT;
3868 stop = '"';
3869 } else if (RAW == '\'') {
3870 NEXT;
3871 stop = '\'';
3872 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003873 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003874 return(NULL);
3875 }
3876
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003877 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003878 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003879 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003880 return(NULL);
3881 }
3882 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3883 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003884 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003885 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003886 xmlChar *tmp;
3887
Owen Taylor3473f882001-02-23 17:55:21 +00003888 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003889 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3890 if (tmp == NULL) {
3891 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003892 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003893 ctxt->instate = (xmlParserInputState) state;
3894 return(NULL);
3895 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003896 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003897 }
3898 count++;
3899 if (count > 50) {
3900 GROW;
3901 count = 0;
3902 }
3903 COPY_BUF(l,buf,len,cur);
3904 NEXTL(l);
3905 cur = CUR_CHAR(l);
3906 if (cur == 0) {
3907 GROW;
3908 SHRINK;
3909 cur = CUR_CHAR(l);
3910 }
3911 }
3912 buf[len] = 0;
3913 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003914 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003915 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003916 } else {
3917 NEXT;
3918 }
3919 return(buf);
3920}
3921
3922/**
3923 * xmlParsePubidLiteral:
3924 * @ctxt: an XML parser context
3925 *
3926 * parse an XML public literal
3927 *
3928 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3929 *
3930 * Returns the PubidLiteral parsed or NULL.
3931 */
3932
3933xmlChar *
3934xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3935 xmlChar *buf = NULL;
3936 int len = 0;
3937 int size = XML_PARSER_BUFFER_SIZE;
3938 xmlChar cur;
3939 xmlChar stop;
3940 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003941 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003942
3943 SHRINK;
3944 if (RAW == '"') {
3945 NEXT;
3946 stop = '"';
3947 } else if (RAW == '\'') {
3948 NEXT;
3949 stop = '\'';
3950 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003951 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003952 return(NULL);
3953 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003954 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003955 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003956 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003957 return(NULL);
3958 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003959 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003960 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003961 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003962 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003963 xmlChar *tmp;
3964
Owen Taylor3473f882001-02-23 17:55:21 +00003965 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003966 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3967 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003968 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003969 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003970 return(NULL);
3971 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003972 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003973 }
3974 buf[len++] = cur;
3975 count++;
3976 if (count > 50) {
3977 GROW;
3978 count = 0;
3979 }
3980 NEXT;
3981 cur = CUR;
3982 if (cur == 0) {
3983 GROW;
3984 SHRINK;
3985 cur = CUR;
3986 }
3987 }
3988 buf[len] = 0;
3989 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003990 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003991 } else {
3992 NEXT;
3993 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003994 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003995 return(buf);
3996}
3997
Daniel Veillard48b2f892001-02-25 16:11:03 +00003998void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003999
4000/*
4001 * used for the test in the inner loop of the char data testing
4002 */
4003static const unsigned char test_char_data[256] = {
4004 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4005 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4006 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4007 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4008 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4009 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4010 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4011 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4012 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4013 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4014 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4015 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4016 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4017 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4018 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4019 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4020 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4021 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4022 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4023 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4024 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4025 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4026 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4027 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4028 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4029 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4030 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4031 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4032 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4033 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4034 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4035 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4036};
4037
Owen Taylor3473f882001-02-23 17:55:21 +00004038/**
4039 * xmlParseCharData:
4040 * @ctxt: an XML parser context
4041 * @cdata: int indicating whether we are within a CDATA section
4042 *
4043 * parse a CharData section.
4044 * if we are within a CDATA section ']]>' marks an end of section.
4045 *
4046 * The right angle bracket (>) may be represented using the string "&gt;",
4047 * and must, for compatibility, be escaped using "&gt;" or a character
4048 * reference when it appears in the string "]]>" in content, when that
4049 * string is not marking the end of a CDATA section.
4050 *
4051 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4052 */
4053
4054void
4055xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004056 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004057 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004058 int line = ctxt->input->line;
4059 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004060 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004061
4062 SHRINK;
4063 GROW;
4064 /*
4065 * Accelerated common case where input don't need to be
4066 * modified before passing it to the handler.
4067 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004068 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004069 in = ctxt->input->cur;
4070 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004071get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004072 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004073 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004074 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004075 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004076 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004077 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004078 goto get_more_space;
4079 }
4080 if (*in == '<') {
4081 nbchar = in - ctxt->input->cur;
4082 if (nbchar > 0) {
4083 const xmlChar *tmp = ctxt->input->cur;
4084 ctxt->input->cur = in;
4085
Daniel Veillard34099b42004-11-04 17:34:35 +00004086 if ((ctxt->sax != NULL) &&
4087 (ctxt->sax->ignorableWhitespace !=
4088 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004089 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004090 if (ctxt->sax->ignorableWhitespace != NULL)
4091 ctxt->sax->ignorableWhitespace(ctxt->userData,
4092 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004093 } else {
4094 if (ctxt->sax->characters != NULL)
4095 ctxt->sax->characters(ctxt->userData,
4096 tmp, nbchar);
4097 if (*ctxt->space == -1)
4098 *ctxt->space = -2;
4099 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004100 } else if ((ctxt->sax != NULL) &&
4101 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004102 ctxt->sax->characters(ctxt->userData,
4103 tmp, nbchar);
4104 }
4105 }
4106 return;
4107 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004108
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004109get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004110 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004111 while (test_char_data[*in]) {
4112 in++;
4113 ccol++;
4114 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004115 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004116 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004117 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004118 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004119 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004120 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004121 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004122 }
4123 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004124 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004125 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004126 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004127 return;
4128 }
4129 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004130 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004131 goto get_more;
4132 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004133 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004134 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004135 if ((ctxt->sax != NULL) &&
4136 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004137 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004138 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004139 const xmlChar *tmp = ctxt->input->cur;
4140 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004141
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004142 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004143 if (ctxt->sax->ignorableWhitespace != NULL)
4144 ctxt->sax->ignorableWhitespace(ctxt->userData,
4145 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004146 } else {
4147 if (ctxt->sax->characters != NULL)
4148 ctxt->sax->characters(ctxt->userData,
4149 tmp, nbchar);
4150 if (*ctxt->space == -1)
4151 *ctxt->space = -2;
4152 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004153 line = ctxt->input->line;
4154 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004155 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004156 if (ctxt->sax->characters != NULL)
4157 ctxt->sax->characters(ctxt->userData,
4158 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004159 line = ctxt->input->line;
4160 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004161 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004162 /* something really bad happened in the SAX callback */
4163 if (ctxt->instate != XML_PARSER_CONTENT)
4164 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004165 }
4166 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004167 if (*in == 0xD) {
4168 in++;
4169 if (*in == 0xA) {
4170 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004171 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004172 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004173 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004174 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004175 in--;
4176 }
4177 if (*in == '<') {
4178 return;
4179 }
4180 if (*in == '&') {
4181 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004182 }
4183 SHRINK;
4184 GROW;
4185 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004186 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004187 nbchar = 0;
4188 }
Daniel Veillard50582112001-03-26 22:52:16 +00004189 ctxt->input->line = line;
4190 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004191 xmlParseCharDataComplex(ctxt, cdata);
4192}
4193
Daniel Veillard01c13b52002-12-10 15:19:08 +00004194/**
4195 * xmlParseCharDataComplex:
4196 * @ctxt: an XML parser context
4197 * @cdata: int indicating whether we are within a CDATA section
4198 *
4199 * parse a CharData section.this is the fallback function
4200 * of xmlParseCharData() when the parsing requires handling
4201 * of non-ASCII characters.
4202 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004203void
4204xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004205 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4206 int nbchar = 0;
4207 int cur, l;
4208 int count = 0;
4209
4210 SHRINK;
4211 GROW;
4212 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004213 while ((cur != '<') && /* checked */
4214 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004215 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004216 if ((cur == ']') && (NXT(1) == ']') &&
4217 (NXT(2) == '>')) {
4218 if (cdata) break;
4219 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004220 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004221 }
4222 }
4223 COPY_BUF(l,buf,nbchar,cur);
4224 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004225 buf[nbchar] = 0;
4226
Owen Taylor3473f882001-02-23 17:55:21 +00004227 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004228 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004229 */
4230 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004231 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004232 if (ctxt->sax->ignorableWhitespace != NULL)
4233 ctxt->sax->ignorableWhitespace(ctxt->userData,
4234 buf, nbchar);
4235 } else {
4236 if (ctxt->sax->characters != NULL)
4237 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004238 if ((ctxt->sax->characters !=
4239 ctxt->sax->ignorableWhitespace) &&
4240 (*ctxt->space == -1))
4241 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004242 }
4243 }
4244 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004245 /* something really bad happened in the SAX callback */
4246 if (ctxt->instate != XML_PARSER_CONTENT)
4247 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004248 }
4249 count++;
4250 if (count > 50) {
4251 GROW;
4252 count = 0;
4253 }
4254 NEXTL(l);
4255 cur = CUR_CHAR(l);
4256 }
4257 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004258 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004259 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004260 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004261 */
4262 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004263 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004264 if (ctxt->sax->ignorableWhitespace != NULL)
4265 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4266 } else {
4267 if (ctxt->sax->characters != NULL)
4268 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004269 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4270 (*ctxt->space == -1))
4271 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004272 }
4273 }
4274 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004275 if ((cur != 0) && (!IS_CHAR(cur))) {
4276 /* Generate the error and skip the offending character */
4277 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4278 "PCDATA invalid Char value %d\n",
4279 cur);
4280 NEXTL(l);
4281 }
Owen Taylor3473f882001-02-23 17:55:21 +00004282}
4283
4284/**
4285 * xmlParseExternalID:
4286 * @ctxt: an XML parser context
4287 * @publicID: a xmlChar** receiving PubidLiteral
4288 * @strict: indicate whether we should restrict parsing to only
4289 * production [75], see NOTE below
4290 *
4291 * Parse an External ID or a Public ID
4292 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004293 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004294 * 'PUBLIC' S PubidLiteral S SystemLiteral
4295 *
4296 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4297 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4298 *
4299 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4300 *
4301 * Returns the function returns SystemLiteral and in the second
4302 * case publicID receives PubidLiteral, is strict is off
4303 * it is possible to return NULL and have publicID set.
4304 */
4305
4306xmlChar *
4307xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4308 xmlChar *URI = NULL;
4309
4310 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004311
4312 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004313 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004314 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004315 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004316 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4317 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004318 }
4319 SKIP_BLANKS;
4320 URI = xmlParseSystemLiteral(ctxt);
4321 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004322 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004323 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004324 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004325 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004326 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004327 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004328 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004329 }
4330 SKIP_BLANKS;
4331 *publicID = xmlParsePubidLiteral(ctxt);
4332 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004333 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004334 }
4335 if (strict) {
4336 /*
4337 * We don't handle [83] so "S SystemLiteral" is required.
4338 */
William M. Brack76e95df2003-10-18 16:20:14 +00004339 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004340 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004341 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004342 }
4343 } else {
4344 /*
4345 * We handle [83] so we return immediately, if
4346 * "S SystemLiteral" is not detected. From a purely parsing
4347 * point of view that's a nice mess.
4348 */
4349 const xmlChar *ptr;
4350 GROW;
4351
4352 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004353 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004354
William M. Brack76e95df2003-10-18 16:20:14 +00004355 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004356 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4357 }
4358 SKIP_BLANKS;
4359 URI = xmlParseSystemLiteral(ctxt);
4360 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004361 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004362 }
4363 }
4364 return(URI);
4365}
4366
4367/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004368 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004369 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004370 * @buf: the already parsed part of the buffer
4371 * @len: number of bytes filles in the buffer
4372 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004373 *
4374 * Skip an XML (SGML) comment <!-- .... -->
4375 * The spec says that "For compatibility, the string "--" (double-hyphen)
4376 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004377 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004378 *
4379 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4380 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004381static void
4382xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004383 int q, ql;
4384 int r, rl;
4385 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00004386 int count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004387 int inputid;
4388
4389 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004390
Owen Taylor3473f882001-02-23 17:55:21 +00004391 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004392 len = 0;
4393 size = XML_PARSER_BUFFER_SIZE;
4394 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4395 if (buf == NULL) {
4396 xmlErrMemory(ctxt, NULL);
4397 return;
4398 }
Owen Taylor3473f882001-02-23 17:55:21 +00004399 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004400 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004401 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004402 if (q == 0)
4403 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004404 if (!IS_CHAR(q)) {
4405 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4406 "xmlParseComment: invalid xmlChar value %d\n",
4407 q);
4408 xmlFree (buf);
4409 return;
4410 }
Owen Taylor3473f882001-02-23 17:55:21 +00004411 NEXTL(ql);
4412 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004413 if (r == 0)
4414 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004415 if (!IS_CHAR(r)) {
4416 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4417 "xmlParseComment: invalid xmlChar value %d\n",
4418 q);
4419 xmlFree (buf);
4420 return;
4421 }
Owen Taylor3473f882001-02-23 17:55:21 +00004422 NEXTL(rl);
4423 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004424 if (cur == 0)
4425 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004426 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004427 ((cur != '>') ||
4428 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004429 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004430 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004431 }
4432 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004433 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004434 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004435 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4436 if (new_buf == NULL) {
4437 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004438 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004439 return;
4440 }
William M. Bracka3215c72004-07-31 16:24:01 +00004441 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004442 }
4443 COPY_BUF(ql,buf,len,q);
4444 q = r;
4445 ql = rl;
4446 r = cur;
4447 rl = l;
4448
4449 count++;
4450 if (count > 50) {
4451 GROW;
4452 count = 0;
4453 }
4454 NEXTL(l);
4455 cur = CUR_CHAR(l);
4456 if (cur == 0) {
4457 SHRINK;
4458 GROW;
4459 cur = CUR_CHAR(l);
4460 }
4461 }
4462 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004463 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004464 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004465 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004466 } else if (!IS_CHAR(cur)) {
4467 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4468 "xmlParseComment: invalid xmlChar value %d\n",
4469 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004470 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004471 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004472 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4473 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004474 }
4475 NEXT;
4476 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4477 (!ctxt->disableSAX))
4478 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004479 }
Daniel Veillardda629342007-08-01 07:49:06 +00004480 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004481 return;
4482not_terminated:
4483 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4484 "Comment not terminated\n", NULL);
4485 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004486 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004487}
Daniel Veillardda629342007-08-01 07:49:06 +00004488
Daniel Veillard4c778d82005-01-23 17:37:44 +00004489/**
4490 * xmlParseComment:
4491 * @ctxt: an XML parser context
4492 *
4493 * Skip an XML (SGML) comment <!-- .... -->
4494 * The spec says that "For compatibility, the string "--" (double-hyphen)
4495 * must not occur within comments. "
4496 *
4497 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4498 */
4499void
4500xmlParseComment(xmlParserCtxtPtr ctxt) {
4501 xmlChar *buf = NULL;
4502 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004503 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004504 xmlParserInputState state;
4505 const xmlChar *in;
4506 int nbchar = 0, ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004507 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004508
4509 /*
4510 * Check that there is a comment right here.
4511 */
4512 if ((RAW != '<') || (NXT(1) != '!') ||
4513 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004514 state = ctxt->instate;
4515 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004516 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004517 SKIP(4);
4518 SHRINK;
4519 GROW;
4520
4521 /*
4522 * Accelerated common case where input don't need to be
4523 * modified before passing it to the handler.
4524 */
4525 in = ctxt->input->cur;
4526 do {
4527 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004528 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004529 ctxt->input->line++; ctxt->input->col = 1;
4530 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004531 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004532 }
4533get_more:
4534 ccol = ctxt->input->col;
4535 while (((*in > '-') && (*in <= 0x7F)) ||
4536 ((*in >= 0x20) && (*in < '-')) ||
4537 (*in == 0x09)) {
4538 in++;
4539 ccol++;
4540 }
4541 ctxt->input->col = ccol;
4542 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004543 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004544 ctxt->input->line++; ctxt->input->col = 1;
4545 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004546 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004547 goto get_more;
4548 }
4549 nbchar = in - ctxt->input->cur;
4550 /*
4551 * save current set of data
4552 */
4553 if (nbchar > 0) {
4554 if ((ctxt->sax != NULL) &&
4555 (ctxt->sax->comment != NULL)) {
4556 if (buf == NULL) {
4557 if ((*in == '-') && (in[1] == '-'))
4558 size = nbchar + 1;
4559 else
4560 size = XML_PARSER_BUFFER_SIZE + nbchar;
4561 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4562 if (buf == NULL) {
4563 xmlErrMemory(ctxt, NULL);
4564 ctxt->instate = state;
4565 return;
4566 }
4567 len = 0;
4568 } else if (len + nbchar + 1 >= size) {
4569 xmlChar *new_buf;
4570 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4571 new_buf = (xmlChar *) xmlRealloc(buf,
4572 size * sizeof(xmlChar));
4573 if (new_buf == NULL) {
4574 xmlFree (buf);
4575 xmlErrMemory(ctxt, NULL);
4576 ctxt->instate = state;
4577 return;
4578 }
4579 buf = new_buf;
4580 }
4581 memcpy(&buf[len], ctxt->input->cur, nbchar);
4582 len += nbchar;
4583 buf[len] = 0;
4584 }
4585 }
4586 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004587 if (*in == 0xA) {
4588 in++;
4589 ctxt->input->line++; ctxt->input->col = 1;
4590 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004591 if (*in == 0xD) {
4592 in++;
4593 if (*in == 0xA) {
4594 ctxt->input->cur = in;
4595 in++;
4596 ctxt->input->line++; ctxt->input->col = 1;
4597 continue; /* while */
4598 }
4599 in--;
4600 }
4601 SHRINK;
4602 GROW;
4603 in = ctxt->input->cur;
4604 if (*in == '-') {
4605 if (in[1] == '-') {
4606 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004607 if (ctxt->input->id != inputid) {
4608 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4609 "comment doesn't start and stop in the same entity\n");
4610 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004611 SKIP(3);
4612 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4613 (!ctxt->disableSAX)) {
4614 if (buf != NULL)
4615 ctxt->sax->comment(ctxt->userData, buf);
4616 else
4617 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4618 }
4619 if (buf != NULL)
4620 xmlFree(buf);
4621 ctxt->instate = state;
4622 return;
4623 }
4624 if (buf != NULL)
4625 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4626 "Comment not terminated \n<!--%.50s\n",
4627 buf);
4628 else
4629 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4630 "Comment not terminated \n", NULL);
4631 in++;
4632 ctxt->input->col++;
4633 }
4634 in++;
4635 ctxt->input->col++;
4636 goto get_more;
4637 }
4638 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4639 xmlParseCommentComplex(ctxt, buf, len, size);
4640 ctxt->instate = state;
4641 return;
4642}
4643
Owen Taylor3473f882001-02-23 17:55:21 +00004644
4645/**
4646 * xmlParsePITarget:
4647 * @ctxt: an XML parser context
4648 *
4649 * parse the name of a PI
4650 *
4651 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4652 *
4653 * Returns the PITarget name or NULL
4654 */
4655
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004656const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004657xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004658 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004659
4660 name = xmlParseName(ctxt);
4661 if ((name != NULL) &&
4662 ((name[0] == 'x') || (name[0] == 'X')) &&
4663 ((name[1] == 'm') || (name[1] == 'M')) &&
4664 ((name[2] == 'l') || (name[2] == 'L'))) {
4665 int i;
4666 if ((name[0] == 'x') && (name[1] == 'm') &&
4667 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004668 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004669 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004670 return(name);
4671 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004672 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004673 return(name);
4674 }
4675 for (i = 0;;i++) {
4676 if (xmlW3CPIs[i] == NULL) break;
4677 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4678 return(name);
4679 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004680 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4681 "xmlParsePITarget: invalid name prefix 'xml'\n",
4682 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004683 }
Daniel Veillard37334572008-07-31 08:20:02 +00004684 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4685 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4686 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4687 }
Owen Taylor3473f882001-02-23 17:55:21 +00004688 return(name);
4689}
4690
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004691#ifdef LIBXML_CATALOG_ENABLED
4692/**
4693 * xmlParseCatalogPI:
4694 * @ctxt: an XML parser context
4695 * @catalog: the PI value string
4696 *
4697 * parse an XML Catalog Processing Instruction.
4698 *
4699 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4700 *
4701 * Occurs only if allowed by the user and if happening in the Misc
4702 * part of the document before any doctype informations
4703 * This will add the given catalog to the parsing context in order
4704 * to be used if there is a resolution need further down in the document
4705 */
4706
4707static void
4708xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4709 xmlChar *URL = NULL;
4710 const xmlChar *tmp, *base;
4711 xmlChar marker;
4712
4713 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004714 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004715 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4716 goto error;
4717 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004718 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004719 if (*tmp != '=') {
4720 return;
4721 }
4722 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004723 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004724 marker = *tmp;
4725 if ((marker != '\'') && (marker != '"'))
4726 goto error;
4727 tmp++;
4728 base = tmp;
4729 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4730 if (*tmp == 0)
4731 goto error;
4732 URL = xmlStrndup(base, tmp - base);
4733 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004734 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004735 if (*tmp != 0)
4736 goto error;
4737
4738 if (URL != NULL) {
4739 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4740 xmlFree(URL);
4741 }
4742 return;
4743
4744error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004745 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4746 "Catalog PI syntax error: %s\n",
4747 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004748 if (URL != NULL)
4749 xmlFree(URL);
4750}
4751#endif
4752
Owen Taylor3473f882001-02-23 17:55:21 +00004753/**
4754 * xmlParsePI:
4755 * @ctxt: an XML parser context
4756 *
4757 * parse an XML Processing Instruction.
4758 *
4759 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4760 *
4761 * The processing is transfered to SAX once parsed.
4762 */
4763
4764void
4765xmlParsePI(xmlParserCtxtPtr ctxt) {
4766 xmlChar *buf = NULL;
4767 int len = 0;
4768 int size = XML_PARSER_BUFFER_SIZE;
4769 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004770 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004771 xmlParserInputState state;
4772 int count = 0;
4773
4774 if ((RAW == '<') && (NXT(1) == '?')) {
4775 xmlParserInputPtr input = ctxt->input;
4776 state = ctxt->instate;
4777 ctxt->instate = XML_PARSER_PI;
4778 /*
4779 * this is a Processing Instruction.
4780 */
4781 SKIP(2);
4782 SHRINK;
4783
4784 /*
4785 * Parse the target name and check for special support like
4786 * namespace.
4787 */
4788 target = xmlParsePITarget(ctxt);
4789 if (target != NULL) {
4790 if ((RAW == '?') && (NXT(1) == '>')) {
4791 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004792 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4793 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004794 }
4795 SKIP(2);
4796
4797 /*
4798 * SAX: PI detected.
4799 */
4800 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4801 (ctxt->sax->processingInstruction != NULL))
4802 ctxt->sax->processingInstruction(ctxt->userData,
4803 target, NULL);
4804 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004805 return;
4806 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004807 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004808 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004809 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004810 ctxt->instate = state;
4811 return;
4812 }
4813 cur = CUR;
4814 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004815 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4816 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004817 }
4818 SKIP_BLANKS;
4819 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004820 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004821 ((cur != '?') || (NXT(1) != '>'))) {
4822 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004823 xmlChar *tmp;
4824
Owen Taylor3473f882001-02-23 17:55:21 +00004825 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004826 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4827 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004828 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004829 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004830 ctxt->instate = state;
4831 return;
4832 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004833 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004834 }
4835 count++;
4836 if (count > 50) {
4837 GROW;
4838 count = 0;
4839 }
4840 COPY_BUF(l,buf,len,cur);
4841 NEXTL(l);
4842 cur = CUR_CHAR(l);
4843 if (cur == 0) {
4844 SHRINK;
4845 GROW;
4846 cur = CUR_CHAR(l);
4847 }
4848 }
4849 buf[len] = 0;
4850 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004851 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4852 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004853 } else {
4854 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004855 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4856 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004857 }
4858 SKIP(2);
4859
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004860#ifdef LIBXML_CATALOG_ENABLED
4861 if (((state == XML_PARSER_MISC) ||
4862 (state == XML_PARSER_START)) &&
4863 (xmlStrEqual(target, XML_CATALOG_PI))) {
4864 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4865 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4866 (allow == XML_CATA_ALLOW_ALL))
4867 xmlParseCatalogPI(ctxt, buf);
4868 }
4869#endif
4870
4871
Owen Taylor3473f882001-02-23 17:55:21 +00004872 /*
4873 * SAX: PI detected.
4874 */
4875 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4876 (ctxt->sax->processingInstruction != NULL))
4877 ctxt->sax->processingInstruction(ctxt->userData,
4878 target, buf);
4879 }
4880 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004881 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004882 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004883 }
4884 ctxt->instate = state;
4885 }
4886}
4887
4888/**
4889 * xmlParseNotationDecl:
4890 * @ctxt: an XML parser context
4891 *
4892 * parse a notation declaration
4893 *
4894 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4895 *
4896 * Hence there is actually 3 choices:
4897 * 'PUBLIC' S PubidLiteral
4898 * 'PUBLIC' S PubidLiteral S SystemLiteral
4899 * and 'SYSTEM' S SystemLiteral
4900 *
4901 * See the NOTE on xmlParseExternalID().
4902 */
4903
4904void
4905xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004906 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004907 xmlChar *Pubid;
4908 xmlChar *Systemid;
4909
Daniel Veillarda07050d2003-10-19 14:46:32 +00004910 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004911 xmlParserInputPtr input = ctxt->input;
4912 SHRINK;
4913 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004914 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004915 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4916 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004917 return;
4918 }
4919 SKIP_BLANKS;
4920
Daniel Veillard76d66f42001-05-16 21:05:17 +00004921 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004922 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004923 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004924 return;
4925 }
William M. Brack76e95df2003-10-18 16:20:14 +00004926 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004927 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004928 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004929 return;
4930 }
Daniel Veillard37334572008-07-31 08:20:02 +00004931 if (xmlStrchr(name, ':') != NULL) {
4932 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4933 "colon are forbidden from notation names '%s'\n",
4934 name, NULL, NULL);
4935 }
Owen Taylor3473f882001-02-23 17:55:21 +00004936 SKIP_BLANKS;
4937
4938 /*
4939 * Parse the IDs.
4940 */
4941 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4942 SKIP_BLANKS;
4943
4944 if (RAW == '>') {
4945 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004946 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4947 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004948 }
4949 NEXT;
4950 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4951 (ctxt->sax->notationDecl != NULL))
4952 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4953 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004954 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004955 }
Owen Taylor3473f882001-02-23 17:55:21 +00004956 if (Systemid != NULL) xmlFree(Systemid);
4957 if (Pubid != NULL) xmlFree(Pubid);
4958 }
4959}
4960
4961/**
4962 * xmlParseEntityDecl:
4963 * @ctxt: an XML parser context
4964 *
4965 * parse <!ENTITY declarations
4966 *
4967 * [70] EntityDecl ::= GEDecl | PEDecl
4968 *
4969 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4970 *
4971 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4972 *
4973 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4974 *
4975 * [74] PEDef ::= EntityValue | ExternalID
4976 *
4977 * [76] NDataDecl ::= S 'NDATA' S Name
4978 *
4979 * [ VC: Notation Declared ]
4980 * The Name must match the declared name of a notation.
4981 */
4982
4983void
4984xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004985 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004986 xmlChar *value = NULL;
4987 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004988 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004989 int isParameter = 0;
4990 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004991 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004992
Daniel Veillard4c778d82005-01-23 17:37:44 +00004993 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004994 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004995 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004996 SHRINK;
4997 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004998 skipped = SKIP_BLANKS;
4999 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005000 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5001 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005002 }
Owen Taylor3473f882001-02-23 17:55:21 +00005003
5004 if (RAW == '%') {
5005 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005006 skipped = SKIP_BLANKS;
5007 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005008 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5009 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005010 }
Owen Taylor3473f882001-02-23 17:55:21 +00005011 isParameter = 1;
5012 }
5013
Daniel Veillard76d66f42001-05-16 21:05:17 +00005014 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005015 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005016 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5017 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005018 return;
5019 }
Daniel Veillard37334572008-07-31 08:20:02 +00005020 if (xmlStrchr(name, ':') != NULL) {
5021 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5022 "colon are forbidden from entities names '%s'\n",
5023 name, NULL, NULL);
5024 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005025 skipped = SKIP_BLANKS;
5026 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005027 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5028 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005029 }
Owen Taylor3473f882001-02-23 17:55:21 +00005030
Daniel Veillardf5582f12002-06-11 10:08:16 +00005031 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005032 /*
5033 * handle the various case of definitions...
5034 */
5035 if (isParameter) {
5036 if ((RAW == '"') || (RAW == '\'')) {
5037 value = xmlParseEntityValue(ctxt, &orig);
5038 if (value) {
5039 if ((ctxt->sax != NULL) &&
5040 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5041 ctxt->sax->entityDecl(ctxt->userData, name,
5042 XML_INTERNAL_PARAMETER_ENTITY,
5043 NULL, NULL, value);
5044 }
5045 } else {
5046 URI = xmlParseExternalID(ctxt, &literal, 1);
5047 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005048 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005049 }
5050 if (URI) {
5051 xmlURIPtr uri;
5052
5053 uri = xmlParseURI((const char *) URI);
5054 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005055 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5056 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005057 /*
5058 * This really ought to be a well formedness error
5059 * but the XML Core WG decided otherwise c.f. issue
5060 * E26 of the XML erratas.
5061 */
Owen Taylor3473f882001-02-23 17:55:21 +00005062 } else {
5063 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005064 /*
5065 * Okay this is foolish to block those but not
5066 * invalid URIs.
5067 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005068 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005069 } else {
5070 if ((ctxt->sax != NULL) &&
5071 (!ctxt->disableSAX) &&
5072 (ctxt->sax->entityDecl != NULL))
5073 ctxt->sax->entityDecl(ctxt->userData, name,
5074 XML_EXTERNAL_PARAMETER_ENTITY,
5075 literal, URI, NULL);
5076 }
5077 xmlFreeURI(uri);
5078 }
5079 }
5080 }
5081 } else {
5082 if ((RAW == '"') || (RAW == '\'')) {
5083 value = xmlParseEntityValue(ctxt, &orig);
5084 if ((ctxt->sax != NULL) &&
5085 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5086 ctxt->sax->entityDecl(ctxt->userData, name,
5087 XML_INTERNAL_GENERAL_ENTITY,
5088 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005089 /*
5090 * For expat compatibility in SAX mode.
5091 */
5092 if ((ctxt->myDoc == NULL) ||
5093 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5094 if (ctxt->myDoc == NULL) {
5095 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005096 if (ctxt->myDoc == NULL) {
5097 xmlErrMemory(ctxt, "New Doc failed");
5098 return;
5099 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005100 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005101 }
5102 if (ctxt->myDoc->intSubset == NULL)
5103 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5104 BAD_CAST "fake", NULL, NULL);
5105
Daniel Veillard1af9a412003-08-20 22:54:39 +00005106 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5107 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005108 }
Owen Taylor3473f882001-02-23 17:55:21 +00005109 } else {
5110 URI = xmlParseExternalID(ctxt, &literal, 1);
5111 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005112 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005113 }
5114 if (URI) {
5115 xmlURIPtr uri;
5116
5117 uri = xmlParseURI((const char *)URI);
5118 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005119 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5120 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005121 /*
5122 * This really ought to be a well formedness error
5123 * but the XML Core WG decided otherwise c.f. issue
5124 * E26 of the XML erratas.
5125 */
Owen Taylor3473f882001-02-23 17:55:21 +00005126 } else {
5127 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005128 /*
5129 * Okay this is foolish to block those but not
5130 * invalid URIs.
5131 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005132 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005133 }
5134 xmlFreeURI(uri);
5135 }
5136 }
William M. Brack76e95df2003-10-18 16:20:14 +00005137 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005138 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5139 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005140 }
5141 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005142 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005143 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005144 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005145 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5146 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005147 }
5148 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005149 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005150 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5151 (ctxt->sax->unparsedEntityDecl != NULL))
5152 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5153 literal, URI, ndata);
5154 } else {
5155 if ((ctxt->sax != NULL) &&
5156 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5157 ctxt->sax->entityDecl(ctxt->userData, name,
5158 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5159 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005160 /*
5161 * For expat compatibility in SAX mode.
5162 * assuming the entity repalcement was asked for
5163 */
5164 if ((ctxt->replaceEntities != 0) &&
5165 ((ctxt->myDoc == NULL) ||
5166 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5167 if (ctxt->myDoc == NULL) {
5168 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005169 if (ctxt->myDoc == NULL) {
5170 xmlErrMemory(ctxt, "New Doc failed");
5171 return;
5172 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005173 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005174 }
5175
5176 if (ctxt->myDoc->intSubset == NULL)
5177 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5178 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005179 xmlSAX2EntityDecl(ctxt, name,
5180 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5181 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005182 }
Owen Taylor3473f882001-02-23 17:55:21 +00005183 }
5184 }
5185 }
5186 SKIP_BLANKS;
5187 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005188 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005189 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005190 } else {
5191 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005192 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5193 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005194 }
5195 NEXT;
5196 }
5197 if (orig != NULL) {
5198 /*
5199 * Ugly mechanism to save the raw entity value.
5200 */
5201 xmlEntityPtr cur = NULL;
5202
5203 if (isParameter) {
5204 if ((ctxt->sax != NULL) &&
5205 (ctxt->sax->getParameterEntity != NULL))
5206 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5207 } else {
5208 if ((ctxt->sax != NULL) &&
5209 (ctxt->sax->getEntity != NULL))
5210 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005211 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005212 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005213 }
Owen Taylor3473f882001-02-23 17:55:21 +00005214 }
5215 if (cur != NULL) {
5216 if (cur->orig != NULL)
5217 xmlFree(orig);
5218 else
5219 cur->orig = orig;
5220 } else
5221 xmlFree(orig);
5222 }
Owen Taylor3473f882001-02-23 17:55:21 +00005223 if (value != NULL) xmlFree(value);
5224 if (URI != NULL) xmlFree(URI);
5225 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005226 }
5227}
5228
5229/**
5230 * xmlParseDefaultDecl:
5231 * @ctxt: an XML parser context
5232 * @value: Receive a possible fixed default value for the attribute
5233 *
5234 * Parse an attribute default declaration
5235 *
5236 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5237 *
5238 * [ VC: Required Attribute ]
5239 * if the default declaration is the keyword #REQUIRED, then the
5240 * attribute must be specified for all elements of the type in the
5241 * attribute-list declaration.
5242 *
5243 * [ VC: Attribute Default Legal ]
5244 * The declared default value must meet the lexical constraints of
5245 * the declared attribute type c.f. xmlValidateAttributeDecl()
5246 *
5247 * [ VC: Fixed Attribute Default ]
5248 * if an attribute has a default value declared with the #FIXED
5249 * keyword, instances of that attribute must match the default value.
5250 *
5251 * [ WFC: No < in Attribute Values ]
5252 * handled in xmlParseAttValue()
5253 *
5254 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5255 * or XML_ATTRIBUTE_FIXED.
5256 */
5257
5258int
5259xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5260 int val;
5261 xmlChar *ret;
5262
5263 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005264 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005265 SKIP(9);
5266 return(XML_ATTRIBUTE_REQUIRED);
5267 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005268 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005269 SKIP(8);
5270 return(XML_ATTRIBUTE_IMPLIED);
5271 }
5272 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005273 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005274 SKIP(6);
5275 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005276 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005277 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5278 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005279 }
5280 SKIP_BLANKS;
5281 }
5282 ret = xmlParseAttValue(ctxt);
5283 ctxt->instate = XML_PARSER_DTD;
5284 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005285 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005286 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005287 } else
5288 *value = ret;
5289 return(val);
5290}
5291
5292/**
5293 * xmlParseNotationType:
5294 * @ctxt: an XML parser context
5295 *
5296 * parse an Notation attribute type.
5297 *
5298 * Note: the leading 'NOTATION' S part has already being parsed...
5299 *
5300 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5301 *
5302 * [ VC: Notation Attributes ]
5303 * Values of this type must match one of the notation names included
5304 * in the declaration; all notation names in the declaration must be declared.
5305 *
5306 * Returns: the notation attribute tree built while parsing
5307 */
5308
5309xmlEnumerationPtr
5310xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005311 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005312 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005313
5314 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005315 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005316 return(NULL);
5317 }
5318 SHRINK;
5319 do {
5320 NEXT;
5321 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005322 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005323 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005324 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5325 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005326 xmlFreeEnumeration(ret);
5327 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005328 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005329 tmp = ret;
5330 while (tmp != NULL) {
5331 if (xmlStrEqual(name, tmp->name)) {
5332 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5333 "standalone: attribute notation value token %s duplicated\n",
5334 name, NULL);
5335 if (!xmlDictOwns(ctxt->dict, name))
5336 xmlFree((xmlChar *) name);
5337 break;
5338 }
5339 tmp = tmp->next;
5340 }
5341 if (tmp == NULL) {
5342 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005343 if (cur == NULL) {
5344 xmlFreeEnumeration(ret);
5345 return(NULL);
5346 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005347 if (last == NULL) ret = last = cur;
5348 else {
5349 last->next = cur;
5350 last = cur;
5351 }
Owen Taylor3473f882001-02-23 17:55:21 +00005352 }
5353 SKIP_BLANKS;
5354 } while (RAW == '|');
5355 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005356 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005357 xmlFreeEnumeration(ret);
5358 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005359 }
5360 NEXT;
5361 return(ret);
5362}
5363
5364/**
5365 * xmlParseEnumerationType:
5366 * @ctxt: an XML parser context
5367 *
5368 * parse an Enumeration attribute type.
5369 *
5370 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5371 *
5372 * [ VC: Enumeration ]
5373 * Values of this type must match one of the Nmtoken tokens in
5374 * the declaration
5375 *
5376 * Returns: the enumeration attribute tree built while parsing
5377 */
5378
5379xmlEnumerationPtr
5380xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5381 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005382 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005383
5384 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005385 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005386 return(NULL);
5387 }
5388 SHRINK;
5389 do {
5390 NEXT;
5391 SKIP_BLANKS;
5392 name = xmlParseNmtoken(ctxt);
5393 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005394 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005395 return(ret);
5396 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005397 tmp = ret;
5398 while (tmp != NULL) {
5399 if (xmlStrEqual(name, tmp->name)) {
5400 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5401 "standalone: attribute enumeration value token %s duplicated\n",
5402 name, NULL);
5403 if (!xmlDictOwns(ctxt->dict, name))
5404 xmlFree(name);
5405 break;
5406 }
5407 tmp = tmp->next;
5408 }
5409 if (tmp == NULL) {
5410 cur = xmlCreateEnumeration(name);
5411 if (!xmlDictOwns(ctxt->dict, name))
5412 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005413 if (cur == NULL) {
5414 xmlFreeEnumeration(ret);
5415 return(NULL);
5416 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005417 if (last == NULL) ret = last = cur;
5418 else {
5419 last->next = cur;
5420 last = cur;
5421 }
Owen Taylor3473f882001-02-23 17:55:21 +00005422 }
5423 SKIP_BLANKS;
5424 } while (RAW == '|');
5425 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005426 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005427 return(ret);
5428 }
5429 NEXT;
5430 return(ret);
5431}
5432
5433/**
5434 * xmlParseEnumeratedType:
5435 * @ctxt: an XML parser context
5436 * @tree: the enumeration tree built while parsing
5437 *
5438 * parse an Enumerated attribute type.
5439 *
5440 * [57] EnumeratedType ::= NotationType | Enumeration
5441 *
5442 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5443 *
5444 *
5445 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5446 */
5447
5448int
5449xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005450 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005451 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005452 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005453 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5454 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005455 return(0);
5456 }
5457 SKIP_BLANKS;
5458 *tree = xmlParseNotationType(ctxt);
5459 if (*tree == NULL) return(0);
5460 return(XML_ATTRIBUTE_NOTATION);
5461 }
5462 *tree = xmlParseEnumerationType(ctxt);
5463 if (*tree == NULL) return(0);
5464 return(XML_ATTRIBUTE_ENUMERATION);
5465}
5466
5467/**
5468 * xmlParseAttributeType:
5469 * @ctxt: an XML parser context
5470 * @tree: the enumeration tree built while parsing
5471 *
5472 * parse the Attribute list def for an element
5473 *
5474 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5475 *
5476 * [55] StringType ::= 'CDATA'
5477 *
5478 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5479 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5480 *
5481 * Validity constraints for attribute values syntax are checked in
5482 * xmlValidateAttributeValue()
5483 *
5484 * [ VC: ID ]
5485 * Values of type ID must match the Name production. A name must not
5486 * appear more than once in an XML document as a value of this type;
5487 * i.e., ID values must uniquely identify the elements which bear them.
5488 *
5489 * [ VC: One ID per Element Type ]
5490 * No element type may have more than one ID attribute specified.
5491 *
5492 * [ VC: ID Attribute Default ]
5493 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5494 *
5495 * [ VC: IDREF ]
5496 * Values of type IDREF must match the Name production, and values
5497 * of type IDREFS must match Names; each IDREF Name must match the value
5498 * of an ID attribute on some element in the XML document; i.e. IDREF
5499 * values must match the value of some ID attribute.
5500 *
5501 * [ VC: Entity Name ]
5502 * Values of type ENTITY must match the Name production, values
5503 * of type ENTITIES must match Names; each Entity Name must match the
5504 * name of an unparsed entity declared in the DTD.
5505 *
5506 * [ VC: Name Token ]
5507 * Values of type NMTOKEN must match the Nmtoken production; values
5508 * of type NMTOKENS must match Nmtokens.
5509 *
5510 * Returns the attribute type
5511 */
5512int
5513xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5514 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005515 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005516 SKIP(5);
5517 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005518 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005519 SKIP(6);
5520 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005521 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005522 SKIP(5);
5523 return(XML_ATTRIBUTE_IDREF);
5524 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5525 SKIP(2);
5526 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005527 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005528 SKIP(6);
5529 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005530 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005531 SKIP(8);
5532 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005533 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005534 SKIP(8);
5535 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005536 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005537 SKIP(7);
5538 return(XML_ATTRIBUTE_NMTOKEN);
5539 }
5540 return(xmlParseEnumeratedType(ctxt, tree));
5541}
5542
5543/**
5544 * xmlParseAttributeListDecl:
5545 * @ctxt: an XML parser context
5546 *
5547 * : parse the Attribute list def for an element
5548 *
5549 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5550 *
5551 * [53] AttDef ::= S Name S AttType S DefaultDecl
5552 *
5553 */
5554void
5555xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005556 const xmlChar *elemName;
5557 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005558 xmlEnumerationPtr tree;
5559
Daniel Veillarda07050d2003-10-19 14:46:32 +00005560 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005561 xmlParserInputPtr input = ctxt->input;
5562
5563 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005564 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005565 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005566 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005567 }
5568 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005569 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005570 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005571 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5572 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005573 return;
5574 }
5575 SKIP_BLANKS;
5576 GROW;
5577 while (RAW != '>') {
5578 const xmlChar *check = CUR_PTR;
5579 int type;
5580 int def;
5581 xmlChar *defaultValue = NULL;
5582
5583 GROW;
5584 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005585 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005586 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005587 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5588 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005589 break;
5590 }
5591 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005592 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005593 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005594 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005595 break;
5596 }
5597 SKIP_BLANKS;
5598
5599 type = xmlParseAttributeType(ctxt, &tree);
5600 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005601 break;
5602 }
5603
5604 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005605 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005606 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5607 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005608 if (tree != NULL)
5609 xmlFreeEnumeration(tree);
5610 break;
5611 }
5612 SKIP_BLANKS;
5613
5614 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5615 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005616 if (defaultValue != NULL)
5617 xmlFree(defaultValue);
5618 if (tree != NULL)
5619 xmlFreeEnumeration(tree);
5620 break;
5621 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005622 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5623 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005624
5625 GROW;
5626 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005627 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005628 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005629 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005630 if (defaultValue != NULL)
5631 xmlFree(defaultValue);
5632 if (tree != NULL)
5633 xmlFreeEnumeration(tree);
5634 break;
5635 }
5636 SKIP_BLANKS;
5637 }
5638 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005639 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5640 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005641 if (defaultValue != NULL)
5642 xmlFree(defaultValue);
5643 if (tree != NULL)
5644 xmlFreeEnumeration(tree);
5645 break;
5646 }
5647 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5648 (ctxt->sax->attributeDecl != NULL))
5649 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5650 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005651 else if (tree != NULL)
5652 xmlFreeEnumeration(tree);
5653
5654 if ((ctxt->sax2) && (defaultValue != NULL) &&
5655 (def != XML_ATTRIBUTE_IMPLIED) &&
5656 (def != XML_ATTRIBUTE_REQUIRED)) {
5657 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5658 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005659 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005660 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5661 }
Owen Taylor3473f882001-02-23 17:55:21 +00005662 if (defaultValue != NULL)
5663 xmlFree(defaultValue);
5664 GROW;
5665 }
5666 if (RAW == '>') {
5667 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00005668 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5669 "Attribute list declaration doesn't start and stop in the same entity\n",
5670 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005671 }
5672 NEXT;
5673 }
Owen Taylor3473f882001-02-23 17:55:21 +00005674 }
5675}
5676
5677/**
5678 * xmlParseElementMixedContentDecl:
5679 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005680 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005681 *
5682 * parse the declaration for a Mixed Element content
5683 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5684 *
5685 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5686 * '(' S? '#PCDATA' S? ')'
5687 *
5688 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5689 *
5690 * [ VC: No Duplicate Types ]
5691 * The same name must not appear more than once in a single
5692 * mixed-content declaration.
5693 *
5694 * returns: the list of the xmlElementContentPtr describing the element choices
5695 */
5696xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005697xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005698 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005699 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005700
5701 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005702 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005703 SKIP(7);
5704 SKIP_BLANKS;
5705 SHRINK;
5706 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005707 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005708 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5709"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005710 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005711 }
Owen Taylor3473f882001-02-23 17:55:21 +00005712 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005713 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005714 if (ret == NULL)
5715 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005716 if (RAW == '*') {
5717 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5718 NEXT;
5719 }
5720 return(ret);
5721 }
5722 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005723 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005724 if (ret == NULL) return(NULL);
5725 }
5726 while (RAW == '|') {
5727 NEXT;
5728 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005729 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005730 if (ret == NULL) return(NULL);
5731 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005732 if (cur != NULL)
5733 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005734 cur = ret;
5735 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005736 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005737 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005738 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005739 if (n->c1 != NULL)
5740 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005741 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005742 if (n != NULL)
5743 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005744 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005745 }
5746 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005747 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005748 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005749 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005750 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005751 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005752 return(NULL);
5753 }
5754 SKIP_BLANKS;
5755 GROW;
5756 }
5757 if ((RAW == ')') && (NXT(1) == '*')) {
5758 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005759 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005760 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005761 if (cur->c2 != NULL)
5762 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005763 }
5764 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005765 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005766 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5767"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005768 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005769 }
Owen Taylor3473f882001-02-23 17:55:21 +00005770 SKIP(2);
5771 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005772 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005773 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005774 return(NULL);
5775 }
5776
5777 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005778 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005779 }
5780 return(ret);
5781}
5782
5783/**
Daniel Veillard489f9672009-08-10 16:49:30 +02005784 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00005785 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005786 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02005787 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00005788 *
5789 * parse the declaration for a Mixed Element content
5790 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5791 *
5792 *
5793 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5794 *
5795 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5796 *
5797 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5798 *
5799 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5800 *
5801 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5802 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005803 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005804 * opening or closing parentheses in a choice, seq, or Mixed
5805 * construct is contained in the replacement text for a parameter
5806 * entity, both must be contained in the same replacement text. For
5807 * interoperability, if a parameter-entity reference appears in a
5808 * choice, seq, or Mixed construct, its replacement text should not
5809 * be empty, and neither the first nor last non-blank character of
5810 * the replacement text should be a connector (| or ,).
5811 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005812 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005813 * hierarchy.
5814 */
Daniel Veillard489f9672009-08-10 16:49:30 +02005815static xmlElementContentPtr
5816xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5817 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00005818 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005819 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005820 xmlChar type = 0;
5821
Daniel Veillard489f9672009-08-10 16:49:30 +02005822 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5823 (depth > 2048)) {
5824 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5825"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5826 depth);
5827 return(NULL);
5828 }
Owen Taylor3473f882001-02-23 17:55:21 +00005829 SKIP_BLANKS;
5830 GROW;
5831 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005832 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005833
Owen Taylor3473f882001-02-23 17:55:21 +00005834 /* Recurse on first child */
5835 NEXT;
5836 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02005837 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5838 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00005839 SKIP_BLANKS;
5840 GROW;
5841 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005842 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005843 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005844 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005845 return(NULL);
5846 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005847 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005848 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005849 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005850 return(NULL);
5851 }
Owen Taylor3473f882001-02-23 17:55:21 +00005852 GROW;
5853 if (RAW == '?') {
5854 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5855 NEXT;
5856 } else if (RAW == '*') {
5857 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5858 NEXT;
5859 } else if (RAW == '+') {
5860 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5861 NEXT;
5862 } else {
5863 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5864 }
Owen Taylor3473f882001-02-23 17:55:21 +00005865 GROW;
5866 }
5867 SKIP_BLANKS;
5868 SHRINK;
5869 while (RAW != ')') {
5870 /*
5871 * Each loop we parse one separator and one element.
5872 */
5873 if (RAW == ',') {
5874 if (type == 0) type = CUR;
5875
5876 /*
5877 * Detect "Name | Name , Name" error
5878 */
5879 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005880 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005881 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005882 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005883 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005884 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005885 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005886 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005887 return(NULL);
5888 }
5889 NEXT;
5890
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005891 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005892 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005893 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005894 xmlFreeDocElementContent(ctxt->myDoc, last);
5895 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005896 return(NULL);
5897 }
5898 if (last == NULL) {
5899 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005900 if (ret != NULL)
5901 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005902 ret = cur = op;
5903 } else {
5904 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005905 if (op != NULL)
5906 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005907 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005908 if (last != NULL)
5909 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005910 cur =op;
5911 last = NULL;
5912 }
5913 } else if (RAW == '|') {
5914 if (type == 0) type = CUR;
5915
5916 /*
5917 * Detect "Name , Name | Name" error
5918 */
5919 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005920 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005921 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005922 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005923 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005924 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005925 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005926 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005927 return(NULL);
5928 }
5929 NEXT;
5930
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005931 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005932 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005933 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005934 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005935 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005936 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005937 return(NULL);
5938 }
5939 if (last == NULL) {
5940 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005941 if (ret != NULL)
5942 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005943 ret = cur = op;
5944 } else {
5945 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005946 if (op != NULL)
5947 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005948 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005949 if (last != NULL)
5950 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005951 cur =op;
5952 last = NULL;
5953 }
5954 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005955 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00005956 if ((last != NULL) && (last != ret))
5957 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005958 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005959 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005960 return(NULL);
5961 }
5962 GROW;
5963 SKIP_BLANKS;
5964 GROW;
5965 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005966 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005967 /* Recurse on second child */
5968 NEXT;
5969 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02005970 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5971 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00005972 SKIP_BLANKS;
5973 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005974 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005975 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005976 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005977 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005978 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005979 return(NULL);
5980 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005981 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005982 if (last == NULL) {
5983 if (ret != NULL)
5984 xmlFreeDocElementContent(ctxt->myDoc, ret);
5985 return(NULL);
5986 }
Owen Taylor3473f882001-02-23 17:55:21 +00005987 if (RAW == '?') {
5988 last->ocur = XML_ELEMENT_CONTENT_OPT;
5989 NEXT;
5990 } else if (RAW == '*') {
5991 last->ocur = XML_ELEMENT_CONTENT_MULT;
5992 NEXT;
5993 } else if (RAW == '+') {
5994 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5995 NEXT;
5996 } else {
5997 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5998 }
5999 }
6000 SKIP_BLANKS;
6001 GROW;
6002 }
6003 if ((cur != NULL) && (last != NULL)) {
6004 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006005 if (last != NULL)
6006 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006007 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006008 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006009 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6010"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006011 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006012 }
Owen Taylor3473f882001-02-23 17:55:21 +00006013 NEXT;
6014 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006015 if (ret != NULL) {
6016 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6017 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6018 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6019 else
6020 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6021 }
Owen Taylor3473f882001-02-23 17:55:21 +00006022 NEXT;
6023 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006024 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006025 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006026 cur = ret;
6027 /*
6028 * Some normalization:
6029 * (a | b* | c?)* == (a | b | c)*
6030 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006031 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006032 if ((cur->c1 != NULL) &&
6033 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6034 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6035 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6036 if ((cur->c2 != NULL) &&
6037 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6038 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6039 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6040 cur = cur->c2;
6041 }
6042 }
Owen Taylor3473f882001-02-23 17:55:21 +00006043 NEXT;
6044 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006045 if (ret != NULL) {
6046 int found = 0;
6047
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006048 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6049 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6050 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006051 else
6052 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006053 /*
6054 * Some normalization:
6055 * (a | b*)+ == (a | b)*
6056 * (a | b?)+ == (a | b)*
6057 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006058 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006059 if ((cur->c1 != NULL) &&
6060 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6061 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6062 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6063 found = 1;
6064 }
6065 if ((cur->c2 != NULL) &&
6066 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6067 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6068 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6069 found = 1;
6070 }
6071 cur = cur->c2;
6072 }
6073 if (found)
6074 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6075 }
Owen Taylor3473f882001-02-23 17:55:21 +00006076 NEXT;
6077 }
6078 return(ret);
6079}
6080
6081/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006082 *
6083 * xmlParseElementChildrenContentDecl:
6084 * @ctxt: an XML parser context
6085 * @inputchk: the input used for the current entity, needed for boundary checks
6086 * @depth: the level of recursion
6087 *
6088 * parse the declaration for a Mixed Element content
6089 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6090 *
6091 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6092 *
6093 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6094 *
6095 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6096 *
6097 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6098 *
6099 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6100 * TODO Parameter-entity replacement text must be properly nested
6101 * with parenthesized groups. That is to say, if either of the
6102 * opening or closing parentheses in a choice, seq, or Mixed
6103 * construct is contained in the replacement text for a parameter
6104 * entity, both must be contained in the same replacement text. For
6105 * interoperability, if a parameter-entity reference appears in a
6106 * choice, seq, or Mixed construct, its replacement text should not
6107 * be empty, and neither the first nor last non-blank character of
6108 * the replacement text should be a connector (| or ,).
6109 *
6110 * Returns the tree of xmlElementContentPtr describing the element
6111 * hierarchy.
6112 */
6113xmlElementContentPtr
6114xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6115 /* stub left for API/ABI compat */
6116 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6117}
6118
6119/**
Owen Taylor3473f882001-02-23 17:55:21 +00006120 * xmlParseElementContentDecl:
6121 * @ctxt: an XML parser context
6122 * @name: the name of the element being defined.
6123 * @result: the Element Content pointer will be stored here if any
6124 *
6125 * parse the declaration for an Element content either Mixed or Children,
6126 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6127 *
6128 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6129 *
6130 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6131 */
6132
6133int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006134xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006135 xmlElementContentPtr *result) {
6136
6137 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006138 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006139 int res;
6140
6141 *result = NULL;
6142
6143 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006144 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006145 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006146 return(-1);
6147 }
6148 NEXT;
6149 GROW;
6150 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006151 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006152 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006153 res = XML_ELEMENT_TYPE_MIXED;
6154 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006155 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006156 res = XML_ELEMENT_TYPE_ELEMENT;
6157 }
Owen Taylor3473f882001-02-23 17:55:21 +00006158 SKIP_BLANKS;
6159 *result = tree;
6160 return(res);
6161}
6162
6163/**
6164 * xmlParseElementDecl:
6165 * @ctxt: an XML parser context
6166 *
6167 * parse an Element declaration.
6168 *
6169 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6170 *
6171 * [ VC: Unique Element Type Declaration ]
6172 * No element type may be declared more than once
6173 *
6174 * Returns the type of the element, or -1 in case of error
6175 */
6176int
6177xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006178 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006179 int ret = -1;
6180 xmlElementContentPtr content = NULL;
6181
Daniel Veillard4c778d82005-01-23 17:37:44 +00006182 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006183 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006184 xmlParserInputPtr input = ctxt->input;
6185
6186 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006187 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006188 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6189 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006190 }
6191 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006192 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006193 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006194 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6195 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006196 return(-1);
6197 }
6198 while ((RAW == 0) && (ctxt->inputNr > 1))
6199 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006200 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006201 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6202 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006203 }
6204 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006205 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006206 SKIP(5);
6207 /*
6208 * Element must always be empty.
6209 */
6210 ret = XML_ELEMENT_TYPE_EMPTY;
6211 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6212 (NXT(2) == 'Y')) {
6213 SKIP(3);
6214 /*
6215 * Element is a generic container.
6216 */
6217 ret = XML_ELEMENT_TYPE_ANY;
6218 } else if (RAW == '(') {
6219 ret = xmlParseElementContentDecl(ctxt, name, &content);
6220 } else {
6221 /*
6222 * [ WFC: PEs in Internal Subset ] error handling.
6223 */
6224 if ((RAW == '%') && (ctxt->external == 0) &&
6225 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006226 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006227 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006228 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006229 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006230 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6231 }
Owen Taylor3473f882001-02-23 17:55:21 +00006232 return(-1);
6233 }
6234
6235 SKIP_BLANKS;
6236 /*
6237 * Pop-up of finished entities.
6238 */
6239 while ((RAW == 0) && (ctxt->inputNr > 1))
6240 xmlPopInput(ctxt);
6241 SKIP_BLANKS;
6242
6243 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006244 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006245 if (content != NULL) {
6246 xmlFreeDocElementContent(ctxt->myDoc, content);
6247 }
Owen Taylor3473f882001-02-23 17:55:21 +00006248 } else {
6249 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006250 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6251 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006252 }
6253
6254 NEXT;
6255 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006256 (ctxt->sax->elementDecl != NULL)) {
6257 if (content != NULL)
6258 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006259 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6260 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006261 if ((content != NULL) && (content->parent == NULL)) {
6262 /*
6263 * this is a trick: if xmlAddElementDecl is called,
6264 * instead of copying the full tree it is plugged directly
6265 * if called from the parser. Avoid duplicating the
6266 * interfaces or change the API/ABI
6267 */
6268 xmlFreeDocElementContent(ctxt->myDoc, content);
6269 }
6270 } else if (content != NULL) {
6271 xmlFreeDocElementContent(ctxt->myDoc, content);
6272 }
Owen Taylor3473f882001-02-23 17:55:21 +00006273 }
Owen Taylor3473f882001-02-23 17:55:21 +00006274 }
6275 return(ret);
6276}
6277
6278/**
Owen Taylor3473f882001-02-23 17:55:21 +00006279 * xmlParseConditionalSections
6280 * @ctxt: an XML parser context
6281 *
6282 * [61] conditionalSect ::= includeSect | ignoreSect
6283 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6284 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6285 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6286 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6287 */
6288
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006289static void
Owen Taylor3473f882001-02-23 17:55:21 +00006290xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006291 int id = ctxt->input->id;
6292
Owen Taylor3473f882001-02-23 17:55:21 +00006293 SKIP(3);
6294 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006295 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006296 SKIP(7);
6297 SKIP_BLANKS;
6298 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006299 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006300 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006301 if (ctxt->input->id != id) {
6302 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6303 "All markup of the conditional section is not in the same entity\n",
6304 NULL, NULL);
6305 }
Owen Taylor3473f882001-02-23 17:55:21 +00006306 NEXT;
6307 }
6308 if (xmlParserDebugEntities) {
6309 if ((ctxt->input != NULL) && (ctxt->input->filename))
6310 xmlGenericError(xmlGenericErrorContext,
6311 "%s(%d): ", ctxt->input->filename,
6312 ctxt->input->line);
6313 xmlGenericError(xmlGenericErrorContext,
6314 "Entering INCLUDE Conditional Section\n");
6315 }
6316
6317 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6318 (NXT(2) != '>'))) {
6319 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006320 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006321
6322 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6323 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006324 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006325 NEXT;
6326 } else if (RAW == '%') {
6327 xmlParsePEReference(ctxt);
6328 } else
6329 xmlParseMarkupDecl(ctxt);
6330
6331 /*
6332 * Pop-up of finished entities.
6333 */
6334 while ((RAW == 0) && (ctxt->inputNr > 1))
6335 xmlPopInput(ctxt);
6336
Daniel Veillardfdc91562002-07-01 21:52:03 +00006337 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006338 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006339 break;
6340 }
6341 }
6342 if (xmlParserDebugEntities) {
6343 if ((ctxt->input != NULL) && (ctxt->input->filename))
6344 xmlGenericError(xmlGenericErrorContext,
6345 "%s(%d): ", ctxt->input->filename,
6346 ctxt->input->line);
6347 xmlGenericError(xmlGenericErrorContext,
6348 "Leaving INCLUDE Conditional Section\n");
6349 }
6350
Daniel Veillarda07050d2003-10-19 14:46:32 +00006351 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006352 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006353 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006354 int depth = 0;
6355
6356 SKIP(6);
6357 SKIP_BLANKS;
6358 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006359 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006360 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006361 if (ctxt->input->id != id) {
6362 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6363 "All markup of the conditional section is not in the same entity\n",
6364 NULL, NULL);
6365 }
Owen Taylor3473f882001-02-23 17:55:21 +00006366 NEXT;
6367 }
6368 if (xmlParserDebugEntities) {
6369 if ((ctxt->input != NULL) && (ctxt->input->filename))
6370 xmlGenericError(xmlGenericErrorContext,
6371 "%s(%d): ", ctxt->input->filename,
6372 ctxt->input->line);
6373 xmlGenericError(xmlGenericErrorContext,
6374 "Entering IGNORE Conditional Section\n");
6375 }
6376
6377 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006378 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006379 * But disable SAX event generating DTD building in the meantime
6380 */
6381 state = ctxt->disableSAX;
6382 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006383 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006384 ctxt->instate = XML_PARSER_IGNORE;
6385
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006386 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006387 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6388 depth++;
6389 SKIP(3);
6390 continue;
6391 }
6392 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6393 if (--depth >= 0) SKIP(3);
6394 continue;
6395 }
6396 NEXT;
6397 continue;
6398 }
6399
6400 ctxt->disableSAX = state;
6401 ctxt->instate = instate;
6402
6403 if (xmlParserDebugEntities) {
6404 if ((ctxt->input != NULL) && (ctxt->input->filename))
6405 xmlGenericError(xmlGenericErrorContext,
6406 "%s(%d): ", ctxt->input->filename,
6407 ctxt->input->line);
6408 xmlGenericError(xmlGenericErrorContext,
6409 "Leaving IGNORE Conditional Section\n");
6410 }
6411
6412 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006413 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006414 }
6415
6416 if (RAW == 0)
6417 SHRINK;
6418
6419 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006420 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006421 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006422 if (ctxt->input->id != id) {
6423 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6424 "All markup of the conditional section is not in the same entity\n",
6425 NULL, NULL);
6426 }
Owen Taylor3473f882001-02-23 17:55:21 +00006427 SKIP(3);
6428 }
6429}
6430
6431/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006432 * xmlParseMarkupDecl:
6433 * @ctxt: an XML parser context
6434 *
6435 * parse Markup declarations
6436 *
6437 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6438 * NotationDecl | PI | Comment
6439 *
6440 * [ VC: Proper Declaration/PE Nesting ]
6441 * Parameter-entity replacement text must be properly nested with
6442 * markup declarations. That is to say, if either the first character
6443 * or the last character of a markup declaration (markupdecl above) is
6444 * contained in the replacement text for a parameter-entity reference,
6445 * both must be contained in the same replacement text.
6446 *
6447 * [ WFC: PEs in Internal Subset ]
6448 * In the internal DTD subset, parameter-entity references can occur
6449 * only where markup declarations can occur, not within markup declarations.
6450 * (This does not apply to references that occur in external parameter
6451 * entities or to the external subset.)
6452 */
6453void
6454xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6455 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006456 if (CUR == '<') {
6457 if (NXT(1) == '!') {
6458 switch (NXT(2)) {
6459 case 'E':
6460 if (NXT(3) == 'L')
6461 xmlParseElementDecl(ctxt);
6462 else if (NXT(3) == 'N')
6463 xmlParseEntityDecl(ctxt);
6464 break;
6465 case 'A':
6466 xmlParseAttributeListDecl(ctxt);
6467 break;
6468 case 'N':
6469 xmlParseNotationDecl(ctxt);
6470 break;
6471 case '-':
6472 xmlParseComment(ctxt);
6473 break;
6474 default:
6475 /* there is an error but it will be detected later */
6476 break;
6477 }
6478 } else if (NXT(1) == '?') {
6479 xmlParsePI(ctxt);
6480 }
6481 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006482 /*
6483 * This is only for internal subset. On external entities,
6484 * the replacement is done before parsing stage
6485 */
6486 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6487 xmlParsePEReference(ctxt);
6488
6489 /*
6490 * Conditional sections are allowed from entities included
6491 * by PE References in the internal subset.
6492 */
6493 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6494 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6495 xmlParseConditionalSections(ctxt);
6496 }
6497 }
6498
6499 ctxt->instate = XML_PARSER_DTD;
6500}
6501
6502/**
6503 * xmlParseTextDecl:
6504 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006505 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006506 * parse an XML declaration header for external entities
6507 *
6508 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006509 */
6510
6511void
6512xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6513 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006514 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006515
6516 /*
6517 * We know that '<?xml' is here.
6518 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006519 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006520 SKIP(5);
6521 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006522 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006523 return;
6524 }
6525
William M. Brack76e95df2003-10-18 16:20:14 +00006526 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006527 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6528 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006529 }
6530 SKIP_BLANKS;
6531
6532 /*
6533 * We may have the VersionInfo here.
6534 */
6535 version = xmlParseVersionInfo(ctxt);
6536 if (version == NULL)
6537 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006538 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006539 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006540 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6541 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006542 }
6543 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006544 ctxt->input->version = version;
6545
6546 /*
6547 * We must have the encoding declaration
6548 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006549 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006550 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6551 /*
6552 * The XML REC instructs us to stop parsing right here
6553 */
6554 return;
6555 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006556 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6557 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6558 "Missing encoding in text declaration\n");
6559 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006560
6561 SKIP_BLANKS;
6562 if ((RAW == '?') && (NXT(1) == '>')) {
6563 SKIP(2);
6564 } else if (RAW == '>') {
6565 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006566 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006567 NEXT;
6568 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006569 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006570 MOVETO_ENDTAG(CUR_PTR);
6571 NEXT;
6572 }
6573}
6574
6575/**
Owen Taylor3473f882001-02-23 17:55:21 +00006576 * xmlParseExternalSubset:
6577 * @ctxt: an XML parser context
6578 * @ExternalID: the external identifier
6579 * @SystemID: the system identifier (or URL)
6580 *
6581 * parse Markup declarations from an external subset
6582 *
6583 * [30] extSubset ::= textDecl? extSubsetDecl
6584 *
6585 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6586 */
6587void
6588xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6589 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006590 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006591 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006592
6593 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
6594 (ctxt->input->end - ctxt->input->cur >= 4)) {
6595 xmlChar start[4];
6596 xmlCharEncoding enc;
6597
6598 start[0] = RAW;
6599 start[1] = NXT(1);
6600 start[2] = NXT(2);
6601 start[3] = NXT(3);
6602 enc = xmlDetectCharEncoding(start, 4);
6603 if (enc != XML_CHAR_ENCODING_NONE)
6604 xmlSwitchEncoding(ctxt, enc);
6605 }
6606
Daniel Veillarda07050d2003-10-19 14:46:32 +00006607 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006608 xmlParseTextDecl(ctxt);
6609 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6610 /*
6611 * The XML REC instructs us to stop parsing right here
6612 */
6613 ctxt->instate = XML_PARSER_EOF;
6614 return;
6615 }
6616 }
6617 if (ctxt->myDoc == NULL) {
6618 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006619 if (ctxt->myDoc == NULL) {
6620 xmlErrMemory(ctxt, "New Doc failed");
6621 return;
6622 }
6623 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006624 }
6625 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6626 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6627
6628 ctxt->instate = XML_PARSER_DTD;
6629 ctxt->external = 1;
6630 while (((RAW == '<') && (NXT(1) == '?')) ||
6631 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006632 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006633 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006634 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006635
6636 GROW;
6637 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6638 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006639 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006640 NEXT;
6641 } else if (RAW == '%') {
6642 xmlParsePEReference(ctxt);
6643 } else
6644 xmlParseMarkupDecl(ctxt);
6645
6646 /*
6647 * Pop-up of finished entities.
6648 */
6649 while ((RAW == 0) && (ctxt->inputNr > 1))
6650 xmlPopInput(ctxt);
6651
Daniel Veillardfdc91562002-07-01 21:52:03 +00006652 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006653 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006654 break;
6655 }
6656 }
6657
6658 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006659 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006660 }
6661
6662}
6663
6664/**
6665 * xmlParseReference:
6666 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006667 *
Owen Taylor3473f882001-02-23 17:55:21 +00006668 * parse and handle entity references in content, depending on the SAX
6669 * interface, this may end-up in a call to character() if this is a
6670 * CharRef, a predefined entity, if there is no reference() callback.
6671 * or if the parser was asked to switch to that mode.
6672 *
6673 * [67] Reference ::= EntityRef | CharRef
6674 */
6675void
6676xmlParseReference(xmlParserCtxtPtr ctxt) {
6677 xmlEntityPtr ent;
6678 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00006679 int was_checked;
6680 xmlNodePtr list = NULL;
6681 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006682
Daniel Veillard0161e632008-08-28 15:36:32 +00006683
6684 if (RAW != '&')
6685 return;
6686
6687 /*
6688 * Simple case of a CharRef
6689 */
Owen Taylor3473f882001-02-23 17:55:21 +00006690 if (NXT(1) == '#') {
6691 int i = 0;
6692 xmlChar out[10];
6693 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006694 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00006695
Daniel Veillarddc171602008-03-26 17:41:38 +00006696 if (value == 0)
6697 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006698 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6699 /*
6700 * So we are using non-UTF-8 buffers
6701 * Check that the char fit on 8bits, if not
6702 * generate a CharRef.
6703 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006704 if (value <= 0xFF) {
6705 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006706 out[1] = 0;
6707 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6708 (!ctxt->disableSAX))
6709 ctxt->sax->characters(ctxt->userData, out, 1);
6710 } else {
6711 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006712 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006713 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006714 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006715 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6716 (!ctxt->disableSAX))
6717 ctxt->sax->reference(ctxt->userData, out);
6718 }
6719 } else {
6720 /*
6721 * Just encode the value in UTF-8
6722 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006723 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006724 out[i] = 0;
6725 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6726 (!ctxt->disableSAX))
6727 ctxt->sax->characters(ctxt->userData, out, i);
6728 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006729 return;
6730 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006731
Daniel Veillard0161e632008-08-28 15:36:32 +00006732 /*
6733 * We are seeing an entity reference
6734 */
6735 ent = xmlParseEntityRef(ctxt);
6736 if (ent == NULL) return;
6737 if (!ctxt->wellFormed)
6738 return;
6739 was_checked = ent->checked;
6740
6741 /* special case of predefined entities */
6742 if ((ent->name == NULL) ||
6743 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6744 val = ent->content;
6745 if (val == NULL) return;
6746 /*
6747 * inline the entity.
6748 */
6749 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6750 (!ctxt->disableSAX))
6751 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6752 return;
6753 }
6754
6755 /*
6756 * The first reference to the entity trigger a parsing phase
6757 * where the ent->children is filled with the result from
6758 * the parsing.
6759 */
6760 if (ent->checked == 0) {
6761 unsigned long oldnbent = ctxt->nbentities;
6762
6763 /*
6764 * This is a bit hackish but this seems the best
6765 * way to make sure both SAX and DOM entity support
6766 * behaves okay.
6767 */
6768 void *user_data;
6769 if (ctxt->userData == ctxt)
6770 user_data = NULL;
6771 else
6772 user_data = ctxt->userData;
6773
6774 /*
6775 * Check that this entity is well formed
6776 * 4.3.2: An internal general parsed entity is well-formed
6777 * if its replacement text matches the production labeled
6778 * content.
6779 */
6780 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6781 ctxt->depth++;
6782 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6783 user_data, &list);
6784 ctxt->depth--;
6785
6786 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6787 ctxt->depth++;
6788 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6789 user_data, ctxt->depth, ent->URI,
6790 ent->ExternalID, &list);
6791 ctxt->depth--;
6792 } else {
6793 ret = XML_ERR_ENTITY_PE_INTERNAL;
6794 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6795 "invalid entity type found\n", NULL);
6796 }
6797
6798 /*
6799 * Store the number of entities needing parsing for this entity
6800 * content and do checkings
6801 */
6802 ent->checked = ctxt->nbentities - oldnbent;
6803 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006804 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00006805 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006806 return;
6807 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006808 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6809 xmlFreeNodeList(list);
6810 return;
6811 }
Owen Taylor3473f882001-02-23 17:55:21 +00006812
Daniel Veillard0161e632008-08-28 15:36:32 +00006813 if ((ret == XML_ERR_OK) && (list != NULL)) {
6814 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6815 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6816 (ent->children == NULL)) {
6817 ent->children = list;
6818 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00006819 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006820 * Prune it directly in the generated document
6821 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00006822 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006823 if (((list->type == XML_TEXT_NODE) &&
6824 (list->next == NULL)) ||
6825 (ctxt->parseMode == XML_PARSE_READER)) {
6826 list->parent = (xmlNodePtr) ent;
6827 list = NULL;
6828 ent->owner = 1;
6829 } else {
6830 ent->owner = 0;
6831 while (list != NULL) {
6832 list->parent = (xmlNodePtr) ctxt->node;
6833 list->doc = ctxt->myDoc;
6834 if (list->next == NULL)
6835 ent->last = list;
6836 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006837 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006838 list = ent->children;
6839#ifdef LIBXML_LEGACY_ENABLED
6840 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6841 xmlAddEntityReference(ent, list, NULL);
6842#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00006843 }
6844 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00006845 ent->owner = 1;
6846 while (list != NULL) {
6847 list->parent = (xmlNodePtr) ent;
6848 if (list->next == NULL)
6849 ent->last = list;
6850 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006851 }
6852 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006853 } else {
6854 xmlFreeNodeList(list);
6855 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006856 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006857 } else if ((ret != XML_ERR_OK) &&
6858 (ret != XML_WAR_UNDECLARED_ENTITY)) {
6859 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6860 "Entity '%s' failed to parse\n", ent->name);
6861 } else if (list != NULL) {
6862 xmlFreeNodeList(list);
6863 list = NULL;
6864 }
6865 if (ent->checked == 0)
6866 ent->checked = 1;
6867 } else if (ent->checked != 1) {
6868 ctxt->nbentities += ent->checked;
6869 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006870
Daniel Veillard0161e632008-08-28 15:36:32 +00006871 /*
6872 * Now that the entity content has been gathered
6873 * provide it to the application, this can take different forms based
6874 * on the parsing modes.
6875 */
6876 if (ent->children == NULL) {
6877 /*
6878 * Probably running in SAX mode and the callbacks don't
6879 * build the entity content. So unless we already went
6880 * though parsing for first checking go though the entity
6881 * content to generate callbacks associated to the entity
6882 */
6883 if (was_checked != 0) {
6884 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00006885 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006886 * This is a bit hackish but this seems the best
6887 * way to make sure both SAX and DOM entity support
6888 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00006889 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006890 if (ctxt->userData == ctxt)
6891 user_data = NULL;
6892 else
6893 user_data = ctxt->userData;
6894
6895 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6896 ctxt->depth++;
6897 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6898 ent->content, user_data, NULL);
6899 ctxt->depth--;
6900 } else if (ent->etype ==
6901 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6902 ctxt->depth++;
6903 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6904 ctxt->sax, user_data, ctxt->depth,
6905 ent->URI, ent->ExternalID, NULL);
6906 ctxt->depth--;
6907 } else {
6908 ret = XML_ERR_ENTITY_PE_INTERNAL;
6909 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6910 "invalid entity type found\n", NULL);
6911 }
6912 if (ret == XML_ERR_ENTITY_LOOP) {
6913 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6914 return;
6915 }
6916 }
6917 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6918 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6919 /*
6920 * Entity reference callback comes second, it's somewhat
6921 * superfluous but a compatibility to historical behaviour
6922 */
6923 ctxt->sax->reference(ctxt->userData, ent->name);
6924 }
6925 return;
6926 }
6927
6928 /*
6929 * If we didn't get any children for the entity being built
6930 */
6931 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6932 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6933 /*
6934 * Create a node.
6935 */
6936 ctxt->sax->reference(ctxt->userData, ent->name);
6937 return;
6938 }
6939
6940 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
6941 /*
6942 * There is a problem on the handling of _private for entities
6943 * (bug 155816): Should we copy the content of the field from
6944 * the entity (possibly overwriting some value set by the user
6945 * when a copy is created), should we leave it alone, or should
6946 * we try to take care of different situations? The problem
6947 * is exacerbated by the usage of this field by the xmlReader.
6948 * To fix this bug, we look at _private on the created node
6949 * and, if it's NULL, we copy in whatever was in the entity.
6950 * If it's not NULL we leave it alone. This is somewhat of a
6951 * hack - maybe we should have further tests to determine
6952 * what to do.
6953 */
6954 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6955 /*
6956 * Seems we are generating the DOM content, do
6957 * a simple tree copy for all references except the first
6958 * In the first occurrence list contains the replacement.
6959 * progressive == 2 means we are operating on the Reader
6960 * and since nodes are discarded we must copy all the time.
6961 */
6962 if (((list == NULL) && (ent->owner == 0)) ||
6963 (ctxt->parseMode == XML_PARSE_READER)) {
6964 xmlNodePtr nw = NULL, cur, firstChild = NULL;
6965
6966 /*
6967 * when operating on a reader, the entities definitions
6968 * are always owning the entities subtree.
6969 if (ctxt->parseMode == XML_PARSE_READER)
6970 ent->owner = 1;
6971 */
6972
6973 cur = ent->children;
6974 while (cur != NULL) {
6975 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6976 if (nw != NULL) {
6977 if (nw->_private == NULL)
6978 nw->_private = cur->_private;
6979 if (firstChild == NULL){
6980 firstChild = nw;
6981 }
6982 nw = xmlAddChild(ctxt->node, nw);
6983 }
6984 if (cur == ent->last) {
6985 /*
6986 * needed to detect some strange empty
6987 * node cases in the reader tests
6988 */
6989 if ((ctxt->parseMode == XML_PARSE_READER) &&
6990 (nw != NULL) &&
6991 (nw->type == XML_ELEMENT_NODE) &&
6992 (nw->children == NULL))
6993 nw->extra = 1;
6994
6995 break;
6996 }
6997 cur = cur->next;
6998 }
6999#ifdef LIBXML_LEGACY_ENABLED
7000 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7001 xmlAddEntityReference(ent, firstChild, nw);
7002#endif /* LIBXML_LEGACY_ENABLED */
7003 } else if (list == NULL) {
7004 xmlNodePtr nw = NULL, cur, next, last,
7005 firstChild = NULL;
7006 /*
7007 * Copy the entity child list and make it the new
7008 * entity child list. The goal is to make sure any
7009 * ID or REF referenced will be the one from the
7010 * document content and not the entity copy.
7011 */
7012 cur = ent->children;
7013 ent->children = NULL;
7014 last = ent->last;
7015 ent->last = NULL;
7016 while (cur != NULL) {
7017 next = cur->next;
7018 cur->next = NULL;
7019 cur->parent = NULL;
7020 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7021 if (nw != NULL) {
7022 if (nw->_private == NULL)
7023 nw->_private = cur->_private;
7024 if (firstChild == NULL){
7025 firstChild = cur;
7026 }
7027 xmlAddChild((xmlNodePtr) ent, nw);
7028 xmlAddChild(ctxt->node, cur);
7029 }
7030 if (cur == last)
7031 break;
7032 cur = next;
7033 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007034 if (ent->owner == 0)
7035 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007036#ifdef LIBXML_LEGACY_ENABLED
7037 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7038 xmlAddEntityReference(ent, firstChild, nw);
7039#endif /* LIBXML_LEGACY_ENABLED */
7040 } else {
7041 const xmlChar *nbktext;
7042
7043 /*
7044 * the name change is to avoid coalescing of the
7045 * node with a possible previous text one which
7046 * would make ent->children a dangling pointer
7047 */
7048 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7049 -1);
7050 if (ent->children->type == XML_TEXT_NODE)
7051 ent->children->name = nbktext;
7052 if ((ent->last != ent->children) &&
7053 (ent->last->type == XML_TEXT_NODE))
7054 ent->last->name = nbktext;
7055 xmlAddChildList(ctxt->node, ent->children);
7056 }
7057
7058 /*
7059 * This is to avoid a nasty side effect, see
7060 * characters() in SAX.c
7061 */
7062 ctxt->nodemem = 0;
7063 ctxt->nodelen = 0;
7064 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007065 }
7066 }
7067}
7068
7069/**
7070 * xmlParseEntityRef:
7071 * @ctxt: an XML parser context
7072 *
7073 * parse ENTITY references declarations
7074 *
7075 * [68] EntityRef ::= '&' Name ';'
7076 *
7077 * [ WFC: Entity Declared ]
7078 * In a document without any DTD, a document with only an internal DTD
7079 * subset which contains no parameter entity references, or a document
7080 * with "standalone='yes'", the Name given in the entity reference
7081 * must match that in an entity declaration, except that well-formed
7082 * documents need not declare any of the following entities: amp, lt,
7083 * gt, apos, quot. The declaration of a parameter entity must precede
7084 * any reference to it. Similarly, the declaration of a general entity
7085 * must precede any reference to it which appears in a default value in an
7086 * attribute-list declaration. Note that if entities are declared in the
7087 * external subset or in external parameter entities, a non-validating
7088 * processor is not obligated to read and process their declarations;
7089 * for such documents, the rule that an entity must be declared is a
7090 * well-formedness constraint only if standalone='yes'.
7091 *
7092 * [ WFC: Parsed Entity ]
7093 * An entity reference must not contain the name of an unparsed entity
7094 *
7095 * Returns the xmlEntityPtr if found, or NULL otherwise.
7096 */
7097xmlEntityPtr
7098xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007099 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007100 xmlEntityPtr ent = NULL;
7101
7102 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007103
Daniel Veillard0161e632008-08-28 15:36:32 +00007104 if (RAW != '&')
7105 return(NULL);
7106 NEXT;
7107 name = xmlParseName(ctxt);
7108 if (name == NULL) {
7109 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7110 "xmlParseEntityRef: no name\n");
7111 return(NULL);
7112 }
7113 if (RAW != ';') {
7114 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7115 return(NULL);
7116 }
7117 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007118
Daniel Veillard0161e632008-08-28 15:36:32 +00007119 /*
7120 * Predefined entites override any extra definition
7121 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007122 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7123 ent = xmlGetPredefinedEntity(name);
7124 if (ent != NULL)
7125 return(ent);
7126 }
Owen Taylor3473f882001-02-23 17:55:21 +00007127
Daniel Veillard0161e632008-08-28 15:36:32 +00007128 /*
7129 * Increate the number of entity references parsed
7130 */
7131 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007132
Daniel Veillard0161e632008-08-28 15:36:32 +00007133 /*
7134 * Ask first SAX for entity resolution, otherwise try the
7135 * entities which may have stored in the parser context.
7136 */
7137 if (ctxt->sax != NULL) {
7138 if (ctxt->sax->getEntity != NULL)
7139 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007140 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7141 (ctxt->options & XML_PARSE_OLDSAX))
7142 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007143 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7144 (ctxt->userData==ctxt)) {
7145 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007146 }
7147 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007148 /*
7149 * [ WFC: Entity Declared ]
7150 * In a document without any DTD, a document with only an
7151 * internal DTD subset which contains no parameter entity
7152 * references, or a document with "standalone='yes'", the
7153 * Name given in the entity reference must match that in an
7154 * entity declaration, except that well-formed documents
7155 * need not declare any of the following entities: amp, lt,
7156 * gt, apos, quot.
7157 * The declaration of a parameter entity must precede any
7158 * reference to it.
7159 * Similarly, the declaration of a general entity must
7160 * precede any reference to it which appears in a default
7161 * value in an attribute-list declaration. Note that if
7162 * entities are declared in the external subset or in
7163 * external parameter entities, a non-validating processor
7164 * is not obligated to read and process their declarations;
7165 * for such documents, the rule that an entity must be
7166 * declared is a well-formedness constraint only if
7167 * standalone='yes'.
7168 */
7169 if (ent == NULL) {
7170 if ((ctxt->standalone == 1) ||
7171 ((ctxt->hasExternalSubset == 0) &&
7172 (ctxt->hasPErefs == 0))) {
7173 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7174 "Entity '%s' not defined\n", name);
7175 } else {
7176 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7177 "Entity '%s' not defined\n", name);
7178 if ((ctxt->inSubset == 0) &&
7179 (ctxt->sax != NULL) &&
7180 (ctxt->sax->reference != NULL)) {
7181 ctxt->sax->reference(ctxt->userData, name);
7182 }
7183 }
7184 ctxt->valid = 0;
7185 }
7186
7187 /*
7188 * [ WFC: Parsed Entity ]
7189 * An entity reference must not contain the name of an
7190 * unparsed entity
7191 */
7192 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7193 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7194 "Entity reference to unparsed entity %s\n", name);
7195 }
7196
7197 /*
7198 * [ WFC: No External Entity References ]
7199 * Attribute values cannot contain direct or indirect
7200 * entity references to external entities.
7201 */
7202 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7203 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7204 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7205 "Attribute references external entity '%s'\n", name);
7206 }
7207 /*
7208 * [ WFC: No < in Attribute Values ]
7209 * The replacement text of any entity referred to directly or
7210 * indirectly in an attribute value (other than "&lt;") must
7211 * not contain a <.
7212 */
7213 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7214 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007215 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007216 (xmlStrchr(ent->content, '<'))) {
7217 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7218 "'<' in entity '%s' is not allowed in attributes values\n", name);
7219 }
7220
7221 /*
7222 * Internal check, no parameter entities here ...
7223 */
7224 else {
7225 switch (ent->etype) {
7226 case XML_INTERNAL_PARAMETER_ENTITY:
7227 case XML_EXTERNAL_PARAMETER_ENTITY:
7228 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7229 "Attempt to reference the parameter entity '%s'\n",
7230 name);
7231 break;
7232 default:
7233 break;
7234 }
7235 }
7236
7237 /*
7238 * [ WFC: No Recursion ]
7239 * A parsed entity must not contain a recursive reference
7240 * to itself, either directly or indirectly.
7241 * Done somewhere else
7242 */
Owen Taylor3473f882001-02-23 17:55:21 +00007243 return(ent);
7244}
7245
7246/**
7247 * xmlParseStringEntityRef:
7248 * @ctxt: an XML parser context
7249 * @str: a pointer to an index in the string
7250 *
7251 * parse ENTITY references declarations, but this version parses it from
7252 * a string value.
7253 *
7254 * [68] EntityRef ::= '&' Name ';'
7255 *
7256 * [ WFC: Entity Declared ]
7257 * In a document without any DTD, a document with only an internal DTD
7258 * subset which contains no parameter entity references, or a document
7259 * with "standalone='yes'", the Name given in the entity reference
7260 * must match that in an entity declaration, except that well-formed
7261 * documents need not declare any of the following entities: amp, lt,
7262 * gt, apos, quot. The declaration of a parameter entity must precede
7263 * any reference to it. Similarly, the declaration of a general entity
7264 * must precede any reference to it which appears in a default value in an
7265 * attribute-list declaration. Note that if entities are declared in the
7266 * external subset or in external parameter entities, a non-validating
7267 * processor is not obligated to read and process their declarations;
7268 * for such documents, the rule that an entity must be declared is a
7269 * well-formedness constraint only if standalone='yes'.
7270 *
7271 * [ WFC: Parsed Entity ]
7272 * An entity reference must not contain the name of an unparsed entity
7273 *
7274 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7275 * is updated to the current location in the string.
7276 */
7277xmlEntityPtr
7278xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7279 xmlChar *name;
7280 const xmlChar *ptr;
7281 xmlChar cur;
7282 xmlEntityPtr ent = NULL;
7283
7284 if ((str == NULL) || (*str == NULL))
7285 return(NULL);
7286 ptr = *str;
7287 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007288 if (cur != '&')
7289 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007290
Daniel Veillard0161e632008-08-28 15:36:32 +00007291 ptr++;
7292 cur = *ptr;
7293 name = xmlParseStringName(ctxt, &ptr);
7294 if (name == NULL) {
7295 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7296 "xmlParseStringEntityRef: no name\n");
7297 *str = ptr;
7298 return(NULL);
7299 }
7300 if (*ptr != ';') {
7301 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007302 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007303 *str = ptr;
7304 return(NULL);
7305 }
7306 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007307
Owen Taylor3473f882001-02-23 17:55:21 +00007308
Daniel Veillard0161e632008-08-28 15:36:32 +00007309 /*
7310 * Predefined entites override any extra definition
7311 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007312 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7313 ent = xmlGetPredefinedEntity(name);
7314 if (ent != NULL) {
7315 xmlFree(name);
7316 *str = ptr;
7317 return(ent);
7318 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007319 }
Owen Taylor3473f882001-02-23 17:55:21 +00007320
Daniel Veillard0161e632008-08-28 15:36:32 +00007321 /*
7322 * Increate the number of entity references parsed
7323 */
7324 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007325
Daniel Veillard0161e632008-08-28 15:36:32 +00007326 /*
7327 * Ask first SAX for entity resolution, otherwise try the
7328 * entities which may have stored in the parser context.
7329 */
7330 if (ctxt->sax != NULL) {
7331 if (ctxt->sax->getEntity != NULL)
7332 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007333 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7334 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007335 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7336 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007337 }
7338 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007339
7340 /*
7341 * [ WFC: Entity Declared ]
7342 * In a document without any DTD, a document with only an
7343 * internal DTD subset which contains no parameter entity
7344 * references, or a document with "standalone='yes'", the
7345 * Name given in the entity reference must match that in an
7346 * entity declaration, except that well-formed documents
7347 * need not declare any of the following entities: amp, lt,
7348 * gt, apos, quot.
7349 * The declaration of a parameter entity must precede any
7350 * reference to it.
7351 * Similarly, the declaration of a general entity must
7352 * precede any reference to it which appears in a default
7353 * value in an attribute-list declaration. Note that if
7354 * entities are declared in the external subset or in
7355 * external parameter entities, a non-validating processor
7356 * is not obligated to read and process their declarations;
7357 * for such documents, the rule that an entity must be
7358 * declared is a well-formedness constraint only if
7359 * standalone='yes'.
7360 */
7361 if (ent == NULL) {
7362 if ((ctxt->standalone == 1) ||
7363 ((ctxt->hasExternalSubset == 0) &&
7364 (ctxt->hasPErefs == 0))) {
7365 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7366 "Entity '%s' not defined\n", name);
7367 } else {
7368 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7369 "Entity '%s' not defined\n",
7370 name);
7371 }
7372 /* TODO ? check regressions ctxt->valid = 0; */
7373 }
7374
7375 /*
7376 * [ WFC: Parsed Entity ]
7377 * An entity reference must not contain the name of an
7378 * unparsed entity
7379 */
7380 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7381 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7382 "Entity reference to unparsed entity %s\n", name);
7383 }
7384
7385 /*
7386 * [ WFC: No External Entity References ]
7387 * Attribute values cannot contain direct or indirect
7388 * entity references to external entities.
7389 */
7390 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7391 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7392 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7393 "Attribute references external entity '%s'\n", name);
7394 }
7395 /*
7396 * [ WFC: No < in Attribute Values ]
7397 * The replacement text of any entity referred to directly or
7398 * indirectly in an attribute value (other than "&lt;") must
7399 * not contain a <.
7400 */
7401 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7402 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007403 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007404 (xmlStrchr(ent->content, '<'))) {
7405 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7406 "'<' in entity '%s' is not allowed in attributes values\n",
7407 name);
7408 }
7409
7410 /*
7411 * Internal check, no parameter entities here ...
7412 */
7413 else {
7414 switch (ent->etype) {
7415 case XML_INTERNAL_PARAMETER_ENTITY:
7416 case XML_EXTERNAL_PARAMETER_ENTITY:
7417 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7418 "Attempt to reference the parameter entity '%s'\n",
7419 name);
7420 break;
7421 default:
7422 break;
7423 }
7424 }
7425
7426 /*
7427 * [ WFC: No Recursion ]
7428 * A parsed entity must not contain a recursive reference
7429 * to itself, either directly or indirectly.
7430 * Done somewhere else
7431 */
7432
7433 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007434 *str = ptr;
7435 return(ent);
7436}
7437
7438/**
7439 * xmlParsePEReference:
7440 * @ctxt: an XML parser context
7441 *
7442 * parse PEReference declarations
7443 * The entity content is handled directly by pushing it's content as
7444 * a new input stream.
7445 *
7446 * [69] PEReference ::= '%' Name ';'
7447 *
7448 * [ WFC: No Recursion ]
7449 * A parsed entity must not contain a recursive
7450 * reference to itself, either directly or indirectly.
7451 *
7452 * [ WFC: Entity Declared ]
7453 * In a document without any DTD, a document with only an internal DTD
7454 * subset which contains no parameter entity references, or a document
7455 * with "standalone='yes'", ... ... The declaration of a parameter
7456 * entity must precede any reference to it...
7457 *
7458 * [ VC: Entity Declared ]
7459 * In a document with an external subset or external parameter entities
7460 * with "standalone='no'", ... ... The declaration of a parameter entity
7461 * must precede any reference to it...
7462 *
7463 * [ WFC: In DTD ]
7464 * Parameter-entity references may only appear in the DTD.
7465 * NOTE: misleading but this is handled.
7466 */
7467void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007468xmlParsePEReference(xmlParserCtxtPtr ctxt)
7469{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007470 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007471 xmlEntityPtr entity = NULL;
7472 xmlParserInputPtr input;
7473
Daniel Veillard0161e632008-08-28 15:36:32 +00007474 if (RAW != '%')
7475 return;
7476 NEXT;
7477 name = xmlParseName(ctxt);
7478 if (name == NULL) {
7479 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7480 "xmlParsePEReference: no name\n");
7481 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007482 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007483 if (RAW != ';') {
7484 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7485 return;
7486 }
7487
7488 NEXT;
7489
7490 /*
7491 * Increate the number of entity references parsed
7492 */
7493 ctxt->nbentities++;
7494
7495 /*
7496 * Request the entity from SAX
7497 */
7498 if ((ctxt->sax != NULL) &&
7499 (ctxt->sax->getParameterEntity != NULL))
7500 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7501 name);
7502 if (entity == NULL) {
7503 /*
7504 * [ WFC: Entity Declared ]
7505 * In a document without any DTD, a document with only an
7506 * internal DTD subset which contains no parameter entity
7507 * references, or a document with "standalone='yes'", ...
7508 * ... The declaration of a parameter entity must precede
7509 * any reference to it...
7510 */
7511 if ((ctxt->standalone == 1) ||
7512 ((ctxt->hasExternalSubset == 0) &&
7513 (ctxt->hasPErefs == 0))) {
7514 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7515 "PEReference: %%%s; not found\n",
7516 name);
7517 } else {
7518 /*
7519 * [ VC: Entity Declared ]
7520 * In a document with an external subset or external
7521 * parameter entities with "standalone='no'", ...
7522 * ... The declaration of a parameter entity must
7523 * precede any reference to it...
7524 */
7525 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7526 "PEReference: %%%s; not found\n",
7527 name, NULL);
7528 ctxt->valid = 0;
7529 }
7530 } else {
7531 /*
7532 * Internal checking in case the entity quest barfed
7533 */
7534 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7535 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7536 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7537 "Internal: %%%s; is not a parameter entity\n",
7538 name, NULL);
7539 } else if (ctxt->input->free != deallocblankswrapper) {
7540 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7541 if (xmlPushInput(ctxt, input) < 0)
7542 return;
7543 } else {
7544 /*
7545 * TODO !!!
7546 * handle the extra spaces added before and after
7547 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7548 */
7549 input = xmlNewEntityInputStream(ctxt, entity);
7550 if (xmlPushInput(ctxt, input) < 0)
7551 return;
7552 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7553 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7554 (IS_BLANK_CH(NXT(5)))) {
7555 xmlParseTextDecl(ctxt);
7556 if (ctxt->errNo ==
7557 XML_ERR_UNSUPPORTED_ENCODING) {
7558 /*
7559 * The XML REC instructs us to stop parsing
7560 * right here
7561 */
7562 ctxt->instate = XML_PARSER_EOF;
7563 return;
7564 }
7565 }
7566 }
7567 }
7568 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007569}
7570
7571/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007572 * xmlLoadEntityContent:
7573 * @ctxt: an XML parser context
7574 * @entity: an unloaded system entity
7575 *
7576 * Load the original content of the given system entity from the
7577 * ExternalID/SystemID given. This is to be used for Included in Literal
7578 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7579 *
7580 * Returns 0 in case of success and -1 in case of failure
7581 */
7582static int
7583xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7584 xmlParserInputPtr input;
7585 xmlBufferPtr buf;
7586 int l, c;
7587 int count = 0;
7588
7589 if ((ctxt == NULL) || (entity == NULL) ||
7590 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7591 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7592 (entity->content != NULL)) {
7593 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7594 "xmlLoadEntityContent parameter error");
7595 return(-1);
7596 }
7597
7598 if (xmlParserDebugEntities)
7599 xmlGenericError(xmlGenericErrorContext,
7600 "Reading %s entity content input\n", entity->name);
7601
7602 buf = xmlBufferCreate();
7603 if (buf == NULL) {
7604 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7605 "xmlLoadEntityContent parameter error");
7606 return(-1);
7607 }
7608
7609 input = xmlNewEntityInputStream(ctxt, entity);
7610 if (input == NULL) {
7611 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7612 "xmlLoadEntityContent input error");
7613 xmlBufferFree(buf);
7614 return(-1);
7615 }
7616
7617 /*
7618 * Push the entity as the current input, read char by char
7619 * saving to the buffer until the end of the entity or an error
7620 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00007621 if (xmlPushInput(ctxt, input) < 0) {
7622 xmlBufferFree(buf);
7623 return(-1);
7624 }
7625
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007626 GROW;
7627 c = CUR_CHAR(l);
7628 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7629 (IS_CHAR(c))) {
7630 xmlBufferAdd(buf, ctxt->input->cur, l);
7631 if (count++ > 100) {
7632 count = 0;
7633 GROW;
7634 }
7635 NEXTL(l);
7636 c = CUR_CHAR(l);
7637 }
7638
7639 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7640 xmlPopInput(ctxt);
7641 } else if (!IS_CHAR(c)) {
7642 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7643 "xmlLoadEntityContent: invalid char value %d\n",
7644 c);
7645 xmlBufferFree(buf);
7646 return(-1);
7647 }
7648 entity->content = buf->content;
7649 buf->content = NULL;
7650 xmlBufferFree(buf);
7651
7652 return(0);
7653}
7654
7655/**
Owen Taylor3473f882001-02-23 17:55:21 +00007656 * xmlParseStringPEReference:
7657 * @ctxt: an XML parser context
7658 * @str: a pointer to an index in the string
7659 *
7660 * parse PEReference declarations
7661 *
7662 * [69] PEReference ::= '%' Name ';'
7663 *
7664 * [ WFC: No Recursion ]
7665 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007666 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007667 *
7668 * [ WFC: Entity Declared ]
7669 * In a document without any DTD, a document with only an internal DTD
7670 * subset which contains no parameter entity references, or a document
7671 * with "standalone='yes'", ... ... The declaration of a parameter
7672 * entity must precede any reference to it...
7673 *
7674 * [ VC: Entity Declared ]
7675 * In a document with an external subset or external parameter entities
7676 * with "standalone='no'", ... ... The declaration of a parameter entity
7677 * must precede any reference to it...
7678 *
7679 * [ WFC: In DTD ]
7680 * Parameter-entity references may only appear in the DTD.
7681 * NOTE: misleading but this is handled.
7682 *
7683 * Returns the string of the entity content.
7684 * str is updated to the current value of the index
7685 */
7686xmlEntityPtr
7687xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7688 const xmlChar *ptr;
7689 xmlChar cur;
7690 xmlChar *name;
7691 xmlEntityPtr entity = NULL;
7692
7693 if ((str == NULL) || (*str == NULL)) return(NULL);
7694 ptr = *str;
7695 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007696 if (cur != '%')
7697 return(NULL);
7698 ptr++;
7699 cur = *ptr;
7700 name = xmlParseStringName(ctxt, &ptr);
7701 if (name == NULL) {
7702 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7703 "xmlParseStringPEReference: no name\n");
7704 *str = ptr;
7705 return(NULL);
7706 }
7707 cur = *ptr;
7708 if (cur != ';') {
7709 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7710 xmlFree(name);
7711 *str = ptr;
7712 return(NULL);
7713 }
7714 ptr++;
7715
7716 /*
7717 * Increate the number of entity references parsed
7718 */
7719 ctxt->nbentities++;
7720
7721 /*
7722 * Request the entity from SAX
7723 */
7724 if ((ctxt->sax != NULL) &&
7725 (ctxt->sax->getParameterEntity != NULL))
7726 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7727 name);
7728 if (entity == NULL) {
7729 /*
7730 * [ WFC: Entity Declared ]
7731 * In a document without any DTD, a document with only an
7732 * internal DTD subset which contains no parameter entity
7733 * references, or a document with "standalone='yes'", ...
7734 * ... The declaration of a parameter entity must precede
7735 * any reference to it...
7736 */
7737 if ((ctxt->standalone == 1) ||
7738 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7739 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7740 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007741 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007742 /*
7743 * [ VC: Entity Declared ]
7744 * In a document with an external subset or external
7745 * parameter entities with "standalone='no'", ...
7746 * ... The declaration of a parameter entity must
7747 * precede any reference to it...
7748 */
7749 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7750 "PEReference: %%%s; not found\n",
7751 name, NULL);
7752 ctxt->valid = 0;
7753 }
7754 } else {
7755 /*
7756 * Internal checking in case the entity quest barfed
7757 */
7758 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7759 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7760 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7761 "%%%s; is not a parameter entity\n",
7762 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007763 }
7764 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007765 ctxt->hasPErefs = 1;
7766 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007767 *str = ptr;
7768 return(entity);
7769}
7770
7771/**
7772 * xmlParseDocTypeDecl:
7773 * @ctxt: an XML parser context
7774 *
7775 * parse a DOCTYPE declaration
7776 *
7777 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7778 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7779 *
7780 * [ VC: Root Element Type ]
7781 * The Name in the document type declaration must match the element
7782 * type of the root element.
7783 */
7784
7785void
7786xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007787 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007788 xmlChar *ExternalID = NULL;
7789 xmlChar *URI = NULL;
7790
7791 /*
7792 * We know that '<!DOCTYPE' has been detected.
7793 */
7794 SKIP(9);
7795
7796 SKIP_BLANKS;
7797
7798 /*
7799 * Parse the DOCTYPE name.
7800 */
7801 name = xmlParseName(ctxt);
7802 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007803 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7804 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007805 }
7806 ctxt->intSubName = name;
7807
7808 SKIP_BLANKS;
7809
7810 /*
7811 * Check for SystemID and ExternalID
7812 */
7813 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7814
7815 if ((URI != NULL) || (ExternalID != NULL)) {
7816 ctxt->hasExternalSubset = 1;
7817 }
7818 ctxt->extSubURI = URI;
7819 ctxt->extSubSystem = ExternalID;
7820
7821 SKIP_BLANKS;
7822
7823 /*
7824 * Create and update the internal subset.
7825 */
7826 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7827 (!ctxt->disableSAX))
7828 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7829
7830 /*
7831 * Is there any internal subset declarations ?
7832 * they are handled separately in xmlParseInternalSubset()
7833 */
7834 if (RAW == '[')
7835 return;
7836
7837 /*
7838 * We should be at the end of the DOCTYPE declaration.
7839 */
7840 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007841 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007842 }
7843 NEXT;
7844}
7845
7846/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007847 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007848 * @ctxt: an XML parser context
7849 *
7850 * parse the internal subset declaration
7851 *
7852 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7853 */
7854
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007855static void
Owen Taylor3473f882001-02-23 17:55:21 +00007856xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7857 /*
7858 * Is there any DTD definition ?
7859 */
7860 if (RAW == '[') {
7861 ctxt->instate = XML_PARSER_DTD;
7862 NEXT;
7863 /*
7864 * Parse the succession of Markup declarations and
7865 * PEReferences.
7866 * Subsequence (markupdecl | PEReference | S)*
7867 */
7868 while (RAW != ']') {
7869 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007870 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007871
7872 SKIP_BLANKS;
7873 xmlParseMarkupDecl(ctxt);
7874 xmlParsePEReference(ctxt);
7875
7876 /*
7877 * Pop-up of finished entities.
7878 */
7879 while ((RAW == 0) && (ctxt->inputNr > 1))
7880 xmlPopInput(ctxt);
7881
7882 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007883 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00007884 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007885 break;
7886 }
7887 }
7888 if (RAW == ']') {
7889 NEXT;
7890 SKIP_BLANKS;
7891 }
7892 }
7893
7894 /*
7895 * We should be at the end of the DOCTYPE declaration.
7896 */
7897 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007898 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007899 }
7900 NEXT;
7901}
7902
Daniel Veillard81273902003-09-30 00:43:48 +00007903#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007904/**
7905 * xmlParseAttribute:
7906 * @ctxt: an XML parser context
7907 * @value: a xmlChar ** used to store the value of the attribute
7908 *
7909 * parse an attribute
7910 *
7911 * [41] Attribute ::= Name Eq AttValue
7912 *
7913 * [ WFC: No External Entity References ]
7914 * Attribute values cannot contain direct or indirect entity references
7915 * to external entities.
7916 *
7917 * [ WFC: No < in Attribute Values ]
7918 * The replacement text of any entity referred to directly or indirectly in
7919 * an attribute value (other than "&lt;") must not contain a <.
7920 *
7921 * [ VC: Attribute Value Type ]
7922 * The attribute must have been declared; the value must be of the type
7923 * declared for it.
7924 *
7925 * [25] Eq ::= S? '=' S?
7926 *
7927 * With namespace:
7928 *
7929 * [NS 11] Attribute ::= QName Eq AttValue
7930 *
7931 * Also the case QName == xmlns:??? is handled independently as a namespace
7932 * definition.
7933 *
7934 * Returns the attribute name, and the value in *value.
7935 */
7936
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007937const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007938xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007939 const xmlChar *name;
7940 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007941
7942 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007943 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007944 name = xmlParseName(ctxt);
7945 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007946 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007947 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007948 return(NULL);
7949 }
7950
7951 /*
7952 * read the value
7953 */
7954 SKIP_BLANKS;
7955 if (RAW == '=') {
7956 NEXT;
7957 SKIP_BLANKS;
7958 val = xmlParseAttValue(ctxt);
7959 ctxt->instate = XML_PARSER_CONTENT;
7960 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007961 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007962 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007963 return(NULL);
7964 }
7965
7966 /*
7967 * Check that xml:lang conforms to the specification
7968 * No more registered as an error, just generate a warning now
7969 * since this was deprecated in XML second edition
7970 */
7971 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7972 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007973 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7974 "Malformed value for xml:lang : %s\n",
7975 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007976 }
7977 }
7978
7979 /*
7980 * Check that xml:space conforms to the specification
7981 */
7982 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7983 if (xmlStrEqual(val, BAD_CAST "default"))
7984 *(ctxt->space) = 0;
7985 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7986 *(ctxt->space) = 1;
7987 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007988 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007989"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007990 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007991 }
7992 }
7993
7994 *value = val;
7995 return(name);
7996}
7997
7998/**
7999 * xmlParseStartTag:
8000 * @ctxt: an XML parser context
8001 *
8002 * parse a start of tag either for rule element or
8003 * EmptyElement. In both case we don't parse the tag closing chars.
8004 *
8005 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8006 *
8007 * [ WFC: Unique Att Spec ]
8008 * No attribute name may appear more than once in the same start-tag or
8009 * empty-element tag.
8010 *
8011 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8012 *
8013 * [ WFC: Unique Att Spec ]
8014 * No attribute name may appear more than once in the same start-tag or
8015 * empty-element tag.
8016 *
8017 * With namespace:
8018 *
8019 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8020 *
8021 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8022 *
8023 * Returns the element name parsed
8024 */
8025
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008026const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008027xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008028 const xmlChar *name;
8029 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008030 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008031 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008032 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008033 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008034 int i;
8035
8036 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008037 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008038
8039 name = xmlParseName(ctxt);
8040 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008041 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008042 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008043 return(NULL);
8044 }
8045
8046 /*
8047 * Now parse the attributes, it ends up with the ending
8048 *
8049 * (S Attribute)* S?
8050 */
8051 SKIP_BLANKS;
8052 GROW;
8053
Daniel Veillard21a0f912001-02-25 19:54:14 +00008054 while ((RAW != '>') &&
8055 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008056 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008057 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008058 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008059
8060 attname = xmlParseAttribute(ctxt, &attvalue);
8061 if ((attname != NULL) && (attvalue != NULL)) {
8062 /*
8063 * [ WFC: Unique Att Spec ]
8064 * No attribute name may appear more than once in the same
8065 * start-tag or empty-element tag.
8066 */
8067 for (i = 0; i < nbatts;i += 2) {
8068 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008069 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008070 xmlFree(attvalue);
8071 goto failed;
8072 }
8073 }
Owen Taylor3473f882001-02-23 17:55:21 +00008074 /*
8075 * Add the pair to atts
8076 */
8077 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008078 maxatts = 22; /* allow for 10 attrs by default */
8079 atts = (const xmlChar **)
8080 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008081 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008082 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008083 if (attvalue != NULL)
8084 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008085 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008086 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008087 ctxt->atts = atts;
8088 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008089 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008090 const xmlChar **n;
8091
Owen Taylor3473f882001-02-23 17:55:21 +00008092 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008093 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008094 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008095 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008096 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008097 if (attvalue != NULL)
8098 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008099 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008100 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008101 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008102 ctxt->atts = atts;
8103 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008104 }
8105 atts[nbatts++] = attname;
8106 atts[nbatts++] = attvalue;
8107 atts[nbatts] = NULL;
8108 atts[nbatts + 1] = NULL;
8109 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008110 if (attvalue != NULL)
8111 xmlFree(attvalue);
8112 }
8113
8114failed:
8115
Daniel Veillard3772de32002-12-17 10:31:45 +00008116 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008117 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8118 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008119 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008120 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8121 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008122 }
8123 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008124 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8125 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008126 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8127 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008128 break;
8129 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008130 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008131 GROW;
8132 }
8133
8134 /*
8135 * SAX: Start of Element !
8136 */
8137 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008138 (!ctxt->disableSAX)) {
8139 if (nbatts > 0)
8140 ctxt->sax->startElement(ctxt->userData, name, atts);
8141 else
8142 ctxt->sax->startElement(ctxt->userData, name, NULL);
8143 }
Owen Taylor3473f882001-02-23 17:55:21 +00008144
8145 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008146 /* Free only the content strings */
8147 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008148 if (atts[i] != NULL)
8149 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008150 }
8151 return(name);
8152}
8153
8154/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008155 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008156 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008157 * @line: line of the start tag
8158 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008159 *
8160 * parse an end of tag
8161 *
8162 * [42] ETag ::= '</' Name S? '>'
8163 *
8164 * With namespace
8165 *
8166 * [NS 9] ETag ::= '</' QName S? '>'
8167 */
8168
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008169static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008170xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008171 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008172
8173 GROW;
8174 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008175 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008176 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008177 return;
8178 }
8179 SKIP(2);
8180
Daniel Veillard46de64e2002-05-29 08:21:33 +00008181 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008182
8183 /*
8184 * We should definitely be at the ending "S? '>'" part
8185 */
8186 GROW;
8187 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008188 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008189 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008190 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008191 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008192
8193 /*
8194 * [ WFC: Element Type Match ]
8195 * The Name in an element's end-tag must match the element type in the
8196 * start-tag.
8197 *
8198 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008199 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008200 if (name == NULL) name = BAD_CAST "unparseable";
8201 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008202 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008203 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008204 }
8205
8206 /*
8207 * SAX: End of Tag
8208 */
8209 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8210 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008211 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008212
Daniel Veillarde57ec792003-09-10 10:50:59 +00008213 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008214 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008215 return;
8216}
8217
8218/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008219 * xmlParseEndTag:
8220 * @ctxt: an XML parser context
8221 *
8222 * parse an end of tag
8223 *
8224 * [42] ETag ::= '</' Name S? '>'
8225 *
8226 * With namespace
8227 *
8228 * [NS 9] ETag ::= '</' QName S? '>'
8229 */
8230
8231void
8232xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008233 xmlParseEndTag1(ctxt, 0);
8234}
Daniel Veillard81273902003-09-30 00:43:48 +00008235#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008236
8237/************************************************************************
8238 * *
8239 * SAX 2 specific operations *
8240 * *
8241 ************************************************************************/
8242
Daniel Veillard0fb18932003-09-07 09:14:37 +00008243/*
8244 * xmlGetNamespace:
8245 * @ctxt: an XML parser context
8246 * @prefix: the prefix to lookup
8247 *
8248 * Lookup the namespace name for the @prefix (which ca be NULL)
8249 * The prefix must come from the @ctxt->dict dictionnary
8250 *
8251 * Returns the namespace name or NULL if not bound
8252 */
8253static const xmlChar *
8254xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8255 int i;
8256
Daniel Veillarde57ec792003-09-10 10:50:59 +00008257 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008258 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008259 if (ctxt->nsTab[i] == prefix) {
8260 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8261 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008262 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008263 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008264 return(NULL);
8265}
8266
8267/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008268 * xmlParseQName:
8269 * @ctxt: an XML parser context
8270 * @prefix: pointer to store the prefix part
8271 *
8272 * parse an XML Namespace QName
8273 *
8274 * [6] QName ::= (Prefix ':')? LocalPart
8275 * [7] Prefix ::= NCName
8276 * [8] LocalPart ::= NCName
8277 *
8278 * Returns the Name parsed or NULL
8279 */
8280
8281static const xmlChar *
8282xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8283 const xmlChar *l, *p;
8284
8285 GROW;
8286
8287 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008288 if (l == NULL) {
8289 if (CUR == ':') {
8290 l = xmlParseName(ctxt);
8291 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008292 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8293 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008294 *prefix = NULL;
8295 return(l);
8296 }
8297 }
8298 return(NULL);
8299 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008300 if (CUR == ':') {
8301 NEXT;
8302 p = l;
8303 l = xmlParseNCName(ctxt);
8304 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008305 xmlChar *tmp;
8306
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008307 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8308 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008309 l = xmlParseNmtoken(ctxt);
8310 if (l == NULL)
8311 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8312 else {
8313 tmp = xmlBuildQName(l, p, NULL, 0);
8314 xmlFree((char *)l);
8315 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008316 p = xmlDictLookup(ctxt->dict, tmp, -1);
8317 if (tmp != NULL) xmlFree(tmp);
8318 *prefix = NULL;
8319 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008320 }
8321 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008322 xmlChar *tmp;
8323
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008324 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8325 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008326 NEXT;
8327 tmp = (xmlChar *) xmlParseName(ctxt);
8328 if (tmp != NULL) {
8329 tmp = xmlBuildQName(tmp, l, NULL, 0);
8330 l = xmlDictLookup(ctxt->dict, tmp, -1);
8331 if (tmp != NULL) xmlFree(tmp);
8332 *prefix = p;
8333 return(l);
8334 }
8335 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8336 l = xmlDictLookup(ctxt->dict, tmp, -1);
8337 if (tmp != NULL) xmlFree(tmp);
8338 *prefix = p;
8339 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008340 }
8341 *prefix = p;
8342 } else
8343 *prefix = NULL;
8344 return(l);
8345}
8346
8347/**
8348 * xmlParseQNameAndCompare:
8349 * @ctxt: an XML parser context
8350 * @name: the localname
8351 * @prefix: the prefix, if any.
8352 *
8353 * parse an XML name and compares for match
8354 * (specialized for endtag parsing)
8355 *
8356 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8357 * and the name for mismatch
8358 */
8359
8360static const xmlChar *
8361xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8362 xmlChar const *prefix) {
8363 const xmlChar *cmp = name;
8364 const xmlChar *in;
8365 const xmlChar *ret;
8366 const xmlChar *prefix2;
8367
8368 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8369
8370 GROW;
8371 in = ctxt->input->cur;
8372
8373 cmp = prefix;
8374 while (*in != 0 && *in == *cmp) {
8375 ++in;
8376 ++cmp;
8377 }
8378 if ((*cmp == 0) && (*in == ':')) {
8379 in++;
8380 cmp = name;
8381 while (*in != 0 && *in == *cmp) {
8382 ++in;
8383 ++cmp;
8384 }
William M. Brack76e95df2003-10-18 16:20:14 +00008385 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008386 /* success */
8387 ctxt->input->cur = in;
8388 return((const xmlChar*) 1);
8389 }
8390 }
8391 /*
8392 * all strings coms from the dictionary, equality can be done directly
8393 */
8394 ret = xmlParseQName (ctxt, &prefix2);
8395 if ((ret == name) && (prefix == prefix2))
8396 return((const xmlChar*) 1);
8397 return ret;
8398}
8399
8400/**
8401 * xmlParseAttValueInternal:
8402 * @ctxt: an XML parser context
8403 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008404 * @alloc: whether the attribute was reallocated as a new string
8405 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008406 *
8407 * parse a value for an attribute.
8408 * NOTE: if no normalization is needed, the routine will return pointers
8409 * directly from the data buffer.
8410 *
8411 * 3.3.3 Attribute-Value Normalization:
8412 * Before the value of an attribute is passed to the application or
8413 * checked for validity, the XML processor must normalize it as follows:
8414 * - a character reference is processed by appending the referenced
8415 * character to the attribute value
8416 * - an entity reference is processed by recursively processing the
8417 * replacement text of the entity
8418 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8419 * appending #x20 to the normalized value, except that only a single
8420 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8421 * parsed entity or the literal entity value of an internal parsed entity
8422 * - other characters are processed by appending them to the normalized value
8423 * If the declared value is not CDATA, then the XML processor must further
8424 * process the normalized attribute value by discarding any leading and
8425 * trailing space (#x20) characters, and by replacing sequences of space
8426 * (#x20) characters by a single space (#x20) character.
8427 * All attributes for which no declaration has been read should be treated
8428 * by a non-validating parser as if declared CDATA.
8429 *
8430 * Returns the AttValue parsed or NULL. The value has to be freed by the
8431 * caller if it was copied, this can be detected by val[*len] == 0.
8432 */
8433
8434static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008435xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8436 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008437{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008438 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008439 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008440 xmlChar *ret = NULL;
8441
8442 GROW;
8443 in = (xmlChar *) CUR_PTR;
8444 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008445 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008446 return (NULL);
8447 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008448 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008449
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008450 /*
8451 * try to handle in this routine the most common case where no
8452 * allocation of a new string is required and where content is
8453 * pure ASCII.
8454 */
8455 limit = *in++;
8456 end = ctxt->input->end;
8457 start = in;
8458 if (in >= end) {
8459 const xmlChar *oldbase = ctxt->input->base;
8460 GROW;
8461 if (oldbase != ctxt->input->base) {
8462 long delta = ctxt->input->base - oldbase;
8463 start = start + delta;
8464 in = in + delta;
8465 }
8466 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008467 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008468 if (normalize) {
8469 /*
8470 * Skip any leading spaces
8471 */
8472 while ((in < end) && (*in != limit) &&
8473 ((*in == 0x20) || (*in == 0x9) ||
8474 (*in == 0xA) || (*in == 0xD))) {
8475 in++;
8476 start = in;
8477 if (in >= end) {
8478 const xmlChar *oldbase = ctxt->input->base;
8479 GROW;
8480 if (oldbase != ctxt->input->base) {
8481 long delta = ctxt->input->base - oldbase;
8482 start = start + delta;
8483 in = in + delta;
8484 }
8485 end = ctxt->input->end;
8486 }
8487 }
8488 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8489 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8490 if ((*in++ == 0x20) && (*in == 0x20)) break;
8491 if (in >= end) {
8492 const xmlChar *oldbase = ctxt->input->base;
8493 GROW;
8494 if (oldbase != ctxt->input->base) {
8495 long delta = ctxt->input->base - oldbase;
8496 start = start + delta;
8497 in = in + delta;
8498 }
8499 end = ctxt->input->end;
8500 }
8501 }
8502 last = in;
8503 /*
8504 * skip the trailing blanks
8505 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008506 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008507 while ((in < end) && (*in != limit) &&
8508 ((*in == 0x20) || (*in == 0x9) ||
8509 (*in == 0xA) || (*in == 0xD))) {
8510 in++;
8511 if (in >= end) {
8512 const xmlChar *oldbase = ctxt->input->base;
8513 GROW;
8514 if (oldbase != ctxt->input->base) {
8515 long delta = ctxt->input->base - oldbase;
8516 start = start + delta;
8517 in = in + delta;
8518 last = last + delta;
8519 }
8520 end = ctxt->input->end;
8521 }
8522 }
8523 if (*in != limit) goto need_complex;
8524 } else {
8525 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8526 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8527 in++;
8528 if (in >= end) {
8529 const xmlChar *oldbase = ctxt->input->base;
8530 GROW;
8531 if (oldbase != ctxt->input->base) {
8532 long delta = ctxt->input->base - oldbase;
8533 start = start + delta;
8534 in = in + delta;
8535 }
8536 end = ctxt->input->end;
8537 }
8538 }
8539 last = in;
8540 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008541 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008542 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008543 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008544 *len = last - start;
8545 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008546 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008547 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008548 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008549 }
8550 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008551 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008552 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008553need_complex:
8554 if (alloc) *alloc = 1;
8555 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008556}
8557
8558/**
8559 * xmlParseAttribute2:
8560 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008561 * @pref: the element prefix
8562 * @elem: the element name
8563 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008564 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008565 * @len: an int * to save the length of the attribute
8566 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008567 *
8568 * parse an attribute in the new SAX2 framework.
8569 *
8570 * Returns the attribute name, and the value in *value, .
8571 */
8572
8573static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008574xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008575 const xmlChar * pref, const xmlChar * elem,
8576 const xmlChar ** prefix, xmlChar ** value,
8577 int *len, int *alloc)
8578{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008579 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008580 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008581 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008582
8583 *value = NULL;
8584 GROW;
8585 name = xmlParseQName(ctxt, prefix);
8586 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008587 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8588 "error parsing attribute name\n");
8589 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008590 }
8591
8592 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008593 * get the type if needed
8594 */
8595 if (ctxt->attsSpecial != NULL) {
8596 int type;
8597
8598 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008599 pref, elem, *prefix, name);
8600 if (type != 0)
8601 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008602 }
8603
8604 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008605 * read the value
8606 */
8607 SKIP_BLANKS;
8608 if (RAW == '=') {
8609 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008610 SKIP_BLANKS;
8611 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8612 if (normalize) {
8613 /*
8614 * Sometimes a second normalisation pass for spaces is needed
8615 * but that only happens if charrefs or entities refernces
8616 * have been used in the attribute value, i.e. the attribute
8617 * value have been extracted in an allocated string already.
8618 */
8619 if (*alloc) {
8620 const xmlChar *val2;
8621
8622 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008623 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008624 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008625 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008626 }
8627 }
8628 }
8629 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008630 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008631 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8632 "Specification mandate value for attribute %s\n",
8633 name);
8634 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008635 }
8636
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008637 if (*prefix == ctxt->str_xml) {
8638 /*
8639 * Check that xml:lang conforms to the specification
8640 * No more registered as an error, just generate a warning now
8641 * since this was deprecated in XML second edition
8642 */
8643 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8644 internal_val = xmlStrndup(val, *len);
8645 if (!xmlCheckLanguageID(internal_val)) {
8646 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8647 "Malformed value for xml:lang : %s\n",
8648 internal_val, NULL);
8649 }
8650 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008651
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008652 /*
8653 * Check that xml:space conforms to the specification
8654 */
8655 if (xmlStrEqual(name, BAD_CAST "space")) {
8656 internal_val = xmlStrndup(val, *len);
8657 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8658 *(ctxt->space) = 0;
8659 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8660 *(ctxt->space) = 1;
8661 else {
8662 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8663 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8664 internal_val, NULL);
8665 }
8666 }
8667 if (internal_val) {
8668 xmlFree(internal_val);
8669 }
8670 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008671
8672 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008673 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008674}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008675/**
8676 * xmlParseStartTag2:
8677 * @ctxt: an XML parser context
8678 *
8679 * parse a start of tag either for rule element or
8680 * EmptyElement. In both case we don't parse the tag closing chars.
8681 * This routine is called when running SAX2 parsing
8682 *
8683 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8684 *
8685 * [ WFC: Unique Att Spec ]
8686 * No attribute name may appear more than once in the same start-tag or
8687 * empty-element tag.
8688 *
8689 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8690 *
8691 * [ WFC: Unique Att Spec ]
8692 * No attribute name may appear more than once in the same start-tag or
8693 * empty-element tag.
8694 *
8695 * With namespace:
8696 *
8697 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8698 *
8699 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8700 *
8701 * Returns the element name parsed
8702 */
8703
8704static const xmlChar *
8705xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008706 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008707 const xmlChar *localname;
8708 const xmlChar *prefix;
8709 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008710 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008711 const xmlChar *nsname;
8712 xmlChar *attvalue;
8713 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008714 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008715 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008716 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008717 const xmlChar *base;
8718 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008719 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008720
8721 if (RAW != '<') return(NULL);
8722 NEXT1;
8723
8724 /*
8725 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8726 * point since the attribute values may be stored as pointers to
8727 * the buffer and calling SHRINK would destroy them !
8728 * The Shrinking is only possible once the full set of attribute
8729 * callbacks have been done.
8730 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008731reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008732 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008733 base = ctxt->input->base;
8734 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008735 oldline = ctxt->input->line;
8736 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008737 nbatts = 0;
8738 nratts = 0;
8739 nbdef = 0;
8740 nbNs = 0;
8741 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008742 /* Forget any namespaces added during an earlier parse of this element. */
8743 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008744
8745 localname = xmlParseQName(ctxt, &prefix);
8746 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008747 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8748 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008749 return(NULL);
8750 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008751 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008752
8753 /*
8754 * Now parse the attributes, it ends up with the ending
8755 *
8756 * (S Attribute)* S?
8757 */
8758 SKIP_BLANKS;
8759 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008760 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008761
8762 while ((RAW != '>') &&
8763 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008764 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008765 const xmlChar *q = CUR_PTR;
8766 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008767 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008768
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008769 attname = xmlParseAttribute2(ctxt, prefix, localname,
8770 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008771 if (ctxt->input->base != base) {
8772 if ((attvalue != NULL) && (alloc != 0))
8773 xmlFree(attvalue);
8774 attvalue = NULL;
8775 goto base_changed;
8776 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008777 if ((attname != NULL) && (attvalue != NULL)) {
8778 if (len < 0) len = xmlStrlen(attvalue);
8779 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008780 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8781 xmlURIPtr uri;
8782
8783 if (*URL != 0) {
8784 uri = xmlParseURI((const char *) URL);
8785 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008786 xmlNsErr(ctxt, XML_WAR_NS_URI,
8787 "xmlns: '%s' is not a valid URI\n",
8788 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008789 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00008790 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008791 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8792 "xmlns: URI %s is not absolute\n",
8793 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008794 }
8795 xmlFreeURI(uri);
8796 }
Daniel Veillard37334572008-07-31 08:20:02 +00008797 if (URL == ctxt->str_xml_ns) {
8798 if (attname != ctxt->str_xml) {
8799 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8800 "xml namespace URI cannot be the default namespace\n",
8801 NULL, NULL, NULL);
8802 }
8803 goto skip_default_ns;
8804 }
8805 if ((len == 29) &&
8806 (xmlStrEqual(URL,
8807 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8808 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8809 "reuse of the xmlns namespace name is forbidden\n",
8810 NULL, NULL, NULL);
8811 goto skip_default_ns;
8812 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008813 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008814 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008815 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008816 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008817 for (j = 1;j <= nbNs;j++)
8818 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8819 break;
8820 if (j <= nbNs)
8821 xmlErrAttributeDup(ctxt, NULL, attname);
8822 else
8823 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008824skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008825 if (alloc != 0) xmlFree(attvalue);
8826 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008827 continue;
8828 }
8829 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008830 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8831 xmlURIPtr uri;
8832
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008833 if (attname == ctxt->str_xml) {
8834 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008835 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8836 "xml namespace prefix mapped to wrong URI\n",
8837 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008838 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008839 /*
8840 * Do not keep a namespace definition node
8841 */
Daniel Veillard37334572008-07-31 08:20:02 +00008842 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008843 }
Daniel Veillard37334572008-07-31 08:20:02 +00008844 if (URL == ctxt->str_xml_ns) {
8845 if (attname != ctxt->str_xml) {
8846 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8847 "xml namespace URI mapped to wrong prefix\n",
8848 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008849 }
Daniel Veillard37334572008-07-31 08:20:02 +00008850 goto skip_ns;
8851 }
8852 if (attname == ctxt->str_xmlns) {
8853 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8854 "redefinition of the xmlns prefix is forbidden\n",
8855 NULL, NULL, NULL);
8856 goto skip_ns;
8857 }
8858 if ((len == 29) &&
8859 (xmlStrEqual(URL,
8860 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8861 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8862 "reuse of the xmlns namespace name is forbidden\n",
8863 NULL, NULL, NULL);
8864 goto skip_ns;
8865 }
8866 if ((URL == NULL) || (URL[0] == 0)) {
8867 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8868 "xmlns:%s: Empty XML namespace is not allowed\n",
8869 attname, NULL, NULL);
8870 goto skip_ns;
8871 } else {
8872 uri = xmlParseURI((const char *) URL);
8873 if (uri == NULL) {
8874 xmlNsErr(ctxt, XML_WAR_NS_URI,
8875 "xmlns:%s: '%s' is not a valid URI\n",
8876 attname, URL, NULL);
8877 } else {
8878 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8879 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8880 "xmlns:%s: URI %s is not absolute\n",
8881 attname, URL, NULL);
8882 }
8883 xmlFreeURI(uri);
8884 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008885 }
8886
Daniel Veillard0fb18932003-09-07 09:14:37 +00008887 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008888 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008889 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008890 for (j = 1;j <= nbNs;j++)
8891 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8892 break;
8893 if (j <= nbNs)
8894 xmlErrAttributeDup(ctxt, aprefix, attname);
8895 else
8896 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008897skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008898 if (alloc != 0) xmlFree(attvalue);
8899 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008900 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008901 continue;
8902 }
8903
8904 /*
8905 * Add the pair to atts
8906 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008907 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8908 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008909 if (attvalue[len] == 0)
8910 xmlFree(attvalue);
8911 goto failed;
8912 }
8913 maxatts = ctxt->maxatts;
8914 atts = ctxt->atts;
8915 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008916 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008917 atts[nbatts++] = attname;
8918 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008919 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008920 atts[nbatts++] = attvalue;
8921 attvalue += len;
8922 atts[nbatts++] = attvalue;
8923 /*
8924 * tag if some deallocation is needed
8925 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008926 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008927 } else {
8928 if ((attvalue != NULL) && (attvalue[len] == 0))
8929 xmlFree(attvalue);
8930 }
8931
Daniel Veillard37334572008-07-31 08:20:02 +00008932failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008933
8934 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008935 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008936 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8937 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008938 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008939 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8940 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008941 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008942 }
8943 SKIP_BLANKS;
8944 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8945 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008946 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008947 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008948 break;
8949 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008950 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008951 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008952 }
8953
Daniel Veillard0fb18932003-09-07 09:14:37 +00008954 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008955 * The attributes defaulting
8956 */
8957 if (ctxt->attsDefault != NULL) {
8958 xmlDefAttrsPtr defaults;
8959
8960 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8961 if (defaults != NULL) {
8962 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00008963 attname = defaults->values[5 * i];
8964 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00008965
8966 /*
8967 * special work for namespaces defaulted defs
8968 */
8969 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8970 /*
8971 * check that it's not a defined namespace
8972 */
8973 for (j = 1;j <= nbNs;j++)
8974 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8975 break;
8976 if (j <= nbNs) continue;
8977
8978 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008979 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008980 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008981 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008982 nbNs++;
8983 }
8984 } else if (aprefix == ctxt->str_xmlns) {
8985 /*
8986 * check that it's not a defined namespace
8987 */
8988 for (j = 1;j <= nbNs;j++)
8989 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8990 break;
8991 if (j <= nbNs) continue;
8992
8993 nsname = xmlGetNamespace(ctxt, attname);
8994 if (nsname != defaults->values[2]) {
8995 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008996 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008997 nbNs++;
8998 }
8999 } else {
9000 /*
9001 * check that it's not a defined attribute
9002 */
9003 for (j = 0;j < nbatts;j+=5) {
9004 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9005 break;
9006 }
9007 if (j < nbatts) continue;
9008
9009 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9010 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009011 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009012 }
9013 maxatts = ctxt->maxatts;
9014 atts = ctxt->atts;
9015 }
9016 atts[nbatts++] = attname;
9017 atts[nbatts++] = aprefix;
9018 if (aprefix == NULL)
9019 atts[nbatts++] = NULL;
9020 else
9021 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009022 atts[nbatts++] = defaults->values[5 * i + 2];
9023 atts[nbatts++] = defaults->values[5 * i + 3];
9024 if ((ctxt->standalone == 1) &&
9025 (defaults->values[5 * i + 4] != NULL)) {
9026 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9027 "standalone: attribute %s on %s defaulted from external subset\n",
9028 attname, localname);
9029 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009030 nbdef++;
9031 }
9032 }
9033 }
9034 }
9035
Daniel Veillarde70c8772003-11-25 07:21:18 +00009036 /*
9037 * The attributes checkings
9038 */
9039 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009040 /*
9041 * The default namespace does not apply to attribute names.
9042 */
9043 if (atts[i + 1] != NULL) {
9044 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9045 if (nsname == NULL) {
9046 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9047 "Namespace prefix %s for %s on %s is not defined\n",
9048 atts[i + 1], atts[i], localname);
9049 }
9050 atts[i + 2] = nsname;
9051 } else
9052 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009053 /*
9054 * [ WFC: Unique Att Spec ]
9055 * No attribute name may appear more than once in the same
9056 * start-tag or empty-element tag.
9057 * As extended by the Namespace in XML REC.
9058 */
9059 for (j = 0; j < i;j += 5) {
9060 if (atts[i] == atts[j]) {
9061 if (atts[i+1] == atts[j+1]) {
9062 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9063 break;
9064 }
9065 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9066 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9067 "Namespaced Attribute %s in '%s' redefined\n",
9068 atts[i], nsname, NULL);
9069 break;
9070 }
9071 }
9072 }
9073 }
9074
Daniel Veillarde57ec792003-09-10 10:50:59 +00009075 nsname = xmlGetNamespace(ctxt, prefix);
9076 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009077 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9078 "Namespace prefix %s on %s is not defined\n",
9079 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009080 }
9081 *pref = prefix;
9082 *URI = nsname;
9083
9084 /*
9085 * SAX: Start of Element !
9086 */
9087 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9088 (!ctxt->disableSAX)) {
9089 if (nbNs > 0)
9090 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9091 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9092 nbatts / 5, nbdef, atts);
9093 else
9094 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9095 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9096 }
9097
9098 /*
9099 * Free up attribute allocated strings if needed
9100 */
9101 if (attval != 0) {
9102 for (i = 3,j = 0; j < nratts;i += 5,j++)
9103 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9104 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009105 }
9106
9107 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009108
9109base_changed:
9110 /*
9111 * the attribute strings are valid iif the base didn't changed
9112 */
9113 if (attval != 0) {
9114 for (i = 3,j = 0; j < nratts;i += 5,j++)
9115 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9116 xmlFree((xmlChar *) atts[i]);
9117 }
9118 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009119 ctxt->input->line = oldline;
9120 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009121 if (ctxt->wellFormed == 1) {
9122 goto reparse;
9123 }
9124 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009125}
9126
9127/**
9128 * xmlParseEndTag2:
9129 * @ctxt: an XML parser context
9130 * @line: line of the start tag
9131 * @nsNr: number of namespaces on the start tag
9132 *
9133 * parse an end of tag
9134 *
9135 * [42] ETag ::= '</' Name S? '>'
9136 *
9137 * With namespace
9138 *
9139 * [NS 9] ETag ::= '</' QName S? '>'
9140 */
9141
9142static void
9143xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009144 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009145 const xmlChar *name;
9146
9147 GROW;
9148 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009149 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009150 return;
9151 }
9152 SKIP(2);
9153
William M. Brack13dfa872004-09-18 04:52:08 +00009154 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009155 if (ctxt->input->cur[tlen] == '>') {
9156 ctxt->input->cur += tlen + 1;
9157 goto done;
9158 }
9159 ctxt->input->cur += tlen;
9160 name = (xmlChar*)1;
9161 } else {
9162 if (prefix == NULL)
9163 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9164 else
9165 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9166 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009167
9168 /*
9169 * We should definitely be at the ending "S? '>'" part
9170 */
9171 GROW;
9172 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009173 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009174 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009175 } else
9176 NEXT1;
9177
9178 /*
9179 * [ WFC: Element Type Match ]
9180 * The Name in an element's end-tag must match the element type in the
9181 * start-tag.
9182 *
9183 */
9184 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009185 if (name == NULL) name = BAD_CAST "unparseable";
9186 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009187 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009188 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009189 }
9190
9191 /*
9192 * SAX: End of Tag
9193 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009194done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009195 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9196 (!ctxt->disableSAX))
9197 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9198
Daniel Veillard0fb18932003-09-07 09:14:37 +00009199 spacePop(ctxt);
9200 if (nsNr != 0)
9201 nsPop(ctxt, nsNr);
9202 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009203}
9204
9205/**
Owen Taylor3473f882001-02-23 17:55:21 +00009206 * xmlParseCDSect:
9207 * @ctxt: an XML parser context
9208 *
9209 * Parse escaped pure raw content.
9210 *
9211 * [18] CDSect ::= CDStart CData CDEnd
9212 *
9213 * [19] CDStart ::= '<![CDATA['
9214 *
9215 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9216 *
9217 * [21] CDEnd ::= ']]>'
9218 */
9219void
9220xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9221 xmlChar *buf = NULL;
9222 int len = 0;
9223 int size = XML_PARSER_BUFFER_SIZE;
9224 int r, rl;
9225 int s, sl;
9226 int cur, l;
9227 int count = 0;
9228
Daniel Veillard8f597c32003-10-06 08:19:27 +00009229 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009230 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009231 SKIP(9);
9232 } else
9233 return;
9234
9235 ctxt->instate = XML_PARSER_CDATA_SECTION;
9236 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009237 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009238 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009239 ctxt->instate = XML_PARSER_CONTENT;
9240 return;
9241 }
9242 NEXTL(rl);
9243 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009244 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009245 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009246 ctxt->instate = XML_PARSER_CONTENT;
9247 return;
9248 }
9249 NEXTL(sl);
9250 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009251 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009252 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009253 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009254 return;
9255 }
William M. Brack871611b2003-10-18 04:53:14 +00009256 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009257 ((r != ']') || (s != ']') || (cur != '>'))) {
9258 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009259 xmlChar *tmp;
9260
Owen Taylor3473f882001-02-23 17:55:21 +00009261 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009262 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9263 if (tmp == NULL) {
9264 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009265 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009266 return;
9267 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009268 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009269 }
9270 COPY_BUF(rl,buf,len,r);
9271 r = s;
9272 rl = sl;
9273 s = cur;
9274 sl = l;
9275 count++;
9276 if (count > 50) {
9277 GROW;
9278 count = 0;
9279 }
9280 NEXTL(l);
9281 cur = CUR_CHAR(l);
9282 }
9283 buf[len] = 0;
9284 ctxt->instate = XML_PARSER_CONTENT;
9285 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009286 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009287 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009288 xmlFree(buf);
9289 return;
9290 }
9291 NEXTL(l);
9292
9293 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009294 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009295 */
9296 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9297 if (ctxt->sax->cdataBlock != NULL)
9298 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009299 else if (ctxt->sax->characters != NULL)
9300 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009301 }
9302 xmlFree(buf);
9303}
9304
9305/**
9306 * xmlParseContent:
9307 * @ctxt: an XML parser context
9308 *
9309 * Parse a content:
9310 *
9311 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9312 */
9313
9314void
9315xmlParseContent(xmlParserCtxtPtr ctxt) {
9316 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009317 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009318 ((RAW != '<') || (NXT(1) != '/')) &&
9319 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009320 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009321 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009322 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009323
9324 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009325 * First case : a Processing Instruction.
9326 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009327 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009328 xmlParsePI(ctxt);
9329 }
9330
9331 /*
9332 * Second case : a CDSection
9333 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009334 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009335 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009336 xmlParseCDSect(ctxt);
9337 }
9338
9339 /*
9340 * Third case : a comment
9341 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009342 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009343 (NXT(2) == '-') && (NXT(3) == '-')) {
9344 xmlParseComment(ctxt);
9345 ctxt->instate = XML_PARSER_CONTENT;
9346 }
9347
9348 /*
9349 * Fourth case : a sub-element.
9350 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009351 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009352 xmlParseElement(ctxt);
9353 }
9354
9355 /*
9356 * Fifth case : a reference. If if has not been resolved,
9357 * parsing returns it's Name, create the node
9358 */
9359
Daniel Veillard21a0f912001-02-25 19:54:14 +00009360 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009361 xmlParseReference(ctxt);
9362 }
9363
9364 /*
9365 * Last case, text. Note that References are handled directly.
9366 */
9367 else {
9368 xmlParseCharData(ctxt, 0);
9369 }
9370
9371 GROW;
9372 /*
9373 * Pop-up of finished entities.
9374 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009375 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009376 xmlPopInput(ctxt);
9377 SHRINK;
9378
Daniel Veillardfdc91562002-07-01 21:52:03 +00009379 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009380 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9381 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009382 ctxt->instate = XML_PARSER_EOF;
9383 break;
9384 }
9385 }
9386}
9387
9388/**
9389 * xmlParseElement:
9390 * @ctxt: an XML parser context
9391 *
9392 * parse an XML element, this is highly recursive
9393 *
9394 * [39] element ::= EmptyElemTag | STag content ETag
9395 *
9396 * [ WFC: Element Type Match ]
9397 * The Name in an element's end-tag must match the element type in the
9398 * start-tag.
9399 *
Owen Taylor3473f882001-02-23 17:55:21 +00009400 */
9401
9402void
9403xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009404 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009405 const xmlChar *prefix;
9406 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00009407 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009408 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009409 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009410 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009411
Daniel Veillard8915c152008-08-26 13:05:34 +00009412 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9413 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9414 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9415 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9416 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009417 ctxt->instate = XML_PARSER_EOF;
9418 return;
9419 }
9420
Owen Taylor3473f882001-02-23 17:55:21 +00009421 /* Capture start position */
9422 if (ctxt->record_info) {
9423 node_info.begin_pos = ctxt->input->consumed +
9424 (CUR_PTR - ctxt->input->base);
9425 node_info.begin_line = ctxt->input->line;
9426 }
9427
9428 if (ctxt->spaceNr == 0)
9429 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009430 else if (*ctxt->space == -2)
9431 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009432 else
9433 spacePush(ctxt, *ctxt->space);
9434
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009435 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009436#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009437 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009438#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009439 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009440#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009441 else
9442 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009443#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009444 if (name == NULL) {
9445 spacePop(ctxt);
9446 return;
9447 }
9448 namePush(ctxt, name);
9449 ret = ctxt->node;
9450
Daniel Veillard4432df22003-09-28 18:58:27 +00009451#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009452 /*
9453 * [ VC: Root Element Type ]
9454 * The Name in the document type declaration must match the element
9455 * type of the root element.
9456 */
9457 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9458 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9459 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009460#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009461
9462 /*
9463 * Check for an Empty Element.
9464 */
9465 if ((RAW == '/') && (NXT(1) == '>')) {
9466 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009467 if (ctxt->sax2) {
9468 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9469 (!ctxt->disableSAX))
9470 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009471#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009472 } else {
9473 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9474 (!ctxt->disableSAX))
9475 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009476#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009477 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009478 namePop(ctxt);
9479 spacePop(ctxt);
9480 if (nsNr != ctxt->nsNr)
9481 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009482 if ( ret != NULL && ctxt->record_info ) {
9483 node_info.end_pos = ctxt->input->consumed +
9484 (CUR_PTR - ctxt->input->base);
9485 node_info.end_line = ctxt->input->line;
9486 node_info.node = ret;
9487 xmlParserAddNodeInfo(ctxt, &node_info);
9488 }
9489 return;
9490 }
9491 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009492 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009493 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009494 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9495 "Couldn't find end of Start Tag %s line %d\n",
9496 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009497
9498 /*
9499 * end of parsing of this node.
9500 */
9501 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009502 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009503 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009504 if (nsNr != ctxt->nsNr)
9505 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009506
9507 /*
9508 * Capture end position and add node
9509 */
9510 if ( ret != NULL && ctxt->record_info ) {
9511 node_info.end_pos = ctxt->input->consumed +
9512 (CUR_PTR - ctxt->input->base);
9513 node_info.end_line = ctxt->input->line;
9514 node_info.node = ret;
9515 xmlParserAddNodeInfo(ctxt, &node_info);
9516 }
9517 return;
9518 }
9519
9520 /*
9521 * Parse the content of the element:
9522 */
9523 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009524 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009525 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009526 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009527 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009528
9529 /*
9530 * end of parsing of this node.
9531 */
9532 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009533 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009534 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009535 if (nsNr != ctxt->nsNr)
9536 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009537 return;
9538 }
9539
9540 /*
9541 * parse the end of tag: '</' should be here.
9542 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009543 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009544 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009545 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009546 }
9547#ifdef LIBXML_SAX1_ENABLED
9548 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00009549 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00009550#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009551
9552 /*
9553 * Capture end position and add node
9554 */
9555 if ( ret != NULL && ctxt->record_info ) {
9556 node_info.end_pos = ctxt->input->consumed +
9557 (CUR_PTR - ctxt->input->base);
9558 node_info.end_line = ctxt->input->line;
9559 node_info.node = ret;
9560 xmlParserAddNodeInfo(ctxt, &node_info);
9561 }
9562}
9563
9564/**
9565 * xmlParseVersionNum:
9566 * @ctxt: an XML parser context
9567 *
9568 * parse the XML version value.
9569 *
Daniel Veillard34e3f642008-07-29 09:02:27 +00009570 * [26] VersionNum ::= '1.' [0-9]+
9571 *
9572 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +00009573 *
9574 * Returns the string giving the XML version number, or NULL
9575 */
9576xmlChar *
9577xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9578 xmlChar *buf = NULL;
9579 int len = 0;
9580 int size = 10;
9581 xmlChar cur;
9582
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009583 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009584 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009585 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009586 return(NULL);
9587 }
9588 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00009589 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009590 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009591 return(NULL);
9592 }
9593 buf[len++] = cur;
9594 NEXT;
9595 cur=CUR;
9596 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009597 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009598 return(NULL);
9599 }
9600 buf[len++] = cur;
9601 NEXT;
9602 cur=CUR;
9603 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009604 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009605 xmlChar *tmp;
9606
Owen Taylor3473f882001-02-23 17:55:21 +00009607 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009608 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9609 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009610 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009611 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009612 return(NULL);
9613 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009614 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009615 }
9616 buf[len++] = cur;
9617 NEXT;
9618 cur=CUR;
9619 }
9620 buf[len] = 0;
9621 return(buf);
9622}
9623
9624/**
9625 * xmlParseVersionInfo:
9626 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009627 *
Owen Taylor3473f882001-02-23 17:55:21 +00009628 * parse the XML version.
9629 *
9630 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009631 *
Owen Taylor3473f882001-02-23 17:55:21 +00009632 * [25] Eq ::= S? '=' S?
9633 *
9634 * Returns the version string, e.g. "1.0"
9635 */
9636
9637xmlChar *
9638xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9639 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009640
Daniel Veillarda07050d2003-10-19 14:46:32 +00009641 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009642 SKIP(7);
9643 SKIP_BLANKS;
9644 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009645 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009646 return(NULL);
9647 }
9648 NEXT;
9649 SKIP_BLANKS;
9650 if (RAW == '"') {
9651 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009652 version = xmlParseVersionNum(ctxt);
9653 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009654 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009655 } else
9656 NEXT;
9657 } else if (RAW == '\''){
9658 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009659 version = xmlParseVersionNum(ctxt);
9660 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009661 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009662 } else
9663 NEXT;
9664 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009665 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009666 }
9667 }
9668 return(version);
9669}
9670
9671/**
9672 * xmlParseEncName:
9673 * @ctxt: an XML parser context
9674 *
9675 * parse the XML encoding name
9676 *
9677 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9678 *
9679 * Returns the encoding name value or NULL
9680 */
9681xmlChar *
9682xmlParseEncName(xmlParserCtxtPtr ctxt) {
9683 xmlChar *buf = NULL;
9684 int len = 0;
9685 int size = 10;
9686 xmlChar cur;
9687
9688 cur = CUR;
9689 if (((cur >= 'a') && (cur <= 'z')) ||
9690 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009691 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009692 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009693 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009694 return(NULL);
9695 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00009696
Owen Taylor3473f882001-02-23 17:55:21 +00009697 buf[len++] = cur;
9698 NEXT;
9699 cur = CUR;
9700 while (((cur >= 'a') && (cur <= 'z')) ||
9701 ((cur >= 'A') && (cur <= 'Z')) ||
9702 ((cur >= '0') && (cur <= '9')) ||
9703 (cur == '.') || (cur == '_') ||
9704 (cur == '-')) {
9705 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009706 xmlChar *tmp;
9707
Owen Taylor3473f882001-02-23 17:55:21 +00009708 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009709 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9710 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009711 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009712 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009713 return(NULL);
9714 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009715 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009716 }
9717 buf[len++] = cur;
9718 NEXT;
9719 cur = CUR;
9720 if (cur == 0) {
9721 SHRINK;
9722 GROW;
9723 cur = CUR;
9724 }
9725 }
9726 buf[len] = 0;
9727 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009728 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009729 }
9730 return(buf);
9731}
9732
9733/**
9734 * xmlParseEncodingDecl:
9735 * @ctxt: an XML parser context
9736 *
9737 * parse the XML encoding declaration
9738 *
9739 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9740 *
9741 * this setups the conversion filters.
9742 *
9743 * Returns the encoding value or NULL
9744 */
9745
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009746const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009747xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9748 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009749
9750 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009751 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009752 SKIP(8);
9753 SKIP_BLANKS;
9754 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009755 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009756 return(NULL);
9757 }
9758 NEXT;
9759 SKIP_BLANKS;
9760 if (RAW == '"') {
9761 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009762 encoding = xmlParseEncName(ctxt);
9763 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009764 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009765 } else
9766 NEXT;
9767 } else if (RAW == '\''){
9768 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009769 encoding = xmlParseEncName(ctxt);
9770 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009771 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009772 } else
9773 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009774 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009775 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009776 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00009777 /*
9778 * UTF-16 encoding stwich has already taken place at this stage,
9779 * more over the little-endian/big-endian selection is already done
9780 */
9781 if ((encoding != NULL) &&
9782 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9783 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +00009784 /*
9785 * If no encoding was passed to the parser, that we are
9786 * using UTF-16 and no decoder is present i.e. the
9787 * document is apparently UTF-8 compatible, then raise an
9788 * encoding mismatch fatal error
9789 */
9790 if ((ctxt->encoding == NULL) &&
9791 (ctxt->input->buf != NULL) &&
9792 (ctxt->input->buf->encoder == NULL)) {
9793 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9794 "Document labelled UTF-16 but has UTF-8 content\n");
9795 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009796 if (ctxt->encoding != NULL)
9797 xmlFree((xmlChar *) ctxt->encoding);
9798 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009799 }
9800 /*
9801 * UTF-8 encoding is handled natively
9802 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009803 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009804 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9805 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009806 if (ctxt->encoding != NULL)
9807 xmlFree((xmlChar *) ctxt->encoding);
9808 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009809 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009810 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009811 xmlCharEncodingHandlerPtr handler;
9812
9813 if (ctxt->input->encoding != NULL)
9814 xmlFree((xmlChar *) ctxt->input->encoding);
9815 ctxt->input->encoding = encoding;
9816
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009817 handler = xmlFindCharEncodingHandler((const char *) encoding);
9818 if (handler != NULL) {
9819 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009820 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009821 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009822 "Unsupported encoding %s\n", encoding);
9823 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009824 }
9825 }
9826 }
9827 return(encoding);
9828}
9829
9830/**
9831 * xmlParseSDDecl:
9832 * @ctxt: an XML parser context
9833 *
9834 * parse the XML standalone declaration
9835 *
9836 * [32] SDDecl ::= S 'standalone' Eq
9837 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9838 *
9839 * [ VC: Standalone Document Declaration ]
9840 * TODO The standalone document declaration must have the value "no"
9841 * if any external markup declarations contain declarations of:
9842 * - attributes with default values, if elements to which these
9843 * attributes apply appear in the document without specifications
9844 * of values for these attributes, or
9845 * - entities (other than amp, lt, gt, apos, quot), if references
9846 * to those entities appear in the document, or
9847 * - attributes with values subject to normalization, where the
9848 * attribute appears in the document with a value which will change
9849 * as a result of normalization, or
9850 * - element types with element content, if white space occurs directly
9851 * within any instance of those types.
9852 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009853 * Returns:
9854 * 1 if standalone="yes"
9855 * 0 if standalone="no"
9856 * -2 if standalone attribute is missing or invalid
9857 * (A standalone value of -2 means that the XML declaration was found,
9858 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00009859 */
9860
9861int
9862xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009863 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00009864
9865 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009866 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009867 SKIP(10);
9868 SKIP_BLANKS;
9869 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009870 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009871 return(standalone);
9872 }
9873 NEXT;
9874 SKIP_BLANKS;
9875 if (RAW == '\''){
9876 NEXT;
9877 if ((RAW == 'n') && (NXT(1) == 'o')) {
9878 standalone = 0;
9879 SKIP(2);
9880 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9881 (NXT(2) == 's')) {
9882 standalone = 1;
9883 SKIP(3);
9884 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009885 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009886 }
9887 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009888 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009889 } else
9890 NEXT;
9891 } else if (RAW == '"'){
9892 NEXT;
9893 if ((RAW == 'n') && (NXT(1) == 'o')) {
9894 standalone = 0;
9895 SKIP(2);
9896 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9897 (NXT(2) == 's')) {
9898 standalone = 1;
9899 SKIP(3);
9900 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009901 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009902 }
9903 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009904 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009905 } else
9906 NEXT;
9907 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009908 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009909 }
9910 }
9911 return(standalone);
9912}
9913
9914/**
9915 * xmlParseXMLDecl:
9916 * @ctxt: an XML parser context
9917 *
9918 * parse an XML declaration header
9919 *
9920 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9921 */
9922
9923void
9924xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9925 xmlChar *version;
9926
9927 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009928 * This value for standalone indicates that the document has an
9929 * XML declaration but it does not have a standalone attribute.
9930 * It will be overwritten later if a standalone attribute is found.
9931 */
9932 ctxt->input->standalone = -2;
9933
9934 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009935 * We know that '<?xml' is here.
9936 */
9937 SKIP(5);
9938
William M. Brack76e95df2003-10-18 16:20:14 +00009939 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009940 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9941 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009942 }
9943 SKIP_BLANKS;
9944
9945 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009946 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009947 */
9948 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009949 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009950 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009951 } else {
9952 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9953 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +00009954 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +00009955 */
Daniel Veillard34e3f642008-07-29 09:02:27 +00009956 if (ctxt->options & XML_PARSE_OLD10) {
9957 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9958 "Unsupported version '%s'\n",
9959 version);
9960 } else {
9961 if ((version[0] == '1') && ((version[1] == '.'))) {
9962 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9963 "Unsupported version '%s'\n",
9964 version, NULL);
9965 } else {
9966 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9967 "Unsupported version '%s'\n",
9968 version);
9969 }
9970 }
Daniel Veillard19840942001-11-29 16:11:38 +00009971 }
9972 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009973 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009974 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009975 }
Owen Taylor3473f882001-02-23 17:55:21 +00009976
9977 /*
9978 * We may have the encoding declaration
9979 */
William M. Brack76e95df2003-10-18 16:20:14 +00009980 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009981 if ((RAW == '?') && (NXT(1) == '>')) {
9982 SKIP(2);
9983 return;
9984 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009985 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009986 }
9987 xmlParseEncodingDecl(ctxt);
9988 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9989 /*
9990 * The XML REC instructs us to stop parsing right here
9991 */
9992 return;
9993 }
9994
9995 /*
9996 * We may have the standalone status.
9997 */
William M. Brack76e95df2003-10-18 16:20:14 +00009998 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009999 if ((RAW == '?') && (NXT(1) == '>')) {
10000 SKIP(2);
10001 return;
10002 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010003 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010004 }
10005 SKIP_BLANKS;
10006 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10007
10008 SKIP_BLANKS;
10009 if ((RAW == '?') && (NXT(1) == '>')) {
10010 SKIP(2);
10011 } else if (RAW == '>') {
10012 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010013 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010014 NEXT;
10015 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010016 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010017 MOVETO_ENDTAG(CUR_PTR);
10018 NEXT;
10019 }
10020}
10021
10022/**
10023 * xmlParseMisc:
10024 * @ctxt: an XML parser context
10025 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010026 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010027 *
10028 * [27] Misc ::= Comment | PI | S
10029 */
10030
10031void
10032xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010033 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +000010034 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +000010035 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010036 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010037 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010038 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010039 NEXT;
10040 } else
10041 xmlParseComment(ctxt);
10042 }
10043}
10044
10045/**
10046 * xmlParseDocument:
10047 * @ctxt: an XML parser context
10048 *
10049 * parse an XML document (and build a tree if using the standard SAX
10050 * interface).
10051 *
10052 * [1] document ::= prolog element Misc*
10053 *
10054 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10055 *
10056 * Returns 0, -1 in case of error. the parser context is augmented
10057 * as a result of the parsing.
10058 */
10059
10060int
10061xmlParseDocument(xmlParserCtxtPtr ctxt) {
10062 xmlChar start[4];
10063 xmlCharEncoding enc;
10064
10065 xmlInitParser();
10066
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010067 if ((ctxt == NULL) || (ctxt->input == NULL))
10068 return(-1);
10069
Owen Taylor3473f882001-02-23 17:55:21 +000010070 GROW;
10071
10072 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010073 * SAX: detecting the level.
10074 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010075 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010076
10077 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010078 * SAX: beginning of the document processing.
10079 */
10080 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10081 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10082
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010083 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
10084 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +000010085 /*
10086 * Get the 4 first bytes and decode the charset
10087 * if enc != XML_CHAR_ENCODING_NONE
10088 * plug some encoding conversion routines.
10089 */
10090 start[0] = RAW;
10091 start[1] = NXT(1);
10092 start[2] = NXT(2);
10093 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010094 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010095 if (enc != XML_CHAR_ENCODING_NONE) {
10096 xmlSwitchEncoding(ctxt, enc);
10097 }
Owen Taylor3473f882001-02-23 17:55:21 +000010098 }
10099
10100
10101 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010102 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010103 }
10104
10105 /*
10106 * Check for the XMLDecl in the Prolog.
10107 */
10108 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010109 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010110
10111 /*
10112 * Note that we will switch encoding on the fly.
10113 */
10114 xmlParseXMLDecl(ctxt);
10115 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10116 /*
10117 * The XML REC instructs us to stop parsing right here
10118 */
10119 return(-1);
10120 }
10121 ctxt->standalone = ctxt->input->standalone;
10122 SKIP_BLANKS;
10123 } else {
10124 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10125 }
10126 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10127 ctxt->sax->startDocument(ctxt->userData);
10128
10129 /*
10130 * The Misc part of the Prolog
10131 */
10132 GROW;
10133 xmlParseMisc(ctxt);
10134
10135 /*
10136 * Then possibly doc type declaration(s) and more Misc
10137 * (doctypedecl Misc*)?
10138 */
10139 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010140 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010141
10142 ctxt->inSubset = 1;
10143 xmlParseDocTypeDecl(ctxt);
10144 if (RAW == '[') {
10145 ctxt->instate = XML_PARSER_DTD;
10146 xmlParseInternalSubset(ctxt);
10147 }
10148
10149 /*
10150 * Create and update the external subset.
10151 */
10152 ctxt->inSubset = 2;
10153 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10154 (!ctxt->disableSAX))
10155 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10156 ctxt->extSubSystem, ctxt->extSubURI);
10157 ctxt->inSubset = 0;
10158
Daniel Veillardac4118d2008-01-11 05:27:32 +000010159 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010160
10161 ctxt->instate = XML_PARSER_PROLOG;
10162 xmlParseMisc(ctxt);
10163 }
10164
10165 /*
10166 * Time to start parsing the tree itself
10167 */
10168 GROW;
10169 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010170 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10171 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010172 } else {
10173 ctxt->instate = XML_PARSER_CONTENT;
10174 xmlParseElement(ctxt);
10175 ctxt->instate = XML_PARSER_EPILOG;
10176
10177
10178 /*
10179 * The Misc part at the end
10180 */
10181 xmlParseMisc(ctxt);
10182
Daniel Veillard561b7f82002-03-20 21:55:57 +000010183 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010184 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010185 }
10186 ctxt->instate = XML_PARSER_EOF;
10187 }
10188
10189 /*
10190 * SAX: end of the document processing.
10191 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010192 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010193 ctxt->sax->endDocument(ctxt->userData);
10194
Daniel Veillard5997aca2002-03-18 18:36:20 +000010195 /*
10196 * Remove locally kept entity definitions if the tree was not built
10197 */
10198 if ((ctxt->myDoc != NULL) &&
10199 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10200 xmlFreeDoc(ctxt->myDoc);
10201 ctxt->myDoc = NULL;
10202 }
10203
Daniel Veillardae0765b2008-07-31 19:54:59 +000010204 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10205 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10206 if (ctxt->valid)
10207 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10208 if (ctxt->nsWellFormed)
10209 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10210 if (ctxt->options & XML_PARSE_OLD10)
10211 ctxt->myDoc->properties |= XML_DOC_OLD10;
10212 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010213 if (! ctxt->wellFormed) {
10214 ctxt->valid = 0;
10215 return(-1);
10216 }
Owen Taylor3473f882001-02-23 17:55:21 +000010217 return(0);
10218}
10219
10220/**
10221 * xmlParseExtParsedEnt:
10222 * @ctxt: an XML parser context
10223 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010224 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010225 * An external general parsed entity is well-formed if it matches the
10226 * production labeled extParsedEnt.
10227 *
10228 * [78] extParsedEnt ::= TextDecl? content
10229 *
10230 * Returns 0, -1 in case of error. the parser context is augmented
10231 * as a result of the parsing.
10232 */
10233
10234int
10235xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10236 xmlChar start[4];
10237 xmlCharEncoding enc;
10238
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010239 if ((ctxt == NULL) || (ctxt->input == NULL))
10240 return(-1);
10241
Owen Taylor3473f882001-02-23 17:55:21 +000010242 xmlDefaultSAXHandlerInit();
10243
Daniel Veillard309f81d2003-09-23 09:02:53 +000010244 xmlDetectSAX2(ctxt);
10245
Owen Taylor3473f882001-02-23 17:55:21 +000010246 GROW;
10247
10248 /*
10249 * SAX: beginning of the document processing.
10250 */
10251 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10252 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10253
10254 /*
10255 * Get the 4 first bytes and decode the charset
10256 * if enc != XML_CHAR_ENCODING_NONE
10257 * plug some encoding conversion routines.
10258 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010259 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10260 start[0] = RAW;
10261 start[1] = NXT(1);
10262 start[2] = NXT(2);
10263 start[3] = NXT(3);
10264 enc = xmlDetectCharEncoding(start, 4);
10265 if (enc != XML_CHAR_ENCODING_NONE) {
10266 xmlSwitchEncoding(ctxt, enc);
10267 }
Owen Taylor3473f882001-02-23 17:55:21 +000010268 }
10269
10270
10271 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010272 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010273 }
10274
10275 /*
10276 * Check for the XMLDecl in the Prolog.
10277 */
10278 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010279 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010280
10281 /*
10282 * Note that we will switch encoding on the fly.
10283 */
10284 xmlParseXMLDecl(ctxt);
10285 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10286 /*
10287 * The XML REC instructs us to stop parsing right here
10288 */
10289 return(-1);
10290 }
10291 SKIP_BLANKS;
10292 } else {
10293 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10294 }
10295 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10296 ctxt->sax->startDocument(ctxt->userData);
10297
10298 /*
10299 * Doing validity checking on chunk doesn't make sense
10300 */
10301 ctxt->instate = XML_PARSER_CONTENT;
10302 ctxt->validate = 0;
10303 ctxt->loadsubset = 0;
10304 ctxt->depth = 0;
10305
10306 xmlParseContent(ctxt);
10307
10308 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010309 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010310 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010311 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010312 }
10313
10314 /*
10315 * SAX: end of the document processing.
10316 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010317 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010318 ctxt->sax->endDocument(ctxt->userData);
10319
10320 if (! ctxt->wellFormed) return(-1);
10321 return(0);
10322}
10323
Daniel Veillard73b013f2003-09-30 12:36:01 +000010324#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010325/************************************************************************
10326 * *
10327 * Progressive parsing interfaces *
10328 * *
10329 ************************************************************************/
10330
10331/**
10332 * xmlParseLookupSequence:
10333 * @ctxt: an XML parser context
10334 * @first: the first char to lookup
10335 * @next: the next char to lookup or zero
10336 * @third: the next char to lookup or zero
10337 *
10338 * Try to find if a sequence (first, next, third) or just (first next) or
10339 * (first) is available in the input stream.
10340 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10341 * to avoid rescanning sequences of bytes, it DOES change the state of the
10342 * parser, do not use liberally.
10343 *
10344 * Returns the index to the current parsing point if the full sequence
10345 * is available, -1 otherwise.
10346 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010347static int
Owen Taylor3473f882001-02-23 17:55:21 +000010348xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10349 xmlChar next, xmlChar third) {
10350 int base, len;
10351 xmlParserInputPtr in;
10352 const xmlChar *buf;
10353
10354 in = ctxt->input;
10355 if (in == NULL) return(-1);
10356 base = in->cur - in->base;
10357 if (base < 0) return(-1);
10358 if (ctxt->checkIndex > base)
10359 base = ctxt->checkIndex;
10360 if (in->buf == NULL) {
10361 buf = in->base;
10362 len = in->length;
10363 } else {
10364 buf = in->buf->buffer->content;
10365 len = in->buf->buffer->use;
10366 }
10367 /* take into account the sequence length */
10368 if (third) len -= 2;
10369 else if (next) len --;
10370 for (;base < len;base++) {
10371 if (buf[base] == first) {
10372 if (third != 0) {
10373 if ((buf[base + 1] != next) ||
10374 (buf[base + 2] != third)) continue;
10375 } else if (next != 0) {
10376 if (buf[base + 1] != next) continue;
10377 }
10378 ctxt->checkIndex = 0;
10379#ifdef DEBUG_PUSH
10380 if (next == 0)
10381 xmlGenericError(xmlGenericErrorContext,
10382 "PP: lookup '%c' found at %d\n",
10383 first, base);
10384 else if (third == 0)
10385 xmlGenericError(xmlGenericErrorContext,
10386 "PP: lookup '%c%c' found at %d\n",
10387 first, next, base);
10388 else
10389 xmlGenericError(xmlGenericErrorContext,
10390 "PP: lookup '%c%c%c' found at %d\n",
10391 first, next, third, base);
10392#endif
10393 return(base - (in->cur - in->base));
10394 }
10395 }
10396 ctxt->checkIndex = base;
10397#ifdef DEBUG_PUSH
10398 if (next == 0)
10399 xmlGenericError(xmlGenericErrorContext,
10400 "PP: lookup '%c' failed\n", first);
10401 else if (third == 0)
10402 xmlGenericError(xmlGenericErrorContext,
10403 "PP: lookup '%c%c' failed\n", first, next);
10404 else
10405 xmlGenericError(xmlGenericErrorContext,
10406 "PP: lookup '%c%c%c' failed\n", first, next, third);
10407#endif
10408 return(-1);
10409}
10410
10411/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010412 * xmlParseGetLasts:
10413 * @ctxt: an XML parser context
10414 * @lastlt: pointer to store the last '<' from the input
10415 * @lastgt: pointer to store the last '>' from the input
10416 *
10417 * Lookup the last < and > in the current chunk
10418 */
10419static void
10420xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10421 const xmlChar **lastgt) {
10422 const xmlChar *tmp;
10423
10424 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10425 xmlGenericError(xmlGenericErrorContext,
10426 "Internal error: xmlParseGetLasts\n");
10427 return;
10428 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010429 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010430 tmp = ctxt->input->end;
10431 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010432 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010433 if (tmp < ctxt->input->base) {
10434 *lastlt = NULL;
10435 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010436 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010437 *lastlt = tmp;
10438 tmp++;
10439 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10440 if (*tmp == '\'') {
10441 tmp++;
10442 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10443 if (tmp < ctxt->input->end) tmp++;
10444 } else if (*tmp == '"') {
10445 tmp++;
10446 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10447 if (tmp < ctxt->input->end) tmp++;
10448 } else
10449 tmp++;
10450 }
10451 if (tmp < ctxt->input->end)
10452 *lastgt = tmp;
10453 else {
10454 tmp = *lastlt;
10455 tmp--;
10456 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10457 if (tmp >= ctxt->input->base)
10458 *lastgt = tmp;
10459 else
10460 *lastgt = NULL;
10461 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010462 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010463 } else {
10464 *lastlt = NULL;
10465 *lastgt = NULL;
10466 }
10467}
10468/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010469 * xmlCheckCdataPush:
10470 * @cur: pointer to the bock of characters
10471 * @len: length of the block in bytes
10472 *
10473 * Check that the block of characters is okay as SCdata content [20]
10474 *
10475 * Returns the number of bytes to pass if okay, a negative index where an
10476 * UTF-8 error occured otherwise
10477 */
10478static int
10479xmlCheckCdataPush(const xmlChar *utf, int len) {
10480 int ix;
10481 unsigned char c;
10482 int codepoint;
10483
10484 if ((utf == NULL) || (len <= 0))
10485 return(0);
10486
10487 for (ix = 0; ix < len;) { /* string is 0-terminated */
10488 c = utf[ix];
10489 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10490 if (c >= 0x20)
10491 ix++;
10492 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10493 ix++;
10494 else
10495 return(-ix);
10496 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10497 if (ix + 2 > len) return(ix);
10498 if ((utf[ix+1] & 0xc0 ) != 0x80)
10499 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010500 codepoint = (utf[ix] & 0x1f) << 6;
10501 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010502 if (!xmlIsCharQ(codepoint))
10503 return(-ix);
10504 ix += 2;
10505 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10506 if (ix + 3 > len) return(ix);
10507 if (((utf[ix+1] & 0xc0) != 0x80) ||
10508 ((utf[ix+2] & 0xc0) != 0x80))
10509 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010510 codepoint = (utf[ix] & 0xf) << 12;
10511 codepoint |= (utf[ix+1] & 0x3f) << 6;
10512 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010513 if (!xmlIsCharQ(codepoint))
10514 return(-ix);
10515 ix += 3;
10516 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10517 if (ix + 4 > len) return(ix);
10518 if (((utf[ix+1] & 0xc0) != 0x80) ||
10519 ((utf[ix+2] & 0xc0) != 0x80) ||
10520 ((utf[ix+3] & 0xc0) != 0x80))
10521 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010522 codepoint = (utf[ix] & 0x7) << 18;
10523 codepoint |= (utf[ix+1] & 0x3f) << 12;
10524 codepoint |= (utf[ix+2] & 0x3f) << 6;
10525 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010526 if (!xmlIsCharQ(codepoint))
10527 return(-ix);
10528 ix += 4;
10529 } else /* unknown encoding */
10530 return(-ix);
10531 }
10532 return(ix);
10533}
10534
10535/**
Owen Taylor3473f882001-02-23 17:55:21 +000010536 * xmlParseTryOrFinish:
10537 * @ctxt: an XML parser context
10538 * @terminate: last chunk indicator
10539 *
10540 * Try to progress on parsing
10541 *
10542 * Returns zero if no parsing was possible
10543 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010544static int
Owen Taylor3473f882001-02-23 17:55:21 +000010545xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10546 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010547 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000010548 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000010549 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000010550
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010551 if (ctxt->input == NULL)
10552 return(0);
10553
Owen Taylor3473f882001-02-23 17:55:21 +000010554#ifdef DEBUG_PUSH
10555 switch (ctxt->instate) {
10556 case XML_PARSER_EOF:
10557 xmlGenericError(xmlGenericErrorContext,
10558 "PP: try EOF\n"); break;
10559 case XML_PARSER_START:
10560 xmlGenericError(xmlGenericErrorContext,
10561 "PP: try START\n"); break;
10562 case XML_PARSER_MISC:
10563 xmlGenericError(xmlGenericErrorContext,
10564 "PP: try MISC\n");break;
10565 case XML_PARSER_COMMENT:
10566 xmlGenericError(xmlGenericErrorContext,
10567 "PP: try COMMENT\n");break;
10568 case XML_PARSER_PROLOG:
10569 xmlGenericError(xmlGenericErrorContext,
10570 "PP: try PROLOG\n");break;
10571 case XML_PARSER_START_TAG:
10572 xmlGenericError(xmlGenericErrorContext,
10573 "PP: try START_TAG\n");break;
10574 case XML_PARSER_CONTENT:
10575 xmlGenericError(xmlGenericErrorContext,
10576 "PP: try CONTENT\n");break;
10577 case XML_PARSER_CDATA_SECTION:
10578 xmlGenericError(xmlGenericErrorContext,
10579 "PP: try CDATA_SECTION\n");break;
10580 case XML_PARSER_END_TAG:
10581 xmlGenericError(xmlGenericErrorContext,
10582 "PP: try END_TAG\n");break;
10583 case XML_PARSER_ENTITY_DECL:
10584 xmlGenericError(xmlGenericErrorContext,
10585 "PP: try ENTITY_DECL\n");break;
10586 case XML_PARSER_ENTITY_VALUE:
10587 xmlGenericError(xmlGenericErrorContext,
10588 "PP: try ENTITY_VALUE\n");break;
10589 case XML_PARSER_ATTRIBUTE_VALUE:
10590 xmlGenericError(xmlGenericErrorContext,
10591 "PP: try ATTRIBUTE_VALUE\n");break;
10592 case XML_PARSER_DTD:
10593 xmlGenericError(xmlGenericErrorContext,
10594 "PP: try DTD\n");break;
10595 case XML_PARSER_EPILOG:
10596 xmlGenericError(xmlGenericErrorContext,
10597 "PP: try EPILOG\n");break;
10598 case XML_PARSER_PI:
10599 xmlGenericError(xmlGenericErrorContext,
10600 "PP: try PI\n");break;
10601 case XML_PARSER_IGNORE:
10602 xmlGenericError(xmlGenericErrorContext,
10603 "PP: try IGNORE\n");break;
10604 }
10605#endif
10606
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010607 if ((ctxt->input != NULL) &&
10608 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010609 xmlSHRINK(ctxt);
10610 ctxt->checkIndex = 0;
10611 }
10612 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010613
Daniel Veillarda880b122003-04-21 21:36:41 +000010614 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010615 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010616 return(0);
10617
10618
Owen Taylor3473f882001-02-23 17:55:21 +000010619 /*
10620 * Pop-up of finished entities.
10621 */
10622 while ((RAW == 0) && (ctxt->inputNr > 1))
10623 xmlPopInput(ctxt);
10624
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010625 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010626 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010627 avail = ctxt->input->length -
10628 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010629 else {
10630 /*
10631 * If we are operating on converted input, try to flush
10632 * remainng chars to avoid them stalling in the non-converted
10633 * buffer.
10634 */
10635 if ((ctxt->input->buf->raw != NULL) &&
10636 (ctxt->input->buf->raw->use > 0)) {
10637 int base = ctxt->input->base -
10638 ctxt->input->buf->buffer->content;
10639 int current = ctxt->input->cur - ctxt->input->base;
10640
10641 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10642 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10643 ctxt->input->cur = ctxt->input->base + current;
10644 ctxt->input->end =
10645 &ctxt->input->buf->buffer->content[
10646 ctxt->input->buf->buffer->use];
10647 }
10648 avail = ctxt->input->buf->buffer->use -
10649 (ctxt->input->cur - ctxt->input->base);
10650 }
Owen Taylor3473f882001-02-23 17:55:21 +000010651 if (avail < 1)
10652 goto done;
10653 switch (ctxt->instate) {
10654 case XML_PARSER_EOF:
10655 /*
10656 * Document parsing is done !
10657 */
10658 goto done;
10659 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010660 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10661 xmlChar start[4];
10662 xmlCharEncoding enc;
10663
10664 /*
10665 * Very first chars read from the document flow.
10666 */
10667 if (avail < 4)
10668 goto done;
10669
10670 /*
10671 * Get the 4 first bytes and decode the charset
10672 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010673 * plug some encoding conversion routines,
10674 * else xmlSwitchEncoding will set to (default)
10675 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010676 */
10677 start[0] = RAW;
10678 start[1] = NXT(1);
10679 start[2] = NXT(2);
10680 start[3] = NXT(3);
10681 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010682 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010683 break;
10684 }
Owen Taylor3473f882001-02-23 17:55:21 +000010685
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010686 if (avail < 2)
10687 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010688 cur = ctxt->input->cur[0];
10689 next = ctxt->input->cur[1];
10690 if (cur == 0) {
10691 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10692 ctxt->sax->setDocumentLocator(ctxt->userData,
10693 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010694 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010695 ctxt->instate = XML_PARSER_EOF;
10696#ifdef DEBUG_PUSH
10697 xmlGenericError(xmlGenericErrorContext,
10698 "PP: entering EOF\n");
10699#endif
10700 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10701 ctxt->sax->endDocument(ctxt->userData);
10702 goto done;
10703 }
10704 if ((cur == '<') && (next == '?')) {
10705 /* PI or XML decl */
10706 if (avail < 5) return(ret);
10707 if ((!terminate) &&
10708 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10709 return(ret);
10710 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10711 ctxt->sax->setDocumentLocator(ctxt->userData,
10712 &xmlDefaultSAXLocator);
10713 if ((ctxt->input->cur[2] == 'x') &&
10714 (ctxt->input->cur[3] == 'm') &&
10715 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010716 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010717 ret += 5;
10718#ifdef DEBUG_PUSH
10719 xmlGenericError(xmlGenericErrorContext,
10720 "PP: Parsing XML Decl\n");
10721#endif
10722 xmlParseXMLDecl(ctxt);
10723 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10724 /*
10725 * The XML REC instructs us to stop parsing right
10726 * here
10727 */
10728 ctxt->instate = XML_PARSER_EOF;
10729 return(0);
10730 }
10731 ctxt->standalone = ctxt->input->standalone;
10732 if ((ctxt->encoding == NULL) &&
10733 (ctxt->input->encoding != NULL))
10734 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10735 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10736 (!ctxt->disableSAX))
10737 ctxt->sax->startDocument(ctxt->userData);
10738 ctxt->instate = XML_PARSER_MISC;
10739#ifdef DEBUG_PUSH
10740 xmlGenericError(xmlGenericErrorContext,
10741 "PP: entering MISC\n");
10742#endif
10743 } else {
10744 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10745 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10746 (!ctxt->disableSAX))
10747 ctxt->sax->startDocument(ctxt->userData);
10748 ctxt->instate = XML_PARSER_MISC;
10749#ifdef DEBUG_PUSH
10750 xmlGenericError(xmlGenericErrorContext,
10751 "PP: entering MISC\n");
10752#endif
10753 }
10754 } else {
10755 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10756 ctxt->sax->setDocumentLocator(ctxt->userData,
10757 &xmlDefaultSAXLocator);
10758 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010759 if (ctxt->version == NULL) {
10760 xmlErrMemory(ctxt, NULL);
10761 break;
10762 }
Owen Taylor3473f882001-02-23 17:55:21 +000010763 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10764 (!ctxt->disableSAX))
10765 ctxt->sax->startDocument(ctxt->userData);
10766 ctxt->instate = XML_PARSER_MISC;
10767#ifdef DEBUG_PUSH
10768 xmlGenericError(xmlGenericErrorContext,
10769 "PP: entering MISC\n");
10770#endif
10771 }
10772 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010773 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010774 const xmlChar *name;
10775 const xmlChar *prefix;
10776 const xmlChar *URI;
10777 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010778
10779 if ((avail < 2) && (ctxt->inputNr == 1))
10780 goto done;
10781 cur = ctxt->input->cur[0];
10782 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010783 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010784 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010785 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10786 ctxt->sax->endDocument(ctxt->userData);
10787 goto done;
10788 }
10789 if (!terminate) {
10790 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010791 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010792 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010793 goto done;
10794 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10795 goto done;
10796 }
10797 }
10798 if (ctxt->spaceNr == 0)
10799 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010800 else if (*ctxt->space == -2)
10801 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010802 else
10803 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010804#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010805 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010806#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010807 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010808#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010809 else
10810 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010811#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010812 if (name == NULL) {
10813 spacePop(ctxt);
10814 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010815 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10816 ctxt->sax->endDocument(ctxt->userData);
10817 goto done;
10818 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010819#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000010820 /*
10821 * [ VC: Root Element Type ]
10822 * The Name in the document type declaration must match
10823 * the element type of the root element.
10824 */
10825 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10826 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10827 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010828#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010829
10830 /*
10831 * Check for an Empty Element.
10832 */
10833 if ((RAW == '/') && (NXT(1) == '>')) {
10834 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010835
10836 if (ctxt->sax2) {
10837 if ((ctxt->sax != NULL) &&
10838 (ctxt->sax->endElementNs != NULL) &&
10839 (!ctxt->disableSAX))
10840 ctxt->sax->endElementNs(ctxt->userData, name,
10841 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000010842 if (ctxt->nsNr - nsNr > 0)
10843 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010844#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010845 } else {
10846 if ((ctxt->sax != NULL) &&
10847 (ctxt->sax->endElement != NULL) &&
10848 (!ctxt->disableSAX))
10849 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010850#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010851 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010852 spacePop(ctxt);
10853 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010854 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010855 } else {
10856 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010857 }
10858 break;
10859 }
10860 if (RAW == '>') {
10861 NEXT;
10862 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000010863 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000010864 "Couldn't find end of Start Tag %s\n",
10865 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000010866 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010867 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010868 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010869 if (ctxt->sax2)
10870 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010871#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010872 else
10873 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010874#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010875
Daniel Veillarda880b122003-04-21 21:36:41 +000010876 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010877 break;
10878 }
10879 case XML_PARSER_CONTENT: {
10880 const xmlChar *test;
10881 unsigned int cons;
10882 if ((avail < 2) && (ctxt->inputNr == 1))
10883 goto done;
10884 cur = ctxt->input->cur[0];
10885 next = ctxt->input->cur[1];
10886
10887 test = CUR_PTR;
10888 cons = ctxt->input->consumed;
10889 if ((cur == '<') && (next == '/')) {
10890 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010891 break;
10892 } else if ((cur == '<') && (next == '?')) {
10893 if ((!terminate) &&
10894 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10895 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010896 xmlParsePI(ctxt);
10897 } else if ((cur == '<') && (next != '!')) {
10898 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010899 break;
10900 } else if ((cur == '<') && (next == '!') &&
10901 (ctxt->input->cur[2] == '-') &&
10902 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000010903 int term;
10904
10905 if (avail < 4)
10906 goto done;
10907 ctxt->input->cur += 4;
10908 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10909 ctxt->input->cur -= 4;
10910 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000010911 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010912 xmlParseComment(ctxt);
10913 ctxt->instate = XML_PARSER_CONTENT;
10914 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10915 (ctxt->input->cur[2] == '[') &&
10916 (ctxt->input->cur[3] == 'C') &&
10917 (ctxt->input->cur[4] == 'D') &&
10918 (ctxt->input->cur[5] == 'A') &&
10919 (ctxt->input->cur[6] == 'T') &&
10920 (ctxt->input->cur[7] == 'A') &&
10921 (ctxt->input->cur[8] == '[')) {
10922 SKIP(9);
10923 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000010924 break;
10925 } else if ((cur == '<') && (next == '!') &&
10926 (avail < 9)) {
10927 goto done;
10928 } else if (cur == '&') {
10929 if ((!terminate) &&
10930 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10931 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010932 xmlParseReference(ctxt);
10933 } else {
10934 /* TODO Avoid the extra copy, handle directly !!! */
10935 /*
10936 * Goal of the following test is:
10937 * - minimize calls to the SAX 'character' callback
10938 * when they are mergeable
10939 * - handle an problem for isBlank when we only parse
10940 * a sequence of blank chars and the next one is
10941 * not available to check against '<' presence.
10942 * - tries to homogenize the differences in SAX
10943 * callbacks between the push and pull versions
10944 * of the parser.
10945 */
10946 if ((ctxt->inputNr == 1) &&
10947 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10948 if (!terminate) {
10949 if (ctxt->progressive) {
10950 if ((lastlt == NULL) ||
10951 (ctxt->input->cur > lastlt))
10952 goto done;
10953 } else if (xmlParseLookupSequence(ctxt,
10954 '<', 0, 0) < 0) {
10955 goto done;
10956 }
10957 }
10958 }
10959 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010960 xmlParseCharData(ctxt, 0);
10961 }
10962 /*
10963 * Pop-up of finished entities.
10964 */
10965 while ((RAW == 0) && (ctxt->inputNr > 1))
10966 xmlPopInput(ctxt);
10967 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010968 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10969 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010970 ctxt->instate = XML_PARSER_EOF;
10971 break;
10972 }
10973 break;
10974 }
10975 case XML_PARSER_END_TAG:
10976 if (avail < 2)
10977 goto done;
10978 if (!terminate) {
10979 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010980 /* > can be found unescaped in attribute values */
10981 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010982 goto done;
10983 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10984 goto done;
10985 }
10986 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010987 if (ctxt->sax2) {
10988 xmlParseEndTag2(ctxt,
10989 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10990 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010991 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010992 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010993 }
10994#ifdef LIBXML_SAX1_ENABLED
10995 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010996 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010997#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010998 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010999 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011000 } else {
11001 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011002 }
11003 break;
11004 case XML_PARSER_CDATA_SECTION: {
11005 /*
11006 * The Push mode need to have the SAX callback for
11007 * cdataBlock merge back contiguous callbacks.
11008 */
11009 int base;
11010
11011 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11012 if (base < 0) {
11013 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011014 int tmp;
11015
11016 tmp = xmlCheckCdataPush(ctxt->input->cur,
11017 XML_PARSER_BIG_BUFFER_SIZE);
11018 if (tmp < 0) {
11019 tmp = -tmp;
11020 ctxt->input->cur += tmp;
11021 goto encoding_error;
11022 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011023 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11024 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011025 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011026 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011027 else if (ctxt->sax->characters != NULL)
11028 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011029 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011030 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011031 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011032 ctxt->checkIndex = 0;
11033 }
11034 goto done;
11035 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011036 int tmp;
11037
11038 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11039 if ((tmp < 0) || (tmp != base)) {
11040 tmp = -tmp;
11041 ctxt->input->cur += tmp;
11042 goto encoding_error;
11043 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011044 if ((ctxt->sax != NULL) && (base == 0) &&
11045 (ctxt->sax->cdataBlock != NULL) &&
11046 (!ctxt->disableSAX)) {
11047 /*
11048 * Special case to provide identical behaviour
11049 * between pull and push parsers on enpty CDATA
11050 * sections
11051 */
11052 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11053 (!strncmp((const char *)&ctxt->input->cur[-9],
11054 "<![CDATA[", 9)))
11055 ctxt->sax->cdataBlock(ctxt->userData,
11056 BAD_CAST "", 0);
11057 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011058 (!ctxt->disableSAX)) {
11059 if (ctxt->sax->cdataBlock != NULL)
11060 ctxt->sax->cdataBlock(ctxt->userData,
11061 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011062 else if (ctxt->sax->characters != NULL)
11063 ctxt->sax->characters(ctxt->userData,
11064 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011065 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000011066 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011067 ctxt->checkIndex = 0;
11068 ctxt->instate = XML_PARSER_CONTENT;
11069#ifdef DEBUG_PUSH
11070 xmlGenericError(xmlGenericErrorContext,
11071 "PP: entering CONTENT\n");
11072#endif
11073 }
11074 break;
11075 }
Owen Taylor3473f882001-02-23 17:55:21 +000011076 case XML_PARSER_MISC:
11077 SKIP_BLANKS;
11078 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011079 avail = ctxt->input->length -
11080 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011081 else
Daniel Veillarda880b122003-04-21 21:36:41 +000011082 avail = ctxt->input->buf->buffer->use -
11083 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011084 if (avail < 2)
11085 goto done;
11086 cur = ctxt->input->cur[0];
11087 next = ctxt->input->cur[1];
11088 if ((cur == '<') && (next == '?')) {
11089 if ((!terminate) &&
11090 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11091 goto done;
11092#ifdef DEBUG_PUSH
11093 xmlGenericError(xmlGenericErrorContext,
11094 "PP: Parsing PI\n");
11095#endif
11096 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000011097 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011098 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011099 (ctxt->input->cur[2] == '-') &&
11100 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011101 if ((!terminate) &&
11102 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11103 goto done;
11104#ifdef DEBUG_PUSH
11105 xmlGenericError(xmlGenericErrorContext,
11106 "PP: Parsing Comment\n");
11107#endif
11108 xmlParseComment(ctxt);
11109 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011110 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011111 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011112 (ctxt->input->cur[2] == 'D') &&
11113 (ctxt->input->cur[3] == 'O') &&
11114 (ctxt->input->cur[4] == 'C') &&
11115 (ctxt->input->cur[5] == 'T') &&
11116 (ctxt->input->cur[6] == 'Y') &&
11117 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011118 (ctxt->input->cur[8] == 'E')) {
11119 if ((!terminate) &&
11120 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11121 goto done;
11122#ifdef DEBUG_PUSH
11123 xmlGenericError(xmlGenericErrorContext,
11124 "PP: Parsing internal subset\n");
11125#endif
11126 ctxt->inSubset = 1;
11127 xmlParseDocTypeDecl(ctxt);
11128 if (RAW == '[') {
11129 ctxt->instate = XML_PARSER_DTD;
11130#ifdef DEBUG_PUSH
11131 xmlGenericError(xmlGenericErrorContext,
11132 "PP: entering DTD\n");
11133#endif
11134 } else {
11135 /*
11136 * Create and update the external subset.
11137 */
11138 ctxt->inSubset = 2;
11139 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11140 (ctxt->sax->externalSubset != NULL))
11141 ctxt->sax->externalSubset(ctxt->userData,
11142 ctxt->intSubName, ctxt->extSubSystem,
11143 ctxt->extSubURI);
11144 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011145 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011146 ctxt->instate = XML_PARSER_PROLOG;
11147#ifdef DEBUG_PUSH
11148 xmlGenericError(xmlGenericErrorContext,
11149 "PP: entering PROLOG\n");
11150#endif
11151 }
11152 } else if ((cur == '<') && (next == '!') &&
11153 (avail < 9)) {
11154 goto done;
11155 } else {
11156 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011157 ctxt->progressive = 1;
11158 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011159#ifdef DEBUG_PUSH
11160 xmlGenericError(xmlGenericErrorContext,
11161 "PP: entering START_TAG\n");
11162#endif
11163 }
11164 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011165 case XML_PARSER_PROLOG:
11166 SKIP_BLANKS;
11167 if (ctxt->input->buf == NULL)
11168 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11169 else
11170 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11171 if (avail < 2)
11172 goto done;
11173 cur = ctxt->input->cur[0];
11174 next = ctxt->input->cur[1];
11175 if ((cur == '<') && (next == '?')) {
11176 if ((!terminate) &&
11177 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11178 goto done;
11179#ifdef DEBUG_PUSH
11180 xmlGenericError(xmlGenericErrorContext,
11181 "PP: Parsing PI\n");
11182#endif
11183 xmlParsePI(ctxt);
11184 } else if ((cur == '<') && (next == '!') &&
11185 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11186 if ((!terminate) &&
11187 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11188 goto done;
11189#ifdef DEBUG_PUSH
11190 xmlGenericError(xmlGenericErrorContext,
11191 "PP: Parsing Comment\n");
11192#endif
11193 xmlParseComment(ctxt);
11194 ctxt->instate = XML_PARSER_PROLOG;
11195 } else if ((cur == '<') && (next == '!') &&
11196 (avail < 4)) {
11197 goto done;
11198 } else {
11199 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011200 if (ctxt->progressive == 0)
11201 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011202 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011203#ifdef DEBUG_PUSH
11204 xmlGenericError(xmlGenericErrorContext,
11205 "PP: entering START_TAG\n");
11206#endif
11207 }
11208 break;
11209 case XML_PARSER_EPILOG:
11210 SKIP_BLANKS;
11211 if (ctxt->input->buf == NULL)
11212 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11213 else
11214 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11215 if (avail < 2)
11216 goto done;
11217 cur = ctxt->input->cur[0];
11218 next = ctxt->input->cur[1];
11219 if ((cur == '<') && (next == '?')) {
11220 if ((!terminate) &&
11221 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11222 goto done;
11223#ifdef DEBUG_PUSH
11224 xmlGenericError(xmlGenericErrorContext,
11225 "PP: Parsing PI\n");
11226#endif
11227 xmlParsePI(ctxt);
11228 ctxt->instate = XML_PARSER_EPILOG;
11229 } else if ((cur == '<') && (next == '!') &&
11230 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11231 if ((!terminate) &&
11232 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11233 goto done;
11234#ifdef DEBUG_PUSH
11235 xmlGenericError(xmlGenericErrorContext,
11236 "PP: Parsing Comment\n");
11237#endif
11238 xmlParseComment(ctxt);
11239 ctxt->instate = XML_PARSER_EPILOG;
11240 } else if ((cur == '<') && (next == '!') &&
11241 (avail < 4)) {
11242 goto done;
11243 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011244 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011245 ctxt->instate = XML_PARSER_EOF;
11246#ifdef DEBUG_PUSH
11247 xmlGenericError(xmlGenericErrorContext,
11248 "PP: entering EOF\n");
11249#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011250 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011251 ctxt->sax->endDocument(ctxt->userData);
11252 goto done;
11253 }
11254 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011255 case XML_PARSER_DTD: {
11256 /*
11257 * Sorry but progressive parsing of the internal subset
11258 * is not expected to be supported. We first check that
11259 * the full content of the internal subset is available and
11260 * the parsing is launched only at that point.
11261 * Internal subset ends up with "']' S? '>'" in an unescaped
11262 * section and not in a ']]>' sequence which are conditional
11263 * sections (whoever argued to keep that crap in XML deserve
11264 * a place in hell !).
11265 */
11266 int base, i;
11267 xmlChar *buf;
11268 xmlChar quote = 0;
11269
11270 base = ctxt->input->cur - ctxt->input->base;
11271 if (base < 0) return(0);
11272 if (ctxt->checkIndex > base)
11273 base = ctxt->checkIndex;
11274 buf = ctxt->input->buf->buffer->content;
11275 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11276 base++) {
11277 if (quote != 0) {
11278 if (buf[base] == quote)
11279 quote = 0;
11280 continue;
11281 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011282 if ((quote == 0) && (buf[base] == '<')) {
11283 int found = 0;
11284 /* special handling of comments */
11285 if (((unsigned int) base + 4 <
11286 ctxt->input->buf->buffer->use) &&
11287 (buf[base + 1] == '!') &&
11288 (buf[base + 2] == '-') &&
11289 (buf[base + 3] == '-')) {
11290 for (;(unsigned int) base + 3 <
11291 ctxt->input->buf->buffer->use; base++) {
11292 if ((buf[base] == '-') &&
11293 (buf[base + 1] == '-') &&
11294 (buf[base + 2] == '>')) {
11295 found = 1;
11296 base += 2;
11297 break;
11298 }
11299 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011300 if (!found) {
11301#if 0
11302 fprintf(stderr, "unfinished comment\n");
11303#endif
11304 break; /* for */
11305 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011306 continue;
11307 }
11308 }
Owen Taylor3473f882001-02-23 17:55:21 +000011309 if (buf[base] == '"') {
11310 quote = '"';
11311 continue;
11312 }
11313 if (buf[base] == '\'') {
11314 quote = '\'';
11315 continue;
11316 }
11317 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011318#if 0
11319 fprintf(stderr, "%c%c%c%c: ", buf[base],
11320 buf[base + 1], buf[base + 2], buf[base + 3]);
11321#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011322 if ((unsigned int) base +1 >=
11323 ctxt->input->buf->buffer->use)
11324 break;
11325 if (buf[base + 1] == ']') {
11326 /* conditional crap, skip both ']' ! */
11327 base++;
11328 continue;
11329 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011330 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011331 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11332 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011333 if (buf[base + i] == '>') {
11334#if 0
11335 fprintf(stderr, "found\n");
11336#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011337 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011338 }
11339 if (!IS_BLANK_CH(buf[base + i])) {
11340#if 0
11341 fprintf(stderr, "not found\n");
11342#endif
11343 goto not_end_of_int_subset;
11344 }
Owen Taylor3473f882001-02-23 17:55:21 +000011345 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011346#if 0
11347 fprintf(stderr, "end of stream\n");
11348#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011349 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011350
Owen Taylor3473f882001-02-23 17:55:21 +000011351 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011352not_end_of_int_subset:
11353 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011354 }
11355 /*
11356 * We didn't found the end of the Internal subset
11357 */
Owen Taylor3473f882001-02-23 17:55:21 +000011358#ifdef DEBUG_PUSH
11359 if (next == 0)
11360 xmlGenericError(xmlGenericErrorContext,
11361 "PP: lookup of int subset end filed\n");
11362#endif
11363 goto done;
11364
11365found_end_int_subset:
11366 xmlParseInternalSubset(ctxt);
11367 ctxt->inSubset = 2;
11368 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11369 (ctxt->sax->externalSubset != NULL))
11370 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11371 ctxt->extSubSystem, ctxt->extSubURI);
11372 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011373 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011374 ctxt->instate = XML_PARSER_PROLOG;
11375 ctxt->checkIndex = 0;
11376#ifdef DEBUG_PUSH
11377 xmlGenericError(xmlGenericErrorContext,
11378 "PP: entering PROLOG\n");
11379#endif
11380 break;
11381 }
11382 case XML_PARSER_COMMENT:
11383 xmlGenericError(xmlGenericErrorContext,
11384 "PP: internal error, state == COMMENT\n");
11385 ctxt->instate = XML_PARSER_CONTENT;
11386#ifdef DEBUG_PUSH
11387 xmlGenericError(xmlGenericErrorContext,
11388 "PP: entering CONTENT\n");
11389#endif
11390 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011391 case XML_PARSER_IGNORE:
11392 xmlGenericError(xmlGenericErrorContext,
11393 "PP: internal error, state == IGNORE");
11394 ctxt->instate = XML_PARSER_DTD;
11395#ifdef DEBUG_PUSH
11396 xmlGenericError(xmlGenericErrorContext,
11397 "PP: entering DTD\n");
11398#endif
11399 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011400 case XML_PARSER_PI:
11401 xmlGenericError(xmlGenericErrorContext,
11402 "PP: internal error, state == PI\n");
11403 ctxt->instate = XML_PARSER_CONTENT;
11404#ifdef DEBUG_PUSH
11405 xmlGenericError(xmlGenericErrorContext,
11406 "PP: entering CONTENT\n");
11407#endif
11408 break;
11409 case XML_PARSER_ENTITY_DECL:
11410 xmlGenericError(xmlGenericErrorContext,
11411 "PP: internal error, state == ENTITY_DECL\n");
11412 ctxt->instate = XML_PARSER_DTD;
11413#ifdef DEBUG_PUSH
11414 xmlGenericError(xmlGenericErrorContext,
11415 "PP: entering DTD\n");
11416#endif
11417 break;
11418 case XML_PARSER_ENTITY_VALUE:
11419 xmlGenericError(xmlGenericErrorContext,
11420 "PP: internal error, state == ENTITY_VALUE\n");
11421 ctxt->instate = XML_PARSER_CONTENT;
11422#ifdef DEBUG_PUSH
11423 xmlGenericError(xmlGenericErrorContext,
11424 "PP: entering DTD\n");
11425#endif
11426 break;
11427 case XML_PARSER_ATTRIBUTE_VALUE:
11428 xmlGenericError(xmlGenericErrorContext,
11429 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11430 ctxt->instate = XML_PARSER_START_TAG;
11431#ifdef DEBUG_PUSH
11432 xmlGenericError(xmlGenericErrorContext,
11433 "PP: entering START_TAG\n");
11434#endif
11435 break;
11436 case XML_PARSER_SYSTEM_LITERAL:
11437 xmlGenericError(xmlGenericErrorContext,
11438 "PP: internal error, state == SYSTEM_LITERAL\n");
11439 ctxt->instate = XML_PARSER_START_TAG;
11440#ifdef DEBUG_PUSH
11441 xmlGenericError(xmlGenericErrorContext,
11442 "PP: entering START_TAG\n");
11443#endif
11444 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011445 case XML_PARSER_PUBLIC_LITERAL:
11446 xmlGenericError(xmlGenericErrorContext,
11447 "PP: internal error, state == PUBLIC_LITERAL\n");
11448 ctxt->instate = XML_PARSER_START_TAG;
11449#ifdef DEBUG_PUSH
11450 xmlGenericError(xmlGenericErrorContext,
11451 "PP: entering START_TAG\n");
11452#endif
11453 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011454 }
11455 }
11456done:
11457#ifdef DEBUG_PUSH
11458 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11459#endif
11460 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011461encoding_error:
11462 {
11463 char buffer[150];
11464
11465 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11466 ctxt->input->cur[0], ctxt->input->cur[1],
11467 ctxt->input->cur[2], ctxt->input->cur[3]);
11468 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11469 "Input is not proper UTF-8, indicate encoding !\n%s",
11470 BAD_CAST buffer, NULL);
11471 }
11472 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000011473}
11474
11475/**
Owen Taylor3473f882001-02-23 17:55:21 +000011476 * xmlParseChunk:
11477 * @ctxt: an XML parser context
11478 * @chunk: an char array
11479 * @size: the size in byte of the chunk
11480 * @terminate: last chunk indicator
11481 *
11482 * Parse a Chunk of memory
11483 *
11484 * Returns zero if no error, the xmlParserErrors otherwise.
11485 */
11486int
11487xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11488 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000011489 int end_in_lf = 0;
11490
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011491 if (ctxt == NULL)
11492 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000011493 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011494 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000011495 if (ctxt->instate == XML_PARSER_START)
11496 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000011497 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11498 (chunk[size - 1] == '\r')) {
11499 end_in_lf = 1;
11500 size--;
11501 }
Owen Taylor3473f882001-02-23 17:55:21 +000011502 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11503 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11504 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11505 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000011506 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000011507
William M. Bracka3215c72004-07-31 16:24:01 +000011508 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11509 if (res < 0) {
11510 ctxt->errNo = XML_PARSER_EOF;
11511 ctxt->disableSAX = 1;
11512 return (XML_PARSER_EOF);
11513 }
Owen Taylor3473f882001-02-23 17:55:21 +000011514 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11515 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011516 ctxt->input->end =
11517 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011518#ifdef DEBUG_PUSH
11519 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11520#endif
11521
Owen Taylor3473f882001-02-23 17:55:21 +000011522 } else if (ctxt->instate != XML_PARSER_EOF) {
11523 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11524 xmlParserInputBufferPtr in = ctxt->input->buf;
11525 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11526 (in->raw != NULL)) {
11527 int nbchars;
11528
11529 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11530 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011531 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000011532 xmlGenericError(xmlGenericErrorContext,
11533 "xmlParseChunk: encoder error\n");
11534 return(XML_ERR_INVALID_ENCODING);
11535 }
11536 }
11537 }
11538 }
11539 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000011540 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11541 (ctxt->input->buf != NULL)) {
11542 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11543 }
Daniel Veillard14412512005-01-21 23:53:26 +000011544 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011545 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000011546 if (terminate) {
11547 /*
11548 * Check for termination
11549 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011550 int avail = 0;
11551
11552 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011553 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011554 avail = ctxt->input->length -
11555 (ctxt->input->cur - ctxt->input->base);
11556 else
11557 avail = ctxt->input->buf->buffer->use -
11558 (ctxt->input->cur - ctxt->input->base);
11559 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011560
Owen Taylor3473f882001-02-23 17:55:21 +000011561 if ((ctxt->instate != XML_PARSER_EOF) &&
11562 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011563 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011564 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011565 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011566 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011567 }
Owen Taylor3473f882001-02-23 17:55:21 +000011568 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011569 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011570 ctxt->sax->endDocument(ctxt->userData);
11571 }
11572 ctxt->instate = XML_PARSER_EOF;
11573 }
11574 return((xmlParserErrors) ctxt->errNo);
11575}
11576
11577/************************************************************************
11578 * *
11579 * I/O front end functions to the parser *
11580 * *
11581 ************************************************************************/
11582
11583/**
Owen Taylor3473f882001-02-23 17:55:21 +000011584 * xmlCreatePushParserCtxt:
11585 * @sax: a SAX handler
11586 * @user_data: The user data returned on SAX callbacks
11587 * @chunk: a pointer to an array of chars
11588 * @size: number of chars in the array
11589 * @filename: an optional file name or URI
11590 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000011591 * Create a parser context for using the XML parser in push mode.
11592 * If @buffer and @size are non-NULL, the data is used to detect
11593 * the encoding. The remaining characters will be parsed so they
11594 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000011595 * To allow content encoding detection, @size should be >= 4
11596 * The value of @filename is used for fetching external entities
11597 * and error/warning reports.
11598 *
11599 * Returns the new parser context or NULL
11600 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000011601
Owen Taylor3473f882001-02-23 17:55:21 +000011602xmlParserCtxtPtr
11603xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11604 const char *chunk, int size, const char *filename) {
11605 xmlParserCtxtPtr ctxt;
11606 xmlParserInputPtr inputStream;
11607 xmlParserInputBufferPtr buf;
11608 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11609
11610 /*
11611 * plug some encoding conversion routines
11612 */
11613 if ((chunk != NULL) && (size >= 4))
11614 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11615
11616 buf = xmlAllocParserInputBuffer(enc);
11617 if (buf == NULL) return(NULL);
11618
11619 ctxt = xmlNewParserCtxt();
11620 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011621 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011622 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011623 return(NULL);
11624 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011625 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011626 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11627 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011628 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011629 xmlFreeParserInputBuffer(buf);
11630 xmlFreeParserCtxt(ctxt);
11631 return(NULL);
11632 }
Owen Taylor3473f882001-02-23 17:55:21 +000011633 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011634#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011635 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011636#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011637 xmlFree(ctxt->sax);
11638 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11639 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011640 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011641 xmlFreeParserInputBuffer(buf);
11642 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011643 return(NULL);
11644 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011645 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11646 if (sax->initialized == XML_SAX2_MAGIC)
11647 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11648 else
11649 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011650 if (user_data != NULL)
11651 ctxt->userData = user_data;
11652 }
11653 if (filename == NULL) {
11654 ctxt->directory = NULL;
11655 } else {
11656 ctxt->directory = xmlParserGetDirectory(filename);
11657 }
11658
11659 inputStream = xmlNewInputStream(ctxt);
11660 if (inputStream == NULL) {
11661 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011662 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011663 return(NULL);
11664 }
11665
11666 if (filename == NULL)
11667 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011668 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011669 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011670 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011671 if (inputStream->filename == NULL) {
11672 xmlFreeParserCtxt(ctxt);
11673 xmlFreeParserInputBuffer(buf);
11674 return(NULL);
11675 }
11676 }
Owen Taylor3473f882001-02-23 17:55:21 +000011677 inputStream->buf = buf;
11678 inputStream->base = inputStream->buf->buffer->content;
11679 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011680 inputStream->end =
11681 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011682
11683 inputPush(ctxt, inputStream);
11684
William M. Brack3a1cd212005-02-11 14:35:54 +000011685 /*
11686 * If the caller didn't provide an initial 'chunk' for determining
11687 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11688 * that it can be automatically determined later
11689 */
11690 if ((size == 0) || (chunk == NULL)) {
11691 ctxt->charset = XML_CHAR_ENCODING_NONE;
11692 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011693 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11694 int cur = ctxt->input->cur - ctxt->input->base;
11695
Owen Taylor3473f882001-02-23 17:55:21 +000011696 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011697
11698 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11699 ctxt->input->cur = ctxt->input->base + cur;
11700 ctxt->input->end =
11701 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011702#ifdef DEBUG_PUSH
11703 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11704#endif
11705 }
11706
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011707 if (enc != XML_CHAR_ENCODING_NONE) {
11708 xmlSwitchEncoding(ctxt, enc);
11709 }
11710
Owen Taylor3473f882001-02-23 17:55:21 +000011711 return(ctxt);
11712}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011713#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011714
11715/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011716 * xmlStopParser:
11717 * @ctxt: an XML parser context
11718 *
11719 * Blocks further parser processing
11720 */
11721void
11722xmlStopParser(xmlParserCtxtPtr ctxt) {
11723 if (ctxt == NULL)
11724 return;
11725 ctxt->instate = XML_PARSER_EOF;
11726 ctxt->disableSAX = 1;
11727 if (ctxt->input != NULL) {
11728 ctxt->input->cur = BAD_CAST"";
11729 ctxt->input->base = ctxt->input->cur;
11730 }
11731}
11732
11733/**
Owen Taylor3473f882001-02-23 17:55:21 +000011734 * xmlCreateIOParserCtxt:
11735 * @sax: a SAX handler
11736 * @user_data: The user data returned on SAX callbacks
11737 * @ioread: an I/O read function
11738 * @ioclose: an I/O close function
11739 * @ioctx: an I/O handler
11740 * @enc: the charset encoding if known
11741 *
11742 * Create a parser context for using the XML parser with an existing
11743 * I/O stream
11744 *
11745 * Returns the new parser context or NULL
11746 */
11747xmlParserCtxtPtr
11748xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11749 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11750 void *ioctx, xmlCharEncoding enc) {
11751 xmlParserCtxtPtr ctxt;
11752 xmlParserInputPtr inputStream;
11753 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011754
11755 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011756
11757 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11758 if (buf == NULL) return(NULL);
11759
11760 ctxt = xmlNewParserCtxt();
11761 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011762 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011763 return(NULL);
11764 }
11765 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011766#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011767 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011768#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011769 xmlFree(ctxt->sax);
11770 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11771 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011772 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011773 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011774 return(NULL);
11775 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011776 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11777 if (sax->initialized == XML_SAX2_MAGIC)
11778 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11779 else
11780 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011781 if (user_data != NULL)
11782 ctxt->userData = user_data;
11783 }
11784
11785 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11786 if (inputStream == NULL) {
11787 xmlFreeParserCtxt(ctxt);
11788 return(NULL);
11789 }
11790 inputPush(ctxt, inputStream);
11791
11792 return(ctxt);
11793}
11794
Daniel Veillard4432df22003-09-28 18:58:27 +000011795#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011796/************************************************************************
11797 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011798 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000011799 * *
11800 ************************************************************************/
11801
11802/**
11803 * xmlIOParseDTD:
11804 * @sax: the SAX handler block or NULL
11805 * @input: an Input Buffer
11806 * @enc: the charset encoding if known
11807 *
11808 * Load and parse a DTD
11809 *
11810 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000011811 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000011812 */
11813
11814xmlDtdPtr
11815xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11816 xmlCharEncoding enc) {
11817 xmlDtdPtr ret = NULL;
11818 xmlParserCtxtPtr ctxt;
11819 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011820 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000011821
11822 if (input == NULL)
11823 return(NULL);
11824
11825 ctxt = xmlNewParserCtxt();
11826 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000011827 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011828 return(NULL);
11829 }
11830
11831 /*
11832 * Set-up the SAX context
11833 */
11834 if (sax != NULL) {
11835 if (ctxt->sax != NULL)
11836 xmlFree(ctxt->sax);
11837 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000011838 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011839 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011840 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011841
11842 /*
11843 * generate a parser input from the I/O handler
11844 */
11845
Daniel Veillard43caefb2003-12-07 19:32:22 +000011846 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000011847 if (pinput == NULL) {
11848 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000011849 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011850 xmlFreeParserCtxt(ctxt);
11851 return(NULL);
11852 }
11853
11854 /*
11855 * plug some encoding conversion routines here.
11856 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000011857 if (xmlPushInput(ctxt, pinput) < 0) {
11858 if (sax != NULL) ctxt->sax = NULL;
11859 xmlFreeParserCtxt(ctxt);
11860 return(NULL);
11861 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000011862 if (enc != XML_CHAR_ENCODING_NONE) {
11863 xmlSwitchEncoding(ctxt, enc);
11864 }
Owen Taylor3473f882001-02-23 17:55:21 +000011865
11866 pinput->filename = NULL;
11867 pinput->line = 1;
11868 pinput->col = 1;
11869 pinput->base = ctxt->input->cur;
11870 pinput->cur = ctxt->input->cur;
11871 pinput->free = NULL;
11872
11873 /*
11874 * let's parse that entity knowing it's an external subset.
11875 */
11876 ctxt->inSubset = 2;
11877 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000011878 if (ctxt->myDoc == NULL) {
11879 xmlErrMemory(ctxt, "New Doc failed");
11880 return(NULL);
11881 }
11882 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000011883 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11884 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000011885
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011886 if ((enc == XML_CHAR_ENCODING_NONE) &&
11887 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000011888 /*
11889 * Get the 4 first bytes and decode the charset
11890 * if enc != XML_CHAR_ENCODING_NONE
11891 * plug some encoding conversion routines.
11892 */
11893 start[0] = RAW;
11894 start[1] = NXT(1);
11895 start[2] = NXT(2);
11896 start[3] = NXT(3);
11897 enc = xmlDetectCharEncoding(start, 4);
11898 if (enc != XML_CHAR_ENCODING_NONE) {
11899 xmlSwitchEncoding(ctxt, enc);
11900 }
11901 }
11902
Owen Taylor3473f882001-02-23 17:55:21 +000011903 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11904
11905 if (ctxt->myDoc != NULL) {
11906 if (ctxt->wellFormed) {
11907 ret = ctxt->myDoc->extSubset;
11908 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000011909 if (ret != NULL) {
11910 xmlNodePtr tmp;
11911
11912 ret->doc = NULL;
11913 tmp = ret->children;
11914 while (tmp != NULL) {
11915 tmp->doc = NULL;
11916 tmp = tmp->next;
11917 }
11918 }
Owen Taylor3473f882001-02-23 17:55:21 +000011919 } else {
11920 ret = NULL;
11921 }
11922 xmlFreeDoc(ctxt->myDoc);
11923 ctxt->myDoc = NULL;
11924 }
11925 if (sax != NULL) ctxt->sax = NULL;
11926 xmlFreeParserCtxt(ctxt);
11927
11928 return(ret);
11929}
11930
11931/**
11932 * xmlSAXParseDTD:
11933 * @sax: the SAX handler block
11934 * @ExternalID: a NAME* containing the External ID of the DTD
11935 * @SystemID: a NAME* containing the URL to the DTD
11936 *
11937 * Load and parse an external subset.
11938 *
11939 * Returns the resulting xmlDtdPtr or NULL in case of error.
11940 */
11941
11942xmlDtdPtr
11943xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11944 const xmlChar *SystemID) {
11945 xmlDtdPtr ret = NULL;
11946 xmlParserCtxtPtr ctxt;
11947 xmlParserInputPtr input = NULL;
11948 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011949 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000011950
11951 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11952
11953 ctxt = xmlNewParserCtxt();
11954 if (ctxt == NULL) {
11955 return(NULL);
11956 }
11957
11958 /*
11959 * Set-up the SAX context
11960 */
11961 if (sax != NULL) {
11962 if (ctxt->sax != NULL)
11963 xmlFree(ctxt->sax);
11964 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000011965 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011966 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011967
11968 /*
11969 * Canonicalise the system ID
11970 */
11971 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000011972 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011973 xmlFreeParserCtxt(ctxt);
11974 return(NULL);
11975 }
Owen Taylor3473f882001-02-23 17:55:21 +000011976
11977 /*
11978 * Ask the Entity resolver to load the damn thing
11979 */
11980
11981 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011982 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11983 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011984 if (input == NULL) {
11985 if (sax != NULL) ctxt->sax = NULL;
11986 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011987 if (systemIdCanonic != NULL)
11988 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011989 return(NULL);
11990 }
11991
11992 /*
11993 * plug some encoding conversion routines here.
11994 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000011995 if (xmlPushInput(ctxt, input) < 0) {
11996 if (sax != NULL) ctxt->sax = NULL;
11997 xmlFreeParserCtxt(ctxt);
11998 if (systemIdCanonic != NULL)
11999 xmlFree(systemIdCanonic);
12000 return(NULL);
12001 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012002 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12003 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12004 xmlSwitchEncoding(ctxt, enc);
12005 }
Owen Taylor3473f882001-02-23 17:55:21 +000012006
12007 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012008 input->filename = (char *) systemIdCanonic;
12009 else
12010 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012011 input->line = 1;
12012 input->col = 1;
12013 input->base = ctxt->input->cur;
12014 input->cur = ctxt->input->cur;
12015 input->free = NULL;
12016
12017 /*
12018 * let's parse that entity knowing it's an external subset.
12019 */
12020 ctxt->inSubset = 2;
12021 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012022 if (ctxt->myDoc == NULL) {
12023 xmlErrMemory(ctxt, "New Doc failed");
12024 if (sax != NULL) ctxt->sax = NULL;
12025 xmlFreeParserCtxt(ctxt);
12026 return(NULL);
12027 }
12028 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012029 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12030 ExternalID, SystemID);
12031 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12032
12033 if (ctxt->myDoc != NULL) {
12034 if (ctxt->wellFormed) {
12035 ret = ctxt->myDoc->extSubset;
12036 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012037 if (ret != NULL) {
12038 xmlNodePtr tmp;
12039
12040 ret->doc = NULL;
12041 tmp = ret->children;
12042 while (tmp != NULL) {
12043 tmp->doc = NULL;
12044 tmp = tmp->next;
12045 }
12046 }
Owen Taylor3473f882001-02-23 17:55:21 +000012047 } else {
12048 ret = NULL;
12049 }
12050 xmlFreeDoc(ctxt->myDoc);
12051 ctxt->myDoc = NULL;
12052 }
12053 if (sax != NULL) ctxt->sax = NULL;
12054 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012055
Owen Taylor3473f882001-02-23 17:55:21 +000012056 return(ret);
12057}
12058
Daniel Veillard4432df22003-09-28 18:58:27 +000012059
Owen Taylor3473f882001-02-23 17:55:21 +000012060/**
12061 * xmlParseDTD:
12062 * @ExternalID: a NAME* containing the External ID of the DTD
12063 * @SystemID: a NAME* containing the URL to the DTD
12064 *
12065 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012066 *
Owen Taylor3473f882001-02-23 17:55:21 +000012067 * Returns the resulting xmlDtdPtr or NULL in case of error.
12068 */
12069
12070xmlDtdPtr
12071xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12072 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12073}
Daniel Veillard4432df22003-09-28 18:58:27 +000012074#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012075
12076/************************************************************************
12077 * *
12078 * Front ends when parsing an Entity *
12079 * *
12080 ************************************************************************/
12081
12082/**
Owen Taylor3473f882001-02-23 17:55:21 +000012083 * xmlParseCtxtExternalEntity:
12084 * @ctx: the existing parsing context
12085 * @URL: the URL for the entity to load
12086 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012087 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012088 *
12089 * Parse an external general entity within an existing parsing context
12090 * An external general parsed entity is well-formed if it matches the
12091 * production labeled extParsedEnt.
12092 *
12093 * [78] extParsedEnt ::= TextDecl? content
12094 *
12095 * Returns 0 if the entity is well formed, -1 in case of args problem and
12096 * the parser error code otherwise
12097 */
12098
12099int
12100xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012101 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012102 xmlParserCtxtPtr ctxt;
12103 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012104 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012105 xmlSAXHandlerPtr oldsax = NULL;
12106 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012107 xmlChar start[4];
12108 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012109 xmlParserInputPtr inputStream;
12110 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012111
Daniel Veillardce682bc2004-11-05 17:22:25 +000012112 if (ctx == NULL) return(-1);
12113
Daniel Veillard0161e632008-08-28 15:36:32 +000012114 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12115 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012116 return(XML_ERR_ENTITY_LOOP);
12117 }
12118
Daniel Veillardcda96922001-08-21 10:56:31 +000012119 if (lst != NULL)
12120 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012121 if ((URL == NULL) && (ID == NULL))
12122 return(-1);
12123 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12124 return(-1);
12125
Rob Richards798743a2009-06-19 13:54:25 -040012126 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012127 if (ctxt == NULL) {
12128 return(-1);
12129 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012130
Owen Taylor3473f882001-02-23 17:55:21 +000012131 oldsax = ctxt->sax;
12132 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012133 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012134 newDoc = xmlNewDoc(BAD_CAST "1.0");
12135 if (newDoc == NULL) {
12136 xmlFreeParserCtxt(ctxt);
12137 return(-1);
12138 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012139 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012140 if (ctx->myDoc->dict) {
12141 newDoc->dict = ctx->myDoc->dict;
12142 xmlDictReference(newDoc->dict);
12143 }
Owen Taylor3473f882001-02-23 17:55:21 +000012144 if (ctx->myDoc != NULL) {
12145 newDoc->intSubset = ctx->myDoc->intSubset;
12146 newDoc->extSubset = ctx->myDoc->extSubset;
12147 }
12148 if (ctx->myDoc->URL != NULL) {
12149 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12150 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012151 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12152 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012153 ctxt->sax = oldsax;
12154 xmlFreeParserCtxt(ctxt);
12155 newDoc->intSubset = NULL;
12156 newDoc->extSubset = NULL;
12157 xmlFreeDoc(newDoc);
12158 return(-1);
12159 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012160 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012161 nodePush(ctxt, newDoc->children);
12162 if (ctx->myDoc == NULL) {
12163 ctxt->myDoc = newDoc;
12164 } else {
12165 ctxt->myDoc = ctx->myDoc;
12166 newDoc->children->doc = ctx->myDoc;
12167 }
12168
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012169 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012170 * Get the 4 first bytes and decode the charset
12171 * if enc != XML_CHAR_ENCODING_NONE
12172 * plug some encoding conversion routines.
12173 */
12174 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012175 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12176 start[0] = RAW;
12177 start[1] = NXT(1);
12178 start[2] = NXT(2);
12179 start[3] = NXT(3);
12180 enc = xmlDetectCharEncoding(start, 4);
12181 if (enc != XML_CHAR_ENCODING_NONE) {
12182 xmlSwitchEncoding(ctxt, enc);
12183 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012184 }
12185
Owen Taylor3473f882001-02-23 17:55:21 +000012186 /*
12187 * Parse a possible text declaration first
12188 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012189 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012190 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012191 /*
12192 * An XML-1.0 document can't reference an entity not XML-1.0
12193 */
12194 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12195 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12196 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12197 "Version mismatch between document and entity\n");
12198 }
Owen Taylor3473f882001-02-23 17:55:21 +000012199 }
12200
12201 /*
12202 * Doing validity checking on chunk doesn't make sense
12203 */
12204 ctxt->instate = XML_PARSER_CONTENT;
12205 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012206 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012207 ctxt->loadsubset = ctx->loadsubset;
12208 ctxt->depth = ctx->depth + 1;
12209 ctxt->replaceEntities = ctx->replaceEntities;
12210 if (ctxt->validate) {
12211 ctxt->vctxt.error = ctx->vctxt.error;
12212 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012213 } else {
12214 ctxt->vctxt.error = NULL;
12215 ctxt->vctxt.warning = NULL;
12216 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012217 ctxt->vctxt.nodeTab = NULL;
12218 ctxt->vctxt.nodeNr = 0;
12219 ctxt->vctxt.nodeMax = 0;
12220 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012221 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12222 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012223 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12224 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12225 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012226 ctxt->dictNames = ctx->dictNames;
12227 ctxt->attsDefault = ctx->attsDefault;
12228 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012229 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012230
12231 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012232
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012233 ctx->validate = ctxt->validate;
12234 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012235 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012236 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012237 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012238 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012239 }
12240 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012241 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012242 }
12243
12244 if (!ctxt->wellFormed) {
12245 if (ctxt->errNo == 0)
12246 ret = 1;
12247 else
12248 ret = ctxt->errNo;
12249 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012250 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012251 xmlNodePtr cur;
12252
12253 /*
12254 * Return the newly created nodeset after unlinking it from
12255 * they pseudo parent.
12256 */
12257 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012258 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012259 while (cur != NULL) {
12260 cur->parent = NULL;
12261 cur = cur->next;
12262 }
12263 newDoc->children->children = NULL;
12264 }
12265 ret = 0;
12266 }
12267 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012268 ctxt->dict = NULL;
12269 ctxt->attsDefault = NULL;
12270 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012271 xmlFreeParserCtxt(ctxt);
12272 newDoc->intSubset = NULL;
12273 newDoc->extSubset = NULL;
12274 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012275
Owen Taylor3473f882001-02-23 17:55:21 +000012276 return(ret);
12277}
12278
12279/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012280 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012281 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012282 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012283 * @sax: the SAX handler bloc (possibly NULL)
12284 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12285 * @depth: Used for loop detection, use 0
12286 * @URL: the URL for the entity to load
12287 * @ID: the System ID for the entity to load
12288 * @list: the return value for the set of parsed nodes
12289 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012290 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012291 *
12292 * Returns 0 if the entity is well formed, -1 in case of args problem and
12293 * the parser error code otherwise
12294 */
12295
Daniel Veillard7d515752003-09-26 19:12:37 +000012296static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012297xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12298 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012299 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012300 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012301 xmlParserCtxtPtr ctxt;
12302 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012303 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012304 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012305 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012306 xmlChar start[4];
12307 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012308
Daniel Veillard0161e632008-08-28 15:36:32 +000012309 if (((depth > 40) &&
12310 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12311 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012312 return(XML_ERR_ENTITY_LOOP);
12313 }
12314
Owen Taylor3473f882001-02-23 17:55:21 +000012315 if (list != NULL)
12316 *list = NULL;
12317 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012318 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012319 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012320 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012321
12322
Rob Richards9c0aa472009-03-26 18:10:19 +000012323 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000012324 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012325 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012326 if (oldctxt != NULL) {
12327 ctxt->_private = oldctxt->_private;
12328 ctxt->loadsubset = oldctxt->loadsubset;
12329 ctxt->validate = oldctxt->validate;
12330 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012331 ctxt->record_info = oldctxt->record_info;
12332 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12333 ctxt->node_seq.length = oldctxt->node_seq.length;
12334 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012335 } else {
12336 /*
12337 * Doing validity checking on chunk without context
12338 * doesn't make sense
12339 */
12340 ctxt->_private = NULL;
12341 ctxt->validate = 0;
12342 ctxt->external = 2;
12343 ctxt->loadsubset = 0;
12344 }
Owen Taylor3473f882001-02-23 17:55:21 +000012345 if (sax != NULL) {
12346 oldsax = ctxt->sax;
12347 ctxt->sax = sax;
12348 if (user_data != NULL)
12349 ctxt->userData = user_data;
12350 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012351 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012352 newDoc = xmlNewDoc(BAD_CAST "1.0");
12353 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012354 ctxt->node_seq.maximum = 0;
12355 ctxt->node_seq.length = 0;
12356 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012357 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000012358 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012359 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012360 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000012361 newDoc->intSubset = doc->intSubset;
12362 newDoc->extSubset = doc->extSubset;
12363 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012364 xmlDictReference(newDoc->dict);
12365
Owen Taylor3473f882001-02-23 17:55:21 +000012366 if (doc->URL != NULL) {
12367 newDoc->URL = xmlStrdup(doc->URL);
12368 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012369 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12370 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012371 if (sax != NULL)
12372 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012373 ctxt->node_seq.maximum = 0;
12374 ctxt->node_seq.length = 0;
12375 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012376 xmlFreeParserCtxt(ctxt);
12377 newDoc->intSubset = NULL;
12378 newDoc->extSubset = NULL;
12379 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000012380 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012381 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012382 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012383 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000012384 ctxt->myDoc = doc;
12385 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000012386
Daniel Veillard0161e632008-08-28 15:36:32 +000012387 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012388 * Get the 4 first bytes and decode the charset
12389 * if enc != XML_CHAR_ENCODING_NONE
12390 * plug some encoding conversion routines.
12391 */
12392 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012393 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12394 start[0] = RAW;
12395 start[1] = NXT(1);
12396 start[2] = NXT(2);
12397 start[3] = NXT(3);
12398 enc = xmlDetectCharEncoding(start, 4);
12399 if (enc != XML_CHAR_ENCODING_NONE) {
12400 xmlSwitchEncoding(ctxt, enc);
12401 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012402 }
12403
Owen Taylor3473f882001-02-23 17:55:21 +000012404 /*
12405 * Parse a possible text declaration first
12406 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012407 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012408 xmlParseTextDecl(ctxt);
12409 }
12410
Owen Taylor3473f882001-02-23 17:55:21 +000012411 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012412 ctxt->depth = depth;
12413
12414 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012415
Daniel Veillard561b7f82002-03-20 21:55:57 +000012416 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012417 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000012418 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012419 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012420 }
12421 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012422 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012423 }
12424
12425 if (!ctxt->wellFormed) {
12426 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012427 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000012428 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012429 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000012430 } else {
12431 if (list != NULL) {
12432 xmlNodePtr cur;
12433
12434 /*
12435 * Return the newly created nodeset after unlinking it from
12436 * they pseudo parent.
12437 */
12438 cur = newDoc->children->children;
12439 *list = cur;
12440 while (cur != NULL) {
12441 cur->parent = NULL;
12442 cur = cur->next;
12443 }
12444 newDoc->children->children = NULL;
12445 }
Daniel Veillard7d515752003-09-26 19:12:37 +000012446 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000012447 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012448
12449 /*
12450 * Record in the parent context the number of entities replacement
12451 * done when parsing that reference.
12452 */
12453 oldctxt->nbentities += ctxt->nbentities;
12454 /*
12455 * Also record the size of the entity parsed
12456 */
12457 if (ctxt->input != NULL) {
12458 oldctxt->sizeentities += ctxt->input->consumed;
12459 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12460 }
12461 /*
12462 * And record the last error if any
12463 */
12464 if (ctxt->lastError.code != XML_ERR_OK)
12465 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12466
Owen Taylor3473f882001-02-23 17:55:21 +000012467 if (sax != NULL)
12468 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000012469 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12470 oldctxt->node_seq.length = ctxt->node_seq.length;
12471 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012472 ctxt->node_seq.maximum = 0;
12473 ctxt->node_seq.length = 0;
12474 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012475 xmlFreeParserCtxt(ctxt);
12476 newDoc->intSubset = NULL;
12477 newDoc->extSubset = NULL;
12478 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000012479
Owen Taylor3473f882001-02-23 17:55:21 +000012480 return(ret);
12481}
12482
Daniel Veillard81273902003-09-30 00:43:48 +000012483#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012484/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012485 * xmlParseExternalEntity:
12486 * @doc: the document the chunk pertains to
12487 * @sax: the SAX handler bloc (possibly NULL)
12488 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12489 * @depth: Used for loop detection, use 0
12490 * @URL: the URL for the entity to load
12491 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012492 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000012493 *
12494 * Parse an external general entity
12495 * An external general parsed entity is well-formed if it matches the
12496 * production labeled extParsedEnt.
12497 *
12498 * [78] extParsedEnt ::= TextDecl? content
12499 *
12500 * Returns 0 if the entity is well formed, -1 in case of args problem and
12501 * the parser error code otherwise
12502 */
12503
12504int
12505xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000012506 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012507 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012508 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000012509}
12510
12511/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000012512 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000012513 * @doc: the document the chunk pertains to
12514 * @sax: the SAX handler bloc (possibly NULL)
12515 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12516 * @depth: Used for loop detection, use 0
12517 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000012518 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012519 *
12520 * Parse a well-balanced chunk of an XML document
12521 * called by the parser
12522 * The allowed sequence for the Well Balanced Chunk is the one defined by
12523 * the content production in the XML grammar:
12524 *
12525 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12526 *
12527 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12528 * the parser error code otherwise
12529 */
12530
12531int
12532xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000012533 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012534 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12535 depth, string, lst, 0 );
12536}
Daniel Veillard81273902003-09-30 00:43:48 +000012537#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000012538
12539/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000012540 * xmlParseBalancedChunkMemoryInternal:
12541 * @oldctxt: the existing parsing context
12542 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12543 * @user_data: the user data field for the parser context
12544 * @lst: the return value for the set of parsed nodes
12545 *
12546 *
12547 * Parse a well-balanced chunk of an XML document
12548 * called by the parser
12549 * The allowed sequence for the Well Balanced Chunk is the one defined by
12550 * the content production in the XML grammar:
12551 *
12552 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12553 *
Daniel Veillard7d515752003-09-26 19:12:37 +000012554 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12555 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000012556 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000012557 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000012558 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000012559 */
Daniel Veillard7d515752003-09-26 19:12:37 +000012560static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000012561xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12562 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12563 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012564 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012565 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012566 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012567 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012568 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012569 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000012570 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012571
Daniel Veillard0161e632008-08-28 15:36:32 +000012572 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12573 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012574 return(XML_ERR_ENTITY_LOOP);
12575 }
12576
12577
12578 if (lst != NULL)
12579 *lst = NULL;
12580 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000012581 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012582
12583 size = xmlStrlen(string);
12584
12585 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000012586 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012587 if (user_data != NULL)
12588 ctxt->userData = user_data;
12589 else
12590 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012591 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12592 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012593 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12594 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12595 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012596
12597 oldsax = ctxt->sax;
12598 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012599 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012600 ctxt->replaceEntities = oldctxt->replaceEntities;
12601 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000012602
Daniel Veillarde1ca5032002-12-09 14:13:43 +000012603 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012604 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012605 newDoc = xmlNewDoc(BAD_CAST "1.0");
12606 if (newDoc == NULL) {
12607 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012608 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012609 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000012610 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012611 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012612 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012613 newDoc->dict = ctxt->dict;
12614 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012615 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012616 } else {
12617 ctxt->myDoc = oldctxt->myDoc;
12618 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012619 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012620 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012621 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12622 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012623 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012624 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012625 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012626 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012627 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012628 }
William M. Brack7b9154b2003-09-27 19:23:50 +000012629 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012630 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012631 ctxt->myDoc->children = NULL;
12632 ctxt->myDoc->last = NULL;
12633 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012634 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012635 ctxt->instate = XML_PARSER_CONTENT;
12636 ctxt->depth = oldctxt->depth + 1;
12637
Daniel Veillard328f48c2002-11-15 15:24:34 +000012638 ctxt->validate = 0;
12639 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000012640 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12641 /*
12642 * ID/IDREF registration will be done in xmlValidateElement below
12643 */
12644 ctxt->loadsubset |= XML_SKIP_IDS;
12645 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012646 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012647 ctxt->attsDefault = oldctxt->attsDefault;
12648 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012649
Daniel Veillard68e9e742002-11-16 15:35:11 +000012650 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012651 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012652 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012653 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012654 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012655 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012656 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012657 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012658 }
12659
12660 if (!ctxt->wellFormed) {
12661 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012662 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012663 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012664 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012665 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012666 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012667 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012668
William M. Brack7b9154b2003-09-27 19:23:50 +000012669 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012670 xmlNodePtr cur;
12671
12672 /*
12673 * Return the newly created nodeset after unlinking it from
12674 * they pseudo parent.
12675 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012676 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012677 *lst = cur;
12678 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012679#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012680 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12681 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12682 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012683 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12684 oldctxt->myDoc, cur);
12685 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012686#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012687 cur->parent = NULL;
12688 cur = cur->next;
12689 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012690 ctxt->myDoc->children->children = NULL;
12691 }
12692 if (ctxt->myDoc != NULL) {
12693 xmlFreeNode(ctxt->myDoc->children);
12694 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012695 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012696 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012697
12698 /*
12699 * Record in the parent context the number of entities replacement
12700 * done when parsing that reference.
12701 */
Daniel Veillard4bf899b2008-08-20 17:04:30 +000012702 oldctxt->nbentities += ctxt->nbentities;
Daniel Veillard0161e632008-08-28 15:36:32 +000012703 /*
12704 * Also record the last error if any
12705 */
12706 if (ctxt->lastError.code != XML_ERR_OK)
12707 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12708
Daniel Veillard328f48c2002-11-15 15:24:34 +000012709 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012710 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012711 ctxt->attsDefault = NULL;
12712 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012713 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012714 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012715 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012716 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012717
Daniel Veillard328f48c2002-11-15 15:24:34 +000012718 return(ret);
12719}
12720
Daniel Veillard29b17482004-08-16 00:39:03 +000012721/**
12722 * xmlParseInNodeContext:
12723 * @node: the context node
12724 * @data: the input string
12725 * @datalen: the input string length in bytes
12726 * @options: a combination of xmlParserOption
12727 * @lst: the return value for the set of parsed nodes
12728 *
12729 * Parse a well-balanced chunk of an XML document
12730 * within the context (DTD, namespaces, etc ...) of the given node.
12731 *
12732 * The allowed sequence for the data is a Well Balanced Chunk defined by
12733 * the content production in the XML grammar:
12734 *
12735 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12736 *
12737 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12738 * error code otherwise
12739 */
12740xmlParserErrors
12741xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12742 int options, xmlNodePtr *lst) {
12743#ifdef SAX2
12744 xmlParserCtxtPtr ctxt;
12745 xmlDocPtr doc = NULL;
12746 xmlNodePtr fake, cur;
12747 int nsnr = 0;
12748
12749 xmlParserErrors ret = XML_ERR_OK;
12750
12751 /*
12752 * check all input parameters, grab the document
12753 */
12754 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12755 return(XML_ERR_INTERNAL_ERROR);
12756 switch (node->type) {
12757 case XML_ELEMENT_NODE:
12758 case XML_ATTRIBUTE_NODE:
12759 case XML_TEXT_NODE:
12760 case XML_CDATA_SECTION_NODE:
12761 case XML_ENTITY_REF_NODE:
12762 case XML_PI_NODE:
12763 case XML_COMMENT_NODE:
12764 case XML_DOCUMENT_NODE:
12765 case XML_HTML_DOCUMENT_NODE:
12766 break;
12767 default:
12768 return(XML_ERR_INTERNAL_ERROR);
12769
12770 }
12771 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12772 (node->type != XML_DOCUMENT_NODE) &&
12773 (node->type != XML_HTML_DOCUMENT_NODE))
12774 node = node->parent;
12775 if (node == NULL)
12776 return(XML_ERR_INTERNAL_ERROR);
12777 if (node->type == XML_ELEMENT_NODE)
12778 doc = node->doc;
12779 else
12780 doc = (xmlDocPtr) node;
12781 if (doc == NULL)
12782 return(XML_ERR_INTERNAL_ERROR);
12783
12784 /*
12785 * allocate a context and set-up everything not related to the
12786 * node position in the tree
12787 */
12788 if (doc->type == XML_DOCUMENT_NODE)
12789 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12790#ifdef LIBXML_HTML_ENABLED
12791 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12792 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12793#endif
12794 else
12795 return(XML_ERR_INTERNAL_ERROR);
12796
12797 if (ctxt == NULL)
12798 return(XML_ERR_NO_MEMORY);
12799 fake = xmlNewComment(NULL);
12800 if (fake == NULL) {
12801 xmlFreeParserCtxt(ctxt);
12802 return(XML_ERR_NO_MEMORY);
12803 }
12804 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000012805
12806 /*
12807 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12808 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12809 * we must wait until the last moment to free the original one.
12810 */
Daniel Veillard29b17482004-08-16 00:39:03 +000012811 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000012812 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000012813 xmlDictFree(ctxt->dict);
12814 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000012815 } else
12816 options |= XML_PARSE_NODICT;
12817
Daniel Veillard37334572008-07-31 08:20:02 +000012818 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000012819 xmlDetectSAX2(ctxt);
12820 ctxt->myDoc = doc;
12821
12822 if (node->type == XML_ELEMENT_NODE) {
12823 nodePush(ctxt, node);
12824 /*
12825 * initialize the SAX2 namespaces stack
12826 */
12827 cur = node;
12828 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12829 xmlNsPtr ns = cur->nsDef;
12830 const xmlChar *iprefix, *ihref;
12831
12832 while (ns != NULL) {
12833 if (ctxt->dict) {
12834 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12835 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12836 } else {
12837 iprefix = ns->prefix;
12838 ihref = ns->href;
12839 }
12840
12841 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12842 nsPush(ctxt, iprefix, ihref);
12843 nsnr++;
12844 }
12845 ns = ns->next;
12846 }
12847 cur = cur->parent;
12848 }
12849 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000012850 }
Daniel Veillard29b17482004-08-16 00:39:03 +000012851
12852 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12853 /*
12854 * ID/IDREF registration will be done in xmlValidateElement below
12855 */
12856 ctxt->loadsubset |= XML_SKIP_IDS;
12857 }
12858
Daniel Veillard499cc922006-01-18 17:22:35 +000012859#ifdef LIBXML_HTML_ENABLED
12860 if (doc->type == XML_HTML_DOCUMENT_NODE)
12861 __htmlParseContent(ctxt);
12862 else
12863#endif
12864 xmlParseContent(ctxt);
12865
Daniel Veillard29b17482004-08-16 00:39:03 +000012866 nsPop(ctxt, nsnr);
12867 if ((RAW == '<') && (NXT(1) == '/')) {
12868 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12869 } else if (RAW != 0) {
12870 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12871 }
12872 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12873 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12874 ctxt->wellFormed = 0;
12875 }
12876
12877 if (!ctxt->wellFormed) {
12878 if (ctxt->errNo == 0)
12879 ret = XML_ERR_INTERNAL_ERROR;
12880 else
12881 ret = (xmlParserErrors)ctxt->errNo;
12882 } else {
12883 ret = XML_ERR_OK;
12884 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012885
Daniel Veillard29b17482004-08-16 00:39:03 +000012886 /*
12887 * Return the newly created nodeset after unlinking it from
12888 * the pseudo sibling.
12889 */
Daniel Veillard0161e632008-08-28 15:36:32 +000012890
Daniel Veillard29b17482004-08-16 00:39:03 +000012891 cur = fake->next;
12892 fake->next = NULL;
12893 node->last = fake;
12894
12895 if (cur != NULL) {
12896 cur->prev = NULL;
12897 }
12898
12899 *lst = cur;
12900
12901 while (cur != NULL) {
12902 cur->parent = NULL;
12903 cur = cur->next;
12904 }
12905
12906 xmlUnlinkNode(fake);
12907 xmlFreeNode(fake);
12908
12909
12910 if (ret != XML_ERR_OK) {
12911 xmlFreeNodeList(*lst);
12912 *lst = NULL;
12913 }
William M. Brackc3f81342004-10-03 01:22:44 +000012914
William M. Brackb7b54de2004-10-06 16:38:01 +000012915 if (doc->dict != NULL)
12916 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000012917 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012918
Daniel Veillard29b17482004-08-16 00:39:03 +000012919 return(ret);
12920#else /* !SAX2 */
12921 return(XML_ERR_INTERNAL_ERROR);
12922#endif
12923}
12924
Daniel Veillard81273902003-09-30 00:43:48 +000012925#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000012926/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000012927 * xmlParseBalancedChunkMemoryRecover:
12928 * @doc: the document the chunk pertains to
12929 * @sax: the SAX handler bloc (possibly NULL)
12930 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12931 * @depth: Used for loop detection, use 0
12932 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12933 * @lst: the return value for the set of parsed nodes
12934 * @recover: return nodes even if the data is broken (use 0)
12935 *
12936 *
12937 * Parse a well-balanced chunk of an XML document
12938 * called by the parser
12939 * The allowed sequence for the Well Balanced Chunk is the one defined by
12940 * the content production in the XML grammar:
12941 *
12942 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12943 *
12944 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12945 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000012946 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000012947 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000012948 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12949 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000012950 */
12951int
12952xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000012953 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000012954 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000012955 xmlParserCtxtPtr ctxt;
12956 xmlDocPtr newDoc;
12957 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012958 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012959 int size;
12960 int ret = 0;
12961
Daniel Veillard0161e632008-08-28 15:36:32 +000012962 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000012963 return(XML_ERR_ENTITY_LOOP);
12964 }
12965
12966
Daniel Veillardcda96922001-08-21 10:56:31 +000012967 if (lst != NULL)
12968 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012969 if (string == NULL)
12970 return(-1);
12971
12972 size = xmlStrlen(string);
12973
12974 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12975 if (ctxt == NULL) return(-1);
12976 ctxt->userData = ctxt;
12977 if (sax != NULL) {
12978 oldsax = ctxt->sax;
12979 ctxt->sax = sax;
12980 if (user_data != NULL)
12981 ctxt->userData = user_data;
12982 }
12983 newDoc = xmlNewDoc(BAD_CAST "1.0");
12984 if (newDoc == NULL) {
12985 xmlFreeParserCtxt(ctxt);
12986 return(-1);
12987 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012988 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012989 if ((doc != NULL) && (doc->dict != NULL)) {
12990 xmlDictFree(ctxt->dict);
12991 ctxt->dict = doc->dict;
12992 xmlDictReference(ctxt->dict);
12993 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12994 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12995 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12996 ctxt->dictNames = 1;
12997 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000012998 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012999 }
Owen Taylor3473f882001-02-23 17:55:21 +000013000 if (doc != NULL) {
13001 newDoc->intSubset = doc->intSubset;
13002 newDoc->extSubset = doc->extSubset;
13003 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013004 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13005 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013006 if (sax != NULL)
13007 ctxt->sax = oldsax;
13008 xmlFreeParserCtxt(ctxt);
13009 newDoc->intSubset = NULL;
13010 newDoc->extSubset = NULL;
13011 xmlFreeDoc(newDoc);
13012 return(-1);
13013 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013014 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13015 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013016 if (doc == NULL) {
13017 ctxt->myDoc = newDoc;
13018 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013019 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013020 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013021 /* Ensure that doc has XML spec namespace */
13022 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13023 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013024 }
13025 ctxt->instate = XML_PARSER_CONTENT;
13026 ctxt->depth = depth;
13027
13028 /*
13029 * Doing validity checking on chunk doesn't make sense
13030 */
13031 ctxt->validate = 0;
13032 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013033 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013034
Daniel Veillardb39bc392002-10-26 19:29:51 +000013035 if ( doc != NULL ){
13036 content = doc->children;
13037 doc->children = NULL;
13038 xmlParseContent(ctxt);
13039 doc->children = content;
13040 }
13041 else {
13042 xmlParseContent(ctxt);
13043 }
Owen Taylor3473f882001-02-23 17:55:21 +000013044 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013045 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013046 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013047 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013048 }
13049 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013050 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013051 }
13052
13053 if (!ctxt->wellFormed) {
13054 if (ctxt->errNo == 0)
13055 ret = 1;
13056 else
13057 ret = ctxt->errNo;
13058 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013059 ret = 0;
13060 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013061
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013062 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13063 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013064
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013065 /*
13066 * Return the newly created nodeset after unlinking it from
13067 * they pseudo parent.
13068 */
13069 cur = newDoc->children->children;
13070 *lst = cur;
13071 while (cur != NULL) {
13072 xmlSetTreeDoc(cur, doc);
13073 cur->parent = NULL;
13074 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013075 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013076 newDoc->children->children = NULL;
13077 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013078
13079 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013080 ctxt->sax = oldsax;
13081 xmlFreeParserCtxt(ctxt);
13082 newDoc->intSubset = NULL;
13083 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013084 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013085 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013086
Owen Taylor3473f882001-02-23 17:55:21 +000013087 return(ret);
13088}
13089
13090/**
13091 * xmlSAXParseEntity:
13092 * @sax: the SAX handler block
13093 * @filename: the filename
13094 *
13095 * parse an XML external entity out of context and build a tree.
13096 * It use the given SAX function block to handle the parsing callback.
13097 * If sax is NULL, fallback to the default DOM tree building routines.
13098 *
13099 * [78] extParsedEnt ::= TextDecl? content
13100 *
13101 * This correspond to a "Well Balanced" chunk
13102 *
13103 * Returns the resulting document tree
13104 */
13105
13106xmlDocPtr
13107xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13108 xmlDocPtr ret;
13109 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013110
13111 ctxt = xmlCreateFileParserCtxt(filename);
13112 if (ctxt == NULL) {
13113 return(NULL);
13114 }
13115 if (sax != NULL) {
13116 if (ctxt->sax != NULL)
13117 xmlFree(ctxt->sax);
13118 ctxt->sax = sax;
13119 ctxt->userData = NULL;
13120 }
13121
Owen Taylor3473f882001-02-23 17:55:21 +000013122 xmlParseExtParsedEnt(ctxt);
13123
13124 if (ctxt->wellFormed)
13125 ret = ctxt->myDoc;
13126 else {
13127 ret = NULL;
13128 xmlFreeDoc(ctxt->myDoc);
13129 ctxt->myDoc = NULL;
13130 }
13131 if (sax != NULL)
13132 ctxt->sax = NULL;
13133 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013134
Owen Taylor3473f882001-02-23 17:55:21 +000013135 return(ret);
13136}
13137
13138/**
13139 * xmlParseEntity:
13140 * @filename: the filename
13141 *
13142 * parse an XML external entity out of context and build a tree.
13143 *
13144 * [78] extParsedEnt ::= TextDecl? content
13145 *
13146 * This correspond to a "Well Balanced" chunk
13147 *
13148 * Returns the resulting document tree
13149 */
13150
13151xmlDocPtr
13152xmlParseEntity(const char *filename) {
13153 return(xmlSAXParseEntity(NULL, filename));
13154}
Daniel Veillard81273902003-09-30 00:43:48 +000013155#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013156
13157/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013158 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013159 * @URL: the entity URL
13160 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013161 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013162 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013163 *
13164 * Create a parser context for an external entity
13165 * Automatic support for ZLIB/Compress compressed document is provided
13166 * by default if found at compile-time.
13167 *
13168 * Returns the new parser context or NULL
13169 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013170static xmlParserCtxtPtr
13171xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13172 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013173 xmlParserCtxtPtr ctxt;
13174 xmlParserInputPtr inputStream;
13175 char *directory = NULL;
13176 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013177
Owen Taylor3473f882001-02-23 17:55:21 +000013178 ctxt = xmlNewParserCtxt();
13179 if (ctxt == NULL) {
13180 return(NULL);
13181 }
13182
Daniel Veillard48247b42009-07-10 16:12:46 +020013183 if (pctx != NULL) {
13184 ctxt->options = pctx->options;
13185 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000013186 }
13187
Owen Taylor3473f882001-02-23 17:55:21 +000013188 uri = xmlBuildURI(URL, base);
13189
13190 if (uri == NULL) {
13191 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13192 if (inputStream == NULL) {
13193 xmlFreeParserCtxt(ctxt);
13194 return(NULL);
13195 }
13196
13197 inputPush(ctxt, inputStream);
13198
13199 if ((ctxt->directory == NULL) && (directory == NULL))
13200 directory = xmlParserGetDirectory((char *)URL);
13201 if ((ctxt->directory == NULL) && (directory != NULL))
13202 ctxt->directory = directory;
13203 } else {
13204 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13205 if (inputStream == NULL) {
13206 xmlFree(uri);
13207 xmlFreeParserCtxt(ctxt);
13208 return(NULL);
13209 }
13210
13211 inputPush(ctxt, inputStream);
13212
13213 if ((ctxt->directory == NULL) && (directory == NULL))
13214 directory = xmlParserGetDirectory((char *)uri);
13215 if ((ctxt->directory == NULL) && (directory != NULL))
13216 ctxt->directory = directory;
13217 xmlFree(uri);
13218 }
Owen Taylor3473f882001-02-23 17:55:21 +000013219 return(ctxt);
13220}
13221
Rob Richards9c0aa472009-03-26 18:10:19 +000013222/**
13223 * xmlCreateEntityParserCtxt:
13224 * @URL: the entity URL
13225 * @ID: the entity PUBLIC ID
13226 * @base: a possible base for the target URI
13227 *
13228 * Create a parser context for an external entity
13229 * Automatic support for ZLIB/Compress compressed document is provided
13230 * by default if found at compile-time.
13231 *
13232 * Returns the new parser context or NULL
13233 */
13234xmlParserCtxtPtr
13235xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13236 const xmlChar *base) {
13237 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13238
13239}
13240
Owen Taylor3473f882001-02-23 17:55:21 +000013241/************************************************************************
13242 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000013243 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000013244 * *
13245 ************************************************************************/
13246
13247/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013248 * xmlCreateURLParserCtxt:
13249 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013250 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013251 *
Daniel Veillard61b93382003-11-03 14:28:31 +000013252 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000013253 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000013254 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000013255 *
13256 * Returns the new parser context or NULL
13257 */
13258xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000013259xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000013260{
13261 xmlParserCtxtPtr ctxt;
13262 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000013263 char *directory = NULL;
13264
Owen Taylor3473f882001-02-23 17:55:21 +000013265 ctxt = xmlNewParserCtxt();
13266 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000013267 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000013268 return(NULL);
13269 }
13270
Daniel Veillarddf292f72005-01-16 19:00:15 +000013271 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000013272 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000013273 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000013274
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000013275 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013276 if (inputStream == NULL) {
13277 xmlFreeParserCtxt(ctxt);
13278 return(NULL);
13279 }
13280
Owen Taylor3473f882001-02-23 17:55:21 +000013281 inputPush(ctxt, inputStream);
13282 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000013283 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013284 if ((ctxt->directory == NULL) && (directory != NULL))
13285 ctxt->directory = directory;
13286
13287 return(ctxt);
13288}
13289
Daniel Veillard61b93382003-11-03 14:28:31 +000013290/**
13291 * xmlCreateFileParserCtxt:
13292 * @filename: the filename
13293 *
13294 * Create a parser context for a file content.
13295 * Automatic support for ZLIB/Compress compressed document is provided
13296 * by default if found at compile-time.
13297 *
13298 * Returns the new parser context or NULL
13299 */
13300xmlParserCtxtPtr
13301xmlCreateFileParserCtxt(const char *filename)
13302{
13303 return(xmlCreateURLParserCtxt(filename, 0));
13304}
13305
Daniel Veillard81273902003-09-30 00:43:48 +000013306#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013307/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013308 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000013309 * @sax: the SAX handler block
13310 * @filename: the filename
13311 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13312 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000013313 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000013314 *
13315 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13316 * compressed document is provided by default if found at compile-time.
13317 * It use the given SAX function block to handle the parsing callback.
13318 * If sax is NULL, fallback to the default DOM tree building routines.
13319 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000013320 * User data (void *) is stored within the parser context in the
13321 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000013322 *
Owen Taylor3473f882001-02-23 17:55:21 +000013323 * Returns the resulting document tree
13324 */
13325
13326xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000013327xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13328 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000013329 xmlDocPtr ret;
13330 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013331
Daniel Veillard635ef722001-10-29 11:48:19 +000013332 xmlInitParser();
13333
Owen Taylor3473f882001-02-23 17:55:21 +000013334 ctxt = xmlCreateFileParserCtxt(filename);
13335 if (ctxt == NULL) {
13336 return(NULL);
13337 }
13338 if (sax != NULL) {
13339 if (ctxt->sax != NULL)
13340 xmlFree(ctxt->sax);
13341 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013342 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013343 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000013344 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000013345 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000013346 }
Owen Taylor3473f882001-02-23 17:55:21 +000013347
Daniel Veillard37d2d162008-03-14 10:54:00 +000013348 if (ctxt->directory == NULL)
13349 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013350
Daniel Veillarddad3f682002-11-17 16:47:27 +000013351 ctxt->recovery = recovery;
13352
Owen Taylor3473f882001-02-23 17:55:21 +000013353 xmlParseDocument(ctxt);
13354
William M. Brackc07329e2003-09-08 01:57:30 +000013355 if ((ctxt->wellFormed) || recovery) {
13356 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000013357 if (ret != NULL) {
13358 if (ctxt->input->buf->compressed > 0)
13359 ret->compression = 9;
13360 else
13361 ret->compression = ctxt->input->buf->compressed;
13362 }
William M. Brackc07329e2003-09-08 01:57:30 +000013363 }
Owen Taylor3473f882001-02-23 17:55:21 +000013364 else {
13365 ret = NULL;
13366 xmlFreeDoc(ctxt->myDoc);
13367 ctxt->myDoc = NULL;
13368 }
13369 if (sax != NULL)
13370 ctxt->sax = NULL;
13371 xmlFreeParserCtxt(ctxt);
13372
13373 return(ret);
13374}
13375
13376/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013377 * xmlSAXParseFile:
13378 * @sax: the SAX handler block
13379 * @filename: the filename
13380 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13381 * documents
13382 *
13383 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13384 * compressed document is provided by default if found at compile-time.
13385 * It use the given SAX function block to handle the parsing callback.
13386 * If sax is NULL, fallback to the default DOM tree building routines.
13387 *
13388 * Returns the resulting document tree
13389 */
13390
13391xmlDocPtr
13392xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13393 int recovery) {
13394 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13395}
13396
13397/**
Owen Taylor3473f882001-02-23 17:55:21 +000013398 * xmlRecoverDoc:
13399 * @cur: a pointer to an array of xmlChar
13400 *
13401 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013402 * In the case the document is not Well Formed, a attempt to build a
13403 * tree is tried anyway
13404 *
13405 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013406 */
13407
13408xmlDocPtr
13409xmlRecoverDoc(xmlChar *cur) {
13410 return(xmlSAXParseDoc(NULL, cur, 1));
13411}
13412
13413/**
13414 * xmlParseFile:
13415 * @filename: the filename
13416 *
13417 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13418 * compressed document is provided by default if found at compile-time.
13419 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000013420 * Returns the resulting document tree if the file was wellformed,
13421 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000013422 */
13423
13424xmlDocPtr
13425xmlParseFile(const char *filename) {
13426 return(xmlSAXParseFile(NULL, filename, 0));
13427}
13428
13429/**
13430 * xmlRecoverFile:
13431 * @filename: the filename
13432 *
13433 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13434 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013435 * In the case the document is not Well Formed, it attempts to build
13436 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000013437 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000013438 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013439 */
13440
13441xmlDocPtr
13442xmlRecoverFile(const char *filename) {
13443 return(xmlSAXParseFile(NULL, filename, 1));
13444}
13445
13446
13447/**
13448 * xmlSetupParserForBuffer:
13449 * @ctxt: an XML parser context
13450 * @buffer: a xmlChar * buffer
13451 * @filename: a file name
13452 *
13453 * Setup the parser context to parse a new buffer; Clears any prior
13454 * contents from the parser context. The buffer parameter must not be
13455 * NULL, but the filename parameter can be
13456 */
13457void
13458xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13459 const char* filename)
13460{
13461 xmlParserInputPtr input;
13462
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013463 if ((ctxt == NULL) || (buffer == NULL))
13464 return;
13465
Owen Taylor3473f882001-02-23 17:55:21 +000013466 input = xmlNewInputStream(ctxt);
13467 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000013468 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013469 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013470 return;
13471 }
13472
13473 xmlClearParserCtxt(ctxt);
13474 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000013475 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013476 input->base = buffer;
13477 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013478 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000013479 inputPush(ctxt, input);
13480}
13481
13482/**
13483 * xmlSAXUserParseFile:
13484 * @sax: a SAX handler
13485 * @user_data: The user data returned on SAX callbacks
13486 * @filename: a file name
13487 *
13488 * parse an XML file and call the given SAX handler routines.
13489 * Automatic support for ZLIB/Compress compressed document is provided
13490 *
13491 * Returns 0 in case of success or a error number otherwise
13492 */
13493int
13494xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13495 const char *filename) {
13496 int ret = 0;
13497 xmlParserCtxtPtr ctxt;
13498
13499 ctxt = xmlCreateFileParserCtxt(filename);
13500 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000013501 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000013502 xmlFree(ctxt->sax);
13503 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013504 xmlDetectSAX2(ctxt);
13505
Owen Taylor3473f882001-02-23 17:55:21 +000013506 if (user_data != NULL)
13507 ctxt->userData = user_data;
13508
13509 xmlParseDocument(ctxt);
13510
13511 if (ctxt->wellFormed)
13512 ret = 0;
13513 else {
13514 if (ctxt->errNo != 0)
13515 ret = ctxt->errNo;
13516 else
13517 ret = -1;
13518 }
13519 if (sax != NULL)
13520 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013521 if (ctxt->myDoc != NULL) {
13522 xmlFreeDoc(ctxt->myDoc);
13523 ctxt->myDoc = NULL;
13524 }
Owen Taylor3473f882001-02-23 17:55:21 +000013525 xmlFreeParserCtxt(ctxt);
13526
13527 return ret;
13528}
Daniel Veillard81273902003-09-30 00:43:48 +000013529#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013530
13531/************************************************************************
13532 * *
13533 * Front ends when parsing from memory *
13534 * *
13535 ************************************************************************/
13536
13537/**
13538 * xmlCreateMemoryParserCtxt:
13539 * @buffer: a pointer to a char array
13540 * @size: the size of the array
13541 *
13542 * Create a parser context for an XML in-memory document.
13543 *
13544 * Returns the new parser context or NULL
13545 */
13546xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013547xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013548 xmlParserCtxtPtr ctxt;
13549 xmlParserInputPtr input;
13550 xmlParserInputBufferPtr buf;
13551
13552 if (buffer == NULL)
13553 return(NULL);
13554 if (size <= 0)
13555 return(NULL);
13556
13557 ctxt = xmlNewParserCtxt();
13558 if (ctxt == NULL)
13559 return(NULL);
13560
Daniel Veillard53350552003-09-18 13:35:51 +000013561 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000013562 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013563 if (buf == NULL) {
13564 xmlFreeParserCtxt(ctxt);
13565 return(NULL);
13566 }
Owen Taylor3473f882001-02-23 17:55:21 +000013567
13568 input = xmlNewInputStream(ctxt);
13569 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013570 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000013571 xmlFreeParserCtxt(ctxt);
13572 return(NULL);
13573 }
13574
13575 input->filename = NULL;
13576 input->buf = buf;
13577 input->base = input->buf->buffer->content;
13578 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013579 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000013580
13581 inputPush(ctxt, input);
13582 return(ctxt);
13583}
13584
Daniel Veillard81273902003-09-30 00:43:48 +000013585#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013586/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013587 * xmlSAXParseMemoryWithData:
13588 * @sax: the SAX handler block
13589 * @buffer: an pointer to a char array
13590 * @size: the size of the array
13591 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13592 * documents
13593 * @data: the userdata
13594 *
13595 * parse an XML in-memory block and use the given SAX function block
13596 * to handle the parsing callback. If sax is NULL, fallback to the default
13597 * DOM tree building routines.
13598 *
13599 * User data (void *) is stored within the parser context in the
13600 * context's _private member, so it is available nearly everywhere in libxml
13601 *
13602 * Returns the resulting document tree
13603 */
13604
13605xmlDocPtr
13606xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13607 int size, int recovery, void *data) {
13608 xmlDocPtr ret;
13609 xmlParserCtxtPtr ctxt;
13610
Daniel Veillardab2a7632009-07-09 08:45:03 +020013611 xmlInitParser();
13612
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013613 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13614 if (ctxt == NULL) return(NULL);
13615 if (sax != NULL) {
13616 if (ctxt->sax != NULL)
13617 xmlFree(ctxt->sax);
13618 ctxt->sax = sax;
13619 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013620 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013621 if (data!=NULL) {
13622 ctxt->_private=data;
13623 }
13624
Daniel Veillardadba5f12003-04-04 16:09:01 +000013625 ctxt->recovery = recovery;
13626
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013627 xmlParseDocument(ctxt);
13628
13629 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13630 else {
13631 ret = NULL;
13632 xmlFreeDoc(ctxt->myDoc);
13633 ctxt->myDoc = NULL;
13634 }
13635 if (sax != NULL)
13636 ctxt->sax = NULL;
13637 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020013638
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013639 return(ret);
13640}
13641
13642/**
Owen Taylor3473f882001-02-23 17:55:21 +000013643 * xmlSAXParseMemory:
13644 * @sax: the SAX handler block
13645 * @buffer: an pointer to a char array
13646 * @size: the size of the array
13647 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13648 * documents
13649 *
13650 * parse an XML in-memory block and use the given SAX function block
13651 * to handle the parsing callback. If sax is NULL, fallback to the default
13652 * DOM tree building routines.
13653 *
13654 * Returns the resulting document tree
13655 */
13656xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000013657xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13658 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013659 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013660}
13661
13662/**
13663 * xmlParseMemory:
13664 * @buffer: an pointer to a char array
13665 * @size: the size of the array
13666 *
13667 * parse an XML in-memory block and build a tree.
13668 *
13669 * Returns the resulting document tree
13670 */
13671
Daniel Veillard50822cb2001-07-26 20:05:51 +000013672xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013673 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13674}
13675
13676/**
13677 * xmlRecoverMemory:
13678 * @buffer: an pointer to a char array
13679 * @size: the size of the array
13680 *
13681 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013682 * In the case the document is not Well Formed, an attempt to
13683 * build a tree is tried anyway
13684 *
13685 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000013686 */
13687
Daniel Veillard50822cb2001-07-26 20:05:51 +000013688xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013689 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13690}
13691
13692/**
13693 * xmlSAXUserParseMemory:
13694 * @sax: a SAX handler
13695 * @user_data: The user data returned on SAX callbacks
13696 * @buffer: an in-memory XML document input
13697 * @size: the length of the XML document in bytes
13698 *
13699 * A better SAX parsing routine.
13700 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020013701 *
Owen Taylor3473f882001-02-23 17:55:21 +000013702 * Returns 0 in case of success or a error number otherwise
13703 */
13704int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013705 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013706 int ret = 0;
13707 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020013708
13709 xmlInitParser();
13710
Owen Taylor3473f882001-02-23 17:55:21 +000013711 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13712 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013713 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13714 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000013715 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013716 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013717
Daniel Veillard30211a02001-04-26 09:33:18 +000013718 if (user_data != NULL)
13719 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020013720
Owen Taylor3473f882001-02-23 17:55:21 +000013721 xmlParseDocument(ctxt);
13722
13723 if (ctxt->wellFormed)
13724 ret = 0;
13725 else {
13726 if (ctxt->errNo != 0)
13727 ret = ctxt->errNo;
13728 else
13729 ret = -1;
13730 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013731 if (sax != NULL)
13732 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013733 if (ctxt->myDoc != NULL) {
13734 xmlFreeDoc(ctxt->myDoc);
13735 ctxt->myDoc = NULL;
13736 }
Owen Taylor3473f882001-02-23 17:55:21 +000013737 xmlFreeParserCtxt(ctxt);
13738
13739 return ret;
13740}
Daniel Veillard81273902003-09-30 00:43:48 +000013741#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013742
13743/**
13744 * xmlCreateDocParserCtxt:
13745 * @cur: a pointer to an array of xmlChar
13746 *
13747 * Creates a parser context for an XML in-memory document.
13748 *
13749 * Returns the new parser context or NULL
13750 */
13751xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013752xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013753 int len;
13754
13755 if (cur == NULL)
13756 return(NULL);
13757 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013758 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000013759}
13760
Daniel Veillard81273902003-09-30 00:43:48 +000013761#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013762/**
13763 * xmlSAXParseDoc:
13764 * @sax: the SAX handler block
13765 * @cur: a pointer to an array of xmlChar
13766 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13767 * documents
13768 *
13769 * parse an XML in-memory document and build a tree.
13770 * It use the given SAX function block to handle the parsing callback.
13771 * If sax is NULL, fallback to the default DOM tree building routines.
13772 *
13773 * Returns the resulting document tree
13774 */
13775
13776xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013777xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000013778 xmlDocPtr ret;
13779 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000013780 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013781
Daniel Veillard38936062004-11-04 17:45:11 +000013782 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013783
13784
13785 ctxt = xmlCreateDocParserCtxt(cur);
13786 if (ctxt == NULL) return(NULL);
13787 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000013788 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013789 ctxt->sax = sax;
13790 ctxt->userData = NULL;
13791 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013792 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013793
13794 xmlParseDocument(ctxt);
13795 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13796 else {
13797 ret = NULL;
13798 xmlFreeDoc(ctxt->myDoc);
13799 ctxt->myDoc = NULL;
13800 }
Daniel Veillard34099b42004-11-04 17:34:35 +000013801 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000013802 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000013803 xmlFreeParserCtxt(ctxt);
13804
13805 return(ret);
13806}
13807
13808/**
13809 * xmlParseDoc:
13810 * @cur: a pointer to an array of xmlChar
13811 *
13812 * parse an XML in-memory document and build a tree.
13813 *
13814 * Returns the resulting document tree
13815 */
13816
13817xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013818xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013819 return(xmlSAXParseDoc(NULL, cur, 0));
13820}
Daniel Veillard81273902003-09-30 00:43:48 +000013821#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013822
Daniel Veillard81273902003-09-30 00:43:48 +000013823#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000013824/************************************************************************
13825 * *
13826 * Specific function to keep track of entities references *
13827 * and used by the XSLT debugger *
13828 * *
13829 ************************************************************************/
13830
13831static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13832
13833/**
13834 * xmlAddEntityReference:
13835 * @ent : A valid entity
13836 * @firstNode : A valid first node for children of entity
13837 * @lastNode : A valid last node of children entity
13838 *
13839 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13840 */
13841static void
13842xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13843 xmlNodePtr lastNode)
13844{
13845 if (xmlEntityRefFunc != NULL) {
13846 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13847 }
13848}
13849
13850
13851/**
13852 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000013853 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000013854 *
13855 * Set the function to call call back when a xml reference has been made
13856 */
13857void
13858xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13859{
13860 xmlEntityRefFunc = func;
13861}
Daniel Veillard81273902003-09-30 00:43:48 +000013862#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013863
13864/************************************************************************
13865 * *
13866 * Miscellaneous *
13867 * *
13868 ************************************************************************/
13869
13870#ifdef LIBXML_XPATH_ENABLED
13871#include <libxml/xpath.h>
13872#endif
13873
Daniel Veillardffa3c742005-07-21 13:24:09 +000013874extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000013875static int xmlParserInitialized = 0;
13876
13877/**
13878 * xmlInitParser:
13879 *
13880 * Initialization function for the XML parser.
13881 * This is not reentrant. Call once before processing in case of
13882 * use in multithreaded programs.
13883 */
13884
13885void
13886xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000013887 if (xmlParserInitialized != 0)
13888 return;
Owen Taylor3473f882001-02-23 17:55:21 +000013889
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013890#ifdef LIBXML_THREAD_ENABLED
13891 __xmlGlobalInitMutexLock();
13892 if (xmlParserInitialized == 0) {
13893#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020013894 xmlInitGlobals();
13895 xmlInitThreads();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013896 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13897 (xmlGenericError == NULL))
13898 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013899 xmlInitMemory();
13900 xmlInitCharEncodingHandlers();
13901 xmlDefaultSAXHandlerInit();
13902 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013903#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013904 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013905#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013906#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013907 htmlInitAutoClose();
13908 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013909#endif
13910#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013911 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013912#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013913 xmlParserInitialized = 1;
13914#ifdef LIBXML_THREAD_ENABLED
13915 }
13916 __xmlGlobalInitMutexUnlock();
13917#endif
Owen Taylor3473f882001-02-23 17:55:21 +000013918}
13919
13920/**
13921 * xmlCleanupParser:
13922 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000013923 * This function name is somewhat misleading. It does not clean up
13924 * parser state, it cleans up memory allocated by the library itself.
13925 * It is a cleanup function for the XML library. It tries to reclaim all
13926 * related global memory allocated for the library processing.
13927 * It doesn't deallocate any document related memory. One should
13928 * call xmlCleanupParser() only when the process has finished using
13929 * the library and all XML/HTML documents built with it.
13930 * See also xmlInitParser() which has the opposite function of preparing
13931 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000013932 *
13933 * WARNING: if your application is multithreaded or has plugin support
13934 * calling this may crash the application if another thread or
13935 * a plugin is still using libxml2. It's sometimes very hard to
13936 * guess if libxml2 is in use in the application, some libraries
13937 * or plugins may use it without notice. In case of doubt abstain
13938 * from calling this function or do it just before calling exit()
13939 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000013940 */
13941
13942void
13943xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000013944 if (!xmlParserInitialized)
13945 return;
13946
Owen Taylor3473f882001-02-23 17:55:21 +000013947 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000013948#ifdef LIBXML_CATALOG_ENABLED
13949 xmlCatalogCleanup();
13950#endif
Daniel Veillard14412512005-01-21 23:53:26 +000013951 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000013952 xmlCleanupInputCallbacks();
13953#ifdef LIBXML_OUTPUT_ENABLED
13954 xmlCleanupOutputCallbacks();
13955#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013956#ifdef LIBXML_SCHEMAS_ENABLED
13957 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000013958 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013959#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000013960 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000013961 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000013962 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000013963 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000013964 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000013965}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013966
13967/************************************************************************
13968 * *
13969 * New set (2.6.0) of simpler and more flexible APIs *
13970 * *
13971 ************************************************************************/
13972
13973/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013974 * DICT_FREE:
13975 * @str: a string
13976 *
13977 * Free a string if it is not owned by the "dict" dictionnary in the
13978 * current scope
13979 */
13980#define DICT_FREE(str) \
13981 if ((str) && ((!dict) || \
13982 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13983 xmlFree((char *)(str));
13984
13985/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013986 * xmlCtxtReset:
13987 * @ctxt: an XML parser context
13988 *
13989 * Reset a parser context
13990 */
13991void
13992xmlCtxtReset(xmlParserCtxtPtr ctxt)
13993{
13994 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013995 xmlDictPtr dict;
13996
13997 if (ctxt == NULL)
13998 return;
13999
14000 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014001
14002 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14003 xmlFreeInputStream(input);
14004 }
14005 ctxt->inputNr = 0;
14006 ctxt->input = NULL;
14007
14008 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014009 if (ctxt->spaceTab != NULL) {
14010 ctxt->spaceTab[0] = -1;
14011 ctxt->space = &ctxt->spaceTab[0];
14012 } else {
14013 ctxt->space = NULL;
14014 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014015
14016
14017 ctxt->nodeNr = 0;
14018 ctxt->node = NULL;
14019
14020 ctxt->nameNr = 0;
14021 ctxt->name = NULL;
14022
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014023 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014024 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014025 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014026 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014027 DICT_FREE(ctxt->directory);
14028 ctxt->directory = NULL;
14029 DICT_FREE(ctxt->extSubURI);
14030 ctxt->extSubURI = NULL;
14031 DICT_FREE(ctxt->extSubSystem);
14032 ctxt->extSubSystem = NULL;
14033 if (ctxt->myDoc != NULL)
14034 xmlFreeDoc(ctxt->myDoc);
14035 ctxt->myDoc = NULL;
14036
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014037 ctxt->standalone = -1;
14038 ctxt->hasExternalSubset = 0;
14039 ctxt->hasPErefs = 0;
14040 ctxt->html = 0;
14041 ctxt->external = 0;
14042 ctxt->instate = XML_PARSER_START;
14043 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014044
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014045 ctxt->wellFormed = 1;
14046 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014047 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014048 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014049#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014050 ctxt->vctxt.userData = ctxt;
14051 ctxt->vctxt.error = xmlParserValidityError;
14052 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014053#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014054 ctxt->record_info = 0;
14055 ctxt->nbChars = 0;
14056 ctxt->checkIndex = 0;
14057 ctxt->inSubset = 0;
14058 ctxt->errNo = XML_ERR_OK;
14059 ctxt->depth = 0;
14060 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14061 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014062 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014063 ctxt->sizeentities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014064 xmlInitNodeInfoSeq(&ctxt->node_seq);
14065
14066 if (ctxt->attsDefault != NULL) {
14067 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14068 ctxt->attsDefault = NULL;
14069 }
14070 if (ctxt->attsSpecial != NULL) {
14071 xmlHashFree(ctxt->attsSpecial, NULL);
14072 ctxt->attsSpecial = NULL;
14073 }
14074
Daniel Veillard4432df22003-09-28 18:58:27 +000014075#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014076 if (ctxt->catalogs != NULL)
14077 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014078#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014079 if (ctxt->lastError.code != XML_ERR_OK)
14080 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014081}
14082
14083/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014084 * xmlCtxtResetPush:
14085 * @ctxt: an XML parser context
14086 * @chunk: a pointer to an array of chars
14087 * @size: number of chars in the array
14088 * @filename: an optional file name or URI
14089 * @encoding: the document encoding, or NULL
14090 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014091 * Reset a push parser context
14092 *
14093 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014094 */
14095int
14096xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14097 int size, const char *filename, const char *encoding)
14098{
14099 xmlParserInputPtr inputStream;
14100 xmlParserInputBufferPtr buf;
14101 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14102
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014103 if (ctxt == NULL)
14104 return(1);
14105
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014106 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14107 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14108
14109 buf = xmlAllocParserInputBuffer(enc);
14110 if (buf == NULL)
14111 return(1);
14112
14113 if (ctxt == NULL) {
14114 xmlFreeParserInputBuffer(buf);
14115 return(1);
14116 }
14117
14118 xmlCtxtReset(ctxt);
14119
14120 if (ctxt->pushTab == NULL) {
14121 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14122 sizeof(xmlChar *));
14123 if (ctxt->pushTab == NULL) {
14124 xmlErrMemory(ctxt, NULL);
14125 xmlFreeParserInputBuffer(buf);
14126 return(1);
14127 }
14128 }
14129
14130 if (filename == NULL) {
14131 ctxt->directory = NULL;
14132 } else {
14133 ctxt->directory = xmlParserGetDirectory(filename);
14134 }
14135
14136 inputStream = xmlNewInputStream(ctxt);
14137 if (inputStream == NULL) {
14138 xmlFreeParserInputBuffer(buf);
14139 return(1);
14140 }
14141
14142 if (filename == NULL)
14143 inputStream->filename = NULL;
14144 else
14145 inputStream->filename = (char *)
14146 xmlCanonicPath((const xmlChar *) filename);
14147 inputStream->buf = buf;
14148 inputStream->base = inputStream->buf->buffer->content;
14149 inputStream->cur = inputStream->buf->buffer->content;
14150 inputStream->end =
14151 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14152
14153 inputPush(ctxt, inputStream);
14154
14155 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14156 (ctxt->input->buf != NULL)) {
14157 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14158 int cur = ctxt->input->cur - ctxt->input->base;
14159
14160 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14161
14162 ctxt->input->base = ctxt->input->buf->buffer->content + base;
14163 ctxt->input->cur = ctxt->input->base + cur;
14164 ctxt->input->end =
14165 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14166 use];
14167#ifdef DEBUG_PUSH
14168 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14169#endif
14170 }
14171
14172 if (encoding != NULL) {
14173 xmlCharEncodingHandlerPtr hdlr;
14174
Daniel Veillard37334572008-07-31 08:20:02 +000014175 if (ctxt->encoding != NULL)
14176 xmlFree((xmlChar *) ctxt->encoding);
14177 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14178
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014179 hdlr = xmlFindCharEncodingHandler(encoding);
14180 if (hdlr != NULL) {
14181 xmlSwitchToEncoding(ctxt, hdlr);
14182 } else {
14183 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14184 "Unsupported encoding %s\n", BAD_CAST encoding);
14185 }
14186 } else if (enc != XML_CHAR_ENCODING_NONE) {
14187 xmlSwitchEncoding(ctxt, enc);
14188 }
14189
14190 return(0);
14191}
14192
Daniel Veillard37334572008-07-31 08:20:02 +000014193
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014194/**
Daniel Veillard37334572008-07-31 08:20:02 +000014195 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014196 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014197 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014198 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014199 *
14200 * Applies the options to the parser context
14201 *
14202 * Returns 0 in case of success, the set of unknown or unimplemented options
14203 * in case of error.
14204 */
Daniel Veillard37334572008-07-31 08:20:02 +000014205static int
14206xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014207{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014208 if (ctxt == NULL)
14209 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014210 if (encoding != NULL) {
14211 if (ctxt->encoding != NULL)
14212 xmlFree((xmlChar *) ctxt->encoding);
14213 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14214 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014215 if (options & XML_PARSE_RECOVER) {
14216 ctxt->recovery = 1;
14217 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014218 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014219 } else
14220 ctxt->recovery = 0;
14221 if (options & XML_PARSE_DTDLOAD) {
14222 ctxt->loadsubset = XML_DETECT_IDS;
14223 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014224 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014225 } else
14226 ctxt->loadsubset = 0;
14227 if (options & XML_PARSE_DTDATTR) {
14228 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14229 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014230 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014231 }
14232 if (options & XML_PARSE_NOENT) {
14233 ctxt->replaceEntities = 1;
14234 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14235 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014236 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014237 } else
14238 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014239 if (options & XML_PARSE_PEDANTIC) {
14240 ctxt->pedantic = 1;
14241 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014242 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014243 } else
14244 ctxt->pedantic = 0;
14245 if (options & XML_PARSE_NOBLANKS) {
14246 ctxt->keepBlanks = 0;
14247 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14248 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014249 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014250 } else
14251 ctxt->keepBlanks = 1;
14252 if (options & XML_PARSE_DTDVALID) {
14253 ctxt->validate = 1;
14254 if (options & XML_PARSE_NOWARNING)
14255 ctxt->vctxt.warning = NULL;
14256 if (options & XML_PARSE_NOERROR)
14257 ctxt->vctxt.error = NULL;
14258 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014259 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014260 } else
14261 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000014262 if (options & XML_PARSE_NOWARNING) {
14263 ctxt->sax->warning = NULL;
14264 options -= XML_PARSE_NOWARNING;
14265 }
14266 if (options & XML_PARSE_NOERROR) {
14267 ctxt->sax->error = NULL;
14268 ctxt->sax->fatalError = NULL;
14269 options -= XML_PARSE_NOERROR;
14270 }
Daniel Veillard81273902003-09-30 00:43:48 +000014271#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014272 if (options & XML_PARSE_SAX1) {
14273 ctxt->sax->startElement = xmlSAX2StartElement;
14274 ctxt->sax->endElement = xmlSAX2EndElement;
14275 ctxt->sax->startElementNs = NULL;
14276 ctxt->sax->endElementNs = NULL;
14277 ctxt->sax->initialized = 1;
14278 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014279 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014280 }
Daniel Veillard81273902003-09-30 00:43:48 +000014281#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014282 if (options & XML_PARSE_NODICT) {
14283 ctxt->dictNames = 0;
14284 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014285 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014286 } else {
14287 ctxt->dictNames = 1;
14288 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014289 if (options & XML_PARSE_NOCDATA) {
14290 ctxt->sax->cdataBlock = NULL;
14291 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014292 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014293 }
14294 if (options & XML_PARSE_NSCLEAN) {
14295 ctxt->options |= XML_PARSE_NSCLEAN;
14296 options -= XML_PARSE_NSCLEAN;
14297 }
Daniel Veillard61b93382003-11-03 14:28:31 +000014298 if (options & XML_PARSE_NONET) {
14299 ctxt->options |= XML_PARSE_NONET;
14300 options -= XML_PARSE_NONET;
14301 }
Daniel Veillard8874b942005-08-25 13:19:21 +000014302 if (options & XML_PARSE_COMPACT) {
14303 ctxt->options |= XML_PARSE_COMPACT;
14304 options -= XML_PARSE_COMPACT;
14305 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000014306 if (options & XML_PARSE_OLD10) {
14307 ctxt->options |= XML_PARSE_OLD10;
14308 options -= XML_PARSE_OLD10;
14309 }
Daniel Veillard8915c152008-08-26 13:05:34 +000014310 if (options & XML_PARSE_NOBASEFIX) {
14311 ctxt->options |= XML_PARSE_NOBASEFIX;
14312 options -= XML_PARSE_NOBASEFIX;
14313 }
14314 if (options & XML_PARSE_HUGE) {
14315 ctxt->options |= XML_PARSE_HUGE;
14316 options -= XML_PARSE_HUGE;
14317 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000014318 if (options & XML_PARSE_OLDSAX) {
14319 ctxt->options |= XML_PARSE_OLDSAX;
14320 options -= XML_PARSE_OLDSAX;
14321 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000014322 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014323 return (options);
14324}
14325
14326/**
Daniel Veillard37334572008-07-31 08:20:02 +000014327 * xmlCtxtUseOptions:
14328 * @ctxt: an XML parser context
14329 * @options: a combination of xmlParserOption
14330 *
14331 * Applies the options to the parser context
14332 *
14333 * Returns 0 in case of success, the set of unknown or unimplemented options
14334 * in case of error.
14335 */
14336int
14337xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14338{
14339 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14340}
14341
14342/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014343 * xmlDoRead:
14344 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000014345 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014346 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014347 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014348 * @reuse: keep the context for reuse
14349 *
14350 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000014351 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014352 * Returns the resulting document tree or NULL
14353 */
14354static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014355xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14356 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014357{
14358 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000014359
14360 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014361 if (encoding != NULL) {
14362 xmlCharEncodingHandlerPtr hdlr;
14363
14364 hdlr = xmlFindCharEncodingHandler(encoding);
14365 if (hdlr != NULL)
14366 xmlSwitchToEncoding(ctxt, hdlr);
14367 }
Daniel Veillard60942de2003-09-25 21:05:58 +000014368 if ((URL != NULL) && (ctxt->input != NULL) &&
14369 (ctxt->input->filename == NULL))
14370 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014371 xmlParseDocument(ctxt);
14372 if ((ctxt->wellFormed) || ctxt->recovery)
14373 ret = ctxt->myDoc;
14374 else {
14375 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014376 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014377 xmlFreeDoc(ctxt->myDoc);
14378 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014379 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014380 ctxt->myDoc = NULL;
14381 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014382 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014383 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014384
14385 return (ret);
14386}
14387
14388/**
14389 * xmlReadDoc:
14390 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014391 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014392 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014393 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014394 *
14395 * parse an XML in-memory document and build a tree.
14396 *
14397 * Returns the resulting document tree
14398 */
14399xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014400xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014401{
14402 xmlParserCtxtPtr ctxt;
14403
14404 if (cur == NULL)
14405 return (NULL);
14406
14407 ctxt = xmlCreateDocParserCtxt(cur);
14408 if (ctxt == NULL)
14409 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014410 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014411}
14412
14413/**
14414 * xmlReadFile:
14415 * @filename: a file or URL
14416 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014417 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014418 *
14419 * parse an XML file from the filesystem or the network.
14420 *
14421 * Returns the resulting document tree
14422 */
14423xmlDocPtr
14424xmlReadFile(const char *filename, const char *encoding, int options)
14425{
14426 xmlParserCtxtPtr ctxt;
14427
Daniel Veillard61b93382003-11-03 14:28:31 +000014428 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014429 if (ctxt == NULL)
14430 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014431 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014432}
14433
14434/**
14435 * xmlReadMemory:
14436 * @buffer: a pointer to a char array
14437 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014438 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014439 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014440 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014441 *
14442 * parse an XML in-memory document and build a tree.
14443 *
14444 * Returns the resulting document tree
14445 */
14446xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014447xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014448{
14449 xmlParserCtxtPtr ctxt;
14450
14451 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14452 if (ctxt == NULL)
14453 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014454 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014455}
14456
14457/**
14458 * xmlReadFd:
14459 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014460 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014461 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014462 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014463 *
14464 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014465 * NOTE that the file descriptor will not be closed when the
14466 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014467 *
14468 * Returns the resulting document tree
14469 */
14470xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014471xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014472{
14473 xmlParserCtxtPtr ctxt;
14474 xmlParserInputBufferPtr input;
14475 xmlParserInputPtr stream;
14476
14477 if (fd < 0)
14478 return (NULL);
14479
14480 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14481 if (input == NULL)
14482 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014483 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014484 ctxt = xmlNewParserCtxt();
14485 if (ctxt == NULL) {
14486 xmlFreeParserInputBuffer(input);
14487 return (NULL);
14488 }
14489 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14490 if (stream == NULL) {
14491 xmlFreeParserInputBuffer(input);
14492 xmlFreeParserCtxt(ctxt);
14493 return (NULL);
14494 }
14495 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014496 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014497}
14498
14499/**
14500 * xmlReadIO:
14501 * @ioread: an I/O read function
14502 * @ioclose: an I/O close function
14503 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014504 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014505 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014506 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014507 *
14508 * parse an XML document from I/O functions and source and build a tree.
14509 *
14510 * Returns the resulting document tree
14511 */
14512xmlDocPtr
14513xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000014514 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014515{
14516 xmlParserCtxtPtr ctxt;
14517 xmlParserInputBufferPtr input;
14518 xmlParserInputPtr stream;
14519
14520 if (ioread == NULL)
14521 return (NULL);
14522
14523 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14524 XML_CHAR_ENCODING_NONE);
14525 if (input == NULL)
14526 return (NULL);
14527 ctxt = xmlNewParserCtxt();
14528 if (ctxt == NULL) {
14529 xmlFreeParserInputBuffer(input);
14530 return (NULL);
14531 }
14532 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14533 if (stream == NULL) {
14534 xmlFreeParserInputBuffer(input);
14535 xmlFreeParserCtxt(ctxt);
14536 return (NULL);
14537 }
14538 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014539 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014540}
14541
14542/**
14543 * xmlCtxtReadDoc:
14544 * @ctxt: an XML parser context
14545 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014546 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014547 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014548 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014549 *
14550 * parse an XML in-memory document and build a tree.
14551 * This reuses the existing @ctxt parser context
14552 *
14553 * Returns the resulting document tree
14554 */
14555xmlDocPtr
14556xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000014557 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014558{
14559 xmlParserInputPtr stream;
14560
14561 if (cur == NULL)
14562 return (NULL);
14563 if (ctxt == NULL)
14564 return (NULL);
14565
14566 xmlCtxtReset(ctxt);
14567
14568 stream = xmlNewStringInputStream(ctxt, cur);
14569 if (stream == NULL) {
14570 return (NULL);
14571 }
14572 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014573 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014574}
14575
14576/**
14577 * xmlCtxtReadFile:
14578 * @ctxt: an XML parser context
14579 * @filename: a file or URL
14580 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014581 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014582 *
14583 * parse an XML file from the filesystem or the network.
14584 * This reuses the existing @ctxt parser context
14585 *
14586 * Returns the resulting document tree
14587 */
14588xmlDocPtr
14589xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14590 const char *encoding, int options)
14591{
14592 xmlParserInputPtr stream;
14593
14594 if (filename == NULL)
14595 return (NULL);
14596 if (ctxt == NULL)
14597 return (NULL);
14598
14599 xmlCtxtReset(ctxt);
14600
Daniel Veillard29614c72004-11-26 10:47:26 +000014601 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014602 if (stream == NULL) {
14603 return (NULL);
14604 }
14605 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014606 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014607}
14608
14609/**
14610 * xmlCtxtReadMemory:
14611 * @ctxt: an XML parser context
14612 * @buffer: a pointer to a char array
14613 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014614 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014615 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014616 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014617 *
14618 * parse an XML in-memory document and build a tree.
14619 * This reuses the existing @ctxt parser context
14620 *
14621 * Returns the resulting document tree
14622 */
14623xmlDocPtr
14624xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000014625 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014626{
14627 xmlParserInputBufferPtr input;
14628 xmlParserInputPtr stream;
14629
14630 if (ctxt == NULL)
14631 return (NULL);
14632 if (buffer == NULL)
14633 return (NULL);
14634
14635 xmlCtxtReset(ctxt);
14636
14637 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14638 if (input == NULL) {
14639 return(NULL);
14640 }
14641
14642 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14643 if (stream == NULL) {
14644 xmlFreeParserInputBuffer(input);
14645 return(NULL);
14646 }
14647
14648 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014649 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014650}
14651
14652/**
14653 * xmlCtxtReadFd:
14654 * @ctxt: an XML parser context
14655 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014656 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014657 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014658 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014659 *
14660 * parse an XML from a file descriptor and build a tree.
14661 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014662 * NOTE that the file descriptor will not be closed when the
14663 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014664 *
14665 * Returns the resulting document tree
14666 */
14667xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014668xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14669 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014670{
14671 xmlParserInputBufferPtr input;
14672 xmlParserInputPtr stream;
14673
14674 if (fd < 0)
14675 return (NULL);
14676 if (ctxt == NULL)
14677 return (NULL);
14678
14679 xmlCtxtReset(ctxt);
14680
14681
14682 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14683 if (input == NULL)
14684 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014685 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014686 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14687 if (stream == NULL) {
14688 xmlFreeParserInputBuffer(input);
14689 return (NULL);
14690 }
14691 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014692 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014693}
14694
14695/**
14696 * xmlCtxtReadIO:
14697 * @ctxt: an XML parser context
14698 * @ioread: an I/O read function
14699 * @ioclose: an I/O close function
14700 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014701 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014702 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014703 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014704 *
14705 * parse an XML document from I/O functions and source and build a tree.
14706 * This reuses the existing @ctxt parser context
14707 *
14708 * Returns the resulting document tree
14709 */
14710xmlDocPtr
14711xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14712 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000014713 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014714 const char *encoding, int options)
14715{
14716 xmlParserInputBufferPtr input;
14717 xmlParserInputPtr stream;
14718
14719 if (ioread == NULL)
14720 return (NULL);
14721 if (ctxt == NULL)
14722 return (NULL);
14723
14724 xmlCtxtReset(ctxt);
14725
14726 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14727 XML_CHAR_ENCODING_NONE);
14728 if (input == NULL)
14729 return (NULL);
14730 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14731 if (stream == NULL) {
14732 xmlFreeParserInputBuffer(input);
14733 return (NULL);
14734 }
14735 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014736 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014737}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000014738
14739#define bottom_parser
14740#include "elfgcchack.h"