blob: dfdde8fb079c5b92edef8ea5a2c3bf570286ddbf [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard0161e632008-08-28 15:36:32 +000083static void
84xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
85
Rob Richards9c0aa472009-03-26 18:10:19 +000086static xmlParserCtxtPtr
87xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
88 const xmlChar *base, xmlParserCtxtPtr pctx);
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090/************************************************************************
91 * *
92 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
93 * *
94 ************************************************************************/
95
96#define XML_PARSER_BIG_ENTITY 1000
97#define XML_PARSER_LOT_ENTITY 5000
98
99/*
100 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
101 * replacement over the size in byte of the input indicates that you have
102 * and eponential behaviour. A value of 10 correspond to at least 3 entity
103 * replacement per byte of input.
104 */
105#define XML_PARSER_NON_LINEAR 10
106
107/*
108 * xmlParserEntityCheck
109 *
110 * Function to check non-linear entity expansion behaviour
111 * This is here to detect and stop exponential linear entity expansion
112 * This is not a limitation of the parser but a safety
113 * boundary feature. It can be disabled with the XML_PARSE_HUGE
114 * parser option.
115 */
116static int
117xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
118 xmlEntityPtr ent)
119{
Daniel Veillardcba68392008-08-29 12:43:40 +0000120 unsigned long consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000121
122 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
123 return (0);
124 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
125 return (1);
126 if (size != 0) {
127 /*
128 * Do the check based on the replacement size of the entity
129 */
130 if (size < XML_PARSER_BIG_ENTITY)
131 return(0);
132
133 /*
134 * A limit on the amount of text data reasonably used
135 */
136 if (ctxt->input != NULL) {
137 consumed = ctxt->input->consumed +
138 (ctxt->input->cur - ctxt->input->base);
139 }
140 consumed += ctxt->sizeentities;
141
142 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
143 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
144 return (0);
145 } else if (ent != NULL) {
146 /*
147 * use the number of parsed entities in the replacement
148 */
149 size = ent->checked;
150
151 /*
152 * The amount of data parsed counting entities size only once
153 */
154 if (ctxt->input != NULL) {
155 consumed = ctxt->input->consumed +
156 (ctxt->input->cur - ctxt->input->base);
157 }
158 consumed += ctxt->sizeentities;
159
160 /*
161 * Check the density of entities for the amount of data
162 * knowing an entity reference will take at least 3 bytes
163 */
164 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
165 return (0);
166 } else {
167 /*
168 * strange we got no data for checking just return
169 */
170 return (0);
171 }
172
173 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
174 return (1);
175}
176
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000177/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000178 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000179 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000180 * arbitrary depth limit for the XML documents that we allow to
181 * process. This is not a limitation of the parser but a safety
182 * boundary feature. It can be disabled with the XML_PARSE_HUGE
183 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000184 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000185unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000186
Daniel Veillard0fb18932003-09-07 09:14:37 +0000187
Daniel Veillard0161e632008-08-28 15:36:32 +0000188
189#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000190#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000191#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000192#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
193
Owen Taylor3473f882001-02-23 17:55:21 +0000194/*
Owen Taylor3473f882001-02-23 17:55:21 +0000195 * List of XML prefixed PI allowed by W3C specs
196 */
197
Daniel Veillardb44025c2001-10-11 22:55:55 +0000198static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000199 "xml-stylesheet",
200 NULL
201};
202
Daniel Veillarda07050d2003-10-19 14:46:32 +0000203
Owen Taylor3473f882001-02-23 17:55:21 +0000204/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200205static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000207
Daniel Veillard7d515752003-09-26 19:12:37 +0000208static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000209xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
210 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000211 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000212 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000213
Daniel Veillard37334572008-07-31 08:20:02 +0000214static int
215xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
216 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000217#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000218static void
219xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
220 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000221#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000222
Daniel Veillard7d515752003-09-26 19:12:37 +0000223static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000224xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
225 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000226
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000227static int
228xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
229
Daniel Veillarde57ec792003-09-10 10:50:59 +0000230/************************************************************************
231 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 * Some factorized error routines *
233 * *
234 ************************************************************************/
235
236/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000237 * xmlErrAttributeDup:
238 * @ctxt: an XML parser context
239 * @prefix: the attribute prefix
240 * @localname: the attribute localname
241 *
242 * Handle a redefinition of attribute error
243 */
244static void
245xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
246 const xmlChar * localname)
247{
Daniel Veillard157fee02003-10-31 10:36:03 +0000248 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
249 (ctxt->instate == XML_PARSER_EOF))
250 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000251 if (ctxt != NULL)
252 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000253 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000254 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000255 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
256 (const char *) localname, NULL, NULL, 0, 0,
257 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000258 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000259 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
261 (const char *) prefix, (const char *) localname,
262 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
263 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000264 if (ctxt != NULL) {
265 ctxt->wellFormed = 0;
266 if (ctxt->recovery == 0)
267 ctxt->disableSAX = 1;
268 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000269}
270
271/**
272 * xmlFatalErr:
273 * @ctxt: an XML parser context
274 * @error: the error number
275 * @extra: extra information string
276 *
277 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
278 */
279static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000280xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000281{
282 const char *errmsg;
283
Daniel Veillard157fee02003-10-31 10:36:03 +0000284 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
285 (ctxt->instate == XML_PARSER_EOF))
286 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 switch (error) {
288 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000289 errmsg = "CharRef: invalid hexadecimal value\n";
290 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000291 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000292 errmsg = "CharRef: invalid decimal value\n";
293 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000294 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000295 errmsg = "CharRef: invalid value\n";
296 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000297 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000298 errmsg = "internal error";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "PEReference at end of document\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "PEReference in prolog\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "PEReference in epilog\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "PEReference: no name\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "PEReference: expecting ';'\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "Detected an entity reference loop\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EntityValue: \" or ' expected\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "PEReferences forbidden in internal subset\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "EntityValue: \" or ' expected\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "AttValue: \" or ' expected\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "Unescaped '<' not allowed in attributes values\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "SystemLiteral \" or ' expected\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Unfinished System or Public ID \" or ' expected\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Sequence ']]>' not allowed in content\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "PUBLIC, the Public Identifier is missing\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Comment must not contain '--' (double-hyphen)\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "xmlParsePI : no target name\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "Invalid PI name\n";
356 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 errmsg = "NOTATION: Name expected here\n";
359 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000360 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 errmsg = "'>' required to close NOTATION declaration\n";
362 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 errmsg = "Entity value required\n";
365 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000367 errmsg = "Fragment not allowed";
368 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000369 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000370 errmsg = "'(' required to start ATTLIST enumeration\n";
371 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000372 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000373 errmsg = "NmToken expected in ATTLIST enumeration\n";
374 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000375 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000376 errmsg = "')' required to finish ATTLIST enumeration\n";
377 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000378 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000379 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
380 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000381 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000382 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
383 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000384 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 errmsg = "ContentDecl : Name or '(' expected\n";
386 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000387 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
389 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000390 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000391 errmsg =
392 "PEReference: forbidden within markup decl in internal subset\n";
393 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000394 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000395 errmsg = "expected '>'\n";
396 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000397 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000398 errmsg = "XML conditional section '[' expected\n";
399 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000400 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000401 errmsg = "Content error in the external subset\n";
402 break;
403 case XML_ERR_CONDSEC_INVALID_KEYWORD:
404 errmsg =
405 "conditional section INCLUDE or IGNORE keyword expected\n";
406 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000407 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000408 errmsg = "XML conditional section not closed\n";
409 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000410 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000411 errmsg = "Text declaration '<?xml' required\n";
412 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000413 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 errmsg = "parsing XML declaration: '?>' expected\n";
415 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000416 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 errmsg = "external parsed entities cannot be standalone\n";
418 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000419 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 errmsg = "EntityRef: expecting ';'\n";
421 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000422 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 errmsg = "DOCTYPE improperly terminated\n";
424 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000425 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 errmsg = "EndTag: '</' not found\n";
427 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000428 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000429 errmsg = "expected '='\n";
430 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000431 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000432 errmsg = "String not closed expecting \" or '\n";
433 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000434 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000435 errmsg = "String not started expecting ' or \"\n";
436 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000437 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000438 errmsg = "Invalid XML encoding name\n";
439 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000440 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000441 errmsg = "standalone accepts only 'yes' or 'no'\n";
442 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000443 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000444 errmsg = "Document is empty\n";
445 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000446 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000447 errmsg = "Extra content at the end of the document\n";
448 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000449 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000450 errmsg = "chunk is not well balanced\n";
451 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000452 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000453 errmsg = "extra content at the end of well balanced chunk\n";
454 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000455 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000456 errmsg = "Malformed declaration expecting version\n";
457 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000458#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000459 case:
460 errmsg = "\n";
461 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000462#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 default:
464 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000465 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000466 if (ctxt != NULL)
467 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000468 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000469 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
470 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000471 if (ctxt != NULL) {
472 ctxt->wellFormed = 0;
473 if (ctxt->recovery == 0)
474 ctxt->disableSAX = 1;
475 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000476}
477
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000478/**
479 * xmlFatalErrMsg:
480 * @ctxt: an XML parser context
481 * @error: the error number
482 * @msg: the error message
483 *
484 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
485 */
486static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000487xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
488 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000489{
Daniel Veillard157fee02003-10-31 10:36:03 +0000490 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
491 (ctxt->instate == XML_PARSER_EOF))
492 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000493 if (ctxt != NULL)
494 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000495 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200496 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000497 if (ctxt != NULL) {
498 ctxt->wellFormed = 0;
499 if (ctxt->recovery == 0)
500 ctxt->disableSAX = 1;
501 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000502}
503
504/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000505 * xmlWarningMsg:
506 * @ctxt: an XML parser context
507 * @error: the error number
508 * @msg: the error message
509 * @str1: extra data
510 * @str2: extra data
511 *
512 * Handle a warning.
513 */
514static void
515xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
516 const char *msg, const xmlChar *str1, const xmlChar *str2)
517{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000518 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000519
Daniel Veillard157fee02003-10-31 10:36:03 +0000520 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
521 (ctxt->instate == XML_PARSER_EOF))
522 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000523 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
524 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000525 schannel = ctxt->sax->serror;
526 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000527 (ctxt->sax) ? ctxt->sax->warning : NULL,
528 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000529 ctxt, NULL, XML_FROM_PARSER, error,
530 XML_ERR_WARNING, NULL, 0,
531 (const char *) str1, (const char *) str2, NULL, 0, 0,
532 msg, (const char *) str1, (const char *) str2);
533}
534
535/**
536 * xmlValidityError:
537 * @ctxt: an XML parser context
538 * @error: the error number
539 * @msg: the error message
540 * @str1: extra data
541 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000542 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000543 */
544static void
545xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000546 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000547{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000548 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000549
550 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
551 (ctxt->instate == XML_PARSER_EOF))
552 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000553 if (ctxt != NULL) {
554 ctxt->errNo = error;
555 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
556 schannel = ctxt->sax->serror;
557 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000558 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000559 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000560 ctxt, NULL, XML_FROM_DTD, error,
561 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000562 (const char *) str2, NULL, 0, 0,
563 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000564 if (ctxt != NULL) {
565 ctxt->valid = 0;
566 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000567}
568
569/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000570 * xmlFatalErrMsgInt:
571 * @ctxt: an XML parser context
572 * @error: the error number
573 * @msg: the error message
574 * @val: an integer value
575 *
576 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
577 */
578static void
579xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000580 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000581{
Daniel Veillard157fee02003-10-31 10:36:03 +0000582 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
583 (ctxt->instate == XML_PARSER_EOF))
584 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000585 if (ctxt != NULL)
586 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000587 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000588 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
589 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000590 if (ctxt != NULL) {
591 ctxt->wellFormed = 0;
592 if (ctxt->recovery == 0)
593 ctxt->disableSAX = 1;
594 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000595}
596
597/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000598 * xmlFatalErrMsgStrIntStr:
599 * @ctxt: an XML parser context
600 * @error: the error number
601 * @msg: the error message
602 * @str1: an string info
603 * @val: an integer value
604 * @str2: an string info
605 *
606 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
607 */
608static void
609xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
610 const char *msg, const xmlChar *str1, int val,
611 const xmlChar *str2)
612{
Daniel Veillard157fee02003-10-31 10:36:03 +0000613 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
614 (ctxt->instate == XML_PARSER_EOF))
615 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000616 if (ctxt != NULL)
617 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000618 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000619 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
620 NULL, 0, (const char *) str1, (const char *) str2,
621 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000622 if (ctxt != NULL) {
623 ctxt->wellFormed = 0;
624 if (ctxt->recovery == 0)
625 ctxt->disableSAX = 1;
626 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000627}
628
629/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000630 * xmlFatalErrMsgStr:
631 * @ctxt: an XML parser context
632 * @error: the error number
633 * @msg: the error message
634 * @val: a string value
635 *
636 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
637 */
638static void
639xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000640 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000641{
Daniel Veillard157fee02003-10-31 10:36:03 +0000642 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
643 (ctxt->instate == XML_PARSER_EOF))
644 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000645 if (ctxt != NULL)
646 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000647 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000648 XML_FROM_PARSER, error, XML_ERR_FATAL,
649 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
650 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000651 if (ctxt != NULL) {
652 ctxt->wellFormed = 0;
653 if (ctxt->recovery == 0)
654 ctxt->disableSAX = 1;
655 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000656}
657
658/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000659 * xmlErrMsgStr:
660 * @ctxt: an XML parser context
661 * @error: the error number
662 * @msg: the error message
663 * @val: a string value
664 *
665 * Handle a non fatal parser error
666 */
667static void
668xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
669 const char *msg, const xmlChar * val)
670{
Daniel Veillard157fee02003-10-31 10:36:03 +0000671 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672 (ctxt->instate == XML_PARSER_EOF))
673 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000674 if (ctxt != NULL)
675 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000676 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000677 XML_FROM_PARSER, error, XML_ERR_ERROR,
678 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
679 val);
680}
681
682/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000683 * xmlNsErr:
684 * @ctxt: an XML parser context
685 * @error: the error number
686 * @msg: the message
687 * @info1: extra information string
688 * @info2: extra information string
689 *
690 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
691 */
692static void
693xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
694 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000695 const xmlChar * info1, const xmlChar * info2,
696 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000697{
Daniel Veillard157fee02003-10-31 10:36:03 +0000698 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
699 (ctxt->instate == XML_PARSER_EOF))
700 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000701 if (ctxt != NULL)
702 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000703 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000704 XML_ERR_ERROR, NULL, 0, (const char *) info1,
705 (const char *) info2, (const char *) info3, 0, 0, msg,
706 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000707 if (ctxt != NULL)
708 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000709}
710
Daniel Veillard37334572008-07-31 08:20:02 +0000711/**
712 * xmlNsWarn
713 * @ctxt: an XML parser context
714 * @error: the error number
715 * @msg: the message
716 * @info1: extra information string
717 * @info2: extra information string
718 *
719 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
720 */
721static void
722xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
723 const char *msg,
724 const xmlChar * info1, const xmlChar * info2,
725 const xmlChar * info3)
726{
727 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
728 (ctxt->instate == XML_PARSER_EOF))
729 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000730 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
731 XML_ERR_WARNING, NULL, 0, (const char *) info1,
732 (const char *) info2, (const char *) info3, 0, 0, msg,
733 info1, info2, info3);
734}
735
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000736/************************************************************************
737 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000738 * Library wide options *
739 * *
740 ************************************************************************/
741
742/**
743 * xmlHasFeature:
744 * @feature: the feature to be examined
745 *
746 * Examines if the library has been compiled with a given feature.
747 *
748 * Returns a non-zero value if the feature exist, otherwise zero.
749 * Returns zero (0) if the feature does not exist or an unknown
750 * unknown feature is requested, non-zero otherwise.
751 */
752int
753xmlHasFeature(xmlFeature feature)
754{
755 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000756 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000757#ifdef LIBXML_THREAD_ENABLED
758 return(1);
759#else
760 return(0);
761#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000762 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000763#ifdef LIBXML_TREE_ENABLED
764 return(1);
765#else
766 return(0);
767#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000768 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000769#ifdef LIBXML_OUTPUT_ENABLED
770 return(1);
771#else
772 return(0);
773#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000774 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000775#ifdef LIBXML_PUSH_ENABLED
776 return(1);
777#else
778 return(0);
779#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000780 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000781#ifdef LIBXML_READER_ENABLED
782 return(1);
783#else
784 return(0);
785#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000786 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000787#ifdef LIBXML_PATTERN_ENABLED
788 return(1);
789#else
790 return(0);
791#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000792 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000793#ifdef LIBXML_WRITER_ENABLED
794 return(1);
795#else
796 return(0);
797#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000798 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000799#ifdef LIBXML_SAX1_ENABLED
800 return(1);
801#else
802 return(0);
803#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000804 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000805#ifdef LIBXML_FTP_ENABLED
806 return(1);
807#else
808 return(0);
809#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000810 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000811#ifdef LIBXML_HTTP_ENABLED
812 return(1);
813#else
814 return(0);
815#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000816 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000817#ifdef LIBXML_VALID_ENABLED
818 return(1);
819#else
820 return(0);
821#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000822 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000823#ifdef LIBXML_HTML_ENABLED
824 return(1);
825#else
826 return(0);
827#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000828 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000829#ifdef LIBXML_LEGACY_ENABLED
830 return(1);
831#else
832 return(0);
833#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000834 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000835#ifdef LIBXML_C14N_ENABLED
836 return(1);
837#else
838 return(0);
839#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000840 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000841#ifdef LIBXML_CATALOG_ENABLED
842 return(1);
843#else
844 return(0);
845#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000846 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000847#ifdef LIBXML_XPATH_ENABLED
848 return(1);
849#else
850 return(0);
851#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000852 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000853#ifdef LIBXML_XPTR_ENABLED
854 return(1);
855#else
856 return(0);
857#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000858 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000859#ifdef LIBXML_XINCLUDE_ENABLED
860 return(1);
861#else
862 return(0);
863#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000864 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000865#ifdef LIBXML_ICONV_ENABLED
866 return(1);
867#else
868 return(0);
869#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000870 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000871#ifdef LIBXML_ISO8859X_ENABLED
872 return(1);
873#else
874 return(0);
875#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000876 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000877#ifdef LIBXML_UNICODE_ENABLED
878 return(1);
879#else
880 return(0);
881#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000882 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000883#ifdef LIBXML_REGEXP_ENABLED
884 return(1);
885#else
886 return(0);
887#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000888 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000889#ifdef LIBXML_AUTOMATA_ENABLED
890 return(1);
891#else
892 return(0);
893#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000894 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000895#ifdef LIBXML_EXPR_ENABLED
896 return(1);
897#else
898 return(0);
899#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000900 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000901#ifdef LIBXML_SCHEMAS_ENABLED
902 return(1);
903#else
904 return(0);
905#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000906 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000907#ifdef LIBXML_SCHEMATRON_ENABLED
908 return(1);
909#else
910 return(0);
911#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000912 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000913#ifdef LIBXML_MODULES_ENABLED
914 return(1);
915#else
916 return(0);
917#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000918 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000919#ifdef LIBXML_DEBUG_ENABLED
920 return(1);
921#else
922 return(0);
923#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000924 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000925#ifdef DEBUG_MEMORY_LOCATION
926 return(1);
927#else
928 return(0);
929#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000930 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000931#ifdef LIBXML_DEBUG_RUNTIME
932 return(1);
933#else
934 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000935#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000936 case XML_WITH_ZLIB:
937#ifdef LIBXML_ZLIB_ENABLED
938 return(1);
939#else
940 return(0);
941#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000942 default:
943 break;
944 }
945 return(0);
946}
947
948/************************************************************************
949 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000950 * SAX2 defaulted attributes handling *
951 * *
952 ************************************************************************/
953
954/**
955 * xmlDetectSAX2:
956 * @ctxt: an XML parser context
957 *
958 * Do the SAX2 detection and specific intialization
959 */
960static void
961xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
962 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000963#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000964 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
965 ((ctxt->sax->startElementNs != NULL) ||
966 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000967#else
968 ctxt->sax2 = 1;
969#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000970
971 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
972 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
973 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000974 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
975 (ctxt->str_xml_ns == NULL)) {
976 xmlErrMemory(ctxt, NULL);
977 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000978}
979
Daniel Veillarde57ec792003-09-10 10:50:59 +0000980typedef struct _xmlDefAttrs xmlDefAttrs;
981typedef xmlDefAttrs *xmlDefAttrsPtr;
982struct _xmlDefAttrs {
983 int nbAttrs; /* number of defaulted attributes on that element */
984 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +0000985 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000986};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000987
988/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000989 * xmlAttrNormalizeSpace:
990 * @src: the source string
991 * @dst: the target string
992 *
993 * Normalize the space in non CDATA attribute values:
994 * If the attribute type is not CDATA, then the XML processor MUST further
995 * process the normalized attribute value by discarding any leading and
996 * trailing space (#x20) characters, and by replacing sequences of space
997 * (#x20) characters by a single space (#x20) character.
998 * Note that the size of dst need to be at least src, and if one doesn't need
999 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1000 * passing src as dst is just fine.
1001 *
1002 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1003 * is needed.
1004 */
1005static xmlChar *
1006xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1007{
1008 if ((src == NULL) || (dst == NULL))
1009 return(NULL);
1010
1011 while (*src == 0x20) src++;
1012 while (*src != 0) {
1013 if (*src == 0x20) {
1014 while (*src == 0x20) src++;
1015 if (*src != 0)
1016 *dst++ = 0x20;
1017 } else {
1018 *dst++ = *src++;
1019 }
1020 }
1021 *dst = 0;
1022 if (dst == src)
1023 return(NULL);
1024 return(dst);
1025}
1026
1027/**
1028 * xmlAttrNormalizeSpace2:
1029 * @src: the source string
1030 *
1031 * Normalize the space in non CDATA attribute values, a slightly more complex
1032 * front end to avoid allocation problems when running on attribute values
1033 * coming from the input.
1034 *
1035 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1036 * is needed.
1037 */
1038static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001039xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001040{
1041 int i;
1042 int remove_head = 0;
1043 int need_realloc = 0;
1044 const xmlChar *cur;
1045
1046 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1047 return(NULL);
1048 i = *len;
1049 if (i <= 0)
1050 return(NULL);
1051
1052 cur = src;
1053 while (*cur == 0x20) {
1054 cur++;
1055 remove_head++;
1056 }
1057 while (*cur != 0) {
1058 if (*cur == 0x20) {
1059 cur++;
1060 if ((*cur == 0x20) || (*cur == 0)) {
1061 need_realloc = 1;
1062 break;
1063 }
1064 } else
1065 cur++;
1066 }
1067 if (need_realloc) {
1068 xmlChar *ret;
1069
1070 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1071 if (ret == NULL) {
1072 xmlErrMemory(ctxt, NULL);
1073 return(NULL);
1074 }
1075 xmlAttrNormalizeSpace(ret, ret);
1076 *len = (int) strlen((const char *)ret);
1077 return(ret);
1078 } else if (remove_head) {
1079 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001080 memmove(src, src + remove_head, 1 + *len);
1081 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001082 }
1083 return(NULL);
1084}
1085
1086/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001087 * xmlAddDefAttrs:
1088 * @ctxt: an XML parser context
1089 * @fullname: the element fullname
1090 * @fullattr: the attribute fullname
1091 * @value: the attribute value
1092 *
1093 * Add a defaulted attribute for an element
1094 */
1095static void
1096xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1097 const xmlChar *fullname,
1098 const xmlChar *fullattr,
1099 const xmlChar *value) {
1100 xmlDefAttrsPtr defaults;
1101 int len;
1102 const xmlChar *name;
1103 const xmlChar *prefix;
1104
Daniel Veillard6a31b832008-03-26 14:06:44 +00001105 /*
1106 * Allows to detect attribute redefinitions
1107 */
1108 if (ctxt->attsSpecial != NULL) {
1109 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1110 return;
1111 }
1112
Daniel Veillarde57ec792003-09-10 10:50:59 +00001113 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001114 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001115 if (ctxt->attsDefault == NULL)
1116 goto mem_error;
1117 }
1118
1119 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001120 * split the element name into prefix:localname , the string found
1121 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001122 */
1123 name = xmlSplitQName3(fullname, &len);
1124 if (name == NULL) {
1125 name = xmlDictLookup(ctxt->dict, fullname, -1);
1126 prefix = NULL;
1127 } else {
1128 name = xmlDictLookup(ctxt->dict, name, -1);
1129 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1130 }
1131
1132 /*
1133 * make sure there is some storage
1134 */
1135 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1136 if (defaults == NULL) {
1137 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001138 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001139 if (defaults == NULL)
1140 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001141 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001142 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001143 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1144 defaults, NULL) < 0) {
1145 xmlFree(defaults);
1146 goto mem_error;
1147 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001148 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001149 xmlDefAttrsPtr temp;
1150
1151 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001152 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001153 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001154 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001155 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001156 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001157 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1158 defaults, NULL) < 0) {
1159 xmlFree(defaults);
1160 goto mem_error;
1161 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001162 }
1163
1164 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001165 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001166 * are within the DTD and hen not associated to namespace names.
1167 */
1168 name = xmlSplitQName3(fullattr, &len);
1169 if (name == NULL) {
1170 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1171 prefix = NULL;
1172 } else {
1173 name = xmlDictLookup(ctxt->dict, name, -1);
1174 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1175 }
1176
Daniel Veillardae0765b2008-07-31 19:54:59 +00001177 defaults->values[5 * defaults->nbAttrs] = name;
1178 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001179 /* intern the string and precompute the end */
1180 len = xmlStrlen(value);
1181 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001182 defaults->values[5 * defaults->nbAttrs + 2] = value;
1183 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1184 if (ctxt->external)
1185 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1186 else
1187 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001188 defaults->nbAttrs++;
1189
1190 return;
1191
1192mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001193 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001194 return;
1195}
1196
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001197/**
1198 * xmlAddSpecialAttr:
1199 * @ctxt: an XML parser context
1200 * @fullname: the element fullname
1201 * @fullattr: the attribute fullname
1202 * @type: the attribute type
1203 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001204 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001205 */
1206static void
1207xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1208 const xmlChar *fullname,
1209 const xmlChar *fullattr,
1210 int type)
1211{
1212 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001213 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001214 if (ctxt->attsSpecial == NULL)
1215 goto mem_error;
1216 }
1217
Daniel Veillardac4118d2008-01-11 05:27:32 +00001218 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1219 return;
1220
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001221 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1222 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001223 return;
1224
1225mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001226 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001227 return;
1228}
1229
Daniel Veillard4432df22003-09-28 18:58:27 +00001230/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001231 * xmlCleanSpecialAttrCallback:
1232 *
1233 * Removes CDATA attributes from the special attribute table
1234 */
1235static void
1236xmlCleanSpecialAttrCallback(void *payload, void *data,
1237 const xmlChar *fullname, const xmlChar *fullattr,
1238 const xmlChar *unused ATTRIBUTE_UNUSED) {
1239 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1240
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001241 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001242 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1243 }
1244}
1245
1246/**
1247 * xmlCleanSpecialAttr:
1248 * @ctxt: an XML parser context
1249 *
1250 * Trim the list of attributes defined to remove all those of type
1251 * CDATA as they are not special. This call should be done when finishing
1252 * to parse the DTD and before starting to parse the document root.
1253 */
1254static void
1255xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1256{
1257 if (ctxt->attsSpecial == NULL)
1258 return;
1259
1260 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1261
1262 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1263 xmlHashFree(ctxt->attsSpecial, NULL);
1264 ctxt->attsSpecial = NULL;
1265 }
1266 return;
1267}
1268
1269/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001270 * xmlCheckLanguageID:
1271 * @lang: pointer to the string value
1272 *
1273 * Checks that the value conforms to the LanguageID production:
1274 *
1275 * NOTE: this is somewhat deprecated, those productions were removed from
1276 * the XML Second edition.
1277 *
1278 * [33] LanguageID ::= Langcode ('-' Subcode)*
1279 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1280 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1281 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1282 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1283 * [38] Subcode ::= ([a-z] | [A-Z])+
1284 *
1285 * Returns 1 if correct 0 otherwise
1286 **/
1287int
1288xmlCheckLanguageID(const xmlChar * lang)
1289{
1290 const xmlChar *cur = lang;
1291
1292 if (cur == NULL)
1293 return (0);
1294 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1295 ((cur[0] == 'I') && (cur[1] == '-'))) {
1296 /*
1297 * IANA code
1298 */
1299 cur += 2;
1300 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1301 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1302 cur++;
1303 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1304 ((cur[0] == 'X') && (cur[1] == '-'))) {
1305 /*
1306 * User code
1307 */
1308 cur += 2;
1309 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1310 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1311 cur++;
1312 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1313 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1314 /*
1315 * ISO639
1316 */
1317 cur++;
1318 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1319 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1320 cur++;
1321 else
1322 return (0);
1323 } else
1324 return (0);
1325 while (cur[0] != 0) { /* non input consuming */
1326 if (cur[0] != '-')
1327 return (0);
1328 cur++;
1329 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1330 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1331 cur++;
1332 else
1333 return (0);
1334 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1335 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1336 cur++;
1337 }
1338 return (1);
1339}
1340
Owen Taylor3473f882001-02-23 17:55:21 +00001341/************************************************************************
1342 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001343 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001344 * *
1345 ************************************************************************/
1346
Daniel Veillard8ed10722009-08-20 19:17:36 +02001347static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1348 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001349
Daniel Veillard0fb18932003-09-07 09:14:37 +00001350#ifdef SAX2
1351/**
1352 * nsPush:
1353 * @ctxt: an XML parser context
1354 * @prefix: the namespace prefix or NULL
1355 * @URL: the namespace name
1356 *
1357 * Pushes a new parser namespace on top of the ns stack
1358 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001359 * Returns -1 in case of error, -2 if the namespace should be discarded
1360 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001361 */
1362static int
1363nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1364{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001365 if (ctxt->options & XML_PARSE_NSCLEAN) {
1366 int i;
1367 for (i = 0;i < ctxt->nsNr;i += 2) {
1368 if (ctxt->nsTab[i] == prefix) {
1369 /* in scope */
1370 if (ctxt->nsTab[i + 1] == URL)
1371 return(-2);
1372 /* out of scope keep it */
1373 break;
1374 }
1375 }
1376 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001377 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1378 ctxt->nsMax = 10;
1379 ctxt->nsNr = 0;
1380 ctxt->nsTab = (const xmlChar **)
1381 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1382 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001383 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001384 ctxt->nsMax = 0;
1385 return (-1);
1386 }
1387 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001388 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001389 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001390 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1391 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1392 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001393 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001394 ctxt->nsMax /= 2;
1395 return (-1);
1396 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001397 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001398 }
1399 ctxt->nsTab[ctxt->nsNr++] = prefix;
1400 ctxt->nsTab[ctxt->nsNr++] = URL;
1401 return (ctxt->nsNr);
1402}
1403/**
1404 * nsPop:
1405 * @ctxt: an XML parser context
1406 * @nr: the number to pop
1407 *
1408 * Pops the top @nr parser prefix/namespace from the ns stack
1409 *
1410 * Returns the number of namespaces removed
1411 */
1412static int
1413nsPop(xmlParserCtxtPtr ctxt, int nr)
1414{
1415 int i;
1416
1417 if (ctxt->nsTab == NULL) return(0);
1418 if (ctxt->nsNr < nr) {
1419 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1420 nr = ctxt->nsNr;
1421 }
1422 if (ctxt->nsNr <= 0)
1423 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001424
Daniel Veillard0fb18932003-09-07 09:14:37 +00001425 for (i = 0;i < nr;i++) {
1426 ctxt->nsNr--;
1427 ctxt->nsTab[ctxt->nsNr] = NULL;
1428 }
1429 return(nr);
1430}
1431#endif
1432
1433static int
1434xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1435 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001436 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001437 int maxatts;
1438
1439 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001440 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001441 atts = (const xmlChar **)
1442 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001443 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001444 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001445 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1446 if (attallocs == NULL) goto mem_error;
1447 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001448 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001449 } else if (nr + 5 > ctxt->maxatts) {
1450 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001451 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1452 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001453 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001454 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001455 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1456 (maxatts / 5) * sizeof(int));
1457 if (attallocs == NULL) goto mem_error;
1458 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001459 ctxt->maxatts = maxatts;
1460 }
1461 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001462mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001463 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001464 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001465}
1466
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001467/**
1468 * inputPush:
1469 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001470 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001471 *
1472 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001473 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001474 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001475 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001476int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001477inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1478{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001479 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001480 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001481 if (ctxt->inputNr >= ctxt->inputMax) {
1482 ctxt->inputMax *= 2;
1483 ctxt->inputTab =
1484 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1485 ctxt->inputMax *
1486 sizeof(ctxt->inputTab[0]));
1487 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001488 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001489 xmlFreeInputStream(value);
1490 ctxt->inputMax /= 2;
1491 value = NULL;
1492 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001493 }
1494 }
1495 ctxt->inputTab[ctxt->inputNr] = value;
1496 ctxt->input = value;
1497 return (ctxt->inputNr++);
1498}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001499/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001500 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001501 * @ctxt: an XML parser context
1502 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001503 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001504 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001505 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001506 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001507xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001508inputPop(xmlParserCtxtPtr ctxt)
1509{
1510 xmlParserInputPtr ret;
1511
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001512 if (ctxt == NULL)
1513 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001514 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001515 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001516 ctxt->inputNr--;
1517 if (ctxt->inputNr > 0)
1518 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1519 else
1520 ctxt->input = NULL;
1521 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001522 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001523 return (ret);
1524}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001525/**
1526 * nodePush:
1527 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001528 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001529 *
1530 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001531 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001532 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001533 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001534int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001535nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1536{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001537 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001538 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001539 xmlNodePtr *tmp;
1540
1541 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1542 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001543 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001544 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001545 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001546 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001547 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001548 ctxt->nodeTab = tmp;
1549 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001550 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001551 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1552 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001553 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001554 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001555 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001556 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001557 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001558 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001559 ctxt->nodeTab[ctxt->nodeNr] = value;
1560 ctxt->node = value;
1561 return (ctxt->nodeNr++);
1562}
Daniel Veillard8915c152008-08-26 13:05:34 +00001563
Daniel Veillard1c732d22002-11-30 11:22:59 +00001564/**
1565 * nodePop:
1566 * @ctxt: an XML parser context
1567 *
1568 * Pops the top element node from the node stack
1569 *
1570 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001571 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001572xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001573nodePop(xmlParserCtxtPtr ctxt)
1574{
1575 xmlNodePtr ret;
1576
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001577 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001578 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001579 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001580 ctxt->nodeNr--;
1581 if (ctxt->nodeNr > 0)
1582 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1583 else
1584 ctxt->node = NULL;
1585 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001586 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001587 return (ret);
1588}
Daniel Veillarda2351322004-06-27 12:08:10 +00001589
1590#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001591/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001592 * nameNsPush:
1593 * @ctxt: an XML parser context
1594 * @value: the element name
1595 * @prefix: the element prefix
1596 * @URI: the element namespace name
1597 *
1598 * Pushes a new element name/prefix/URL on top of the name stack
1599 *
1600 * Returns -1 in case of error, the index in the stack otherwise
1601 */
1602static int
1603nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1604 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1605{
1606 if (ctxt->nameNr >= ctxt->nameMax) {
1607 const xmlChar * *tmp;
1608 void **tmp2;
1609 ctxt->nameMax *= 2;
1610 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1611 ctxt->nameMax *
1612 sizeof(ctxt->nameTab[0]));
1613 if (tmp == NULL) {
1614 ctxt->nameMax /= 2;
1615 goto mem_error;
1616 }
1617 ctxt->nameTab = tmp;
1618 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1619 ctxt->nameMax * 3 *
1620 sizeof(ctxt->pushTab[0]));
1621 if (tmp2 == NULL) {
1622 ctxt->nameMax /= 2;
1623 goto mem_error;
1624 }
1625 ctxt->pushTab = tmp2;
1626 }
1627 ctxt->nameTab[ctxt->nameNr] = value;
1628 ctxt->name = value;
1629 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1630 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001631 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001632 return (ctxt->nameNr++);
1633mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001634 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001635 return (-1);
1636}
1637/**
1638 * nameNsPop:
1639 * @ctxt: an XML parser context
1640 *
1641 * Pops the top element/prefix/URI name from the name stack
1642 *
1643 * Returns the name just removed
1644 */
1645static const xmlChar *
1646nameNsPop(xmlParserCtxtPtr ctxt)
1647{
1648 const xmlChar *ret;
1649
1650 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001651 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001652 ctxt->nameNr--;
1653 if (ctxt->nameNr > 0)
1654 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1655 else
1656 ctxt->name = NULL;
1657 ret = ctxt->nameTab[ctxt->nameNr];
1658 ctxt->nameTab[ctxt->nameNr] = NULL;
1659 return (ret);
1660}
Daniel Veillarda2351322004-06-27 12:08:10 +00001661#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001662
1663/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001664 * namePush:
1665 * @ctxt: an XML parser context
1666 * @value: the element name
1667 *
1668 * Pushes a new element name on top of the name stack
1669 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001670 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001671 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001672int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001673namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001674{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001675 if (ctxt == NULL) return (-1);
1676
Daniel Veillard1c732d22002-11-30 11:22:59 +00001677 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001678 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001679 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001680 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001681 ctxt->nameMax *
1682 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001683 if (tmp == NULL) {
1684 ctxt->nameMax /= 2;
1685 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001686 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001687 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001688 }
1689 ctxt->nameTab[ctxt->nameNr] = value;
1690 ctxt->name = value;
1691 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001692mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001693 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001694 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001695}
1696/**
1697 * namePop:
1698 * @ctxt: an XML parser context
1699 *
1700 * Pops the top element name from the name stack
1701 *
1702 * Returns the name just removed
1703 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001704const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001705namePop(xmlParserCtxtPtr ctxt)
1706{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001707 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001708
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001709 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1710 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001711 ctxt->nameNr--;
1712 if (ctxt->nameNr > 0)
1713 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1714 else
1715 ctxt->name = NULL;
1716 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001717 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001718 return (ret);
1719}
Owen Taylor3473f882001-02-23 17:55:21 +00001720
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001721static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001722 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001723 int *tmp;
1724
Owen Taylor3473f882001-02-23 17:55:21 +00001725 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001726 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1727 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1728 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001729 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001730 ctxt->spaceMax /=2;
1731 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001732 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001733 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001734 }
1735 ctxt->spaceTab[ctxt->spaceNr] = val;
1736 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1737 return(ctxt->spaceNr++);
1738}
1739
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001740static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001741 int ret;
1742 if (ctxt->spaceNr <= 0) return(0);
1743 ctxt->spaceNr--;
1744 if (ctxt->spaceNr > 0)
1745 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1746 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001747 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001748 ret = ctxt->spaceTab[ctxt->spaceNr];
1749 ctxt->spaceTab[ctxt->spaceNr] = -1;
1750 return(ret);
1751}
1752
1753/*
1754 * Macros for accessing the content. Those should be used only by the parser,
1755 * and not exported.
1756 *
1757 * Dirty macros, i.e. one often need to make assumption on the context to
1758 * use them
1759 *
1760 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1761 * To be used with extreme caution since operations consuming
1762 * characters may move the input buffer to a different location !
1763 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1764 * This should be used internally by the parser
1765 * only to compare to ASCII values otherwise it would break when
1766 * running with UTF-8 encoding.
1767 * RAW same as CUR but in the input buffer, bypass any token
1768 * extraction that may have been done
1769 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1770 * to compare on ASCII based substring.
1771 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001772 * strings without newlines within the parser.
1773 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1774 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001775 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1776 *
1777 * NEXT Skip to the next character, this does the proper decoding
1778 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001779 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001780 * CUR_CHAR(l) returns the current unicode character (int), set l
1781 * to the number of xmlChars used for the encoding [0-5].
1782 * CUR_SCHAR same but operate on a string instead of the context
1783 * COPY_BUF copy the current unicode char to the target buffer, increment
1784 * the index
1785 * GROW, SHRINK handling of input buffers
1786 */
1787
Daniel Veillardfdc91562002-07-01 21:52:03 +00001788#define RAW (*ctxt->input->cur)
1789#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001790#define NXT(val) ctxt->input->cur[(val)]
1791#define CUR_PTR ctxt->input->cur
1792
Daniel Veillarda07050d2003-10-19 14:46:32 +00001793#define CMP4( s, c1, c2, c3, c4 ) \
1794 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1795 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1796#define CMP5( s, c1, c2, c3, c4, c5 ) \
1797 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1798#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1799 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1800#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1801 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1802#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1803 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1804#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1805 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1806 ((unsigned char *) s)[ 8 ] == c9 )
1807#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1808 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1809 ((unsigned char *) s)[ 9 ] == c10 )
1810
Owen Taylor3473f882001-02-23 17:55:21 +00001811#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001812 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001813 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001814 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001815 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1816 xmlPopInput(ctxt); \
1817 } while (0)
1818
Daniel Veillard0b787f32004-03-26 17:29:53 +00001819#define SKIPL(val) do { \
1820 int skipl; \
1821 for(skipl=0; skipl<val; skipl++) { \
1822 if (*(ctxt->input->cur) == '\n') { \
1823 ctxt->input->line++; ctxt->input->col = 1; \
1824 } else ctxt->input->col++; \
1825 ctxt->nbChars++; \
1826 ctxt->input->cur++; \
1827 } \
1828 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1829 if ((*ctxt->input->cur == 0) && \
1830 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1831 xmlPopInput(ctxt); \
1832 } while (0)
1833
Daniel Veillarda880b122003-04-21 21:36:41 +00001834#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001835 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1836 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001837 xmlSHRINK (ctxt);
1838
1839static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1840 xmlParserInputShrink(ctxt->input);
1841 if ((*ctxt->input->cur == 0) &&
1842 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1843 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001844 }
Owen Taylor3473f882001-02-23 17:55:21 +00001845
Daniel Veillarda880b122003-04-21 21:36:41 +00001846#define GROW if ((ctxt->progressive == 0) && \
1847 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001848 xmlGROW (ctxt);
1849
1850static void xmlGROW (xmlParserCtxtPtr ctxt) {
1851 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1852 if ((*ctxt->input->cur == 0) &&
1853 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1854 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001855}
Owen Taylor3473f882001-02-23 17:55:21 +00001856
1857#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1858
1859#define NEXT xmlNextChar(ctxt)
1860
Daniel Veillard21a0f912001-02-25 19:54:14 +00001861#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001862 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001863 ctxt->input->cur++; \
1864 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001865 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001866 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1867 }
1868
Owen Taylor3473f882001-02-23 17:55:21 +00001869#define NEXTL(l) do { \
1870 if (*(ctxt->input->cur) == '\n') { \
1871 ctxt->input->line++; ctxt->input->col = 1; \
1872 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001873 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001874 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001875 } while (0)
1876
1877#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1878#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1879
1880#define COPY_BUF(l,b,i,v) \
1881 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001882 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001883
1884/**
1885 * xmlSkipBlankChars:
1886 * @ctxt: the XML parser context
1887 *
1888 * skip all blanks character found at that point in the input streams.
1889 * It pops up finished entities in the process if allowable at that point.
1890 *
1891 * Returns the number of space chars skipped
1892 */
1893
1894int
1895xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001896 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001897
1898 /*
1899 * It's Okay to use CUR/NEXT here since all the blanks are on
1900 * the ASCII range.
1901 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001902 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1903 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001904 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001905 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001906 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001907 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001908 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001909 if (*cur == '\n') {
1910 ctxt->input->line++; ctxt->input->col = 1;
1911 }
1912 cur++;
1913 res++;
1914 if (*cur == 0) {
1915 ctxt->input->cur = cur;
1916 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1917 cur = ctxt->input->cur;
1918 }
1919 }
1920 ctxt->input->cur = cur;
1921 } else {
1922 int cur;
1923 do {
1924 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001925 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001926 NEXT;
1927 cur = CUR;
1928 res++;
1929 }
1930 while ((cur == 0) && (ctxt->inputNr > 1) &&
1931 (ctxt->instate != XML_PARSER_COMMENT)) {
1932 xmlPopInput(ctxt);
1933 cur = CUR;
1934 }
1935 /*
1936 * Need to handle support of entities branching here
1937 */
1938 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1939 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1940 }
Owen Taylor3473f882001-02-23 17:55:21 +00001941 return(res);
1942}
1943
1944/************************************************************************
1945 * *
1946 * Commodity functions to handle entities *
1947 * *
1948 ************************************************************************/
1949
1950/**
1951 * xmlPopInput:
1952 * @ctxt: an XML parser context
1953 *
1954 * xmlPopInput: the current input pointed by ctxt->input came to an end
1955 * pop it and return the next char.
1956 *
1957 * Returns the current xmlChar in the parser context
1958 */
1959xmlChar
1960xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001961 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001962 if (xmlParserDebugEntities)
1963 xmlGenericError(xmlGenericErrorContext,
1964 "Popping input %d\n", ctxt->inputNr);
1965 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001966 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001967 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1968 return(xmlPopInput(ctxt));
1969 return(CUR);
1970}
1971
1972/**
1973 * xmlPushInput:
1974 * @ctxt: an XML parser context
1975 * @input: an XML parser input fragment (entity, XML fragment ...).
1976 *
1977 * xmlPushInput: switch to a new input stream which is stacked on top
1978 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001979 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00001980 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001981int
Owen Taylor3473f882001-02-23 17:55:21 +00001982xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001983 int ret;
1984 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001985
1986 if (xmlParserDebugEntities) {
1987 if ((ctxt->input != NULL) && (ctxt->input->filename))
1988 xmlGenericError(xmlGenericErrorContext,
1989 "%s(%d): ", ctxt->input->filename,
1990 ctxt->input->line);
1991 xmlGenericError(xmlGenericErrorContext,
1992 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1993 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001994 ret = inputPush(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001995 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001996 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001997}
1998
1999/**
2000 * xmlParseCharRef:
2001 * @ctxt: an XML parser context
2002 *
2003 * parse Reference declarations
2004 *
2005 * [66] CharRef ::= '&#' [0-9]+ ';' |
2006 * '&#x' [0-9a-fA-F]+ ';'
2007 *
2008 * [ WFC: Legal Character ]
2009 * Characters referred to using character references must match the
2010 * production for Char.
2011 *
2012 * Returns the value parsed (as an int), 0 in case of error
2013 */
2014int
2015xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002016 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002017 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002018 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002019
Owen Taylor3473f882001-02-23 17:55:21 +00002020 /*
2021 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2022 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002023 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002024 (NXT(2) == 'x')) {
2025 SKIP(3);
2026 GROW;
2027 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002028 if (count++ > 20) {
2029 count = 0;
2030 GROW;
2031 }
2032 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002033 val = val * 16 + (CUR - '0');
2034 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2035 val = val * 16 + (CUR - 'a') + 10;
2036 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2037 val = val * 16 + (CUR - 'A') + 10;
2038 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002039 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002040 val = 0;
2041 break;
2042 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002043 if (val > 0x10FFFF)
2044 outofrange = val;
2045
Owen Taylor3473f882001-02-23 17:55:21 +00002046 NEXT;
2047 count++;
2048 }
2049 if (RAW == ';') {
2050 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002051 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002052 ctxt->nbChars ++;
2053 ctxt->input->cur++;
2054 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002055 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002056 SKIP(2);
2057 GROW;
2058 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002059 if (count++ > 20) {
2060 count = 0;
2061 GROW;
2062 }
2063 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002064 val = val * 10 + (CUR - '0');
2065 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002066 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002067 val = 0;
2068 break;
2069 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002070 if (val > 0x10FFFF)
2071 outofrange = val;
2072
Owen Taylor3473f882001-02-23 17:55:21 +00002073 NEXT;
2074 count++;
2075 }
2076 if (RAW == ';') {
2077 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002078 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002079 ctxt->nbChars ++;
2080 ctxt->input->cur++;
2081 }
2082 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002083 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002084 }
2085
2086 /*
2087 * [ WFC: Legal Character ]
2088 * Characters referred to using character references must match the
2089 * production for Char.
2090 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002091 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002092 return(val);
2093 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002094 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2095 "xmlParseCharRef: invalid xmlChar value %d\n",
2096 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002097 }
2098 return(0);
2099}
2100
2101/**
2102 * xmlParseStringCharRef:
2103 * @ctxt: an XML parser context
2104 * @str: a pointer to an index in the string
2105 *
2106 * parse Reference declarations, variant parsing from a string rather
2107 * than an an input flow.
2108 *
2109 * [66] CharRef ::= '&#' [0-9]+ ';' |
2110 * '&#x' [0-9a-fA-F]+ ';'
2111 *
2112 * [ WFC: Legal Character ]
2113 * Characters referred to using character references must match the
2114 * production for Char.
2115 *
2116 * Returns the value parsed (as an int), 0 in case of error, str will be
2117 * updated to the current value of the index
2118 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002119static int
Owen Taylor3473f882001-02-23 17:55:21 +00002120xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2121 const xmlChar *ptr;
2122 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002123 unsigned int val = 0;
2124 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002125
2126 if ((str == NULL) || (*str == NULL)) return(0);
2127 ptr = *str;
2128 cur = *ptr;
2129 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2130 ptr += 3;
2131 cur = *ptr;
2132 while (cur != ';') { /* Non input consuming loop */
2133 if ((cur >= '0') && (cur <= '9'))
2134 val = val * 16 + (cur - '0');
2135 else if ((cur >= 'a') && (cur <= 'f'))
2136 val = val * 16 + (cur - 'a') + 10;
2137 else if ((cur >= 'A') && (cur <= 'F'))
2138 val = val * 16 + (cur - 'A') + 10;
2139 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002140 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002141 val = 0;
2142 break;
2143 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002144 if (val > 0x10FFFF)
2145 outofrange = val;
2146
Owen Taylor3473f882001-02-23 17:55:21 +00002147 ptr++;
2148 cur = *ptr;
2149 }
2150 if (cur == ';')
2151 ptr++;
2152 } else if ((cur == '&') && (ptr[1] == '#')){
2153 ptr += 2;
2154 cur = *ptr;
2155 while (cur != ';') { /* Non input consuming loops */
2156 if ((cur >= '0') && (cur <= '9'))
2157 val = val * 10 + (cur - '0');
2158 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002159 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002160 val = 0;
2161 break;
2162 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002163 if (val > 0x10FFFF)
2164 outofrange = val;
2165
Owen Taylor3473f882001-02-23 17:55:21 +00002166 ptr++;
2167 cur = *ptr;
2168 }
2169 if (cur == ';')
2170 ptr++;
2171 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002172 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002173 return(0);
2174 }
2175 *str = ptr;
2176
2177 /*
2178 * [ WFC: Legal Character ]
2179 * Characters referred to using character references must match the
2180 * production for Char.
2181 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002182 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002183 return(val);
2184 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002185 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2186 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2187 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002188 }
2189 return(0);
2190}
2191
2192/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002193 * xmlNewBlanksWrapperInputStream:
2194 * @ctxt: an XML parser context
2195 * @entity: an Entity pointer
2196 *
2197 * Create a new input stream for wrapping
2198 * blanks around a PEReference
2199 *
2200 * Returns the new input stream or NULL
2201 */
2202
2203static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2204
Daniel Veillardf4862f02002-09-10 11:13:43 +00002205static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002206xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2207 xmlParserInputPtr input;
2208 xmlChar *buffer;
2209 size_t length;
2210 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002211 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2212 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002213 return(NULL);
2214 }
2215 if (xmlParserDebugEntities)
2216 xmlGenericError(xmlGenericErrorContext,
2217 "new blanks wrapper for entity: %s\n", entity->name);
2218 input = xmlNewInputStream(ctxt);
2219 if (input == NULL) {
2220 return(NULL);
2221 }
2222 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002223 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002224 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002225 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002226 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002227 return(NULL);
2228 }
2229 buffer [0] = ' ';
2230 buffer [1] = '%';
2231 buffer [length-3] = ';';
2232 buffer [length-2] = ' ';
2233 buffer [length-1] = 0;
2234 memcpy(buffer + 2, entity->name, length - 5);
2235 input->free = deallocblankswrapper;
2236 input->base = buffer;
2237 input->cur = buffer;
2238 input->length = length;
2239 input->end = &buffer[length];
2240 return(input);
2241}
2242
2243/**
Owen Taylor3473f882001-02-23 17:55:21 +00002244 * xmlParserHandlePEReference:
2245 * @ctxt: the parser context
2246 *
2247 * [69] PEReference ::= '%' Name ';'
2248 *
2249 * [ WFC: No Recursion ]
2250 * A parsed entity must not contain a recursive
2251 * reference to itself, either directly or indirectly.
2252 *
2253 * [ WFC: Entity Declared ]
2254 * In a document without any DTD, a document with only an internal DTD
2255 * subset which contains no parameter entity references, or a document
2256 * with "standalone='yes'", ... ... The declaration of a parameter
2257 * entity must precede any reference to it...
2258 *
2259 * [ VC: Entity Declared ]
2260 * In a document with an external subset or external parameter entities
2261 * with "standalone='no'", ... ... The declaration of a parameter entity
2262 * must precede any reference to it...
2263 *
2264 * [ WFC: In DTD ]
2265 * Parameter-entity references may only appear in the DTD.
2266 * NOTE: misleading but this is handled.
2267 *
2268 * A PEReference may have been detected in the current input stream
2269 * the handling is done accordingly to
2270 * http://www.w3.org/TR/REC-xml#entproc
2271 * i.e.
2272 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002273 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002274 */
2275void
2276xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002277 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002278 xmlEntityPtr entity = NULL;
2279 xmlParserInputPtr input;
2280
Owen Taylor3473f882001-02-23 17:55:21 +00002281 if (RAW != '%') return;
2282 switch(ctxt->instate) {
2283 case XML_PARSER_CDATA_SECTION:
2284 return;
2285 case XML_PARSER_COMMENT:
2286 return;
2287 case XML_PARSER_START_TAG:
2288 return;
2289 case XML_PARSER_END_TAG:
2290 return;
2291 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002292 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002293 return;
2294 case XML_PARSER_PROLOG:
2295 case XML_PARSER_START:
2296 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002297 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002298 return;
2299 case XML_PARSER_ENTITY_DECL:
2300 case XML_PARSER_CONTENT:
2301 case XML_PARSER_ATTRIBUTE_VALUE:
2302 case XML_PARSER_PI:
2303 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002304 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002305 /* we just ignore it there */
2306 return;
2307 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002308 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002309 return;
2310 case XML_PARSER_ENTITY_VALUE:
2311 /*
2312 * NOTE: in the case of entity values, we don't do the
2313 * substitution here since we need the literal
2314 * entity value to be able to save the internal
2315 * subset of the document.
2316 * This will be handled by xmlStringDecodeEntities
2317 */
2318 return;
2319 case XML_PARSER_DTD:
2320 /*
2321 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2322 * In the internal DTD subset, parameter-entity references
2323 * can occur only where markup declarations can occur, not
2324 * within markup declarations.
2325 * In that case this is handled in xmlParseMarkupDecl
2326 */
2327 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2328 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002329 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002330 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002331 break;
2332 case XML_PARSER_IGNORE:
2333 return;
2334 }
2335
2336 NEXT;
2337 name = xmlParseName(ctxt);
2338 if (xmlParserDebugEntities)
2339 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002340 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002341 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002342 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002343 } else {
2344 if (RAW == ';') {
2345 NEXT;
2346 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2347 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2348 if (entity == NULL) {
2349
2350 /*
2351 * [ WFC: Entity Declared ]
2352 * In a document without any DTD, a document with only an
2353 * internal DTD subset which contains no parameter entity
2354 * references, or a document with "standalone='yes'", ...
2355 * ... The declaration of a parameter entity must precede
2356 * any reference to it...
2357 */
2358 if ((ctxt->standalone == 1) ||
2359 ((ctxt->hasExternalSubset == 0) &&
2360 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002361 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002362 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002363 } else {
2364 /*
2365 * [ VC: Entity Declared ]
2366 * In a document with an external subset or external
2367 * parameter entities with "standalone='no'", ...
2368 * ... The declaration of a parameter entity must precede
2369 * any reference to it...
2370 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002371 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2372 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2373 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002374 name, NULL);
Daniel Veillard24eb9782003-10-04 21:08:09 +00002375 } else
2376 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2377 "PEReference: %%%s; not found\n",
2378 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002379 ctxt->valid = 0;
2380 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002381 } else if (ctxt->input->free != deallocblankswrapper) {
2382 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002383 if (xmlPushInput(ctxt, input) < 0)
2384 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002385 } else {
2386 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2387 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002388 xmlChar start[4];
2389 xmlCharEncoding enc;
2390
Owen Taylor3473f882001-02-23 17:55:21 +00002391 /*
2392 * handle the extra spaces added before and after
2393 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002394 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002395 */
2396 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002397 if (xmlPushInput(ctxt, input) < 0)
2398 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002399
2400 /*
2401 * Get the 4 first bytes and decode the charset
2402 * if enc != XML_CHAR_ENCODING_NONE
2403 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002404 * Note that, since we may have some non-UTF8
2405 * encoding (like UTF16, bug 135229), the 'length'
2406 * is not known, but we can calculate based upon
2407 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002408 */
2409 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002410 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002411 start[0] = RAW;
2412 start[1] = NXT(1);
2413 start[2] = NXT(2);
2414 start[3] = NXT(3);
2415 enc = xmlDetectCharEncoding(start, 4);
2416 if (enc != XML_CHAR_ENCODING_NONE) {
2417 xmlSwitchEncoding(ctxt, enc);
2418 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002419 }
2420
Owen Taylor3473f882001-02-23 17:55:21 +00002421 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002422 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2423 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002424 xmlParseTextDecl(ctxt);
2425 }
Owen Taylor3473f882001-02-23 17:55:21 +00002426 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002427 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2428 "PEReference: %s is not a parameter entity\n",
2429 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002430 }
2431 }
2432 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002433 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002434 }
Owen Taylor3473f882001-02-23 17:55:21 +00002435 }
2436}
2437
2438/*
2439 * Macro used to grow the current buffer.
2440 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002441#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002442 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002443 buffer##_size *= 2; \
Daniel Veillard0161e632008-08-28 15:36:32 +00002444 buffer##_size += n; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002445 tmp = (xmlChar *) \
Daniel Veillard68b6e022008-03-31 09:26:00 +00002446 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002447 if (tmp == NULL) goto mem_error; \
2448 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002449}
2450
2451/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002452 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002453 * @ctxt: the parser context
2454 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002455 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002456 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2457 * @end: an end marker xmlChar, 0 if none
2458 * @end2: an end marker xmlChar, 0 if none
2459 * @end3: an end marker xmlChar, 0 if none
2460 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002461 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002462 *
2463 * [67] Reference ::= EntityRef | CharRef
2464 *
2465 * [69] PEReference ::= '%' Name ';'
2466 *
2467 * Returns A newly allocated string with the substitution done. The caller
2468 * must deallocate it !
2469 */
2470xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002471xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2472 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002473 xmlChar *buffer = NULL;
2474 int buffer_size = 0;
2475
2476 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002477 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002478 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002479 xmlEntityPtr ent;
2480 int c,l;
2481 int nbchars = 0;
2482
Daniel Veillarda82b1822004-11-08 16:24:57 +00002483 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002484 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002485 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002486
Daniel Veillard0161e632008-08-28 15:36:32 +00002487 if (((ctxt->depth > 40) &&
2488 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2489 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002490 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002491 return(NULL);
2492 }
2493
2494 /*
2495 * allocate a translation buffer.
2496 */
2497 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002498 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002499 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002500
2501 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002502 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002503 * we are operating on already parsed values.
2504 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002505 if (str < last)
2506 c = CUR_SCHAR(str, l);
2507 else
2508 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002509 while ((c != 0) && (c != end) && /* non input consuming loop */
2510 (c != end2) && (c != end3)) {
2511
2512 if (c == 0) break;
2513 if ((c == '&') && (str[1] == '#')) {
2514 int val = xmlParseStringCharRef(ctxt, &str);
2515 if (val != 0) {
2516 COPY_BUF(0,buffer,nbchars,val);
2517 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002518 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002519 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002520 }
Owen Taylor3473f882001-02-23 17:55:21 +00002521 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2522 if (xmlParserDebugEntities)
2523 xmlGenericError(xmlGenericErrorContext,
2524 "String decoding Entity Reference: %.30s\n",
2525 str);
2526 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002527 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2528 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002529 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002530 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002531 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002532 if ((ent != NULL) &&
2533 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2534 if (ent->content != NULL) {
2535 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002536 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002537 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002538 }
Owen Taylor3473f882001-02-23 17:55:21 +00002539 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002540 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2541 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002542 }
2543 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002544 ctxt->depth++;
2545 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2546 0, 0, 0);
2547 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002548
Owen Taylor3473f882001-02-23 17:55:21 +00002549 if (rep != NULL) {
2550 current = rep;
2551 while (*current != 0) { /* non input consuming loop */
2552 buffer[nbchars++] = *current++;
2553 if (nbchars >
2554 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002555 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2556 goto int_error;
2557 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002558 }
2559 }
2560 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002561 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002562 }
2563 } else if (ent != NULL) {
2564 int i = xmlStrlen(ent->name);
2565 const xmlChar *cur = ent->name;
2566
2567 buffer[nbchars++] = '&';
2568 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002569 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002570 }
2571 for (;i > 0;i--)
2572 buffer[nbchars++] = *cur++;
2573 buffer[nbchars++] = ';';
2574 }
2575 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2576 if (xmlParserDebugEntities)
2577 xmlGenericError(xmlGenericErrorContext,
2578 "String decoding PE Reference: %.30s\n", str);
2579 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002580 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2581 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002582 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002583 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002584 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002585 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002586 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002587 }
Owen Taylor3473f882001-02-23 17:55:21 +00002588 ctxt->depth++;
2589 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2590 0, 0, 0);
2591 ctxt->depth--;
2592 if (rep != NULL) {
2593 current = rep;
2594 while (*current != 0) { /* non input consuming loop */
2595 buffer[nbchars++] = *current++;
2596 if (nbchars >
2597 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002598 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2599 goto int_error;
2600 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002601 }
2602 }
2603 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002604 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002605 }
2606 }
2607 } else {
2608 COPY_BUF(l,buffer,nbchars,c);
2609 str += l;
2610 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002611 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002612 }
2613 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002614 if (str < last)
2615 c = CUR_SCHAR(str, l);
2616 else
2617 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002618 }
2619 buffer[nbchars++] = 0;
2620 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002621
2622mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002623 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002624int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002625 if (rep != NULL)
2626 xmlFree(rep);
2627 if (buffer != NULL)
2628 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002629 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002630}
2631
Daniel Veillarde57ec792003-09-10 10:50:59 +00002632/**
2633 * xmlStringDecodeEntities:
2634 * @ctxt: the parser context
2635 * @str: the input string
2636 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2637 * @end: an end marker xmlChar, 0 if none
2638 * @end2: an end marker xmlChar, 0 if none
2639 * @end3: an end marker xmlChar, 0 if none
2640 *
2641 * Takes a entity string content and process to do the adequate substitutions.
2642 *
2643 * [67] Reference ::= EntityRef | CharRef
2644 *
2645 * [69] PEReference ::= '%' Name ';'
2646 *
2647 * Returns A newly allocated string with the substitution done. The caller
2648 * must deallocate it !
2649 */
2650xmlChar *
2651xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2652 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002653 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002654 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2655 end, end2, end3));
2656}
Owen Taylor3473f882001-02-23 17:55:21 +00002657
2658/************************************************************************
2659 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002660 * Commodity functions, cleanup needed ? *
2661 * *
2662 ************************************************************************/
2663
2664/**
2665 * areBlanks:
2666 * @ctxt: an XML parser context
2667 * @str: a xmlChar *
2668 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002669 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002670 *
2671 * Is this a sequence of blank chars that one can ignore ?
2672 *
2673 * Returns 1 if ignorable 0 otherwise.
2674 */
2675
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002676static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2677 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002678 int i, ret;
2679 xmlNodePtr lastChild;
2680
Daniel Veillard05c13a22001-09-09 08:38:09 +00002681 /*
2682 * Don't spend time trying to differentiate them, the same callback is
2683 * used !
2684 */
2685 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002686 return(0);
2687
Owen Taylor3473f882001-02-23 17:55:21 +00002688 /*
2689 * Check for xml:space value.
2690 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002691 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2692 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002693 return(0);
2694
2695 /*
2696 * Check that the string is made of blanks
2697 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002698 if (blank_chars == 0) {
2699 for (i = 0;i < len;i++)
2700 if (!(IS_BLANK_CH(str[i]))) return(0);
2701 }
Owen Taylor3473f882001-02-23 17:55:21 +00002702
2703 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002704 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002705 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002706 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002707 if (ctxt->myDoc != NULL) {
2708 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2709 if (ret == 0) return(1);
2710 if (ret == 1) return(0);
2711 }
2712
2713 /*
2714 * Otherwise, heuristic :-\
2715 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002716 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002717 if ((ctxt->node->children == NULL) &&
2718 (RAW == '<') && (NXT(1) == '/')) return(0);
2719
2720 lastChild = xmlGetLastChild(ctxt->node);
2721 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002722 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2723 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002724 } else if (xmlNodeIsText(lastChild))
2725 return(0);
2726 else if ((ctxt->node->children != NULL) &&
2727 (xmlNodeIsText(ctxt->node->children)))
2728 return(0);
2729 return(1);
2730}
2731
Owen Taylor3473f882001-02-23 17:55:21 +00002732/************************************************************************
2733 * *
2734 * Extra stuff for namespace support *
2735 * Relates to http://www.w3.org/TR/WD-xml-names *
2736 * *
2737 ************************************************************************/
2738
2739/**
2740 * xmlSplitQName:
2741 * @ctxt: an XML parser context
2742 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002743 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002744 *
2745 * parse an UTF8 encoded XML qualified name string
2746 *
2747 * [NS 5] QName ::= (Prefix ':')? LocalPart
2748 *
2749 * [NS 6] Prefix ::= NCName
2750 *
2751 * [NS 7] LocalPart ::= NCName
2752 *
2753 * Returns the local part, and prefix is updated
2754 * to get the Prefix if any.
2755 */
2756
2757xmlChar *
2758xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2759 xmlChar buf[XML_MAX_NAMELEN + 5];
2760 xmlChar *buffer = NULL;
2761 int len = 0;
2762 int max = XML_MAX_NAMELEN;
2763 xmlChar *ret = NULL;
2764 const xmlChar *cur = name;
2765 int c;
2766
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002767 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002768 *prefix = NULL;
2769
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002770 if (cur == NULL) return(NULL);
2771
Owen Taylor3473f882001-02-23 17:55:21 +00002772#ifndef XML_XML_NAMESPACE
2773 /* xml: prefix is not really a namespace */
2774 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2775 (cur[2] == 'l') && (cur[3] == ':'))
2776 return(xmlStrdup(name));
2777#endif
2778
Daniel Veillard597bc482003-07-24 16:08:28 +00002779 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002780 if (cur[0] == ':')
2781 return(xmlStrdup(name));
2782
2783 c = *cur++;
2784 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2785 buf[len++] = c;
2786 c = *cur++;
2787 }
2788 if (len >= max) {
2789 /*
2790 * Okay someone managed to make a huge name, so he's ready to pay
2791 * for the processing speed.
2792 */
2793 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002794
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002795 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002796 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002797 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002798 return(NULL);
2799 }
2800 memcpy(buffer, buf, len);
2801 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2802 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002803 xmlChar *tmp;
2804
Owen Taylor3473f882001-02-23 17:55:21 +00002805 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002806 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002807 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002808 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002809 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002810 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002811 return(NULL);
2812 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002813 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002814 }
2815 buffer[len++] = c;
2816 c = *cur++;
2817 }
2818 buffer[len] = 0;
2819 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002820
Daniel Veillard597bc482003-07-24 16:08:28 +00002821 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002822 if (buffer != NULL)
2823 xmlFree(buffer);
2824 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002825 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002826 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002827
Owen Taylor3473f882001-02-23 17:55:21 +00002828 if (buffer == NULL)
2829 ret = xmlStrndup(buf, len);
2830 else {
2831 ret = buffer;
2832 buffer = NULL;
2833 max = XML_MAX_NAMELEN;
2834 }
2835
2836
2837 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002838 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002839 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002840 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002841 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002842 }
Owen Taylor3473f882001-02-23 17:55:21 +00002843 len = 0;
2844
Daniel Veillardbb284f42002-10-16 18:02:47 +00002845 /*
2846 * Check that the first character is proper to start
2847 * a new name
2848 */
2849 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2850 ((c >= 0x41) && (c <= 0x5A)) ||
2851 (c == '_') || (c == ':'))) {
2852 int l;
2853 int first = CUR_SCHAR(cur, l);
2854
2855 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002856 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002857 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002858 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002859 }
2860 }
2861 cur++;
2862
Owen Taylor3473f882001-02-23 17:55:21 +00002863 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2864 buf[len++] = c;
2865 c = *cur++;
2866 }
2867 if (len >= max) {
2868 /*
2869 * Okay someone managed to make a huge name, so he's ready to pay
2870 * for the processing speed.
2871 */
2872 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002873
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002874 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002875 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002876 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002877 return(NULL);
2878 }
2879 memcpy(buffer, buf, len);
2880 while (c != 0) { /* tested bigname2.xml */
2881 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002882 xmlChar *tmp;
2883
Owen Taylor3473f882001-02-23 17:55:21 +00002884 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002885 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002886 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002887 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002888 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002889 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002890 return(NULL);
2891 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002892 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002893 }
2894 buffer[len++] = c;
2895 c = *cur++;
2896 }
2897 buffer[len] = 0;
2898 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00002899
Owen Taylor3473f882001-02-23 17:55:21 +00002900 if (buffer == NULL)
2901 ret = xmlStrndup(buf, len);
2902 else {
2903 ret = buffer;
2904 }
2905 }
2906
2907 return(ret);
2908}
2909
2910/************************************************************************
2911 * *
2912 * The parser itself *
2913 * Relates to http://www.w3.org/TR/REC-xml *
2914 * *
2915 ************************************************************************/
2916
Daniel Veillard34e3f642008-07-29 09:02:27 +00002917/************************************************************************
2918 * *
2919 * Routines to parse Name, NCName and NmToken *
2920 * *
2921 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00002922#ifdef DEBUG
2923static unsigned long nbParseName = 0;
2924static unsigned long nbParseNmToken = 0;
2925static unsigned long nbParseNCName = 0;
2926static unsigned long nbParseNCNameComplex = 0;
2927static unsigned long nbParseNameComplex = 0;
2928static unsigned long nbParseStringName = 0;
2929#endif
2930
Daniel Veillard34e3f642008-07-29 09:02:27 +00002931/*
2932 * The two following functions are related to the change of accepted
2933 * characters for Name and NmToken in the Revision 5 of XML-1.0
2934 * They correspond to the modified production [4] and the new production [4a]
2935 * changes in that revision. Also note that the macros used for the
2936 * productions Letter, Digit, CombiningChar and Extender are not needed
2937 * anymore.
2938 * We still keep compatibility to pre-revision5 parsing semantic if the
2939 * new XML_PARSE_OLD10 option is given to the parser.
2940 */
2941static int
2942xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
2943 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2944 /*
2945 * Use the new checks of production [4] [4a] amd [5] of the
2946 * Update 5 of XML-1.0
2947 */
2948 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2949 (((c >= 'a') && (c <= 'z')) ||
2950 ((c >= 'A') && (c <= 'Z')) ||
2951 (c == '_') || (c == ':') ||
2952 ((c >= 0xC0) && (c <= 0xD6)) ||
2953 ((c >= 0xD8) && (c <= 0xF6)) ||
2954 ((c >= 0xF8) && (c <= 0x2FF)) ||
2955 ((c >= 0x370) && (c <= 0x37D)) ||
2956 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2957 ((c >= 0x200C) && (c <= 0x200D)) ||
2958 ((c >= 0x2070) && (c <= 0x218F)) ||
2959 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2960 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2961 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2962 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2963 ((c >= 0x10000) && (c <= 0xEFFFF))))
2964 return(1);
2965 } else {
2966 if (IS_LETTER(c) || (c == '_') || (c == ':'))
2967 return(1);
2968 }
2969 return(0);
2970}
2971
2972static int
2973xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
2974 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2975 /*
2976 * Use the new checks of production [4] [4a] amd [5] of the
2977 * Update 5 of XML-1.0
2978 */
2979 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2980 (((c >= 'a') && (c <= 'z')) ||
2981 ((c >= 'A') && (c <= 'Z')) ||
2982 ((c >= '0') && (c <= '9')) || /* !start */
2983 (c == '_') || (c == ':') ||
2984 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2985 ((c >= 0xC0) && (c <= 0xD6)) ||
2986 ((c >= 0xD8) && (c <= 0xF6)) ||
2987 ((c >= 0xF8) && (c <= 0x2FF)) ||
2988 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2989 ((c >= 0x370) && (c <= 0x37D)) ||
2990 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2991 ((c >= 0x200C) && (c <= 0x200D)) ||
2992 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2993 ((c >= 0x2070) && (c <= 0x218F)) ||
2994 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2995 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2996 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2997 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2998 ((c >= 0x10000) && (c <= 0xEFFFF))))
2999 return(1);
3000 } else {
3001 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3002 (c == '.') || (c == '-') ||
3003 (c == '_') || (c == ':') ||
3004 (IS_COMBINING(c)) ||
3005 (IS_EXTENDER(c)))
3006 return(1);
3007 }
3008 return(0);
3009}
3010
Daniel Veillarde57ec792003-09-10 10:50:59 +00003011static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003012 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003013
Daniel Veillard34e3f642008-07-29 09:02:27 +00003014static const xmlChar *
3015xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3016 int len = 0, l;
3017 int c;
3018 int count = 0;
3019
Daniel Veillardc6561462009-03-25 10:22:31 +00003020#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003021 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003022#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003023
3024 /*
3025 * Handler for more complex cases
3026 */
3027 GROW;
3028 c = CUR_CHAR(l);
3029 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3030 /*
3031 * Use the new checks of production [4] [4a] amd [5] of the
3032 * Update 5 of XML-1.0
3033 */
3034 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3035 (!(((c >= 'a') && (c <= 'z')) ||
3036 ((c >= 'A') && (c <= 'Z')) ||
3037 (c == '_') || (c == ':') ||
3038 ((c >= 0xC0) && (c <= 0xD6)) ||
3039 ((c >= 0xD8) && (c <= 0xF6)) ||
3040 ((c >= 0xF8) && (c <= 0x2FF)) ||
3041 ((c >= 0x370) && (c <= 0x37D)) ||
3042 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3043 ((c >= 0x200C) && (c <= 0x200D)) ||
3044 ((c >= 0x2070) && (c <= 0x218F)) ||
3045 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3046 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3047 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3048 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3049 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3050 return(NULL);
3051 }
3052 len += l;
3053 NEXTL(l);
3054 c = CUR_CHAR(l);
3055 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3056 (((c >= 'a') && (c <= 'z')) ||
3057 ((c >= 'A') && (c <= 'Z')) ||
3058 ((c >= '0') && (c <= '9')) || /* !start */
3059 (c == '_') || (c == ':') ||
3060 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3061 ((c >= 0xC0) && (c <= 0xD6)) ||
3062 ((c >= 0xD8) && (c <= 0xF6)) ||
3063 ((c >= 0xF8) && (c <= 0x2FF)) ||
3064 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3065 ((c >= 0x370) && (c <= 0x37D)) ||
3066 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3067 ((c >= 0x200C) && (c <= 0x200D)) ||
3068 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3069 ((c >= 0x2070) && (c <= 0x218F)) ||
3070 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3071 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3072 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3073 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3074 ((c >= 0x10000) && (c <= 0xEFFFF))
3075 )) {
3076 if (count++ > 100) {
3077 count = 0;
3078 GROW;
3079 }
3080 len += l;
3081 NEXTL(l);
3082 c = CUR_CHAR(l);
3083 }
3084 } else {
3085 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3086 (!IS_LETTER(c) && (c != '_') &&
3087 (c != ':'))) {
3088 return(NULL);
3089 }
3090 len += l;
3091 NEXTL(l);
3092 c = CUR_CHAR(l);
3093
3094 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3095 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3096 (c == '.') || (c == '-') ||
3097 (c == '_') || (c == ':') ||
3098 (IS_COMBINING(c)) ||
3099 (IS_EXTENDER(c)))) {
3100 if (count++ > 100) {
3101 count = 0;
3102 GROW;
3103 }
3104 len += l;
3105 NEXTL(l);
3106 c = CUR_CHAR(l);
3107 }
3108 }
3109 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3110 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3111 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3112}
3113
Owen Taylor3473f882001-02-23 17:55:21 +00003114/**
3115 * xmlParseName:
3116 * @ctxt: an XML parser context
3117 *
3118 * parse an XML name.
3119 *
3120 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3121 * CombiningChar | Extender
3122 *
3123 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3124 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003125 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003126 *
3127 * Returns the Name parsed or NULL
3128 */
3129
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003130const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003131xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003132 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003133 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003134 int count = 0;
3135
3136 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003137
Daniel Veillardc6561462009-03-25 10:22:31 +00003138#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003139 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003140#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003141
Daniel Veillard48b2f892001-02-25 16:11:03 +00003142 /*
3143 * Accelerator for simple ASCII names
3144 */
3145 in = ctxt->input->cur;
3146 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3147 ((*in >= 0x41) && (*in <= 0x5A)) ||
3148 (*in == '_') || (*in == ':')) {
3149 in++;
3150 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3151 ((*in >= 0x41) && (*in <= 0x5A)) ||
3152 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003153 (*in == '_') || (*in == '-') ||
3154 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003155 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003156 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003157 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003158 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003159 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003160 ctxt->nbChars += count;
3161 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003162 if (ret == NULL)
3163 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003164 return(ret);
3165 }
3166 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003167 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003168 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003169}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003170
Daniel Veillard34e3f642008-07-29 09:02:27 +00003171static const xmlChar *
3172xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3173 int len = 0, l;
3174 int c;
3175 int count = 0;
3176
Daniel Veillardc6561462009-03-25 10:22:31 +00003177#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003178 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003179#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003180
3181 /*
3182 * Handler for more complex cases
3183 */
3184 GROW;
3185 c = CUR_CHAR(l);
3186 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3187 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3188 return(NULL);
3189 }
3190
3191 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3192 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3193 if (count++ > 100) {
3194 count = 0;
3195 GROW;
3196 }
3197 len += l;
3198 NEXTL(l);
3199 c = CUR_CHAR(l);
3200 }
3201 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3202}
3203
3204/**
3205 * xmlParseNCName:
3206 * @ctxt: an XML parser context
3207 * @len: lenght of the string parsed
3208 *
3209 * parse an XML name.
3210 *
3211 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3212 * CombiningChar | Extender
3213 *
3214 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3215 *
3216 * Returns the Name parsed or NULL
3217 */
3218
3219static const xmlChar *
3220xmlParseNCName(xmlParserCtxtPtr ctxt) {
3221 const xmlChar *in;
3222 const xmlChar *ret;
3223 int count = 0;
3224
Daniel Veillardc6561462009-03-25 10:22:31 +00003225#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003226 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003227#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003228
3229 /*
3230 * Accelerator for simple ASCII names
3231 */
3232 in = ctxt->input->cur;
3233 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3234 ((*in >= 0x41) && (*in <= 0x5A)) ||
3235 (*in == '_')) {
3236 in++;
3237 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3238 ((*in >= 0x41) && (*in <= 0x5A)) ||
3239 ((*in >= 0x30) && (*in <= 0x39)) ||
3240 (*in == '_') || (*in == '-') ||
3241 (*in == '.'))
3242 in++;
3243 if ((*in > 0) && (*in < 0x80)) {
3244 count = in - ctxt->input->cur;
3245 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3246 ctxt->input->cur = in;
3247 ctxt->nbChars += count;
3248 ctxt->input->col += count;
3249 if (ret == NULL) {
3250 xmlErrMemory(ctxt, NULL);
3251 }
3252 return(ret);
3253 }
3254 }
3255 return(xmlParseNCNameComplex(ctxt));
3256}
3257
Daniel Veillard46de64e2002-05-29 08:21:33 +00003258/**
3259 * xmlParseNameAndCompare:
3260 * @ctxt: an XML parser context
3261 *
3262 * parse an XML name and compares for match
3263 * (specialized for endtag parsing)
3264 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003265 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3266 * and the name for mismatch
3267 */
3268
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003269static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003270xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003271 register const xmlChar *cmp = other;
3272 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003273 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003274
3275 GROW;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003276
Daniel Veillard46de64e2002-05-29 08:21:33 +00003277 in = ctxt->input->cur;
3278 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003279 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003280 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003281 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003282 }
William M. Brack76e95df2003-10-18 16:20:14 +00003283 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003284 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003285 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003286 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003287 }
3288 /* failure (or end of input buffer), check with full function */
3289 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003290 /* strings coming from the dictionnary direct compare possible */
3291 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003292 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003293 }
3294 return ret;
3295}
3296
Owen Taylor3473f882001-02-23 17:55:21 +00003297/**
3298 * xmlParseStringName:
3299 * @ctxt: an XML parser context
3300 * @str: a pointer to the string pointer (IN/OUT)
3301 *
3302 * parse an XML name.
3303 *
3304 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3305 * CombiningChar | Extender
3306 *
3307 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3308 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003309 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003310 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003311 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003312 * is updated to the current location in the string.
3313 */
3314
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003315static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003316xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3317 xmlChar buf[XML_MAX_NAMELEN + 5];
3318 const xmlChar *cur = *str;
3319 int len = 0, l;
3320 int c;
3321
Daniel Veillardc6561462009-03-25 10:22:31 +00003322#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003323 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003324#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003325
Owen Taylor3473f882001-02-23 17:55:21 +00003326 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003327 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003328 return(NULL);
3329 }
3330
Daniel Veillard34e3f642008-07-29 09:02:27 +00003331 COPY_BUF(l,buf,len,c);
3332 cur += l;
3333 c = CUR_SCHAR(cur, l);
3334 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003335 COPY_BUF(l,buf,len,c);
3336 cur += l;
3337 c = CUR_SCHAR(cur, l);
3338 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3339 /*
3340 * Okay someone managed to make a huge name, so he's ready to pay
3341 * for the processing speed.
3342 */
3343 xmlChar *buffer;
3344 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003345
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003346 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003347 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003348 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003349 return(NULL);
3350 }
3351 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003352 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003353 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003354 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003355 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003356 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003357 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003358 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003359 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003360 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003361 return(NULL);
3362 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003363 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003364 }
3365 COPY_BUF(l,buffer,len,c);
3366 cur += l;
3367 c = CUR_SCHAR(cur, l);
3368 }
3369 buffer[len] = 0;
3370 *str = cur;
3371 return(buffer);
3372 }
3373 }
3374 *str = cur;
3375 return(xmlStrndup(buf, len));
3376}
3377
3378/**
3379 * xmlParseNmtoken:
3380 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003381 *
Owen Taylor3473f882001-02-23 17:55:21 +00003382 * parse an XML Nmtoken.
3383 *
3384 * [7] Nmtoken ::= (NameChar)+
3385 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003386 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003387 *
3388 * Returns the Nmtoken parsed or NULL
3389 */
3390
3391xmlChar *
3392xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3393 xmlChar buf[XML_MAX_NAMELEN + 5];
3394 int len = 0, l;
3395 int c;
3396 int count = 0;
3397
Daniel Veillardc6561462009-03-25 10:22:31 +00003398#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003399 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003400#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003401
Owen Taylor3473f882001-02-23 17:55:21 +00003402 GROW;
3403 c = CUR_CHAR(l);
3404
Daniel Veillard34e3f642008-07-29 09:02:27 +00003405 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003406 if (count++ > 100) {
3407 count = 0;
3408 GROW;
3409 }
3410 COPY_BUF(l,buf,len,c);
3411 NEXTL(l);
3412 c = CUR_CHAR(l);
3413 if (len >= XML_MAX_NAMELEN) {
3414 /*
3415 * Okay someone managed to make a huge token, so he's ready to pay
3416 * for the processing speed.
3417 */
3418 xmlChar *buffer;
3419 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003420
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003421 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003422 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003423 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003424 return(NULL);
3425 }
3426 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003427 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003428 if (count++ > 100) {
3429 count = 0;
3430 GROW;
3431 }
3432 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003433 xmlChar *tmp;
3434
Owen Taylor3473f882001-02-23 17:55:21 +00003435 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003436 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003437 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003438 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003439 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003440 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003441 return(NULL);
3442 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003443 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003444 }
3445 COPY_BUF(l,buffer,len,c);
3446 NEXTL(l);
3447 c = CUR_CHAR(l);
3448 }
3449 buffer[len] = 0;
3450 return(buffer);
3451 }
3452 }
3453 if (len == 0)
3454 return(NULL);
3455 return(xmlStrndup(buf, len));
3456}
3457
3458/**
3459 * xmlParseEntityValue:
3460 * @ctxt: an XML parser context
3461 * @orig: if non-NULL store a copy of the original entity value
3462 *
3463 * parse a value for ENTITY declarations
3464 *
3465 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3466 * "'" ([^%&'] | PEReference | Reference)* "'"
3467 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003468 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003469 */
3470
3471xmlChar *
3472xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3473 xmlChar *buf = NULL;
3474 int len = 0;
3475 int size = XML_PARSER_BUFFER_SIZE;
3476 int c, l;
3477 xmlChar stop;
3478 xmlChar *ret = NULL;
3479 const xmlChar *cur = NULL;
3480 xmlParserInputPtr input;
3481
3482 if (RAW == '"') stop = '"';
3483 else if (RAW == '\'') stop = '\'';
3484 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003485 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003486 return(NULL);
3487 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003488 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003489 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003490 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003491 return(NULL);
3492 }
3493
3494 /*
3495 * The content of the entity definition is copied in a buffer.
3496 */
3497
3498 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3499 input = ctxt->input;
3500 GROW;
3501 NEXT;
3502 c = CUR_CHAR(l);
3503 /*
3504 * NOTE: 4.4.5 Included in Literal
3505 * When a parameter entity reference appears in a literal entity
3506 * value, ... a single or double quote character in the replacement
3507 * text is always treated as a normal data character and will not
3508 * terminate the literal.
3509 * In practice it means we stop the loop only when back at parsing
3510 * the initial entity and the quote is found
3511 */
William M. Brack871611b2003-10-18 04:53:14 +00003512 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003513 (ctxt->input != input))) {
3514 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003515 xmlChar *tmp;
3516
Owen Taylor3473f882001-02-23 17:55:21 +00003517 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003518 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3519 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003520 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003521 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003522 return(NULL);
3523 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003524 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003525 }
3526 COPY_BUF(l,buf,len,c);
3527 NEXTL(l);
3528 /*
3529 * Pop-up of finished entities.
3530 */
3531 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3532 xmlPopInput(ctxt);
3533
3534 GROW;
3535 c = CUR_CHAR(l);
3536 if (c == 0) {
3537 GROW;
3538 c = CUR_CHAR(l);
3539 }
3540 }
3541 buf[len] = 0;
3542
3543 /*
3544 * Raise problem w.r.t. '&' and '%' being used in non-entities
3545 * reference constructs. Note Charref will be handled in
3546 * xmlStringDecodeEntities()
3547 */
3548 cur = buf;
3549 while (*cur != 0) { /* non input consuming */
3550 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3551 xmlChar *name;
3552 xmlChar tmp = *cur;
3553
3554 cur++;
3555 name = xmlParseStringName(ctxt, &cur);
3556 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003557 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003558 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003559 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003560 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003561 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3562 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003563 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003564 }
3565 if (name != NULL)
3566 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003567 if (*cur == 0)
3568 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003569 }
3570 cur++;
3571 }
3572
3573 /*
3574 * Then PEReference entities are substituted.
3575 */
3576 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003577 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003578 xmlFree(buf);
3579 } else {
3580 NEXT;
3581 /*
3582 * NOTE: 4.4.7 Bypassed
3583 * When a general entity reference appears in the EntityValue in
3584 * an entity declaration, it is bypassed and left as is.
3585 * so XML_SUBSTITUTE_REF is not set here.
3586 */
3587 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3588 0, 0, 0);
3589 if (orig != NULL)
3590 *orig = buf;
3591 else
3592 xmlFree(buf);
3593 }
3594
3595 return(ret);
3596}
3597
3598/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003599 * xmlParseAttValueComplex:
3600 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003601 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003602 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003603 *
3604 * parse a value for an attribute, this is the fallback function
3605 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003606 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003607 *
3608 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3609 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003610static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003611xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003612 xmlChar limit = 0;
3613 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003614 xmlChar *rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003615 int len = 0;
3616 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003617 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003618 xmlChar *current = NULL;
3619 xmlEntityPtr ent;
3620
Owen Taylor3473f882001-02-23 17:55:21 +00003621 if (NXT(0) == '"') {
3622 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3623 limit = '"';
3624 NEXT;
3625 } else if (NXT(0) == '\'') {
3626 limit = '\'';
3627 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3628 NEXT;
3629 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003630 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003631 return(NULL);
3632 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003633
Owen Taylor3473f882001-02-23 17:55:21 +00003634 /*
3635 * allocate a translation buffer.
3636 */
3637 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003638 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003639 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003640
3641 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003642 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003643 */
3644 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003645 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003646 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003647 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003648 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003649 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003650 if (NXT(1) == '#') {
3651 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003652
Owen Taylor3473f882001-02-23 17:55:21 +00003653 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003654 if (ctxt->replaceEntities) {
3655 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003656 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003657 }
3658 buf[len++] = '&';
3659 } else {
3660 /*
3661 * The reparsing will be done in xmlStringGetNodeList()
3662 * called by the attribute() function in SAX.c
3663 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003664 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003665 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003666 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003667 buf[len++] = '&';
3668 buf[len++] = '#';
3669 buf[len++] = '3';
3670 buf[len++] = '8';
3671 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003672 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003673 } else if (val != 0) {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003674 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003675 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003676 }
Owen Taylor3473f882001-02-23 17:55:21 +00003677 len += xmlCopyChar(0, &buf[len], val);
3678 }
3679 } else {
3680 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003681 ctxt->nbentities++;
3682 if (ent != NULL)
3683 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003684 if ((ent != NULL) &&
3685 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3686 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003687 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003688 }
3689 if ((ctxt->replaceEntities == 0) &&
3690 (ent->content[0] == '&')) {
3691 buf[len++] = '&';
3692 buf[len++] = '#';
3693 buf[len++] = '3';
3694 buf[len++] = '8';
3695 buf[len++] = ';';
3696 } else {
3697 buf[len++] = ent->content[0];
3698 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003699 } else if ((ent != NULL) &&
3700 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003701 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3702 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003703 XML_SUBSTITUTE_REF,
3704 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003705 if (rep != NULL) {
3706 current = rep;
3707 while (*current != 0) { /* non input consuming */
3708 buf[len++] = *current++;
3709 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003710 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003711 }
3712 }
3713 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003714 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003715 }
3716 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003717 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003718 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003719 }
Owen Taylor3473f882001-02-23 17:55:21 +00003720 if (ent->content != NULL)
3721 buf[len++] = ent->content[0];
3722 }
3723 } else if (ent != NULL) {
3724 int i = xmlStrlen(ent->name);
3725 const xmlChar *cur = ent->name;
3726
3727 /*
3728 * This may look absurd but is needed to detect
3729 * entities problems
3730 */
3731 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3732 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003733 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003734 XML_SUBSTITUTE_REF, 0, 0, 0);
3735 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003736 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003737 rep = NULL;
3738 }
Owen Taylor3473f882001-02-23 17:55:21 +00003739 }
3740
3741 /*
3742 * Just output the reference
3743 */
3744 buf[len++] = '&';
Daniel Veillard0161e632008-08-28 15:36:32 +00003745 while (len > buf_size - i - 10) {
3746 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003747 }
3748 for (;i > 0;i--)
3749 buf[len++] = *cur++;
3750 buf[len++] = ';';
3751 }
3752 }
3753 } else {
3754 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003755 if ((len != 0) || (!normalize)) {
3756 if ((!normalize) || (!in_space)) {
3757 COPY_BUF(l,buf,len,0x20);
Daniel Veillard0161e632008-08-28 15:36:32 +00003758 while (len > buf_size - 10) {
3759 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003760 }
3761 }
3762 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003763 }
3764 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003765 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003766 COPY_BUF(l,buf,len,c);
3767 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003768 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003769 }
3770 }
3771 NEXTL(l);
3772 }
3773 GROW;
3774 c = CUR_CHAR(l);
3775 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003776 if ((in_space) && (normalize)) {
3777 while (buf[len - 1] == 0x20) len--;
3778 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003779 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003780 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003781 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003782 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003783 if ((c != 0) && (!IS_CHAR(c))) {
3784 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3785 "invalid character in attribute value\n");
3786 } else {
3787 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3788 "AttValue: ' expected\n");
3789 }
Owen Taylor3473f882001-02-23 17:55:21 +00003790 } else
3791 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003792 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003793 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003794
3795mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003796 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003797 if (buf != NULL)
3798 xmlFree(buf);
3799 if (rep != NULL)
3800 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003801 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003802}
3803
3804/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003805 * xmlParseAttValue:
3806 * @ctxt: an XML parser context
3807 *
3808 * parse a value for an attribute
3809 * Note: the parser won't do substitution of entities here, this
3810 * will be handled later in xmlStringGetNodeList
3811 *
3812 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3813 * "'" ([^<&'] | Reference)* "'"
3814 *
3815 * 3.3.3 Attribute-Value Normalization:
3816 * Before the value of an attribute is passed to the application or
3817 * checked for validity, the XML processor must normalize it as follows:
3818 * - a character reference is processed by appending the referenced
3819 * character to the attribute value
3820 * - an entity reference is processed by recursively processing the
3821 * replacement text of the entity
3822 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3823 * appending #x20 to the normalized value, except that only a single
3824 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3825 * parsed entity or the literal entity value of an internal parsed entity
3826 * - other characters are processed by appending them to the normalized value
3827 * If the declared value is not CDATA, then the XML processor must further
3828 * process the normalized attribute value by discarding any leading and
3829 * trailing space (#x20) characters, and by replacing sequences of space
3830 * (#x20) characters by a single space (#x20) character.
3831 * All attributes for which no declaration has been read should be treated
3832 * by a non-validating parser as if declared CDATA.
3833 *
3834 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3835 */
3836
3837
3838xmlChar *
3839xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003840 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003841 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003842}
3843
3844/**
Owen Taylor3473f882001-02-23 17:55:21 +00003845 * xmlParseSystemLiteral:
3846 * @ctxt: an XML parser context
3847 *
3848 * parse an XML Literal
3849 *
3850 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3851 *
3852 * Returns the SystemLiteral parsed or NULL
3853 */
3854
3855xmlChar *
3856xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3857 xmlChar *buf = NULL;
3858 int len = 0;
3859 int size = XML_PARSER_BUFFER_SIZE;
3860 int cur, l;
3861 xmlChar stop;
3862 int state = ctxt->instate;
3863 int count = 0;
3864
3865 SHRINK;
3866 if (RAW == '"') {
3867 NEXT;
3868 stop = '"';
3869 } else if (RAW == '\'') {
3870 NEXT;
3871 stop = '\'';
3872 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003873 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003874 return(NULL);
3875 }
3876
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003877 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003878 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003879 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003880 return(NULL);
3881 }
3882 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3883 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003884 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003885 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003886 xmlChar *tmp;
3887
Owen Taylor3473f882001-02-23 17:55:21 +00003888 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003889 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3890 if (tmp == NULL) {
3891 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003892 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003893 ctxt->instate = (xmlParserInputState) state;
3894 return(NULL);
3895 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003896 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003897 }
3898 count++;
3899 if (count > 50) {
3900 GROW;
3901 count = 0;
3902 }
3903 COPY_BUF(l,buf,len,cur);
3904 NEXTL(l);
3905 cur = CUR_CHAR(l);
3906 if (cur == 0) {
3907 GROW;
3908 SHRINK;
3909 cur = CUR_CHAR(l);
3910 }
3911 }
3912 buf[len] = 0;
3913 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003914 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003915 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003916 } else {
3917 NEXT;
3918 }
3919 return(buf);
3920}
3921
3922/**
3923 * xmlParsePubidLiteral:
3924 * @ctxt: an XML parser context
3925 *
3926 * parse an XML public literal
3927 *
3928 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3929 *
3930 * Returns the PubidLiteral parsed or NULL.
3931 */
3932
3933xmlChar *
3934xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3935 xmlChar *buf = NULL;
3936 int len = 0;
3937 int size = XML_PARSER_BUFFER_SIZE;
3938 xmlChar cur;
3939 xmlChar stop;
3940 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003941 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003942
3943 SHRINK;
3944 if (RAW == '"') {
3945 NEXT;
3946 stop = '"';
3947 } else if (RAW == '\'') {
3948 NEXT;
3949 stop = '\'';
3950 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003951 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003952 return(NULL);
3953 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003954 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003955 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003956 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003957 return(NULL);
3958 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003959 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003960 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003961 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003962 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003963 xmlChar *tmp;
3964
Owen Taylor3473f882001-02-23 17:55:21 +00003965 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003966 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3967 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003968 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003969 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003970 return(NULL);
3971 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003972 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003973 }
3974 buf[len++] = cur;
3975 count++;
3976 if (count > 50) {
3977 GROW;
3978 count = 0;
3979 }
3980 NEXT;
3981 cur = CUR;
3982 if (cur == 0) {
3983 GROW;
3984 SHRINK;
3985 cur = CUR;
3986 }
3987 }
3988 buf[len] = 0;
3989 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003990 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003991 } else {
3992 NEXT;
3993 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003994 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003995 return(buf);
3996}
3997
Daniel Veillard8ed10722009-08-20 19:17:36 +02003998static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003999
4000/*
4001 * used for the test in the inner loop of the char data testing
4002 */
4003static const unsigned char test_char_data[256] = {
4004 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4005 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4006 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4007 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4008 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4009 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4010 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4011 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4012 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4013 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4014 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4015 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4016 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4017 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4018 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4019 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4020 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4021 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4022 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4023 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4024 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4025 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4026 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4027 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4028 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4029 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4030 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4031 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4032 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4033 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4034 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4035 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4036};
4037
Owen Taylor3473f882001-02-23 17:55:21 +00004038/**
4039 * xmlParseCharData:
4040 * @ctxt: an XML parser context
4041 * @cdata: int indicating whether we are within a CDATA section
4042 *
4043 * parse a CharData section.
4044 * if we are within a CDATA section ']]>' marks an end of section.
4045 *
4046 * The right angle bracket (>) may be represented using the string "&gt;",
4047 * and must, for compatibility, be escaped using "&gt;" or a character
4048 * reference when it appears in the string "]]>" in content, when that
4049 * string is not marking the end of a CDATA section.
4050 *
4051 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4052 */
4053
4054void
4055xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004056 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004057 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004058 int line = ctxt->input->line;
4059 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004060 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004061
4062 SHRINK;
4063 GROW;
4064 /*
4065 * Accelerated common case where input don't need to be
4066 * modified before passing it to the handler.
4067 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004068 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004069 in = ctxt->input->cur;
4070 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004071get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004072 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004073 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004074 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004075 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004076 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004077 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004078 goto get_more_space;
4079 }
4080 if (*in == '<') {
4081 nbchar = in - ctxt->input->cur;
4082 if (nbchar > 0) {
4083 const xmlChar *tmp = ctxt->input->cur;
4084 ctxt->input->cur = in;
4085
Daniel Veillard34099b42004-11-04 17:34:35 +00004086 if ((ctxt->sax != NULL) &&
4087 (ctxt->sax->ignorableWhitespace !=
4088 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004089 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004090 if (ctxt->sax->ignorableWhitespace != NULL)
4091 ctxt->sax->ignorableWhitespace(ctxt->userData,
4092 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004093 } else {
4094 if (ctxt->sax->characters != NULL)
4095 ctxt->sax->characters(ctxt->userData,
4096 tmp, nbchar);
4097 if (*ctxt->space == -1)
4098 *ctxt->space = -2;
4099 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004100 } else if ((ctxt->sax != NULL) &&
4101 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004102 ctxt->sax->characters(ctxt->userData,
4103 tmp, nbchar);
4104 }
4105 }
4106 return;
4107 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004108
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004109get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004110 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004111 while (test_char_data[*in]) {
4112 in++;
4113 ccol++;
4114 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004115 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004116 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004117 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004118 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004119 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004120 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004121 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004122 }
4123 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004124 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004125 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004126 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004127 return;
4128 }
4129 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004130 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004131 goto get_more;
4132 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004133 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004134 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004135 if ((ctxt->sax != NULL) &&
4136 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004137 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004138 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004139 const xmlChar *tmp = ctxt->input->cur;
4140 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004141
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004142 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004143 if (ctxt->sax->ignorableWhitespace != NULL)
4144 ctxt->sax->ignorableWhitespace(ctxt->userData,
4145 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004146 } else {
4147 if (ctxt->sax->characters != NULL)
4148 ctxt->sax->characters(ctxt->userData,
4149 tmp, nbchar);
4150 if (*ctxt->space == -1)
4151 *ctxt->space = -2;
4152 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004153 line = ctxt->input->line;
4154 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004155 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004156 if (ctxt->sax->characters != NULL)
4157 ctxt->sax->characters(ctxt->userData,
4158 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004159 line = ctxt->input->line;
4160 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004161 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004162 /* something really bad happened in the SAX callback */
4163 if (ctxt->instate != XML_PARSER_CONTENT)
4164 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004165 }
4166 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004167 if (*in == 0xD) {
4168 in++;
4169 if (*in == 0xA) {
4170 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004171 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004172 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004173 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004174 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004175 in--;
4176 }
4177 if (*in == '<') {
4178 return;
4179 }
4180 if (*in == '&') {
4181 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004182 }
4183 SHRINK;
4184 GROW;
4185 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004186 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004187 nbchar = 0;
4188 }
Daniel Veillard50582112001-03-26 22:52:16 +00004189 ctxt->input->line = line;
4190 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004191 xmlParseCharDataComplex(ctxt, cdata);
4192}
4193
Daniel Veillard01c13b52002-12-10 15:19:08 +00004194/**
4195 * xmlParseCharDataComplex:
4196 * @ctxt: an XML parser context
4197 * @cdata: int indicating whether we are within a CDATA section
4198 *
4199 * parse a CharData section.this is the fallback function
4200 * of xmlParseCharData() when the parsing requires handling
4201 * of non-ASCII characters.
4202 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004203static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004204xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004205 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4206 int nbchar = 0;
4207 int cur, l;
4208 int count = 0;
4209
4210 SHRINK;
4211 GROW;
4212 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004213 while ((cur != '<') && /* checked */
4214 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004215 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004216 if ((cur == ']') && (NXT(1) == ']') &&
4217 (NXT(2) == '>')) {
4218 if (cdata) break;
4219 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004220 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004221 }
4222 }
4223 COPY_BUF(l,buf,nbchar,cur);
4224 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004225 buf[nbchar] = 0;
4226
Owen Taylor3473f882001-02-23 17:55:21 +00004227 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004228 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004229 */
4230 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004231 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004232 if (ctxt->sax->ignorableWhitespace != NULL)
4233 ctxt->sax->ignorableWhitespace(ctxt->userData,
4234 buf, nbchar);
4235 } else {
4236 if (ctxt->sax->characters != NULL)
4237 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004238 if ((ctxt->sax->characters !=
4239 ctxt->sax->ignorableWhitespace) &&
4240 (*ctxt->space == -1))
4241 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004242 }
4243 }
4244 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004245 /* something really bad happened in the SAX callback */
4246 if (ctxt->instate != XML_PARSER_CONTENT)
4247 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004248 }
4249 count++;
4250 if (count > 50) {
4251 GROW;
4252 count = 0;
4253 }
4254 NEXTL(l);
4255 cur = CUR_CHAR(l);
4256 }
4257 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004258 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004259 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004260 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004261 */
4262 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004263 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004264 if (ctxt->sax->ignorableWhitespace != NULL)
4265 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4266 } else {
4267 if (ctxt->sax->characters != NULL)
4268 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004269 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4270 (*ctxt->space == -1))
4271 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004272 }
4273 }
4274 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004275 if ((cur != 0) && (!IS_CHAR(cur))) {
4276 /* Generate the error and skip the offending character */
4277 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4278 "PCDATA invalid Char value %d\n",
4279 cur);
4280 NEXTL(l);
4281 }
Owen Taylor3473f882001-02-23 17:55:21 +00004282}
4283
4284/**
4285 * xmlParseExternalID:
4286 * @ctxt: an XML parser context
4287 * @publicID: a xmlChar** receiving PubidLiteral
4288 * @strict: indicate whether we should restrict parsing to only
4289 * production [75], see NOTE below
4290 *
4291 * Parse an External ID or a Public ID
4292 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004293 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004294 * 'PUBLIC' S PubidLiteral S SystemLiteral
4295 *
4296 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4297 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4298 *
4299 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4300 *
4301 * Returns the function returns SystemLiteral and in the second
4302 * case publicID receives PubidLiteral, is strict is off
4303 * it is possible to return NULL and have publicID set.
4304 */
4305
4306xmlChar *
4307xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4308 xmlChar *URI = NULL;
4309
4310 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004311
4312 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004313 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004314 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004315 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004316 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4317 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004318 }
4319 SKIP_BLANKS;
4320 URI = xmlParseSystemLiteral(ctxt);
4321 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004322 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004323 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004324 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004325 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004326 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004327 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004328 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004329 }
4330 SKIP_BLANKS;
4331 *publicID = xmlParsePubidLiteral(ctxt);
4332 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004333 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004334 }
4335 if (strict) {
4336 /*
4337 * We don't handle [83] so "S SystemLiteral" is required.
4338 */
William M. Brack76e95df2003-10-18 16:20:14 +00004339 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004340 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004341 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004342 }
4343 } else {
4344 /*
4345 * We handle [83] so we return immediately, if
4346 * "S SystemLiteral" is not detected. From a purely parsing
4347 * point of view that's a nice mess.
4348 */
4349 const xmlChar *ptr;
4350 GROW;
4351
4352 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004353 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004354
William M. Brack76e95df2003-10-18 16:20:14 +00004355 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004356 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4357 }
4358 SKIP_BLANKS;
4359 URI = xmlParseSystemLiteral(ctxt);
4360 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004361 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004362 }
4363 }
4364 return(URI);
4365}
4366
4367/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004368 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004369 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004370 * @buf: the already parsed part of the buffer
4371 * @len: number of bytes filles in the buffer
4372 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004373 *
4374 * Skip an XML (SGML) comment <!-- .... -->
4375 * The spec says that "For compatibility, the string "--" (double-hyphen)
4376 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004377 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004378 *
4379 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4380 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004381static void
4382xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004383 int q, ql;
4384 int r, rl;
4385 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00004386 int count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004387 int inputid;
4388
4389 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004390
Owen Taylor3473f882001-02-23 17:55:21 +00004391 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004392 len = 0;
4393 size = XML_PARSER_BUFFER_SIZE;
4394 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4395 if (buf == NULL) {
4396 xmlErrMemory(ctxt, NULL);
4397 return;
4398 }
Owen Taylor3473f882001-02-23 17:55:21 +00004399 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004400 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004401 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004402 if (q == 0)
4403 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004404 if (!IS_CHAR(q)) {
4405 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4406 "xmlParseComment: invalid xmlChar value %d\n",
4407 q);
4408 xmlFree (buf);
4409 return;
4410 }
Owen Taylor3473f882001-02-23 17:55:21 +00004411 NEXTL(ql);
4412 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004413 if (r == 0)
4414 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004415 if (!IS_CHAR(r)) {
4416 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4417 "xmlParseComment: invalid xmlChar value %d\n",
4418 q);
4419 xmlFree (buf);
4420 return;
4421 }
Owen Taylor3473f882001-02-23 17:55:21 +00004422 NEXTL(rl);
4423 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004424 if (cur == 0)
4425 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004426 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004427 ((cur != '>') ||
4428 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004429 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004430 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004431 }
4432 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004433 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004434 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004435 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4436 if (new_buf == NULL) {
4437 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004438 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004439 return;
4440 }
William M. Bracka3215c72004-07-31 16:24:01 +00004441 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004442 }
4443 COPY_BUF(ql,buf,len,q);
4444 q = r;
4445 ql = rl;
4446 r = cur;
4447 rl = l;
4448
4449 count++;
4450 if (count > 50) {
4451 GROW;
4452 count = 0;
4453 }
4454 NEXTL(l);
4455 cur = CUR_CHAR(l);
4456 if (cur == 0) {
4457 SHRINK;
4458 GROW;
4459 cur = CUR_CHAR(l);
4460 }
4461 }
4462 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004463 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004464 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004465 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004466 } else if (!IS_CHAR(cur)) {
4467 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4468 "xmlParseComment: invalid xmlChar value %d\n",
4469 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004470 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004471 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004472 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4473 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004474 }
4475 NEXT;
4476 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4477 (!ctxt->disableSAX))
4478 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004479 }
Daniel Veillardda629342007-08-01 07:49:06 +00004480 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004481 return;
4482not_terminated:
4483 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4484 "Comment not terminated\n", NULL);
4485 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004486 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004487}
Daniel Veillardda629342007-08-01 07:49:06 +00004488
Daniel Veillard4c778d82005-01-23 17:37:44 +00004489/**
4490 * xmlParseComment:
4491 * @ctxt: an XML parser context
4492 *
4493 * Skip an XML (SGML) comment <!-- .... -->
4494 * The spec says that "For compatibility, the string "--" (double-hyphen)
4495 * must not occur within comments. "
4496 *
4497 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4498 */
4499void
4500xmlParseComment(xmlParserCtxtPtr ctxt) {
4501 xmlChar *buf = NULL;
4502 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004503 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004504 xmlParserInputState state;
4505 const xmlChar *in;
4506 int nbchar = 0, ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004507 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004508
4509 /*
4510 * Check that there is a comment right here.
4511 */
4512 if ((RAW != '<') || (NXT(1) != '!') ||
4513 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004514 state = ctxt->instate;
4515 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004516 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004517 SKIP(4);
4518 SHRINK;
4519 GROW;
4520
4521 /*
4522 * Accelerated common case where input don't need to be
4523 * modified before passing it to the handler.
4524 */
4525 in = ctxt->input->cur;
4526 do {
4527 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004528 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004529 ctxt->input->line++; ctxt->input->col = 1;
4530 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004531 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004532 }
4533get_more:
4534 ccol = ctxt->input->col;
4535 while (((*in > '-') && (*in <= 0x7F)) ||
4536 ((*in >= 0x20) && (*in < '-')) ||
4537 (*in == 0x09)) {
4538 in++;
4539 ccol++;
4540 }
4541 ctxt->input->col = ccol;
4542 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004543 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004544 ctxt->input->line++; ctxt->input->col = 1;
4545 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004546 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004547 goto get_more;
4548 }
4549 nbchar = in - ctxt->input->cur;
4550 /*
4551 * save current set of data
4552 */
4553 if (nbchar > 0) {
4554 if ((ctxt->sax != NULL) &&
4555 (ctxt->sax->comment != NULL)) {
4556 if (buf == NULL) {
4557 if ((*in == '-') && (in[1] == '-'))
4558 size = nbchar + 1;
4559 else
4560 size = XML_PARSER_BUFFER_SIZE + nbchar;
4561 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4562 if (buf == NULL) {
4563 xmlErrMemory(ctxt, NULL);
4564 ctxt->instate = state;
4565 return;
4566 }
4567 len = 0;
4568 } else if (len + nbchar + 1 >= size) {
4569 xmlChar *new_buf;
4570 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4571 new_buf = (xmlChar *) xmlRealloc(buf,
4572 size * sizeof(xmlChar));
4573 if (new_buf == NULL) {
4574 xmlFree (buf);
4575 xmlErrMemory(ctxt, NULL);
4576 ctxt->instate = state;
4577 return;
4578 }
4579 buf = new_buf;
4580 }
4581 memcpy(&buf[len], ctxt->input->cur, nbchar);
4582 len += nbchar;
4583 buf[len] = 0;
4584 }
4585 }
4586 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004587 if (*in == 0xA) {
4588 in++;
4589 ctxt->input->line++; ctxt->input->col = 1;
4590 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004591 if (*in == 0xD) {
4592 in++;
4593 if (*in == 0xA) {
4594 ctxt->input->cur = in;
4595 in++;
4596 ctxt->input->line++; ctxt->input->col = 1;
4597 continue; /* while */
4598 }
4599 in--;
4600 }
4601 SHRINK;
4602 GROW;
4603 in = ctxt->input->cur;
4604 if (*in == '-') {
4605 if (in[1] == '-') {
4606 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004607 if (ctxt->input->id != inputid) {
4608 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4609 "comment doesn't start and stop in the same entity\n");
4610 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004611 SKIP(3);
4612 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4613 (!ctxt->disableSAX)) {
4614 if (buf != NULL)
4615 ctxt->sax->comment(ctxt->userData, buf);
4616 else
4617 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4618 }
4619 if (buf != NULL)
4620 xmlFree(buf);
4621 ctxt->instate = state;
4622 return;
4623 }
4624 if (buf != NULL)
4625 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4626 "Comment not terminated \n<!--%.50s\n",
4627 buf);
4628 else
4629 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4630 "Comment not terminated \n", NULL);
4631 in++;
4632 ctxt->input->col++;
4633 }
4634 in++;
4635 ctxt->input->col++;
4636 goto get_more;
4637 }
4638 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4639 xmlParseCommentComplex(ctxt, buf, len, size);
4640 ctxt->instate = state;
4641 return;
4642}
4643
Owen Taylor3473f882001-02-23 17:55:21 +00004644
4645/**
4646 * xmlParsePITarget:
4647 * @ctxt: an XML parser context
4648 *
4649 * parse the name of a PI
4650 *
4651 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4652 *
4653 * Returns the PITarget name or NULL
4654 */
4655
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004656const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004657xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004658 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004659
4660 name = xmlParseName(ctxt);
4661 if ((name != NULL) &&
4662 ((name[0] == 'x') || (name[0] == 'X')) &&
4663 ((name[1] == 'm') || (name[1] == 'M')) &&
4664 ((name[2] == 'l') || (name[2] == 'L'))) {
4665 int i;
4666 if ((name[0] == 'x') && (name[1] == 'm') &&
4667 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004668 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004669 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004670 return(name);
4671 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004672 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004673 return(name);
4674 }
4675 for (i = 0;;i++) {
4676 if (xmlW3CPIs[i] == NULL) break;
4677 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4678 return(name);
4679 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004680 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4681 "xmlParsePITarget: invalid name prefix 'xml'\n",
4682 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004683 }
Daniel Veillard37334572008-07-31 08:20:02 +00004684 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4685 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4686 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4687 }
Owen Taylor3473f882001-02-23 17:55:21 +00004688 return(name);
4689}
4690
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004691#ifdef LIBXML_CATALOG_ENABLED
4692/**
4693 * xmlParseCatalogPI:
4694 * @ctxt: an XML parser context
4695 * @catalog: the PI value string
4696 *
4697 * parse an XML Catalog Processing Instruction.
4698 *
4699 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4700 *
4701 * Occurs only if allowed by the user and if happening in the Misc
4702 * part of the document before any doctype informations
4703 * This will add the given catalog to the parsing context in order
4704 * to be used if there is a resolution need further down in the document
4705 */
4706
4707static void
4708xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4709 xmlChar *URL = NULL;
4710 const xmlChar *tmp, *base;
4711 xmlChar marker;
4712
4713 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004714 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004715 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4716 goto error;
4717 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004718 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004719 if (*tmp != '=') {
4720 return;
4721 }
4722 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004723 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004724 marker = *tmp;
4725 if ((marker != '\'') && (marker != '"'))
4726 goto error;
4727 tmp++;
4728 base = tmp;
4729 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4730 if (*tmp == 0)
4731 goto error;
4732 URL = xmlStrndup(base, tmp - base);
4733 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004734 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004735 if (*tmp != 0)
4736 goto error;
4737
4738 if (URL != NULL) {
4739 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4740 xmlFree(URL);
4741 }
4742 return;
4743
4744error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004745 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4746 "Catalog PI syntax error: %s\n",
4747 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004748 if (URL != NULL)
4749 xmlFree(URL);
4750}
4751#endif
4752
Owen Taylor3473f882001-02-23 17:55:21 +00004753/**
4754 * xmlParsePI:
4755 * @ctxt: an XML parser context
4756 *
4757 * parse an XML Processing Instruction.
4758 *
4759 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4760 *
4761 * The processing is transfered to SAX once parsed.
4762 */
4763
4764void
4765xmlParsePI(xmlParserCtxtPtr ctxt) {
4766 xmlChar *buf = NULL;
4767 int len = 0;
4768 int size = XML_PARSER_BUFFER_SIZE;
4769 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004770 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004771 xmlParserInputState state;
4772 int count = 0;
4773
4774 if ((RAW == '<') && (NXT(1) == '?')) {
4775 xmlParserInputPtr input = ctxt->input;
4776 state = ctxt->instate;
4777 ctxt->instate = XML_PARSER_PI;
4778 /*
4779 * this is a Processing Instruction.
4780 */
4781 SKIP(2);
4782 SHRINK;
4783
4784 /*
4785 * Parse the target name and check for special support like
4786 * namespace.
4787 */
4788 target = xmlParsePITarget(ctxt);
4789 if (target != NULL) {
4790 if ((RAW == '?') && (NXT(1) == '>')) {
4791 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004792 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4793 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004794 }
4795 SKIP(2);
4796
4797 /*
4798 * SAX: PI detected.
4799 */
4800 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4801 (ctxt->sax->processingInstruction != NULL))
4802 ctxt->sax->processingInstruction(ctxt->userData,
4803 target, NULL);
4804 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004805 return;
4806 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004807 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004808 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004809 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004810 ctxt->instate = state;
4811 return;
4812 }
4813 cur = CUR;
4814 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004815 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4816 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004817 }
4818 SKIP_BLANKS;
4819 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004820 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004821 ((cur != '?') || (NXT(1) != '>'))) {
4822 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004823 xmlChar *tmp;
4824
Owen Taylor3473f882001-02-23 17:55:21 +00004825 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004826 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4827 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004828 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004829 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004830 ctxt->instate = state;
4831 return;
4832 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004833 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004834 }
4835 count++;
4836 if (count > 50) {
4837 GROW;
4838 count = 0;
4839 }
4840 COPY_BUF(l,buf,len,cur);
4841 NEXTL(l);
4842 cur = CUR_CHAR(l);
4843 if (cur == 0) {
4844 SHRINK;
4845 GROW;
4846 cur = CUR_CHAR(l);
4847 }
4848 }
4849 buf[len] = 0;
4850 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004851 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4852 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004853 } else {
4854 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004855 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4856 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004857 }
4858 SKIP(2);
4859
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004860#ifdef LIBXML_CATALOG_ENABLED
4861 if (((state == XML_PARSER_MISC) ||
4862 (state == XML_PARSER_START)) &&
4863 (xmlStrEqual(target, XML_CATALOG_PI))) {
4864 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4865 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4866 (allow == XML_CATA_ALLOW_ALL))
4867 xmlParseCatalogPI(ctxt, buf);
4868 }
4869#endif
4870
4871
Owen Taylor3473f882001-02-23 17:55:21 +00004872 /*
4873 * SAX: PI detected.
4874 */
4875 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4876 (ctxt->sax->processingInstruction != NULL))
4877 ctxt->sax->processingInstruction(ctxt->userData,
4878 target, buf);
4879 }
4880 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004881 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004882 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004883 }
4884 ctxt->instate = state;
4885 }
4886}
4887
4888/**
4889 * xmlParseNotationDecl:
4890 * @ctxt: an XML parser context
4891 *
4892 * parse a notation declaration
4893 *
4894 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4895 *
4896 * Hence there is actually 3 choices:
4897 * 'PUBLIC' S PubidLiteral
4898 * 'PUBLIC' S PubidLiteral S SystemLiteral
4899 * and 'SYSTEM' S SystemLiteral
4900 *
4901 * See the NOTE on xmlParseExternalID().
4902 */
4903
4904void
4905xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004906 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004907 xmlChar *Pubid;
4908 xmlChar *Systemid;
4909
Daniel Veillarda07050d2003-10-19 14:46:32 +00004910 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004911 xmlParserInputPtr input = ctxt->input;
4912 SHRINK;
4913 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004914 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004915 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4916 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004917 return;
4918 }
4919 SKIP_BLANKS;
4920
Daniel Veillard76d66f42001-05-16 21:05:17 +00004921 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004922 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004923 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004924 return;
4925 }
William M. Brack76e95df2003-10-18 16:20:14 +00004926 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004927 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004928 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004929 return;
4930 }
Daniel Veillard37334572008-07-31 08:20:02 +00004931 if (xmlStrchr(name, ':') != NULL) {
4932 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4933 "colon are forbidden from notation names '%s'\n",
4934 name, NULL, NULL);
4935 }
Owen Taylor3473f882001-02-23 17:55:21 +00004936 SKIP_BLANKS;
4937
4938 /*
4939 * Parse the IDs.
4940 */
4941 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4942 SKIP_BLANKS;
4943
4944 if (RAW == '>') {
4945 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004946 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4947 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004948 }
4949 NEXT;
4950 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4951 (ctxt->sax->notationDecl != NULL))
4952 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4953 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004954 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004955 }
Owen Taylor3473f882001-02-23 17:55:21 +00004956 if (Systemid != NULL) xmlFree(Systemid);
4957 if (Pubid != NULL) xmlFree(Pubid);
4958 }
4959}
4960
4961/**
4962 * xmlParseEntityDecl:
4963 * @ctxt: an XML parser context
4964 *
4965 * parse <!ENTITY declarations
4966 *
4967 * [70] EntityDecl ::= GEDecl | PEDecl
4968 *
4969 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4970 *
4971 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4972 *
4973 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4974 *
4975 * [74] PEDef ::= EntityValue | ExternalID
4976 *
4977 * [76] NDataDecl ::= S 'NDATA' S Name
4978 *
4979 * [ VC: Notation Declared ]
4980 * The Name must match the declared name of a notation.
4981 */
4982
4983void
4984xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004985 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004986 xmlChar *value = NULL;
4987 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004988 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004989 int isParameter = 0;
4990 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004991 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004992
Daniel Veillard4c778d82005-01-23 17:37:44 +00004993 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004994 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004995 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004996 SHRINK;
4997 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004998 skipped = SKIP_BLANKS;
4999 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005000 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5001 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005002 }
Owen Taylor3473f882001-02-23 17:55:21 +00005003
5004 if (RAW == '%') {
5005 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005006 skipped = SKIP_BLANKS;
5007 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005008 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5009 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005010 }
Owen Taylor3473f882001-02-23 17:55:21 +00005011 isParameter = 1;
5012 }
5013
Daniel Veillard76d66f42001-05-16 21:05:17 +00005014 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005015 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005016 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5017 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005018 return;
5019 }
Daniel Veillard37334572008-07-31 08:20:02 +00005020 if (xmlStrchr(name, ':') != NULL) {
5021 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5022 "colon are forbidden from entities names '%s'\n",
5023 name, NULL, NULL);
5024 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005025 skipped = SKIP_BLANKS;
5026 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005027 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5028 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005029 }
Owen Taylor3473f882001-02-23 17:55:21 +00005030
Daniel Veillardf5582f12002-06-11 10:08:16 +00005031 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005032 /*
5033 * handle the various case of definitions...
5034 */
5035 if (isParameter) {
5036 if ((RAW == '"') || (RAW == '\'')) {
5037 value = xmlParseEntityValue(ctxt, &orig);
5038 if (value) {
5039 if ((ctxt->sax != NULL) &&
5040 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5041 ctxt->sax->entityDecl(ctxt->userData, name,
5042 XML_INTERNAL_PARAMETER_ENTITY,
5043 NULL, NULL, value);
5044 }
5045 } else {
5046 URI = xmlParseExternalID(ctxt, &literal, 1);
5047 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005048 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005049 }
5050 if (URI) {
5051 xmlURIPtr uri;
5052
5053 uri = xmlParseURI((const char *) URI);
5054 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005055 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5056 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005057 /*
5058 * This really ought to be a well formedness error
5059 * but the XML Core WG decided otherwise c.f. issue
5060 * E26 of the XML erratas.
5061 */
Owen Taylor3473f882001-02-23 17:55:21 +00005062 } else {
5063 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005064 /*
5065 * Okay this is foolish to block those but not
5066 * invalid URIs.
5067 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005068 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005069 } else {
5070 if ((ctxt->sax != NULL) &&
5071 (!ctxt->disableSAX) &&
5072 (ctxt->sax->entityDecl != NULL))
5073 ctxt->sax->entityDecl(ctxt->userData, name,
5074 XML_EXTERNAL_PARAMETER_ENTITY,
5075 literal, URI, NULL);
5076 }
5077 xmlFreeURI(uri);
5078 }
5079 }
5080 }
5081 } else {
5082 if ((RAW == '"') || (RAW == '\'')) {
5083 value = xmlParseEntityValue(ctxt, &orig);
5084 if ((ctxt->sax != NULL) &&
5085 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5086 ctxt->sax->entityDecl(ctxt->userData, name,
5087 XML_INTERNAL_GENERAL_ENTITY,
5088 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005089 /*
5090 * For expat compatibility in SAX mode.
5091 */
5092 if ((ctxt->myDoc == NULL) ||
5093 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5094 if (ctxt->myDoc == NULL) {
5095 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005096 if (ctxt->myDoc == NULL) {
5097 xmlErrMemory(ctxt, "New Doc failed");
5098 return;
5099 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005100 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005101 }
5102 if (ctxt->myDoc->intSubset == NULL)
5103 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5104 BAD_CAST "fake", NULL, NULL);
5105
Daniel Veillard1af9a412003-08-20 22:54:39 +00005106 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5107 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005108 }
Owen Taylor3473f882001-02-23 17:55:21 +00005109 } else {
5110 URI = xmlParseExternalID(ctxt, &literal, 1);
5111 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005112 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005113 }
5114 if (URI) {
5115 xmlURIPtr uri;
5116
5117 uri = xmlParseURI((const char *)URI);
5118 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005119 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5120 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005121 /*
5122 * This really ought to be a well formedness error
5123 * but the XML Core WG decided otherwise c.f. issue
5124 * E26 of the XML erratas.
5125 */
Owen Taylor3473f882001-02-23 17:55:21 +00005126 } else {
5127 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005128 /*
5129 * Okay this is foolish to block those but not
5130 * invalid URIs.
5131 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005132 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005133 }
5134 xmlFreeURI(uri);
5135 }
5136 }
William M. Brack76e95df2003-10-18 16:20:14 +00005137 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005138 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5139 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005140 }
5141 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005142 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005143 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005144 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005145 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5146 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005147 }
5148 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005149 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005150 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5151 (ctxt->sax->unparsedEntityDecl != NULL))
5152 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5153 literal, URI, ndata);
5154 } else {
5155 if ((ctxt->sax != NULL) &&
5156 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5157 ctxt->sax->entityDecl(ctxt->userData, name,
5158 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5159 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005160 /*
5161 * For expat compatibility in SAX mode.
5162 * assuming the entity repalcement was asked for
5163 */
5164 if ((ctxt->replaceEntities != 0) &&
5165 ((ctxt->myDoc == NULL) ||
5166 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5167 if (ctxt->myDoc == NULL) {
5168 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005169 if (ctxt->myDoc == NULL) {
5170 xmlErrMemory(ctxt, "New Doc failed");
5171 return;
5172 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005173 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005174 }
5175
5176 if (ctxt->myDoc->intSubset == NULL)
5177 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5178 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005179 xmlSAX2EntityDecl(ctxt, name,
5180 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5181 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005182 }
Owen Taylor3473f882001-02-23 17:55:21 +00005183 }
5184 }
5185 }
5186 SKIP_BLANKS;
5187 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005188 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005189 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005190 } else {
5191 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005192 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5193 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005194 }
5195 NEXT;
5196 }
5197 if (orig != NULL) {
5198 /*
5199 * Ugly mechanism to save the raw entity value.
5200 */
5201 xmlEntityPtr cur = NULL;
5202
5203 if (isParameter) {
5204 if ((ctxt->sax != NULL) &&
5205 (ctxt->sax->getParameterEntity != NULL))
5206 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5207 } else {
5208 if ((ctxt->sax != NULL) &&
5209 (ctxt->sax->getEntity != NULL))
5210 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005211 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005212 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005213 }
Owen Taylor3473f882001-02-23 17:55:21 +00005214 }
5215 if (cur != NULL) {
5216 if (cur->orig != NULL)
5217 xmlFree(orig);
5218 else
5219 cur->orig = orig;
5220 } else
5221 xmlFree(orig);
5222 }
Owen Taylor3473f882001-02-23 17:55:21 +00005223 if (value != NULL) xmlFree(value);
5224 if (URI != NULL) xmlFree(URI);
5225 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005226 }
5227}
5228
5229/**
5230 * xmlParseDefaultDecl:
5231 * @ctxt: an XML parser context
5232 * @value: Receive a possible fixed default value for the attribute
5233 *
5234 * Parse an attribute default declaration
5235 *
5236 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5237 *
5238 * [ VC: Required Attribute ]
5239 * if the default declaration is the keyword #REQUIRED, then the
5240 * attribute must be specified for all elements of the type in the
5241 * attribute-list declaration.
5242 *
5243 * [ VC: Attribute Default Legal ]
5244 * The declared default value must meet the lexical constraints of
5245 * the declared attribute type c.f. xmlValidateAttributeDecl()
5246 *
5247 * [ VC: Fixed Attribute Default ]
5248 * if an attribute has a default value declared with the #FIXED
5249 * keyword, instances of that attribute must match the default value.
5250 *
5251 * [ WFC: No < in Attribute Values ]
5252 * handled in xmlParseAttValue()
5253 *
5254 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5255 * or XML_ATTRIBUTE_FIXED.
5256 */
5257
5258int
5259xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5260 int val;
5261 xmlChar *ret;
5262
5263 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005264 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005265 SKIP(9);
5266 return(XML_ATTRIBUTE_REQUIRED);
5267 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005268 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005269 SKIP(8);
5270 return(XML_ATTRIBUTE_IMPLIED);
5271 }
5272 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005273 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005274 SKIP(6);
5275 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005276 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005277 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5278 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005279 }
5280 SKIP_BLANKS;
5281 }
5282 ret = xmlParseAttValue(ctxt);
5283 ctxt->instate = XML_PARSER_DTD;
5284 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005285 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005286 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005287 } else
5288 *value = ret;
5289 return(val);
5290}
5291
5292/**
5293 * xmlParseNotationType:
5294 * @ctxt: an XML parser context
5295 *
5296 * parse an Notation attribute type.
5297 *
5298 * Note: the leading 'NOTATION' S part has already being parsed...
5299 *
5300 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5301 *
5302 * [ VC: Notation Attributes ]
5303 * Values of this type must match one of the notation names included
5304 * in the declaration; all notation names in the declaration must be declared.
5305 *
5306 * Returns: the notation attribute tree built while parsing
5307 */
5308
5309xmlEnumerationPtr
5310xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005311 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005312 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005313
5314 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005315 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005316 return(NULL);
5317 }
5318 SHRINK;
5319 do {
5320 NEXT;
5321 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005322 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005323 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005324 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5325 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005326 xmlFreeEnumeration(ret);
5327 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005328 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005329 tmp = ret;
5330 while (tmp != NULL) {
5331 if (xmlStrEqual(name, tmp->name)) {
5332 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5333 "standalone: attribute notation value token %s duplicated\n",
5334 name, NULL);
5335 if (!xmlDictOwns(ctxt->dict, name))
5336 xmlFree((xmlChar *) name);
5337 break;
5338 }
5339 tmp = tmp->next;
5340 }
5341 if (tmp == NULL) {
5342 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005343 if (cur == NULL) {
5344 xmlFreeEnumeration(ret);
5345 return(NULL);
5346 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005347 if (last == NULL) ret = last = cur;
5348 else {
5349 last->next = cur;
5350 last = cur;
5351 }
Owen Taylor3473f882001-02-23 17:55:21 +00005352 }
5353 SKIP_BLANKS;
5354 } while (RAW == '|');
5355 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005356 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005357 xmlFreeEnumeration(ret);
5358 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005359 }
5360 NEXT;
5361 return(ret);
5362}
5363
5364/**
5365 * xmlParseEnumerationType:
5366 * @ctxt: an XML parser context
5367 *
5368 * parse an Enumeration attribute type.
5369 *
5370 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5371 *
5372 * [ VC: Enumeration ]
5373 * Values of this type must match one of the Nmtoken tokens in
5374 * the declaration
5375 *
5376 * Returns: the enumeration attribute tree built while parsing
5377 */
5378
5379xmlEnumerationPtr
5380xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5381 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005382 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005383
5384 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005385 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005386 return(NULL);
5387 }
5388 SHRINK;
5389 do {
5390 NEXT;
5391 SKIP_BLANKS;
5392 name = xmlParseNmtoken(ctxt);
5393 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005394 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005395 return(ret);
5396 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005397 tmp = ret;
5398 while (tmp != NULL) {
5399 if (xmlStrEqual(name, tmp->name)) {
5400 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5401 "standalone: attribute enumeration value token %s duplicated\n",
5402 name, NULL);
5403 if (!xmlDictOwns(ctxt->dict, name))
5404 xmlFree(name);
5405 break;
5406 }
5407 tmp = tmp->next;
5408 }
5409 if (tmp == NULL) {
5410 cur = xmlCreateEnumeration(name);
5411 if (!xmlDictOwns(ctxt->dict, name))
5412 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005413 if (cur == NULL) {
5414 xmlFreeEnumeration(ret);
5415 return(NULL);
5416 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005417 if (last == NULL) ret = last = cur;
5418 else {
5419 last->next = cur;
5420 last = cur;
5421 }
Owen Taylor3473f882001-02-23 17:55:21 +00005422 }
5423 SKIP_BLANKS;
5424 } while (RAW == '|');
5425 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005426 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005427 return(ret);
5428 }
5429 NEXT;
5430 return(ret);
5431}
5432
5433/**
5434 * xmlParseEnumeratedType:
5435 * @ctxt: an XML parser context
5436 * @tree: the enumeration tree built while parsing
5437 *
5438 * parse an Enumerated attribute type.
5439 *
5440 * [57] EnumeratedType ::= NotationType | Enumeration
5441 *
5442 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5443 *
5444 *
5445 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5446 */
5447
5448int
5449xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005450 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005451 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005452 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005453 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5454 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005455 return(0);
5456 }
5457 SKIP_BLANKS;
5458 *tree = xmlParseNotationType(ctxt);
5459 if (*tree == NULL) return(0);
5460 return(XML_ATTRIBUTE_NOTATION);
5461 }
5462 *tree = xmlParseEnumerationType(ctxt);
5463 if (*tree == NULL) return(0);
5464 return(XML_ATTRIBUTE_ENUMERATION);
5465}
5466
5467/**
5468 * xmlParseAttributeType:
5469 * @ctxt: an XML parser context
5470 * @tree: the enumeration tree built while parsing
5471 *
5472 * parse the Attribute list def for an element
5473 *
5474 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5475 *
5476 * [55] StringType ::= 'CDATA'
5477 *
5478 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5479 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5480 *
5481 * Validity constraints for attribute values syntax are checked in
5482 * xmlValidateAttributeValue()
5483 *
5484 * [ VC: ID ]
5485 * Values of type ID must match the Name production. A name must not
5486 * appear more than once in an XML document as a value of this type;
5487 * i.e., ID values must uniquely identify the elements which bear them.
5488 *
5489 * [ VC: One ID per Element Type ]
5490 * No element type may have more than one ID attribute specified.
5491 *
5492 * [ VC: ID Attribute Default ]
5493 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5494 *
5495 * [ VC: IDREF ]
5496 * Values of type IDREF must match the Name production, and values
5497 * of type IDREFS must match Names; each IDREF Name must match the value
5498 * of an ID attribute on some element in the XML document; i.e. IDREF
5499 * values must match the value of some ID attribute.
5500 *
5501 * [ VC: Entity Name ]
5502 * Values of type ENTITY must match the Name production, values
5503 * of type ENTITIES must match Names; each Entity Name must match the
5504 * name of an unparsed entity declared in the DTD.
5505 *
5506 * [ VC: Name Token ]
5507 * Values of type NMTOKEN must match the Nmtoken production; values
5508 * of type NMTOKENS must match Nmtokens.
5509 *
5510 * Returns the attribute type
5511 */
5512int
5513xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5514 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005515 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005516 SKIP(5);
5517 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005518 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005519 SKIP(6);
5520 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005521 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005522 SKIP(5);
5523 return(XML_ATTRIBUTE_IDREF);
5524 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5525 SKIP(2);
5526 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005527 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005528 SKIP(6);
5529 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005530 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005531 SKIP(8);
5532 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005533 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005534 SKIP(8);
5535 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005536 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005537 SKIP(7);
5538 return(XML_ATTRIBUTE_NMTOKEN);
5539 }
5540 return(xmlParseEnumeratedType(ctxt, tree));
5541}
5542
5543/**
5544 * xmlParseAttributeListDecl:
5545 * @ctxt: an XML parser context
5546 *
5547 * : parse the Attribute list def for an element
5548 *
5549 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5550 *
5551 * [53] AttDef ::= S Name S AttType S DefaultDecl
5552 *
5553 */
5554void
5555xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005556 const xmlChar *elemName;
5557 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005558 xmlEnumerationPtr tree;
5559
Daniel Veillarda07050d2003-10-19 14:46:32 +00005560 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005561 xmlParserInputPtr input = ctxt->input;
5562
5563 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005564 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005565 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005566 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005567 }
5568 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005569 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005570 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005571 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5572 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005573 return;
5574 }
5575 SKIP_BLANKS;
5576 GROW;
5577 while (RAW != '>') {
5578 const xmlChar *check = CUR_PTR;
5579 int type;
5580 int def;
5581 xmlChar *defaultValue = NULL;
5582
5583 GROW;
5584 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005585 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005586 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005587 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5588 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005589 break;
5590 }
5591 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005592 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005593 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005594 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005595 break;
5596 }
5597 SKIP_BLANKS;
5598
5599 type = xmlParseAttributeType(ctxt, &tree);
5600 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005601 break;
5602 }
5603
5604 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005605 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005606 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5607 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005608 if (tree != NULL)
5609 xmlFreeEnumeration(tree);
5610 break;
5611 }
5612 SKIP_BLANKS;
5613
5614 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5615 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005616 if (defaultValue != NULL)
5617 xmlFree(defaultValue);
5618 if (tree != NULL)
5619 xmlFreeEnumeration(tree);
5620 break;
5621 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005622 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5623 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005624
5625 GROW;
5626 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005627 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005628 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005629 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005630 if (defaultValue != NULL)
5631 xmlFree(defaultValue);
5632 if (tree != NULL)
5633 xmlFreeEnumeration(tree);
5634 break;
5635 }
5636 SKIP_BLANKS;
5637 }
5638 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005639 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5640 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005641 if (defaultValue != NULL)
5642 xmlFree(defaultValue);
5643 if (tree != NULL)
5644 xmlFreeEnumeration(tree);
5645 break;
5646 }
5647 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5648 (ctxt->sax->attributeDecl != NULL))
5649 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5650 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005651 else if (tree != NULL)
5652 xmlFreeEnumeration(tree);
5653
5654 if ((ctxt->sax2) && (defaultValue != NULL) &&
5655 (def != XML_ATTRIBUTE_IMPLIED) &&
5656 (def != XML_ATTRIBUTE_REQUIRED)) {
5657 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5658 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005659 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005660 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5661 }
Owen Taylor3473f882001-02-23 17:55:21 +00005662 if (defaultValue != NULL)
5663 xmlFree(defaultValue);
5664 GROW;
5665 }
5666 if (RAW == '>') {
5667 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00005668 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5669 "Attribute list declaration doesn't start and stop in the same entity\n",
5670 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005671 }
5672 NEXT;
5673 }
Owen Taylor3473f882001-02-23 17:55:21 +00005674 }
5675}
5676
5677/**
5678 * xmlParseElementMixedContentDecl:
5679 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005680 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005681 *
5682 * parse the declaration for a Mixed Element content
5683 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5684 *
5685 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5686 * '(' S? '#PCDATA' S? ')'
5687 *
5688 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5689 *
5690 * [ VC: No Duplicate Types ]
5691 * The same name must not appear more than once in a single
5692 * mixed-content declaration.
5693 *
5694 * returns: the list of the xmlElementContentPtr describing the element choices
5695 */
5696xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005697xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005698 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005699 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005700
5701 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005702 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005703 SKIP(7);
5704 SKIP_BLANKS;
5705 SHRINK;
5706 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005707 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005708 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5709"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005710 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005711 }
Owen Taylor3473f882001-02-23 17:55:21 +00005712 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005713 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005714 if (ret == NULL)
5715 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005716 if (RAW == '*') {
5717 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5718 NEXT;
5719 }
5720 return(ret);
5721 }
5722 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005723 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005724 if (ret == NULL) return(NULL);
5725 }
5726 while (RAW == '|') {
5727 NEXT;
5728 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005729 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005730 if (ret == NULL) return(NULL);
5731 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005732 if (cur != NULL)
5733 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005734 cur = ret;
5735 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005736 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005737 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005738 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005739 if (n->c1 != NULL)
5740 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005741 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005742 if (n != NULL)
5743 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005744 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005745 }
5746 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005747 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005748 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005749 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005750 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005751 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005752 return(NULL);
5753 }
5754 SKIP_BLANKS;
5755 GROW;
5756 }
5757 if ((RAW == ')') && (NXT(1) == '*')) {
5758 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005759 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005760 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005761 if (cur->c2 != NULL)
5762 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005763 }
5764 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005765 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005766 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5767"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005768 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005769 }
Owen Taylor3473f882001-02-23 17:55:21 +00005770 SKIP(2);
5771 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005772 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005773 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005774 return(NULL);
5775 }
5776
5777 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005778 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005779 }
5780 return(ret);
5781}
5782
5783/**
Daniel Veillard489f9672009-08-10 16:49:30 +02005784 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00005785 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005786 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02005787 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00005788 *
5789 * parse the declaration for a Mixed Element content
5790 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5791 *
5792 *
5793 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5794 *
5795 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5796 *
5797 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5798 *
5799 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5800 *
5801 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5802 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005803 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005804 * opening or closing parentheses in a choice, seq, or Mixed
5805 * construct is contained in the replacement text for a parameter
5806 * entity, both must be contained in the same replacement text. For
5807 * interoperability, if a parameter-entity reference appears in a
5808 * choice, seq, or Mixed construct, its replacement text should not
5809 * be empty, and neither the first nor last non-blank character of
5810 * the replacement text should be a connector (| or ,).
5811 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005812 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005813 * hierarchy.
5814 */
Daniel Veillard489f9672009-08-10 16:49:30 +02005815static xmlElementContentPtr
5816xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5817 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00005818 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005819 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005820 xmlChar type = 0;
5821
Daniel Veillard489f9672009-08-10 16:49:30 +02005822 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5823 (depth > 2048)) {
5824 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5825"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5826 depth);
5827 return(NULL);
5828 }
Owen Taylor3473f882001-02-23 17:55:21 +00005829 SKIP_BLANKS;
5830 GROW;
5831 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005832 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005833
Owen Taylor3473f882001-02-23 17:55:21 +00005834 /* Recurse on first child */
5835 NEXT;
5836 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02005837 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5838 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00005839 SKIP_BLANKS;
5840 GROW;
5841 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005842 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005843 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005844 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005845 return(NULL);
5846 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005847 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005848 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005849 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005850 return(NULL);
5851 }
Owen Taylor3473f882001-02-23 17:55:21 +00005852 GROW;
5853 if (RAW == '?') {
5854 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5855 NEXT;
5856 } else if (RAW == '*') {
5857 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5858 NEXT;
5859 } else if (RAW == '+') {
5860 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5861 NEXT;
5862 } else {
5863 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5864 }
Owen Taylor3473f882001-02-23 17:55:21 +00005865 GROW;
5866 }
5867 SKIP_BLANKS;
5868 SHRINK;
5869 while (RAW != ')') {
5870 /*
5871 * Each loop we parse one separator and one element.
5872 */
5873 if (RAW == ',') {
5874 if (type == 0) type = CUR;
5875
5876 /*
5877 * Detect "Name | Name , Name" error
5878 */
5879 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005880 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005881 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005882 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005883 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005884 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005885 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005886 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005887 return(NULL);
5888 }
5889 NEXT;
5890
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005891 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005892 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005893 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005894 xmlFreeDocElementContent(ctxt->myDoc, last);
5895 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005896 return(NULL);
5897 }
5898 if (last == NULL) {
5899 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005900 if (ret != NULL)
5901 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005902 ret = cur = op;
5903 } else {
5904 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005905 if (op != NULL)
5906 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005907 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005908 if (last != NULL)
5909 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005910 cur =op;
5911 last = NULL;
5912 }
5913 } else if (RAW == '|') {
5914 if (type == 0) type = CUR;
5915
5916 /*
5917 * Detect "Name , Name | Name" error
5918 */
5919 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005920 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005921 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005922 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005923 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005924 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005925 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005926 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005927 return(NULL);
5928 }
5929 NEXT;
5930
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005931 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005932 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005933 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005934 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005935 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005936 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005937 return(NULL);
5938 }
5939 if (last == NULL) {
5940 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005941 if (ret != NULL)
5942 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005943 ret = cur = op;
5944 } else {
5945 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005946 if (op != NULL)
5947 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005948 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005949 if (last != NULL)
5950 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005951 cur =op;
5952 last = NULL;
5953 }
5954 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005955 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00005956 if ((last != NULL) && (last != ret))
5957 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005958 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005959 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005960 return(NULL);
5961 }
5962 GROW;
5963 SKIP_BLANKS;
5964 GROW;
5965 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005966 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005967 /* Recurse on second child */
5968 NEXT;
5969 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02005970 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5971 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00005972 SKIP_BLANKS;
5973 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005974 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005975 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005976 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005977 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005978 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005979 return(NULL);
5980 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005981 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005982 if (last == NULL) {
5983 if (ret != NULL)
5984 xmlFreeDocElementContent(ctxt->myDoc, ret);
5985 return(NULL);
5986 }
Owen Taylor3473f882001-02-23 17:55:21 +00005987 if (RAW == '?') {
5988 last->ocur = XML_ELEMENT_CONTENT_OPT;
5989 NEXT;
5990 } else if (RAW == '*') {
5991 last->ocur = XML_ELEMENT_CONTENT_MULT;
5992 NEXT;
5993 } else if (RAW == '+') {
5994 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5995 NEXT;
5996 } else {
5997 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5998 }
5999 }
6000 SKIP_BLANKS;
6001 GROW;
6002 }
6003 if ((cur != NULL) && (last != NULL)) {
6004 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006005 if (last != NULL)
6006 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006007 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006008 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006009 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6010"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006011 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006012 }
Owen Taylor3473f882001-02-23 17:55:21 +00006013 NEXT;
6014 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006015 if (ret != NULL) {
6016 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6017 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6018 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6019 else
6020 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6021 }
Owen Taylor3473f882001-02-23 17:55:21 +00006022 NEXT;
6023 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006024 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006025 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006026 cur = ret;
6027 /*
6028 * Some normalization:
6029 * (a | b* | c?)* == (a | b | c)*
6030 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006031 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006032 if ((cur->c1 != NULL) &&
6033 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6034 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6035 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6036 if ((cur->c2 != NULL) &&
6037 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6038 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6039 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6040 cur = cur->c2;
6041 }
6042 }
Owen Taylor3473f882001-02-23 17:55:21 +00006043 NEXT;
6044 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006045 if (ret != NULL) {
6046 int found = 0;
6047
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006048 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6049 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6050 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006051 else
6052 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006053 /*
6054 * Some normalization:
6055 * (a | b*)+ == (a | b)*
6056 * (a | b?)+ == (a | b)*
6057 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006058 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006059 if ((cur->c1 != NULL) &&
6060 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6061 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6062 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6063 found = 1;
6064 }
6065 if ((cur->c2 != NULL) &&
6066 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6067 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6068 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6069 found = 1;
6070 }
6071 cur = cur->c2;
6072 }
6073 if (found)
6074 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6075 }
Owen Taylor3473f882001-02-23 17:55:21 +00006076 NEXT;
6077 }
6078 return(ret);
6079}
6080
6081/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006082 * xmlParseElementChildrenContentDecl:
6083 * @ctxt: an XML parser context
6084 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006085 *
6086 * parse the declaration for a Mixed Element content
6087 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6088 *
6089 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6090 *
6091 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6092 *
6093 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6094 *
6095 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6096 *
6097 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6098 * TODO Parameter-entity replacement text must be properly nested
6099 * with parenthesized groups. That is to say, if either of the
6100 * opening or closing parentheses in a choice, seq, or Mixed
6101 * construct is contained in the replacement text for a parameter
6102 * entity, both must be contained in the same replacement text. For
6103 * interoperability, if a parameter-entity reference appears in a
6104 * choice, seq, or Mixed construct, its replacement text should not
6105 * be empty, and neither the first nor last non-blank character of
6106 * the replacement text should be a connector (| or ,).
6107 *
6108 * Returns the tree of xmlElementContentPtr describing the element
6109 * hierarchy.
6110 */
6111xmlElementContentPtr
6112xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6113 /* stub left for API/ABI compat */
6114 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6115}
6116
6117/**
Owen Taylor3473f882001-02-23 17:55:21 +00006118 * xmlParseElementContentDecl:
6119 * @ctxt: an XML parser context
6120 * @name: the name of the element being defined.
6121 * @result: the Element Content pointer will be stored here if any
6122 *
6123 * parse the declaration for an Element content either Mixed or Children,
6124 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6125 *
6126 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6127 *
6128 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6129 */
6130
6131int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006132xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006133 xmlElementContentPtr *result) {
6134
6135 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006136 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006137 int res;
6138
6139 *result = NULL;
6140
6141 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006142 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006143 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006144 return(-1);
6145 }
6146 NEXT;
6147 GROW;
6148 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006149 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006150 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006151 res = XML_ELEMENT_TYPE_MIXED;
6152 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006153 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006154 res = XML_ELEMENT_TYPE_ELEMENT;
6155 }
Owen Taylor3473f882001-02-23 17:55:21 +00006156 SKIP_BLANKS;
6157 *result = tree;
6158 return(res);
6159}
6160
6161/**
6162 * xmlParseElementDecl:
6163 * @ctxt: an XML parser context
6164 *
6165 * parse an Element declaration.
6166 *
6167 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6168 *
6169 * [ VC: Unique Element Type Declaration ]
6170 * No element type may be declared more than once
6171 *
6172 * Returns the type of the element, or -1 in case of error
6173 */
6174int
6175xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006176 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006177 int ret = -1;
6178 xmlElementContentPtr content = NULL;
6179
Daniel Veillard4c778d82005-01-23 17:37:44 +00006180 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006181 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006182 xmlParserInputPtr input = ctxt->input;
6183
6184 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006185 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006186 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6187 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006188 }
6189 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006190 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006191 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006192 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6193 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006194 return(-1);
6195 }
6196 while ((RAW == 0) && (ctxt->inputNr > 1))
6197 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006198 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006199 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6200 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006201 }
6202 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006203 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006204 SKIP(5);
6205 /*
6206 * Element must always be empty.
6207 */
6208 ret = XML_ELEMENT_TYPE_EMPTY;
6209 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6210 (NXT(2) == 'Y')) {
6211 SKIP(3);
6212 /*
6213 * Element is a generic container.
6214 */
6215 ret = XML_ELEMENT_TYPE_ANY;
6216 } else if (RAW == '(') {
6217 ret = xmlParseElementContentDecl(ctxt, name, &content);
6218 } else {
6219 /*
6220 * [ WFC: PEs in Internal Subset ] error handling.
6221 */
6222 if ((RAW == '%') && (ctxt->external == 0) &&
6223 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006224 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006225 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006226 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006227 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006228 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6229 }
Owen Taylor3473f882001-02-23 17:55:21 +00006230 return(-1);
6231 }
6232
6233 SKIP_BLANKS;
6234 /*
6235 * Pop-up of finished entities.
6236 */
6237 while ((RAW == 0) && (ctxt->inputNr > 1))
6238 xmlPopInput(ctxt);
6239 SKIP_BLANKS;
6240
6241 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006242 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006243 if (content != NULL) {
6244 xmlFreeDocElementContent(ctxt->myDoc, content);
6245 }
Owen Taylor3473f882001-02-23 17:55:21 +00006246 } else {
6247 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006248 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6249 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006250 }
6251
6252 NEXT;
6253 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006254 (ctxt->sax->elementDecl != NULL)) {
6255 if (content != NULL)
6256 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006257 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6258 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006259 if ((content != NULL) && (content->parent == NULL)) {
6260 /*
6261 * this is a trick: if xmlAddElementDecl is called,
6262 * instead of copying the full tree it is plugged directly
6263 * if called from the parser. Avoid duplicating the
6264 * interfaces or change the API/ABI
6265 */
6266 xmlFreeDocElementContent(ctxt->myDoc, content);
6267 }
6268 } else if (content != NULL) {
6269 xmlFreeDocElementContent(ctxt->myDoc, content);
6270 }
Owen Taylor3473f882001-02-23 17:55:21 +00006271 }
Owen Taylor3473f882001-02-23 17:55:21 +00006272 }
6273 return(ret);
6274}
6275
6276/**
Owen Taylor3473f882001-02-23 17:55:21 +00006277 * xmlParseConditionalSections
6278 * @ctxt: an XML parser context
6279 *
6280 * [61] conditionalSect ::= includeSect | ignoreSect
6281 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6282 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6283 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6284 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6285 */
6286
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006287static void
Owen Taylor3473f882001-02-23 17:55:21 +00006288xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006289 int id = ctxt->input->id;
6290
Owen Taylor3473f882001-02-23 17:55:21 +00006291 SKIP(3);
6292 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006293 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006294 SKIP(7);
6295 SKIP_BLANKS;
6296 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006297 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006298 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006299 if (ctxt->input->id != id) {
6300 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6301 "All markup of the conditional section is not in the same entity\n",
6302 NULL, NULL);
6303 }
Owen Taylor3473f882001-02-23 17:55:21 +00006304 NEXT;
6305 }
6306 if (xmlParserDebugEntities) {
6307 if ((ctxt->input != NULL) && (ctxt->input->filename))
6308 xmlGenericError(xmlGenericErrorContext,
6309 "%s(%d): ", ctxt->input->filename,
6310 ctxt->input->line);
6311 xmlGenericError(xmlGenericErrorContext,
6312 "Entering INCLUDE Conditional Section\n");
6313 }
6314
6315 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6316 (NXT(2) != '>'))) {
6317 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006318 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006319
6320 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6321 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006322 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006323 NEXT;
6324 } else if (RAW == '%') {
6325 xmlParsePEReference(ctxt);
6326 } else
6327 xmlParseMarkupDecl(ctxt);
6328
6329 /*
6330 * Pop-up of finished entities.
6331 */
6332 while ((RAW == 0) && (ctxt->inputNr > 1))
6333 xmlPopInput(ctxt);
6334
Daniel Veillardfdc91562002-07-01 21:52:03 +00006335 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006336 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006337 break;
6338 }
6339 }
6340 if (xmlParserDebugEntities) {
6341 if ((ctxt->input != NULL) && (ctxt->input->filename))
6342 xmlGenericError(xmlGenericErrorContext,
6343 "%s(%d): ", ctxt->input->filename,
6344 ctxt->input->line);
6345 xmlGenericError(xmlGenericErrorContext,
6346 "Leaving INCLUDE Conditional Section\n");
6347 }
6348
Daniel Veillarda07050d2003-10-19 14:46:32 +00006349 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006350 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006351 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006352 int depth = 0;
6353
6354 SKIP(6);
6355 SKIP_BLANKS;
6356 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006357 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006358 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006359 if (ctxt->input->id != id) {
6360 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6361 "All markup of the conditional section is not in the same entity\n",
6362 NULL, NULL);
6363 }
Owen Taylor3473f882001-02-23 17:55:21 +00006364 NEXT;
6365 }
6366 if (xmlParserDebugEntities) {
6367 if ((ctxt->input != NULL) && (ctxt->input->filename))
6368 xmlGenericError(xmlGenericErrorContext,
6369 "%s(%d): ", ctxt->input->filename,
6370 ctxt->input->line);
6371 xmlGenericError(xmlGenericErrorContext,
6372 "Entering IGNORE Conditional Section\n");
6373 }
6374
6375 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006376 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006377 * But disable SAX event generating DTD building in the meantime
6378 */
6379 state = ctxt->disableSAX;
6380 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006381 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006382 ctxt->instate = XML_PARSER_IGNORE;
6383
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006384 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006385 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6386 depth++;
6387 SKIP(3);
6388 continue;
6389 }
6390 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6391 if (--depth >= 0) SKIP(3);
6392 continue;
6393 }
6394 NEXT;
6395 continue;
6396 }
6397
6398 ctxt->disableSAX = state;
6399 ctxt->instate = instate;
6400
6401 if (xmlParserDebugEntities) {
6402 if ((ctxt->input != NULL) && (ctxt->input->filename))
6403 xmlGenericError(xmlGenericErrorContext,
6404 "%s(%d): ", ctxt->input->filename,
6405 ctxt->input->line);
6406 xmlGenericError(xmlGenericErrorContext,
6407 "Leaving IGNORE Conditional Section\n");
6408 }
6409
6410 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006411 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006412 }
6413
6414 if (RAW == 0)
6415 SHRINK;
6416
6417 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006418 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006419 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006420 if (ctxt->input->id != id) {
6421 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6422 "All markup of the conditional section is not in the same entity\n",
6423 NULL, NULL);
6424 }
Owen Taylor3473f882001-02-23 17:55:21 +00006425 SKIP(3);
6426 }
6427}
6428
6429/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006430 * xmlParseMarkupDecl:
6431 * @ctxt: an XML parser context
6432 *
6433 * parse Markup declarations
6434 *
6435 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6436 * NotationDecl | PI | Comment
6437 *
6438 * [ VC: Proper Declaration/PE Nesting ]
6439 * Parameter-entity replacement text must be properly nested with
6440 * markup declarations. That is to say, if either the first character
6441 * or the last character of a markup declaration (markupdecl above) is
6442 * contained in the replacement text for a parameter-entity reference,
6443 * both must be contained in the same replacement text.
6444 *
6445 * [ WFC: PEs in Internal Subset ]
6446 * In the internal DTD subset, parameter-entity references can occur
6447 * only where markup declarations can occur, not within markup declarations.
6448 * (This does not apply to references that occur in external parameter
6449 * entities or to the external subset.)
6450 */
6451void
6452xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6453 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006454 if (CUR == '<') {
6455 if (NXT(1) == '!') {
6456 switch (NXT(2)) {
6457 case 'E':
6458 if (NXT(3) == 'L')
6459 xmlParseElementDecl(ctxt);
6460 else if (NXT(3) == 'N')
6461 xmlParseEntityDecl(ctxt);
6462 break;
6463 case 'A':
6464 xmlParseAttributeListDecl(ctxt);
6465 break;
6466 case 'N':
6467 xmlParseNotationDecl(ctxt);
6468 break;
6469 case '-':
6470 xmlParseComment(ctxt);
6471 break;
6472 default:
6473 /* there is an error but it will be detected later */
6474 break;
6475 }
6476 } else if (NXT(1) == '?') {
6477 xmlParsePI(ctxt);
6478 }
6479 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006480 /*
6481 * This is only for internal subset. On external entities,
6482 * the replacement is done before parsing stage
6483 */
6484 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6485 xmlParsePEReference(ctxt);
6486
6487 /*
6488 * Conditional sections are allowed from entities included
6489 * by PE References in the internal subset.
6490 */
6491 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6492 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6493 xmlParseConditionalSections(ctxt);
6494 }
6495 }
6496
6497 ctxt->instate = XML_PARSER_DTD;
6498}
6499
6500/**
6501 * xmlParseTextDecl:
6502 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006503 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006504 * parse an XML declaration header for external entities
6505 *
6506 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006507 */
6508
6509void
6510xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6511 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006512 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006513
6514 /*
6515 * We know that '<?xml' is here.
6516 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006517 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006518 SKIP(5);
6519 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006520 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006521 return;
6522 }
6523
William M. Brack76e95df2003-10-18 16:20:14 +00006524 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006525 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6526 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006527 }
6528 SKIP_BLANKS;
6529
6530 /*
6531 * We may have the VersionInfo here.
6532 */
6533 version = xmlParseVersionInfo(ctxt);
6534 if (version == NULL)
6535 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006536 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006537 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006538 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6539 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006540 }
6541 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006542 ctxt->input->version = version;
6543
6544 /*
6545 * We must have the encoding declaration
6546 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006547 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006548 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6549 /*
6550 * The XML REC instructs us to stop parsing right here
6551 */
6552 return;
6553 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006554 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6555 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6556 "Missing encoding in text declaration\n");
6557 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006558
6559 SKIP_BLANKS;
6560 if ((RAW == '?') && (NXT(1) == '>')) {
6561 SKIP(2);
6562 } else if (RAW == '>') {
6563 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006564 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006565 NEXT;
6566 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006567 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006568 MOVETO_ENDTAG(CUR_PTR);
6569 NEXT;
6570 }
6571}
6572
6573/**
Owen Taylor3473f882001-02-23 17:55:21 +00006574 * xmlParseExternalSubset:
6575 * @ctxt: an XML parser context
6576 * @ExternalID: the external identifier
6577 * @SystemID: the system identifier (or URL)
6578 *
6579 * parse Markup declarations from an external subset
6580 *
6581 * [30] extSubset ::= textDecl? extSubsetDecl
6582 *
6583 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6584 */
6585void
6586xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6587 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006588 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006589 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006590
6591 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
6592 (ctxt->input->end - ctxt->input->cur >= 4)) {
6593 xmlChar start[4];
6594 xmlCharEncoding enc;
6595
6596 start[0] = RAW;
6597 start[1] = NXT(1);
6598 start[2] = NXT(2);
6599 start[3] = NXT(3);
6600 enc = xmlDetectCharEncoding(start, 4);
6601 if (enc != XML_CHAR_ENCODING_NONE)
6602 xmlSwitchEncoding(ctxt, enc);
6603 }
6604
Daniel Veillarda07050d2003-10-19 14:46:32 +00006605 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006606 xmlParseTextDecl(ctxt);
6607 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6608 /*
6609 * The XML REC instructs us to stop parsing right here
6610 */
6611 ctxt->instate = XML_PARSER_EOF;
6612 return;
6613 }
6614 }
6615 if (ctxt->myDoc == NULL) {
6616 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006617 if (ctxt->myDoc == NULL) {
6618 xmlErrMemory(ctxt, "New Doc failed");
6619 return;
6620 }
6621 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006622 }
6623 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6624 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6625
6626 ctxt->instate = XML_PARSER_DTD;
6627 ctxt->external = 1;
6628 while (((RAW == '<') && (NXT(1) == '?')) ||
6629 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006630 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006631 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006632 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006633
6634 GROW;
6635 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6636 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006637 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006638 NEXT;
6639 } else if (RAW == '%') {
6640 xmlParsePEReference(ctxt);
6641 } else
6642 xmlParseMarkupDecl(ctxt);
6643
6644 /*
6645 * Pop-up of finished entities.
6646 */
6647 while ((RAW == 0) && (ctxt->inputNr > 1))
6648 xmlPopInput(ctxt);
6649
Daniel Veillardfdc91562002-07-01 21:52:03 +00006650 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006651 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006652 break;
6653 }
6654 }
6655
6656 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006657 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006658 }
6659
6660}
6661
6662/**
6663 * xmlParseReference:
6664 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006665 *
Owen Taylor3473f882001-02-23 17:55:21 +00006666 * parse and handle entity references in content, depending on the SAX
6667 * interface, this may end-up in a call to character() if this is a
6668 * CharRef, a predefined entity, if there is no reference() callback.
6669 * or if the parser was asked to switch to that mode.
6670 *
6671 * [67] Reference ::= EntityRef | CharRef
6672 */
6673void
6674xmlParseReference(xmlParserCtxtPtr ctxt) {
6675 xmlEntityPtr ent;
6676 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00006677 int was_checked;
6678 xmlNodePtr list = NULL;
6679 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006680
Daniel Veillard0161e632008-08-28 15:36:32 +00006681
6682 if (RAW != '&')
6683 return;
6684
6685 /*
6686 * Simple case of a CharRef
6687 */
Owen Taylor3473f882001-02-23 17:55:21 +00006688 if (NXT(1) == '#') {
6689 int i = 0;
6690 xmlChar out[10];
6691 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006692 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00006693
Daniel Veillarddc171602008-03-26 17:41:38 +00006694 if (value == 0)
6695 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006696 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6697 /*
6698 * So we are using non-UTF-8 buffers
6699 * Check that the char fit on 8bits, if not
6700 * generate a CharRef.
6701 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006702 if (value <= 0xFF) {
6703 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006704 out[1] = 0;
6705 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6706 (!ctxt->disableSAX))
6707 ctxt->sax->characters(ctxt->userData, out, 1);
6708 } else {
6709 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006710 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006711 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006712 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006713 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6714 (!ctxt->disableSAX))
6715 ctxt->sax->reference(ctxt->userData, out);
6716 }
6717 } else {
6718 /*
6719 * Just encode the value in UTF-8
6720 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006721 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006722 out[i] = 0;
6723 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6724 (!ctxt->disableSAX))
6725 ctxt->sax->characters(ctxt->userData, out, i);
6726 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006727 return;
6728 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006729
Daniel Veillard0161e632008-08-28 15:36:32 +00006730 /*
6731 * We are seeing an entity reference
6732 */
6733 ent = xmlParseEntityRef(ctxt);
6734 if (ent == NULL) return;
6735 if (!ctxt->wellFormed)
6736 return;
6737 was_checked = ent->checked;
6738
6739 /* special case of predefined entities */
6740 if ((ent->name == NULL) ||
6741 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6742 val = ent->content;
6743 if (val == NULL) return;
6744 /*
6745 * inline the entity.
6746 */
6747 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6748 (!ctxt->disableSAX))
6749 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6750 return;
6751 }
6752
6753 /*
6754 * The first reference to the entity trigger a parsing phase
6755 * where the ent->children is filled with the result from
6756 * the parsing.
6757 */
6758 if (ent->checked == 0) {
6759 unsigned long oldnbent = ctxt->nbentities;
6760
6761 /*
6762 * This is a bit hackish but this seems the best
6763 * way to make sure both SAX and DOM entity support
6764 * behaves okay.
6765 */
6766 void *user_data;
6767 if (ctxt->userData == ctxt)
6768 user_data = NULL;
6769 else
6770 user_data = ctxt->userData;
6771
6772 /*
6773 * Check that this entity is well formed
6774 * 4.3.2: An internal general parsed entity is well-formed
6775 * if its replacement text matches the production labeled
6776 * content.
6777 */
6778 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6779 ctxt->depth++;
6780 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6781 user_data, &list);
6782 ctxt->depth--;
6783
6784 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6785 ctxt->depth++;
6786 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6787 user_data, ctxt->depth, ent->URI,
6788 ent->ExternalID, &list);
6789 ctxt->depth--;
6790 } else {
6791 ret = XML_ERR_ENTITY_PE_INTERNAL;
6792 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6793 "invalid entity type found\n", NULL);
6794 }
6795
6796 /*
6797 * Store the number of entities needing parsing for this entity
6798 * content and do checkings
6799 */
6800 ent->checked = ctxt->nbentities - oldnbent;
6801 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006802 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00006803 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006804 return;
6805 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006806 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6807 xmlFreeNodeList(list);
6808 return;
6809 }
Owen Taylor3473f882001-02-23 17:55:21 +00006810
Daniel Veillard0161e632008-08-28 15:36:32 +00006811 if ((ret == XML_ERR_OK) && (list != NULL)) {
6812 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6813 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6814 (ent->children == NULL)) {
6815 ent->children = list;
6816 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00006817 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006818 * Prune it directly in the generated document
6819 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00006820 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006821 if (((list->type == XML_TEXT_NODE) &&
6822 (list->next == NULL)) ||
6823 (ctxt->parseMode == XML_PARSE_READER)) {
6824 list->parent = (xmlNodePtr) ent;
6825 list = NULL;
6826 ent->owner = 1;
6827 } else {
6828 ent->owner = 0;
6829 while (list != NULL) {
6830 list->parent = (xmlNodePtr) ctxt->node;
6831 list->doc = ctxt->myDoc;
6832 if (list->next == NULL)
6833 ent->last = list;
6834 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006835 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006836 list = ent->children;
6837#ifdef LIBXML_LEGACY_ENABLED
6838 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6839 xmlAddEntityReference(ent, list, NULL);
6840#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00006841 }
6842 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00006843 ent->owner = 1;
6844 while (list != NULL) {
6845 list->parent = (xmlNodePtr) ent;
6846 if (list->next == NULL)
6847 ent->last = list;
6848 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006849 }
6850 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006851 } else {
6852 xmlFreeNodeList(list);
6853 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006854 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006855 } else if ((ret != XML_ERR_OK) &&
6856 (ret != XML_WAR_UNDECLARED_ENTITY)) {
6857 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6858 "Entity '%s' failed to parse\n", ent->name);
6859 } else if (list != NULL) {
6860 xmlFreeNodeList(list);
6861 list = NULL;
6862 }
6863 if (ent->checked == 0)
6864 ent->checked = 1;
6865 } else if (ent->checked != 1) {
6866 ctxt->nbentities += ent->checked;
6867 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006868
Daniel Veillard0161e632008-08-28 15:36:32 +00006869 /*
6870 * Now that the entity content has been gathered
6871 * provide it to the application, this can take different forms based
6872 * on the parsing modes.
6873 */
6874 if (ent->children == NULL) {
6875 /*
6876 * Probably running in SAX mode and the callbacks don't
6877 * build the entity content. So unless we already went
6878 * though parsing for first checking go though the entity
6879 * content to generate callbacks associated to the entity
6880 */
6881 if (was_checked != 0) {
6882 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00006883 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006884 * This is a bit hackish but this seems the best
6885 * way to make sure both SAX and DOM entity support
6886 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00006887 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006888 if (ctxt->userData == ctxt)
6889 user_data = NULL;
6890 else
6891 user_data = ctxt->userData;
6892
6893 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6894 ctxt->depth++;
6895 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6896 ent->content, user_data, NULL);
6897 ctxt->depth--;
6898 } else if (ent->etype ==
6899 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6900 ctxt->depth++;
6901 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6902 ctxt->sax, user_data, ctxt->depth,
6903 ent->URI, ent->ExternalID, NULL);
6904 ctxt->depth--;
6905 } else {
6906 ret = XML_ERR_ENTITY_PE_INTERNAL;
6907 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6908 "invalid entity type found\n", NULL);
6909 }
6910 if (ret == XML_ERR_ENTITY_LOOP) {
6911 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6912 return;
6913 }
6914 }
6915 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6916 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6917 /*
6918 * Entity reference callback comes second, it's somewhat
6919 * superfluous but a compatibility to historical behaviour
6920 */
6921 ctxt->sax->reference(ctxt->userData, ent->name);
6922 }
6923 return;
6924 }
6925
6926 /*
6927 * If we didn't get any children for the entity being built
6928 */
6929 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6930 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6931 /*
6932 * Create a node.
6933 */
6934 ctxt->sax->reference(ctxt->userData, ent->name);
6935 return;
6936 }
6937
6938 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
6939 /*
6940 * There is a problem on the handling of _private for entities
6941 * (bug 155816): Should we copy the content of the field from
6942 * the entity (possibly overwriting some value set by the user
6943 * when a copy is created), should we leave it alone, or should
6944 * we try to take care of different situations? The problem
6945 * is exacerbated by the usage of this field by the xmlReader.
6946 * To fix this bug, we look at _private on the created node
6947 * and, if it's NULL, we copy in whatever was in the entity.
6948 * If it's not NULL we leave it alone. This is somewhat of a
6949 * hack - maybe we should have further tests to determine
6950 * what to do.
6951 */
6952 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6953 /*
6954 * Seems we are generating the DOM content, do
6955 * a simple tree copy for all references except the first
6956 * In the first occurrence list contains the replacement.
6957 * progressive == 2 means we are operating on the Reader
6958 * and since nodes are discarded we must copy all the time.
6959 */
6960 if (((list == NULL) && (ent->owner == 0)) ||
6961 (ctxt->parseMode == XML_PARSE_READER)) {
6962 xmlNodePtr nw = NULL, cur, firstChild = NULL;
6963
6964 /*
6965 * when operating on a reader, the entities definitions
6966 * are always owning the entities subtree.
6967 if (ctxt->parseMode == XML_PARSE_READER)
6968 ent->owner = 1;
6969 */
6970
6971 cur = ent->children;
6972 while (cur != NULL) {
6973 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6974 if (nw != NULL) {
6975 if (nw->_private == NULL)
6976 nw->_private = cur->_private;
6977 if (firstChild == NULL){
6978 firstChild = nw;
6979 }
6980 nw = xmlAddChild(ctxt->node, nw);
6981 }
6982 if (cur == ent->last) {
6983 /*
6984 * needed to detect some strange empty
6985 * node cases in the reader tests
6986 */
6987 if ((ctxt->parseMode == XML_PARSE_READER) &&
6988 (nw != NULL) &&
6989 (nw->type == XML_ELEMENT_NODE) &&
6990 (nw->children == NULL))
6991 nw->extra = 1;
6992
6993 break;
6994 }
6995 cur = cur->next;
6996 }
6997#ifdef LIBXML_LEGACY_ENABLED
6998 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6999 xmlAddEntityReference(ent, firstChild, nw);
7000#endif /* LIBXML_LEGACY_ENABLED */
7001 } else if (list == NULL) {
7002 xmlNodePtr nw = NULL, cur, next, last,
7003 firstChild = NULL;
7004 /*
7005 * Copy the entity child list and make it the new
7006 * entity child list. The goal is to make sure any
7007 * ID or REF referenced will be the one from the
7008 * document content and not the entity copy.
7009 */
7010 cur = ent->children;
7011 ent->children = NULL;
7012 last = ent->last;
7013 ent->last = NULL;
7014 while (cur != NULL) {
7015 next = cur->next;
7016 cur->next = NULL;
7017 cur->parent = NULL;
7018 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7019 if (nw != NULL) {
7020 if (nw->_private == NULL)
7021 nw->_private = cur->_private;
7022 if (firstChild == NULL){
7023 firstChild = cur;
7024 }
7025 xmlAddChild((xmlNodePtr) ent, nw);
7026 xmlAddChild(ctxt->node, cur);
7027 }
7028 if (cur == last)
7029 break;
7030 cur = next;
7031 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007032 if (ent->owner == 0)
7033 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007034#ifdef LIBXML_LEGACY_ENABLED
7035 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7036 xmlAddEntityReference(ent, firstChild, nw);
7037#endif /* LIBXML_LEGACY_ENABLED */
7038 } else {
7039 const xmlChar *nbktext;
7040
7041 /*
7042 * the name change is to avoid coalescing of the
7043 * node with a possible previous text one which
7044 * would make ent->children a dangling pointer
7045 */
7046 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7047 -1);
7048 if (ent->children->type == XML_TEXT_NODE)
7049 ent->children->name = nbktext;
7050 if ((ent->last != ent->children) &&
7051 (ent->last->type == XML_TEXT_NODE))
7052 ent->last->name = nbktext;
7053 xmlAddChildList(ctxt->node, ent->children);
7054 }
7055
7056 /*
7057 * This is to avoid a nasty side effect, see
7058 * characters() in SAX.c
7059 */
7060 ctxt->nodemem = 0;
7061 ctxt->nodelen = 0;
7062 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007063 }
7064 }
7065}
7066
7067/**
7068 * xmlParseEntityRef:
7069 * @ctxt: an XML parser context
7070 *
7071 * parse ENTITY references declarations
7072 *
7073 * [68] EntityRef ::= '&' Name ';'
7074 *
7075 * [ WFC: Entity Declared ]
7076 * In a document without any DTD, a document with only an internal DTD
7077 * subset which contains no parameter entity references, or a document
7078 * with "standalone='yes'", the Name given in the entity reference
7079 * must match that in an entity declaration, except that well-formed
7080 * documents need not declare any of the following entities: amp, lt,
7081 * gt, apos, quot. The declaration of a parameter entity must precede
7082 * any reference to it. Similarly, the declaration of a general entity
7083 * must precede any reference to it which appears in a default value in an
7084 * attribute-list declaration. Note that if entities are declared in the
7085 * external subset or in external parameter entities, a non-validating
7086 * processor is not obligated to read and process their declarations;
7087 * for such documents, the rule that an entity must be declared is a
7088 * well-formedness constraint only if standalone='yes'.
7089 *
7090 * [ WFC: Parsed Entity ]
7091 * An entity reference must not contain the name of an unparsed entity
7092 *
7093 * Returns the xmlEntityPtr if found, or NULL otherwise.
7094 */
7095xmlEntityPtr
7096xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007097 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007098 xmlEntityPtr ent = NULL;
7099
7100 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007101
Daniel Veillard0161e632008-08-28 15:36:32 +00007102 if (RAW != '&')
7103 return(NULL);
7104 NEXT;
7105 name = xmlParseName(ctxt);
7106 if (name == NULL) {
7107 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7108 "xmlParseEntityRef: no name\n");
7109 return(NULL);
7110 }
7111 if (RAW != ';') {
7112 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7113 return(NULL);
7114 }
7115 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007116
Daniel Veillard0161e632008-08-28 15:36:32 +00007117 /*
7118 * Predefined entites override any extra definition
7119 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007120 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7121 ent = xmlGetPredefinedEntity(name);
7122 if (ent != NULL)
7123 return(ent);
7124 }
Owen Taylor3473f882001-02-23 17:55:21 +00007125
Daniel Veillard0161e632008-08-28 15:36:32 +00007126 /*
7127 * Increate the number of entity references parsed
7128 */
7129 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007130
Daniel Veillard0161e632008-08-28 15:36:32 +00007131 /*
7132 * Ask first SAX for entity resolution, otherwise try the
7133 * entities which may have stored in the parser context.
7134 */
7135 if (ctxt->sax != NULL) {
7136 if (ctxt->sax->getEntity != NULL)
7137 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007138 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7139 (ctxt->options & XML_PARSE_OLDSAX))
7140 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007141 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7142 (ctxt->userData==ctxt)) {
7143 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007144 }
7145 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007146 /*
7147 * [ WFC: Entity Declared ]
7148 * In a document without any DTD, a document with only an
7149 * internal DTD subset which contains no parameter entity
7150 * references, or a document with "standalone='yes'", the
7151 * Name given in the entity reference must match that in an
7152 * entity declaration, except that well-formed documents
7153 * need not declare any of the following entities: amp, lt,
7154 * gt, apos, quot.
7155 * The declaration of a parameter entity must precede any
7156 * reference to it.
7157 * Similarly, the declaration of a general entity must
7158 * precede any reference to it which appears in a default
7159 * value in an attribute-list declaration. Note that if
7160 * entities are declared in the external subset or in
7161 * external parameter entities, a non-validating processor
7162 * is not obligated to read and process their declarations;
7163 * for such documents, the rule that an entity must be
7164 * declared is a well-formedness constraint only if
7165 * standalone='yes'.
7166 */
7167 if (ent == NULL) {
7168 if ((ctxt->standalone == 1) ||
7169 ((ctxt->hasExternalSubset == 0) &&
7170 (ctxt->hasPErefs == 0))) {
7171 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7172 "Entity '%s' not defined\n", name);
7173 } else {
7174 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7175 "Entity '%s' not defined\n", name);
7176 if ((ctxt->inSubset == 0) &&
7177 (ctxt->sax != NULL) &&
7178 (ctxt->sax->reference != NULL)) {
7179 ctxt->sax->reference(ctxt->userData, name);
7180 }
7181 }
7182 ctxt->valid = 0;
7183 }
7184
7185 /*
7186 * [ WFC: Parsed Entity ]
7187 * An entity reference must not contain the name of an
7188 * unparsed entity
7189 */
7190 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7191 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7192 "Entity reference to unparsed entity %s\n", name);
7193 }
7194
7195 /*
7196 * [ WFC: No External Entity References ]
7197 * Attribute values cannot contain direct or indirect
7198 * entity references to external entities.
7199 */
7200 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7201 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7202 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7203 "Attribute references external entity '%s'\n", name);
7204 }
7205 /*
7206 * [ WFC: No < in Attribute Values ]
7207 * The replacement text of any entity referred to directly or
7208 * indirectly in an attribute value (other than "&lt;") must
7209 * not contain a <.
7210 */
7211 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7212 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007213 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007214 (xmlStrchr(ent->content, '<'))) {
7215 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7216 "'<' in entity '%s' is not allowed in attributes values\n", name);
7217 }
7218
7219 /*
7220 * Internal check, no parameter entities here ...
7221 */
7222 else {
7223 switch (ent->etype) {
7224 case XML_INTERNAL_PARAMETER_ENTITY:
7225 case XML_EXTERNAL_PARAMETER_ENTITY:
7226 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7227 "Attempt to reference the parameter entity '%s'\n",
7228 name);
7229 break;
7230 default:
7231 break;
7232 }
7233 }
7234
7235 /*
7236 * [ WFC: No Recursion ]
7237 * A parsed entity must not contain a recursive reference
7238 * to itself, either directly or indirectly.
7239 * Done somewhere else
7240 */
Owen Taylor3473f882001-02-23 17:55:21 +00007241 return(ent);
7242}
7243
7244/**
7245 * xmlParseStringEntityRef:
7246 * @ctxt: an XML parser context
7247 * @str: a pointer to an index in the string
7248 *
7249 * parse ENTITY references declarations, but this version parses it from
7250 * a string value.
7251 *
7252 * [68] EntityRef ::= '&' Name ';'
7253 *
7254 * [ WFC: Entity Declared ]
7255 * In a document without any DTD, a document with only an internal DTD
7256 * subset which contains no parameter entity references, or a document
7257 * with "standalone='yes'", the Name given in the entity reference
7258 * must match that in an entity declaration, except that well-formed
7259 * documents need not declare any of the following entities: amp, lt,
7260 * gt, apos, quot. The declaration of a parameter entity must precede
7261 * any reference to it. Similarly, the declaration of a general entity
7262 * must precede any reference to it which appears in a default value in an
7263 * attribute-list declaration. Note that if entities are declared in the
7264 * external subset or in external parameter entities, a non-validating
7265 * processor is not obligated to read and process their declarations;
7266 * for such documents, the rule that an entity must be declared is a
7267 * well-formedness constraint only if standalone='yes'.
7268 *
7269 * [ WFC: Parsed Entity ]
7270 * An entity reference must not contain the name of an unparsed entity
7271 *
7272 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7273 * is updated to the current location in the string.
7274 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007275static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007276xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7277 xmlChar *name;
7278 const xmlChar *ptr;
7279 xmlChar cur;
7280 xmlEntityPtr ent = NULL;
7281
7282 if ((str == NULL) || (*str == NULL))
7283 return(NULL);
7284 ptr = *str;
7285 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007286 if (cur != '&')
7287 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007288
Daniel Veillard0161e632008-08-28 15:36:32 +00007289 ptr++;
7290 cur = *ptr;
7291 name = xmlParseStringName(ctxt, &ptr);
7292 if (name == NULL) {
7293 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7294 "xmlParseStringEntityRef: no name\n");
7295 *str = ptr;
7296 return(NULL);
7297 }
7298 if (*ptr != ';') {
7299 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007300 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007301 *str = ptr;
7302 return(NULL);
7303 }
7304 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007305
Owen Taylor3473f882001-02-23 17:55:21 +00007306
Daniel Veillard0161e632008-08-28 15:36:32 +00007307 /*
7308 * Predefined entites override any extra definition
7309 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007310 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7311 ent = xmlGetPredefinedEntity(name);
7312 if (ent != NULL) {
7313 xmlFree(name);
7314 *str = ptr;
7315 return(ent);
7316 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007317 }
Owen Taylor3473f882001-02-23 17:55:21 +00007318
Daniel Veillard0161e632008-08-28 15:36:32 +00007319 /*
7320 * Increate the number of entity references parsed
7321 */
7322 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007323
Daniel Veillard0161e632008-08-28 15:36:32 +00007324 /*
7325 * Ask first SAX for entity resolution, otherwise try the
7326 * entities which may have stored in the parser context.
7327 */
7328 if (ctxt->sax != NULL) {
7329 if (ctxt->sax->getEntity != NULL)
7330 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007331 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7332 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007333 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7334 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007335 }
7336 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007337
7338 /*
7339 * [ WFC: Entity Declared ]
7340 * In a document without any DTD, a document with only an
7341 * internal DTD subset which contains no parameter entity
7342 * references, or a document with "standalone='yes'", the
7343 * Name given in the entity reference must match that in an
7344 * entity declaration, except that well-formed documents
7345 * need not declare any of the following entities: amp, lt,
7346 * gt, apos, quot.
7347 * The declaration of a parameter entity must precede any
7348 * reference to it.
7349 * Similarly, the declaration of a general entity must
7350 * precede any reference to it which appears in a default
7351 * value in an attribute-list declaration. Note that if
7352 * entities are declared in the external subset or in
7353 * external parameter entities, a non-validating processor
7354 * is not obligated to read and process their declarations;
7355 * for such documents, the rule that an entity must be
7356 * declared is a well-formedness constraint only if
7357 * standalone='yes'.
7358 */
7359 if (ent == NULL) {
7360 if ((ctxt->standalone == 1) ||
7361 ((ctxt->hasExternalSubset == 0) &&
7362 (ctxt->hasPErefs == 0))) {
7363 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7364 "Entity '%s' not defined\n", name);
7365 } else {
7366 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7367 "Entity '%s' not defined\n",
7368 name);
7369 }
7370 /* TODO ? check regressions ctxt->valid = 0; */
7371 }
7372
7373 /*
7374 * [ WFC: Parsed Entity ]
7375 * An entity reference must not contain the name of an
7376 * unparsed entity
7377 */
7378 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7379 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7380 "Entity reference to unparsed entity %s\n", name);
7381 }
7382
7383 /*
7384 * [ WFC: No External Entity References ]
7385 * Attribute values cannot contain direct or indirect
7386 * entity references to external entities.
7387 */
7388 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7389 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7390 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7391 "Attribute references external entity '%s'\n", name);
7392 }
7393 /*
7394 * [ WFC: No < in Attribute Values ]
7395 * The replacement text of any entity referred to directly or
7396 * indirectly in an attribute value (other than "&lt;") must
7397 * not contain a <.
7398 */
7399 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7400 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007401 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007402 (xmlStrchr(ent->content, '<'))) {
7403 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7404 "'<' in entity '%s' is not allowed in attributes values\n",
7405 name);
7406 }
7407
7408 /*
7409 * Internal check, no parameter entities here ...
7410 */
7411 else {
7412 switch (ent->etype) {
7413 case XML_INTERNAL_PARAMETER_ENTITY:
7414 case XML_EXTERNAL_PARAMETER_ENTITY:
7415 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7416 "Attempt to reference the parameter entity '%s'\n",
7417 name);
7418 break;
7419 default:
7420 break;
7421 }
7422 }
7423
7424 /*
7425 * [ WFC: No Recursion ]
7426 * A parsed entity must not contain a recursive reference
7427 * to itself, either directly or indirectly.
7428 * Done somewhere else
7429 */
7430
7431 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007432 *str = ptr;
7433 return(ent);
7434}
7435
7436/**
7437 * xmlParsePEReference:
7438 * @ctxt: an XML parser context
7439 *
7440 * parse PEReference declarations
7441 * The entity content is handled directly by pushing it's content as
7442 * a new input stream.
7443 *
7444 * [69] PEReference ::= '%' Name ';'
7445 *
7446 * [ WFC: No Recursion ]
7447 * A parsed entity must not contain a recursive
7448 * reference to itself, either directly or indirectly.
7449 *
7450 * [ WFC: Entity Declared ]
7451 * In a document without any DTD, a document with only an internal DTD
7452 * subset which contains no parameter entity references, or a document
7453 * with "standalone='yes'", ... ... The declaration of a parameter
7454 * entity must precede any reference to it...
7455 *
7456 * [ VC: Entity Declared ]
7457 * In a document with an external subset or external parameter entities
7458 * with "standalone='no'", ... ... The declaration of a parameter entity
7459 * must precede any reference to it...
7460 *
7461 * [ WFC: In DTD ]
7462 * Parameter-entity references may only appear in the DTD.
7463 * NOTE: misleading but this is handled.
7464 */
7465void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007466xmlParsePEReference(xmlParserCtxtPtr ctxt)
7467{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007468 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007469 xmlEntityPtr entity = NULL;
7470 xmlParserInputPtr input;
7471
Daniel Veillard0161e632008-08-28 15:36:32 +00007472 if (RAW != '%')
7473 return;
7474 NEXT;
7475 name = xmlParseName(ctxt);
7476 if (name == NULL) {
7477 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7478 "xmlParsePEReference: no name\n");
7479 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007480 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007481 if (RAW != ';') {
7482 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7483 return;
7484 }
7485
7486 NEXT;
7487
7488 /*
7489 * Increate the number of entity references parsed
7490 */
7491 ctxt->nbentities++;
7492
7493 /*
7494 * Request the entity from SAX
7495 */
7496 if ((ctxt->sax != NULL) &&
7497 (ctxt->sax->getParameterEntity != NULL))
7498 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7499 name);
7500 if (entity == NULL) {
7501 /*
7502 * [ WFC: Entity Declared ]
7503 * In a document without any DTD, a document with only an
7504 * internal DTD subset which contains no parameter entity
7505 * references, or a document with "standalone='yes'", ...
7506 * ... The declaration of a parameter entity must precede
7507 * any reference to it...
7508 */
7509 if ((ctxt->standalone == 1) ||
7510 ((ctxt->hasExternalSubset == 0) &&
7511 (ctxt->hasPErefs == 0))) {
7512 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7513 "PEReference: %%%s; not found\n",
7514 name);
7515 } else {
7516 /*
7517 * [ VC: Entity Declared ]
7518 * In a document with an external subset or external
7519 * parameter entities with "standalone='no'", ...
7520 * ... The declaration of a parameter entity must
7521 * precede any reference to it...
7522 */
7523 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7524 "PEReference: %%%s; not found\n",
7525 name, NULL);
7526 ctxt->valid = 0;
7527 }
7528 } else {
7529 /*
7530 * Internal checking in case the entity quest barfed
7531 */
7532 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7533 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7534 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7535 "Internal: %%%s; is not a parameter entity\n",
7536 name, NULL);
7537 } else if (ctxt->input->free != deallocblankswrapper) {
7538 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7539 if (xmlPushInput(ctxt, input) < 0)
7540 return;
7541 } else {
7542 /*
7543 * TODO !!!
7544 * handle the extra spaces added before and after
7545 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7546 */
7547 input = xmlNewEntityInputStream(ctxt, entity);
7548 if (xmlPushInput(ctxt, input) < 0)
7549 return;
7550 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7551 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7552 (IS_BLANK_CH(NXT(5)))) {
7553 xmlParseTextDecl(ctxt);
7554 if (ctxt->errNo ==
7555 XML_ERR_UNSUPPORTED_ENCODING) {
7556 /*
7557 * The XML REC instructs us to stop parsing
7558 * right here
7559 */
7560 ctxt->instate = XML_PARSER_EOF;
7561 return;
7562 }
7563 }
7564 }
7565 }
7566 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007567}
7568
7569/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007570 * xmlLoadEntityContent:
7571 * @ctxt: an XML parser context
7572 * @entity: an unloaded system entity
7573 *
7574 * Load the original content of the given system entity from the
7575 * ExternalID/SystemID given. This is to be used for Included in Literal
7576 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7577 *
7578 * Returns 0 in case of success and -1 in case of failure
7579 */
7580static int
7581xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7582 xmlParserInputPtr input;
7583 xmlBufferPtr buf;
7584 int l, c;
7585 int count = 0;
7586
7587 if ((ctxt == NULL) || (entity == NULL) ||
7588 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7589 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7590 (entity->content != NULL)) {
7591 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7592 "xmlLoadEntityContent parameter error");
7593 return(-1);
7594 }
7595
7596 if (xmlParserDebugEntities)
7597 xmlGenericError(xmlGenericErrorContext,
7598 "Reading %s entity content input\n", entity->name);
7599
7600 buf = xmlBufferCreate();
7601 if (buf == NULL) {
7602 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7603 "xmlLoadEntityContent parameter error");
7604 return(-1);
7605 }
7606
7607 input = xmlNewEntityInputStream(ctxt, entity);
7608 if (input == NULL) {
7609 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7610 "xmlLoadEntityContent input error");
7611 xmlBufferFree(buf);
7612 return(-1);
7613 }
7614
7615 /*
7616 * Push the entity as the current input, read char by char
7617 * saving to the buffer until the end of the entity or an error
7618 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00007619 if (xmlPushInput(ctxt, input) < 0) {
7620 xmlBufferFree(buf);
7621 return(-1);
7622 }
7623
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007624 GROW;
7625 c = CUR_CHAR(l);
7626 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7627 (IS_CHAR(c))) {
7628 xmlBufferAdd(buf, ctxt->input->cur, l);
7629 if (count++ > 100) {
7630 count = 0;
7631 GROW;
7632 }
7633 NEXTL(l);
7634 c = CUR_CHAR(l);
7635 }
7636
7637 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7638 xmlPopInput(ctxt);
7639 } else if (!IS_CHAR(c)) {
7640 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7641 "xmlLoadEntityContent: invalid char value %d\n",
7642 c);
7643 xmlBufferFree(buf);
7644 return(-1);
7645 }
7646 entity->content = buf->content;
7647 buf->content = NULL;
7648 xmlBufferFree(buf);
7649
7650 return(0);
7651}
7652
7653/**
Owen Taylor3473f882001-02-23 17:55:21 +00007654 * xmlParseStringPEReference:
7655 * @ctxt: an XML parser context
7656 * @str: a pointer to an index in the string
7657 *
7658 * parse PEReference declarations
7659 *
7660 * [69] PEReference ::= '%' Name ';'
7661 *
7662 * [ WFC: No Recursion ]
7663 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007664 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007665 *
7666 * [ WFC: Entity Declared ]
7667 * In a document without any DTD, a document with only an internal DTD
7668 * subset which contains no parameter entity references, or a document
7669 * with "standalone='yes'", ... ... The declaration of a parameter
7670 * entity must precede any reference to it...
7671 *
7672 * [ VC: Entity Declared ]
7673 * In a document with an external subset or external parameter entities
7674 * with "standalone='no'", ... ... The declaration of a parameter entity
7675 * must precede any reference to it...
7676 *
7677 * [ WFC: In DTD ]
7678 * Parameter-entity references may only appear in the DTD.
7679 * NOTE: misleading but this is handled.
7680 *
7681 * Returns the string of the entity content.
7682 * str is updated to the current value of the index
7683 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007684static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007685xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7686 const xmlChar *ptr;
7687 xmlChar cur;
7688 xmlChar *name;
7689 xmlEntityPtr entity = NULL;
7690
7691 if ((str == NULL) || (*str == NULL)) return(NULL);
7692 ptr = *str;
7693 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007694 if (cur != '%')
7695 return(NULL);
7696 ptr++;
7697 cur = *ptr;
7698 name = xmlParseStringName(ctxt, &ptr);
7699 if (name == NULL) {
7700 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7701 "xmlParseStringPEReference: no name\n");
7702 *str = ptr;
7703 return(NULL);
7704 }
7705 cur = *ptr;
7706 if (cur != ';') {
7707 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7708 xmlFree(name);
7709 *str = ptr;
7710 return(NULL);
7711 }
7712 ptr++;
7713
7714 /*
7715 * Increate the number of entity references parsed
7716 */
7717 ctxt->nbentities++;
7718
7719 /*
7720 * Request the entity from SAX
7721 */
7722 if ((ctxt->sax != NULL) &&
7723 (ctxt->sax->getParameterEntity != NULL))
7724 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7725 name);
7726 if (entity == NULL) {
7727 /*
7728 * [ WFC: Entity Declared ]
7729 * In a document without any DTD, a document with only an
7730 * internal DTD subset which contains no parameter entity
7731 * references, or a document with "standalone='yes'", ...
7732 * ... The declaration of a parameter entity must precede
7733 * any reference to it...
7734 */
7735 if ((ctxt->standalone == 1) ||
7736 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7737 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7738 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007739 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007740 /*
7741 * [ VC: Entity Declared ]
7742 * In a document with an external subset or external
7743 * parameter entities with "standalone='no'", ...
7744 * ... The declaration of a parameter entity must
7745 * precede any reference to it...
7746 */
7747 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7748 "PEReference: %%%s; not found\n",
7749 name, NULL);
7750 ctxt->valid = 0;
7751 }
7752 } else {
7753 /*
7754 * Internal checking in case the entity quest barfed
7755 */
7756 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7757 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7758 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7759 "%%%s; is not a parameter entity\n",
7760 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007761 }
7762 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007763 ctxt->hasPErefs = 1;
7764 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007765 *str = ptr;
7766 return(entity);
7767}
7768
7769/**
7770 * xmlParseDocTypeDecl:
7771 * @ctxt: an XML parser context
7772 *
7773 * parse a DOCTYPE declaration
7774 *
7775 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7776 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7777 *
7778 * [ VC: Root Element Type ]
7779 * The Name in the document type declaration must match the element
7780 * type of the root element.
7781 */
7782
7783void
7784xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007785 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007786 xmlChar *ExternalID = NULL;
7787 xmlChar *URI = NULL;
7788
7789 /*
7790 * We know that '<!DOCTYPE' has been detected.
7791 */
7792 SKIP(9);
7793
7794 SKIP_BLANKS;
7795
7796 /*
7797 * Parse the DOCTYPE name.
7798 */
7799 name = xmlParseName(ctxt);
7800 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007801 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7802 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007803 }
7804 ctxt->intSubName = name;
7805
7806 SKIP_BLANKS;
7807
7808 /*
7809 * Check for SystemID and ExternalID
7810 */
7811 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7812
7813 if ((URI != NULL) || (ExternalID != NULL)) {
7814 ctxt->hasExternalSubset = 1;
7815 }
7816 ctxt->extSubURI = URI;
7817 ctxt->extSubSystem = ExternalID;
7818
7819 SKIP_BLANKS;
7820
7821 /*
7822 * Create and update the internal subset.
7823 */
7824 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7825 (!ctxt->disableSAX))
7826 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7827
7828 /*
7829 * Is there any internal subset declarations ?
7830 * they are handled separately in xmlParseInternalSubset()
7831 */
7832 if (RAW == '[')
7833 return;
7834
7835 /*
7836 * We should be at the end of the DOCTYPE declaration.
7837 */
7838 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007839 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007840 }
7841 NEXT;
7842}
7843
7844/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007845 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007846 * @ctxt: an XML parser context
7847 *
7848 * parse the internal subset declaration
7849 *
7850 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7851 */
7852
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007853static void
Owen Taylor3473f882001-02-23 17:55:21 +00007854xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7855 /*
7856 * Is there any DTD definition ?
7857 */
7858 if (RAW == '[') {
7859 ctxt->instate = XML_PARSER_DTD;
7860 NEXT;
7861 /*
7862 * Parse the succession of Markup declarations and
7863 * PEReferences.
7864 * Subsequence (markupdecl | PEReference | S)*
7865 */
7866 while (RAW != ']') {
7867 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007868 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007869
7870 SKIP_BLANKS;
7871 xmlParseMarkupDecl(ctxt);
7872 xmlParsePEReference(ctxt);
7873
7874 /*
7875 * Pop-up of finished entities.
7876 */
7877 while ((RAW == 0) && (ctxt->inputNr > 1))
7878 xmlPopInput(ctxt);
7879
7880 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007881 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00007882 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007883 break;
7884 }
7885 }
7886 if (RAW == ']') {
7887 NEXT;
7888 SKIP_BLANKS;
7889 }
7890 }
7891
7892 /*
7893 * We should be at the end of the DOCTYPE declaration.
7894 */
7895 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007896 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007897 }
7898 NEXT;
7899}
7900
Daniel Veillard81273902003-09-30 00:43:48 +00007901#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007902/**
7903 * xmlParseAttribute:
7904 * @ctxt: an XML parser context
7905 * @value: a xmlChar ** used to store the value of the attribute
7906 *
7907 * parse an attribute
7908 *
7909 * [41] Attribute ::= Name Eq AttValue
7910 *
7911 * [ WFC: No External Entity References ]
7912 * Attribute values cannot contain direct or indirect entity references
7913 * to external entities.
7914 *
7915 * [ WFC: No < in Attribute Values ]
7916 * The replacement text of any entity referred to directly or indirectly in
7917 * an attribute value (other than "&lt;") must not contain a <.
7918 *
7919 * [ VC: Attribute Value Type ]
7920 * The attribute must have been declared; the value must be of the type
7921 * declared for it.
7922 *
7923 * [25] Eq ::= S? '=' S?
7924 *
7925 * With namespace:
7926 *
7927 * [NS 11] Attribute ::= QName Eq AttValue
7928 *
7929 * Also the case QName == xmlns:??? is handled independently as a namespace
7930 * definition.
7931 *
7932 * Returns the attribute name, and the value in *value.
7933 */
7934
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007935const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007936xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007937 const xmlChar *name;
7938 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007939
7940 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007941 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007942 name = xmlParseName(ctxt);
7943 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007944 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007945 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007946 return(NULL);
7947 }
7948
7949 /*
7950 * read the value
7951 */
7952 SKIP_BLANKS;
7953 if (RAW == '=') {
7954 NEXT;
7955 SKIP_BLANKS;
7956 val = xmlParseAttValue(ctxt);
7957 ctxt->instate = XML_PARSER_CONTENT;
7958 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007959 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007960 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007961 return(NULL);
7962 }
7963
7964 /*
7965 * Check that xml:lang conforms to the specification
7966 * No more registered as an error, just generate a warning now
7967 * since this was deprecated in XML second edition
7968 */
7969 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7970 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007971 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7972 "Malformed value for xml:lang : %s\n",
7973 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007974 }
7975 }
7976
7977 /*
7978 * Check that xml:space conforms to the specification
7979 */
7980 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7981 if (xmlStrEqual(val, BAD_CAST "default"))
7982 *(ctxt->space) = 0;
7983 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7984 *(ctxt->space) = 1;
7985 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007986 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007987"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007988 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007989 }
7990 }
7991
7992 *value = val;
7993 return(name);
7994}
7995
7996/**
7997 * xmlParseStartTag:
7998 * @ctxt: an XML parser context
7999 *
8000 * parse a start of tag either for rule element or
8001 * EmptyElement. In both case we don't parse the tag closing chars.
8002 *
8003 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8004 *
8005 * [ WFC: Unique Att Spec ]
8006 * No attribute name may appear more than once in the same start-tag or
8007 * empty-element tag.
8008 *
8009 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8010 *
8011 * [ WFC: Unique Att Spec ]
8012 * No attribute name may appear more than once in the same start-tag or
8013 * empty-element tag.
8014 *
8015 * With namespace:
8016 *
8017 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8018 *
8019 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8020 *
8021 * Returns the element name parsed
8022 */
8023
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008024const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008025xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008026 const xmlChar *name;
8027 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008028 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008029 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008030 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008031 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008032 int i;
8033
8034 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008035 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008036
8037 name = xmlParseName(ctxt);
8038 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008039 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008040 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008041 return(NULL);
8042 }
8043
8044 /*
8045 * Now parse the attributes, it ends up with the ending
8046 *
8047 * (S Attribute)* S?
8048 */
8049 SKIP_BLANKS;
8050 GROW;
8051
Daniel Veillard21a0f912001-02-25 19:54:14 +00008052 while ((RAW != '>') &&
8053 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008054 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008055 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008056 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008057
8058 attname = xmlParseAttribute(ctxt, &attvalue);
8059 if ((attname != NULL) && (attvalue != NULL)) {
8060 /*
8061 * [ WFC: Unique Att Spec ]
8062 * No attribute name may appear more than once in the same
8063 * start-tag or empty-element tag.
8064 */
8065 for (i = 0; i < nbatts;i += 2) {
8066 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008067 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008068 xmlFree(attvalue);
8069 goto failed;
8070 }
8071 }
Owen Taylor3473f882001-02-23 17:55:21 +00008072 /*
8073 * Add the pair to atts
8074 */
8075 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008076 maxatts = 22; /* allow for 10 attrs by default */
8077 atts = (const xmlChar **)
8078 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008079 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008080 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008081 if (attvalue != NULL)
8082 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008083 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008084 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008085 ctxt->atts = atts;
8086 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008087 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008088 const xmlChar **n;
8089
Owen Taylor3473f882001-02-23 17:55:21 +00008090 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008091 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008092 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008093 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008094 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008095 if (attvalue != NULL)
8096 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008097 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008098 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008099 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008100 ctxt->atts = atts;
8101 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008102 }
8103 atts[nbatts++] = attname;
8104 atts[nbatts++] = attvalue;
8105 atts[nbatts] = NULL;
8106 atts[nbatts + 1] = NULL;
8107 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008108 if (attvalue != NULL)
8109 xmlFree(attvalue);
8110 }
8111
8112failed:
8113
Daniel Veillard3772de32002-12-17 10:31:45 +00008114 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008115 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8116 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008117 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008118 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8119 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008120 }
8121 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008122 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8123 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008124 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8125 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008126 break;
8127 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008128 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008129 GROW;
8130 }
8131
8132 /*
8133 * SAX: Start of Element !
8134 */
8135 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008136 (!ctxt->disableSAX)) {
8137 if (nbatts > 0)
8138 ctxt->sax->startElement(ctxt->userData, name, atts);
8139 else
8140 ctxt->sax->startElement(ctxt->userData, name, NULL);
8141 }
Owen Taylor3473f882001-02-23 17:55:21 +00008142
8143 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008144 /* Free only the content strings */
8145 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008146 if (atts[i] != NULL)
8147 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008148 }
8149 return(name);
8150}
8151
8152/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008153 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008154 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008155 * @line: line of the start tag
8156 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008157 *
8158 * parse an end of tag
8159 *
8160 * [42] ETag ::= '</' Name S? '>'
8161 *
8162 * With namespace
8163 *
8164 * [NS 9] ETag ::= '</' QName S? '>'
8165 */
8166
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008167static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008168xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008169 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008170
8171 GROW;
8172 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008173 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008174 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008175 return;
8176 }
8177 SKIP(2);
8178
Daniel Veillard46de64e2002-05-29 08:21:33 +00008179 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008180
8181 /*
8182 * We should definitely be at the ending "S? '>'" part
8183 */
8184 GROW;
8185 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008186 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008187 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008188 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008189 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008190
8191 /*
8192 * [ WFC: Element Type Match ]
8193 * The Name in an element's end-tag must match the element type in the
8194 * start-tag.
8195 *
8196 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008197 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008198 if (name == NULL) name = BAD_CAST "unparseable";
8199 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008200 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008201 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008202 }
8203
8204 /*
8205 * SAX: End of Tag
8206 */
8207 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8208 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008209 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008210
Daniel Veillarde57ec792003-09-10 10:50:59 +00008211 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008212 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008213 return;
8214}
8215
8216/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008217 * xmlParseEndTag:
8218 * @ctxt: an XML parser context
8219 *
8220 * parse an end of tag
8221 *
8222 * [42] ETag ::= '</' Name S? '>'
8223 *
8224 * With namespace
8225 *
8226 * [NS 9] ETag ::= '</' QName S? '>'
8227 */
8228
8229void
8230xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008231 xmlParseEndTag1(ctxt, 0);
8232}
Daniel Veillard81273902003-09-30 00:43:48 +00008233#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008234
8235/************************************************************************
8236 * *
8237 * SAX 2 specific operations *
8238 * *
8239 ************************************************************************/
8240
Daniel Veillard0fb18932003-09-07 09:14:37 +00008241/*
8242 * xmlGetNamespace:
8243 * @ctxt: an XML parser context
8244 * @prefix: the prefix to lookup
8245 *
8246 * Lookup the namespace name for the @prefix (which ca be NULL)
8247 * The prefix must come from the @ctxt->dict dictionnary
8248 *
8249 * Returns the namespace name or NULL if not bound
8250 */
8251static const xmlChar *
8252xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8253 int i;
8254
Daniel Veillarde57ec792003-09-10 10:50:59 +00008255 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008256 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008257 if (ctxt->nsTab[i] == prefix) {
8258 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8259 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008260 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008261 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008262 return(NULL);
8263}
8264
8265/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008266 * xmlParseQName:
8267 * @ctxt: an XML parser context
8268 * @prefix: pointer to store the prefix part
8269 *
8270 * parse an XML Namespace QName
8271 *
8272 * [6] QName ::= (Prefix ':')? LocalPart
8273 * [7] Prefix ::= NCName
8274 * [8] LocalPart ::= NCName
8275 *
8276 * Returns the Name parsed or NULL
8277 */
8278
8279static const xmlChar *
8280xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8281 const xmlChar *l, *p;
8282
8283 GROW;
8284
8285 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008286 if (l == NULL) {
8287 if (CUR == ':') {
8288 l = xmlParseName(ctxt);
8289 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008290 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8291 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008292 *prefix = NULL;
8293 return(l);
8294 }
8295 }
8296 return(NULL);
8297 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008298 if (CUR == ':') {
8299 NEXT;
8300 p = l;
8301 l = xmlParseNCName(ctxt);
8302 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008303 xmlChar *tmp;
8304
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008305 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8306 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008307 l = xmlParseNmtoken(ctxt);
8308 if (l == NULL)
8309 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8310 else {
8311 tmp = xmlBuildQName(l, p, NULL, 0);
8312 xmlFree((char *)l);
8313 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008314 p = xmlDictLookup(ctxt->dict, tmp, -1);
8315 if (tmp != NULL) xmlFree(tmp);
8316 *prefix = NULL;
8317 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008318 }
8319 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008320 xmlChar *tmp;
8321
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008322 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8323 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008324 NEXT;
8325 tmp = (xmlChar *) xmlParseName(ctxt);
8326 if (tmp != NULL) {
8327 tmp = xmlBuildQName(tmp, l, NULL, 0);
8328 l = xmlDictLookup(ctxt->dict, tmp, -1);
8329 if (tmp != NULL) xmlFree(tmp);
8330 *prefix = p;
8331 return(l);
8332 }
8333 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8334 l = xmlDictLookup(ctxt->dict, tmp, -1);
8335 if (tmp != NULL) xmlFree(tmp);
8336 *prefix = p;
8337 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008338 }
8339 *prefix = p;
8340 } else
8341 *prefix = NULL;
8342 return(l);
8343}
8344
8345/**
8346 * xmlParseQNameAndCompare:
8347 * @ctxt: an XML parser context
8348 * @name: the localname
8349 * @prefix: the prefix, if any.
8350 *
8351 * parse an XML name and compares for match
8352 * (specialized for endtag parsing)
8353 *
8354 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8355 * and the name for mismatch
8356 */
8357
8358static const xmlChar *
8359xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8360 xmlChar const *prefix) {
8361 const xmlChar *cmp = name;
8362 const xmlChar *in;
8363 const xmlChar *ret;
8364 const xmlChar *prefix2;
8365
8366 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8367
8368 GROW;
8369 in = ctxt->input->cur;
8370
8371 cmp = prefix;
8372 while (*in != 0 && *in == *cmp) {
8373 ++in;
8374 ++cmp;
8375 }
8376 if ((*cmp == 0) && (*in == ':')) {
8377 in++;
8378 cmp = name;
8379 while (*in != 0 && *in == *cmp) {
8380 ++in;
8381 ++cmp;
8382 }
William M. Brack76e95df2003-10-18 16:20:14 +00008383 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008384 /* success */
8385 ctxt->input->cur = in;
8386 return((const xmlChar*) 1);
8387 }
8388 }
8389 /*
8390 * all strings coms from the dictionary, equality can be done directly
8391 */
8392 ret = xmlParseQName (ctxt, &prefix2);
8393 if ((ret == name) && (prefix == prefix2))
8394 return((const xmlChar*) 1);
8395 return ret;
8396}
8397
8398/**
8399 * xmlParseAttValueInternal:
8400 * @ctxt: an XML parser context
8401 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008402 * @alloc: whether the attribute was reallocated as a new string
8403 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008404 *
8405 * parse a value for an attribute.
8406 * NOTE: if no normalization is needed, the routine will return pointers
8407 * directly from the data buffer.
8408 *
8409 * 3.3.3 Attribute-Value Normalization:
8410 * Before the value of an attribute is passed to the application or
8411 * checked for validity, the XML processor must normalize it as follows:
8412 * - a character reference is processed by appending the referenced
8413 * character to the attribute value
8414 * - an entity reference is processed by recursively processing the
8415 * replacement text of the entity
8416 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8417 * appending #x20 to the normalized value, except that only a single
8418 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8419 * parsed entity or the literal entity value of an internal parsed entity
8420 * - other characters are processed by appending them to the normalized value
8421 * If the declared value is not CDATA, then the XML processor must further
8422 * process the normalized attribute value by discarding any leading and
8423 * trailing space (#x20) characters, and by replacing sequences of space
8424 * (#x20) characters by a single space (#x20) character.
8425 * All attributes for which no declaration has been read should be treated
8426 * by a non-validating parser as if declared CDATA.
8427 *
8428 * Returns the AttValue parsed or NULL. The value has to be freed by the
8429 * caller if it was copied, this can be detected by val[*len] == 0.
8430 */
8431
8432static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008433xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8434 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008435{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008436 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008437 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008438 xmlChar *ret = NULL;
8439
8440 GROW;
8441 in = (xmlChar *) CUR_PTR;
8442 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008443 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008444 return (NULL);
8445 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008446 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008447
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008448 /*
8449 * try to handle in this routine the most common case where no
8450 * allocation of a new string is required and where content is
8451 * pure ASCII.
8452 */
8453 limit = *in++;
8454 end = ctxt->input->end;
8455 start = in;
8456 if (in >= end) {
8457 const xmlChar *oldbase = ctxt->input->base;
8458 GROW;
8459 if (oldbase != ctxt->input->base) {
8460 long delta = ctxt->input->base - oldbase;
8461 start = start + delta;
8462 in = in + delta;
8463 }
8464 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008465 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008466 if (normalize) {
8467 /*
8468 * Skip any leading spaces
8469 */
8470 while ((in < end) && (*in != limit) &&
8471 ((*in == 0x20) || (*in == 0x9) ||
8472 (*in == 0xA) || (*in == 0xD))) {
8473 in++;
8474 start = in;
8475 if (in >= end) {
8476 const xmlChar *oldbase = ctxt->input->base;
8477 GROW;
8478 if (oldbase != ctxt->input->base) {
8479 long delta = ctxt->input->base - oldbase;
8480 start = start + delta;
8481 in = in + delta;
8482 }
8483 end = ctxt->input->end;
8484 }
8485 }
8486 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8487 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8488 if ((*in++ == 0x20) && (*in == 0x20)) break;
8489 if (in >= end) {
8490 const xmlChar *oldbase = ctxt->input->base;
8491 GROW;
8492 if (oldbase != ctxt->input->base) {
8493 long delta = ctxt->input->base - oldbase;
8494 start = start + delta;
8495 in = in + delta;
8496 }
8497 end = ctxt->input->end;
8498 }
8499 }
8500 last = in;
8501 /*
8502 * skip the trailing blanks
8503 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008504 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008505 while ((in < end) && (*in != limit) &&
8506 ((*in == 0x20) || (*in == 0x9) ||
8507 (*in == 0xA) || (*in == 0xD))) {
8508 in++;
8509 if (in >= end) {
8510 const xmlChar *oldbase = ctxt->input->base;
8511 GROW;
8512 if (oldbase != ctxt->input->base) {
8513 long delta = ctxt->input->base - oldbase;
8514 start = start + delta;
8515 in = in + delta;
8516 last = last + delta;
8517 }
8518 end = ctxt->input->end;
8519 }
8520 }
8521 if (*in != limit) goto need_complex;
8522 } else {
8523 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8524 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8525 in++;
8526 if (in >= end) {
8527 const xmlChar *oldbase = ctxt->input->base;
8528 GROW;
8529 if (oldbase != ctxt->input->base) {
8530 long delta = ctxt->input->base - oldbase;
8531 start = start + delta;
8532 in = in + delta;
8533 }
8534 end = ctxt->input->end;
8535 }
8536 }
8537 last = in;
8538 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008539 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008540 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008541 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008542 *len = last - start;
8543 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008544 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008545 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008546 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008547 }
8548 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008549 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008550 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008551need_complex:
8552 if (alloc) *alloc = 1;
8553 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008554}
8555
8556/**
8557 * xmlParseAttribute2:
8558 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008559 * @pref: the element prefix
8560 * @elem: the element name
8561 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008562 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008563 * @len: an int * to save the length of the attribute
8564 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008565 *
8566 * parse an attribute in the new SAX2 framework.
8567 *
8568 * Returns the attribute name, and the value in *value, .
8569 */
8570
8571static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008572xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008573 const xmlChar * pref, const xmlChar * elem,
8574 const xmlChar ** prefix, xmlChar ** value,
8575 int *len, int *alloc)
8576{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008577 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008578 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008579 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008580
8581 *value = NULL;
8582 GROW;
8583 name = xmlParseQName(ctxt, prefix);
8584 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008585 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8586 "error parsing attribute name\n");
8587 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008588 }
8589
8590 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008591 * get the type if needed
8592 */
8593 if (ctxt->attsSpecial != NULL) {
8594 int type;
8595
8596 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008597 pref, elem, *prefix, name);
8598 if (type != 0)
8599 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008600 }
8601
8602 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008603 * read the value
8604 */
8605 SKIP_BLANKS;
8606 if (RAW == '=') {
8607 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008608 SKIP_BLANKS;
8609 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8610 if (normalize) {
8611 /*
8612 * Sometimes a second normalisation pass for spaces is needed
8613 * but that only happens if charrefs or entities refernces
8614 * have been used in the attribute value, i.e. the attribute
8615 * value have been extracted in an allocated string already.
8616 */
8617 if (*alloc) {
8618 const xmlChar *val2;
8619
8620 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008621 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008622 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008623 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008624 }
8625 }
8626 }
8627 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008628 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008629 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8630 "Specification mandate value for attribute %s\n",
8631 name);
8632 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008633 }
8634
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008635 if (*prefix == ctxt->str_xml) {
8636 /*
8637 * Check that xml:lang conforms to the specification
8638 * No more registered as an error, just generate a warning now
8639 * since this was deprecated in XML second edition
8640 */
8641 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8642 internal_val = xmlStrndup(val, *len);
8643 if (!xmlCheckLanguageID(internal_val)) {
8644 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8645 "Malformed value for xml:lang : %s\n",
8646 internal_val, NULL);
8647 }
8648 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008649
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008650 /*
8651 * Check that xml:space conforms to the specification
8652 */
8653 if (xmlStrEqual(name, BAD_CAST "space")) {
8654 internal_val = xmlStrndup(val, *len);
8655 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8656 *(ctxt->space) = 0;
8657 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8658 *(ctxt->space) = 1;
8659 else {
8660 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8661 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8662 internal_val, NULL);
8663 }
8664 }
8665 if (internal_val) {
8666 xmlFree(internal_val);
8667 }
8668 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008669
8670 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008671 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008672}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008673/**
8674 * xmlParseStartTag2:
8675 * @ctxt: an XML parser context
8676 *
8677 * parse a start of tag either for rule element or
8678 * EmptyElement. In both case we don't parse the tag closing chars.
8679 * This routine is called when running SAX2 parsing
8680 *
8681 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8682 *
8683 * [ WFC: Unique Att Spec ]
8684 * No attribute name may appear more than once in the same start-tag or
8685 * empty-element tag.
8686 *
8687 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8688 *
8689 * [ WFC: Unique Att Spec ]
8690 * No attribute name may appear more than once in the same start-tag or
8691 * empty-element tag.
8692 *
8693 * With namespace:
8694 *
8695 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8696 *
8697 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8698 *
8699 * Returns the element name parsed
8700 */
8701
8702static const xmlChar *
8703xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008704 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008705 const xmlChar *localname;
8706 const xmlChar *prefix;
8707 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008708 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008709 const xmlChar *nsname;
8710 xmlChar *attvalue;
8711 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008712 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008713 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008714 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008715 const xmlChar *base;
8716 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008717 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008718
8719 if (RAW != '<') return(NULL);
8720 NEXT1;
8721
8722 /*
8723 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8724 * point since the attribute values may be stored as pointers to
8725 * the buffer and calling SHRINK would destroy them !
8726 * The Shrinking is only possible once the full set of attribute
8727 * callbacks have been done.
8728 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008729reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008730 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008731 base = ctxt->input->base;
8732 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008733 oldline = ctxt->input->line;
8734 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008735 nbatts = 0;
8736 nratts = 0;
8737 nbdef = 0;
8738 nbNs = 0;
8739 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008740 /* Forget any namespaces added during an earlier parse of this element. */
8741 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008742
8743 localname = xmlParseQName(ctxt, &prefix);
8744 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008745 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8746 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008747 return(NULL);
8748 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008749 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008750
8751 /*
8752 * Now parse the attributes, it ends up with the ending
8753 *
8754 * (S Attribute)* S?
8755 */
8756 SKIP_BLANKS;
8757 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008758 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008759
8760 while ((RAW != '>') &&
8761 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008762 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008763 const xmlChar *q = CUR_PTR;
8764 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008765 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008766
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008767 attname = xmlParseAttribute2(ctxt, prefix, localname,
8768 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008769 if (ctxt->input->base != base) {
8770 if ((attvalue != NULL) && (alloc != 0))
8771 xmlFree(attvalue);
8772 attvalue = NULL;
8773 goto base_changed;
8774 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008775 if ((attname != NULL) && (attvalue != NULL)) {
8776 if (len < 0) len = xmlStrlen(attvalue);
8777 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008778 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8779 xmlURIPtr uri;
8780
8781 if (*URL != 0) {
8782 uri = xmlParseURI((const char *) URL);
8783 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008784 xmlNsErr(ctxt, XML_WAR_NS_URI,
8785 "xmlns: '%s' is not a valid URI\n",
8786 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008787 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00008788 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008789 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8790 "xmlns: URI %s is not absolute\n",
8791 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008792 }
8793 xmlFreeURI(uri);
8794 }
Daniel Veillard37334572008-07-31 08:20:02 +00008795 if (URL == ctxt->str_xml_ns) {
8796 if (attname != ctxt->str_xml) {
8797 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8798 "xml namespace URI cannot be the default namespace\n",
8799 NULL, NULL, NULL);
8800 }
8801 goto skip_default_ns;
8802 }
8803 if ((len == 29) &&
8804 (xmlStrEqual(URL,
8805 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8806 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8807 "reuse of the xmlns namespace name is forbidden\n",
8808 NULL, NULL, NULL);
8809 goto skip_default_ns;
8810 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008811 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008812 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008813 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008814 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008815 for (j = 1;j <= nbNs;j++)
8816 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8817 break;
8818 if (j <= nbNs)
8819 xmlErrAttributeDup(ctxt, NULL, attname);
8820 else
8821 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008822skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008823 if (alloc != 0) xmlFree(attvalue);
8824 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008825 continue;
8826 }
8827 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008828 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8829 xmlURIPtr uri;
8830
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008831 if (attname == ctxt->str_xml) {
8832 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008833 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8834 "xml namespace prefix mapped to wrong URI\n",
8835 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008836 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008837 /*
8838 * Do not keep a namespace definition node
8839 */
Daniel Veillard37334572008-07-31 08:20:02 +00008840 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008841 }
Daniel Veillard37334572008-07-31 08:20:02 +00008842 if (URL == ctxt->str_xml_ns) {
8843 if (attname != ctxt->str_xml) {
8844 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8845 "xml namespace URI mapped to wrong prefix\n",
8846 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008847 }
Daniel Veillard37334572008-07-31 08:20:02 +00008848 goto skip_ns;
8849 }
8850 if (attname == ctxt->str_xmlns) {
8851 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8852 "redefinition of the xmlns prefix is forbidden\n",
8853 NULL, NULL, NULL);
8854 goto skip_ns;
8855 }
8856 if ((len == 29) &&
8857 (xmlStrEqual(URL,
8858 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8859 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8860 "reuse of the xmlns namespace name is forbidden\n",
8861 NULL, NULL, NULL);
8862 goto skip_ns;
8863 }
8864 if ((URL == NULL) || (URL[0] == 0)) {
8865 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8866 "xmlns:%s: Empty XML namespace is not allowed\n",
8867 attname, NULL, NULL);
8868 goto skip_ns;
8869 } else {
8870 uri = xmlParseURI((const char *) URL);
8871 if (uri == NULL) {
8872 xmlNsErr(ctxt, XML_WAR_NS_URI,
8873 "xmlns:%s: '%s' is not a valid URI\n",
8874 attname, URL, NULL);
8875 } else {
8876 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8877 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8878 "xmlns:%s: URI %s is not absolute\n",
8879 attname, URL, NULL);
8880 }
8881 xmlFreeURI(uri);
8882 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008883 }
8884
Daniel Veillard0fb18932003-09-07 09:14:37 +00008885 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008886 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008887 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008888 for (j = 1;j <= nbNs;j++)
8889 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8890 break;
8891 if (j <= nbNs)
8892 xmlErrAttributeDup(ctxt, aprefix, attname);
8893 else
8894 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008895skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008896 if (alloc != 0) xmlFree(attvalue);
8897 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008898 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008899 continue;
8900 }
8901
8902 /*
8903 * Add the pair to atts
8904 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008905 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8906 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008907 if (attvalue[len] == 0)
8908 xmlFree(attvalue);
8909 goto failed;
8910 }
8911 maxatts = ctxt->maxatts;
8912 atts = ctxt->atts;
8913 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008914 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008915 atts[nbatts++] = attname;
8916 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008917 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008918 atts[nbatts++] = attvalue;
8919 attvalue += len;
8920 atts[nbatts++] = attvalue;
8921 /*
8922 * tag if some deallocation is needed
8923 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008924 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008925 } else {
8926 if ((attvalue != NULL) && (attvalue[len] == 0))
8927 xmlFree(attvalue);
8928 }
8929
Daniel Veillard37334572008-07-31 08:20:02 +00008930failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008931
8932 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008933 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008934 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8935 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008936 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008937 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8938 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008939 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008940 }
8941 SKIP_BLANKS;
8942 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8943 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008944 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008945 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008946 break;
8947 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008948 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008949 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008950 }
8951
Daniel Veillard0fb18932003-09-07 09:14:37 +00008952 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008953 * The attributes defaulting
8954 */
8955 if (ctxt->attsDefault != NULL) {
8956 xmlDefAttrsPtr defaults;
8957
8958 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8959 if (defaults != NULL) {
8960 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00008961 attname = defaults->values[5 * i];
8962 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00008963
8964 /*
8965 * special work for namespaces defaulted defs
8966 */
8967 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8968 /*
8969 * check that it's not a defined namespace
8970 */
8971 for (j = 1;j <= nbNs;j++)
8972 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8973 break;
8974 if (j <= nbNs) continue;
8975
8976 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008977 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008978 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008979 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008980 nbNs++;
8981 }
8982 } else if (aprefix == ctxt->str_xmlns) {
8983 /*
8984 * check that it's not a defined namespace
8985 */
8986 for (j = 1;j <= nbNs;j++)
8987 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8988 break;
8989 if (j <= nbNs) continue;
8990
8991 nsname = xmlGetNamespace(ctxt, attname);
8992 if (nsname != defaults->values[2]) {
8993 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008994 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008995 nbNs++;
8996 }
8997 } else {
8998 /*
8999 * check that it's not a defined attribute
9000 */
9001 for (j = 0;j < nbatts;j+=5) {
9002 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9003 break;
9004 }
9005 if (j < nbatts) continue;
9006
9007 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9008 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009009 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009010 }
9011 maxatts = ctxt->maxatts;
9012 atts = ctxt->atts;
9013 }
9014 atts[nbatts++] = attname;
9015 atts[nbatts++] = aprefix;
9016 if (aprefix == NULL)
9017 atts[nbatts++] = NULL;
9018 else
9019 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009020 atts[nbatts++] = defaults->values[5 * i + 2];
9021 atts[nbatts++] = defaults->values[5 * i + 3];
9022 if ((ctxt->standalone == 1) &&
9023 (defaults->values[5 * i + 4] != NULL)) {
9024 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9025 "standalone: attribute %s on %s defaulted from external subset\n",
9026 attname, localname);
9027 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009028 nbdef++;
9029 }
9030 }
9031 }
9032 }
9033
Daniel Veillarde70c8772003-11-25 07:21:18 +00009034 /*
9035 * The attributes checkings
9036 */
9037 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009038 /*
9039 * The default namespace does not apply to attribute names.
9040 */
9041 if (atts[i + 1] != NULL) {
9042 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9043 if (nsname == NULL) {
9044 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9045 "Namespace prefix %s for %s on %s is not defined\n",
9046 atts[i + 1], atts[i], localname);
9047 }
9048 atts[i + 2] = nsname;
9049 } else
9050 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009051 /*
9052 * [ WFC: Unique Att Spec ]
9053 * No attribute name may appear more than once in the same
9054 * start-tag or empty-element tag.
9055 * As extended by the Namespace in XML REC.
9056 */
9057 for (j = 0; j < i;j += 5) {
9058 if (atts[i] == atts[j]) {
9059 if (atts[i+1] == atts[j+1]) {
9060 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9061 break;
9062 }
9063 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9064 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9065 "Namespaced Attribute %s in '%s' redefined\n",
9066 atts[i], nsname, NULL);
9067 break;
9068 }
9069 }
9070 }
9071 }
9072
Daniel Veillarde57ec792003-09-10 10:50:59 +00009073 nsname = xmlGetNamespace(ctxt, prefix);
9074 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009075 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9076 "Namespace prefix %s on %s is not defined\n",
9077 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009078 }
9079 *pref = prefix;
9080 *URI = nsname;
9081
9082 /*
9083 * SAX: Start of Element !
9084 */
9085 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9086 (!ctxt->disableSAX)) {
9087 if (nbNs > 0)
9088 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9089 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9090 nbatts / 5, nbdef, atts);
9091 else
9092 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9093 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9094 }
9095
9096 /*
9097 * Free up attribute allocated strings if needed
9098 */
9099 if (attval != 0) {
9100 for (i = 3,j = 0; j < nratts;i += 5,j++)
9101 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9102 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009103 }
9104
9105 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009106
9107base_changed:
9108 /*
9109 * the attribute strings are valid iif the base didn't changed
9110 */
9111 if (attval != 0) {
9112 for (i = 3,j = 0; j < nratts;i += 5,j++)
9113 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9114 xmlFree((xmlChar *) atts[i]);
9115 }
9116 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009117 ctxt->input->line = oldline;
9118 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009119 if (ctxt->wellFormed == 1) {
9120 goto reparse;
9121 }
9122 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009123}
9124
9125/**
9126 * xmlParseEndTag2:
9127 * @ctxt: an XML parser context
9128 * @line: line of the start tag
9129 * @nsNr: number of namespaces on the start tag
9130 *
9131 * parse an end of tag
9132 *
9133 * [42] ETag ::= '</' Name S? '>'
9134 *
9135 * With namespace
9136 *
9137 * [NS 9] ETag ::= '</' QName S? '>'
9138 */
9139
9140static void
9141xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009142 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009143 const xmlChar *name;
9144
9145 GROW;
9146 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009147 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009148 return;
9149 }
9150 SKIP(2);
9151
William M. Brack13dfa872004-09-18 04:52:08 +00009152 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009153 if (ctxt->input->cur[tlen] == '>') {
9154 ctxt->input->cur += tlen + 1;
9155 goto done;
9156 }
9157 ctxt->input->cur += tlen;
9158 name = (xmlChar*)1;
9159 } else {
9160 if (prefix == NULL)
9161 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9162 else
9163 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9164 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009165
9166 /*
9167 * We should definitely be at the ending "S? '>'" part
9168 */
9169 GROW;
9170 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009171 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009172 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009173 } else
9174 NEXT1;
9175
9176 /*
9177 * [ WFC: Element Type Match ]
9178 * The Name in an element's end-tag must match the element type in the
9179 * start-tag.
9180 *
9181 */
9182 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009183 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009184 if ((line == 0) && (ctxt->node != NULL))
9185 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009186 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009187 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009188 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009189 }
9190
9191 /*
9192 * SAX: End of Tag
9193 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009194done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009195 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9196 (!ctxt->disableSAX))
9197 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9198
Daniel Veillard0fb18932003-09-07 09:14:37 +00009199 spacePop(ctxt);
9200 if (nsNr != 0)
9201 nsPop(ctxt, nsNr);
9202 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009203}
9204
9205/**
Owen Taylor3473f882001-02-23 17:55:21 +00009206 * xmlParseCDSect:
9207 * @ctxt: an XML parser context
9208 *
9209 * Parse escaped pure raw content.
9210 *
9211 * [18] CDSect ::= CDStart CData CDEnd
9212 *
9213 * [19] CDStart ::= '<![CDATA['
9214 *
9215 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9216 *
9217 * [21] CDEnd ::= ']]>'
9218 */
9219void
9220xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9221 xmlChar *buf = NULL;
9222 int len = 0;
9223 int size = XML_PARSER_BUFFER_SIZE;
9224 int r, rl;
9225 int s, sl;
9226 int cur, l;
9227 int count = 0;
9228
Daniel Veillard8f597c32003-10-06 08:19:27 +00009229 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009230 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009231 SKIP(9);
9232 } else
9233 return;
9234
9235 ctxt->instate = XML_PARSER_CDATA_SECTION;
9236 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009237 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009238 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009239 ctxt->instate = XML_PARSER_CONTENT;
9240 return;
9241 }
9242 NEXTL(rl);
9243 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009244 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009245 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009246 ctxt->instate = XML_PARSER_CONTENT;
9247 return;
9248 }
9249 NEXTL(sl);
9250 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009251 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009252 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009253 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009254 return;
9255 }
William M. Brack871611b2003-10-18 04:53:14 +00009256 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009257 ((r != ']') || (s != ']') || (cur != '>'))) {
9258 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009259 xmlChar *tmp;
9260
Owen Taylor3473f882001-02-23 17:55:21 +00009261 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009262 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9263 if (tmp == NULL) {
9264 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009265 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009266 return;
9267 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009268 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009269 }
9270 COPY_BUF(rl,buf,len,r);
9271 r = s;
9272 rl = sl;
9273 s = cur;
9274 sl = l;
9275 count++;
9276 if (count > 50) {
9277 GROW;
9278 count = 0;
9279 }
9280 NEXTL(l);
9281 cur = CUR_CHAR(l);
9282 }
9283 buf[len] = 0;
9284 ctxt->instate = XML_PARSER_CONTENT;
9285 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009286 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009287 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009288 xmlFree(buf);
9289 return;
9290 }
9291 NEXTL(l);
9292
9293 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009294 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009295 */
9296 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9297 if (ctxt->sax->cdataBlock != NULL)
9298 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009299 else if (ctxt->sax->characters != NULL)
9300 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009301 }
9302 xmlFree(buf);
9303}
9304
9305/**
9306 * xmlParseContent:
9307 * @ctxt: an XML parser context
9308 *
9309 * Parse a content:
9310 *
9311 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9312 */
9313
9314void
9315xmlParseContent(xmlParserCtxtPtr ctxt) {
9316 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009317 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009318 ((RAW != '<') || (NXT(1) != '/')) &&
9319 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009320 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009321 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009322 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009323
9324 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009325 * First case : a Processing Instruction.
9326 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009327 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009328 xmlParsePI(ctxt);
9329 }
9330
9331 /*
9332 * Second case : a CDSection
9333 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009334 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009335 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009336 xmlParseCDSect(ctxt);
9337 }
9338
9339 /*
9340 * Third case : a comment
9341 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009342 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009343 (NXT(2) == '-') && (NXT(3) == '-')) {
9344 xmlParseComment(ctxt);
9345 ctxt->instate = XML_PARSER_CONTENT;
9346 }
9347
9348 /*
9349 * Fourth case : a sub-element.
9350 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009351 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009352 xmlParseElement(ctxt);
9353 }
9354
9355 /*
9356 * Fifth case : a reference. If if has not been resolved,
9357 * parsing returns it's Name, create the node
9358 */
9359
Daniel Veillard21a0f912001-02-25 19:54:14 +00009360 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009361 xmlParseReference(ctxt);
9362 }
9363
9364 /*
9365 * Last case, text. Note that References are handled directly.
9366 */
9367 else {
9368 xmlParseCharData(ctxt, 0);
9369 }
9370
9371 GROW;
9372 /*
9373 * Pop-up of finished entities.
9374 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009375 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009376 xmlPopInput(ctxt);
9377 SHRINK;
9378
Daniel Veillardfdc91562002-07-01 21:52:03 +00009379 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009380 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9381 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009382 ctxt->instate = XML_PARSER_EOF;
9383 break;
9384 }
9385 }
9386}
9387
9388/**
9389 * xmlParseElement:
9390 * @ctxt: an XML parser context
9391 *
9392 * parse an XML element, this is highly recursive
9393 *
9394 * [39] element ::= EmptyElemTag | STag content ETag
9395 *
9396 * [ WFC: Element Type Match ]
9397 * The Name in an element's end-tag must match the element type in the
9398 * start-tag.
9399 *
Owen Taylor3473f882001-02-23 17:55:21 +00009400 */
9401
9402void
9403xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009404 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009405 const xmlChar *prefix;
9406 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00009407 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009408 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009409 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009410 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009411
Daniel Veillard8915c152008-08-26 13:05:34 +00009412 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9413 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9414 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9415 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9416 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009417 ctxt->instate = XML_PARSER_EOF;
9418 return;
9419 }
9420
Owen Taylor3473f882001-02-23 17:55:21 +00009421 /* Capture start position */
9422 if (ctxt->record_info) {
9423 node_info.begin_pos = ctxt->input->consumed +
9424 (CUR_PTR - ctxt->input->base);
9425 node_info.begin_line = ctxt->input->line;
9426 }
9427
9428 if (ctxt->spaceNr == 0)
9429 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009430 else if (*ctxt->space == -2)
9431 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009432 else
9433 spacePush(ctxt, *ctxt->space);
9434
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009435 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009436#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009437 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009438#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009439 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009440#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009441 else
9442 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009443#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009444 if (name == NULL) {
9445 spacePop(ctxt);
9446 return;
9447 }
9448 namePush(ctxt, name);
9449 ret = ctxt->node;
9450
Daniel Veillard4432df22003-09-28 18:58:27 +00009451#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009452 /*
9453 * [ VC: Root Element Type ]
9454 * The Name in the document type declaration must match the element
9455 * type of the root element.
9456 */
9457 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9458 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9459 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009460#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009461
9462 /*
9463 * Check for an Empty Element.
9464 */
9465 if ((RAW == '/') && (NXT(1) == '>')) {
9466 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009467 if (ctxt->sax2) {
9468 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9469 (!ctxt->disableSAX))
9470 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009471#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009472 } else {
9473 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9474 (!ctxt->disableSAX))
9475 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009476#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009477 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009478 namePop(ctxt);
9479 spacePop(ctxt);
9480 if (nsNr != ctxt->nsNr)
9481 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009482 if ( ret != NULL && ctxt->record_info ) {
9483 node_info.end_pos = ctxt->input->consumed +
9484 (CUR_PTR - ctxt->input->base);
9485 node_info.end_line = ctxt->input->line;
9486 node_info.node = ret;
9487 xmlParserAddNodeInfo(ctxt, &node_info);
9488 }
9489 return;
9490 }
9491 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009492 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009493 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009494 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9495 "Couldn't find end of Start Tag %s line %d\n",
9496 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009497
9498 /*
9499 * end of parsing of this node.
9500 */
9501 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009502 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009503 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009504 if (nsNr != ctxt->nsNr)
9505 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009506
9507 /*
9508 * Capture end position and add node
9509 */
9510 if ( ret != NULL && ctxt->record_info ) {
9511 node_info.end_pos = ctxt->input->consumed +
9512 (CUR_PTR - ctxt->input->base);
9513 node_info.end_line = ctxt->input->line;
9514 node_info.node = ret;
9515 xmlParserAddNodeInfo(ctxt, &node_info);
9516 }
9517 return;
9518 }
9519
9520 /*
9521 * Parse the content of the element:
9522 */
9523 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009524 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009525 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009526 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009527 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009528
9529 /*
9530 * end of parsing of this node.
9531 */
9532 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009533 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009534 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009535 if (nsNr != ctxt->nsNr)
9536 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009537 return;
9538 }
9539
9540 /*
9541 * parse the end of tag: '</' should be here.
9542 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009543 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009544 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009545 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009546 }
9547#ifdef LIBXML_SAX1_ENABLED
9548 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00009549 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00009550#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009551
9552 /*
9553 * Capture end position and add node
9554 */
9555 if ( ret != NULL && ctxt->record_info ) {
9556 node_info.end_pos = ctxt->input->consumed +
9557 (CUR_PTR - ctxt->input->base);
9558 node_info.end_line = ctxt->input->line;
9559 node_info.node = ret;
9560 xmlParserAddNodeInfo(ctxt, &node_info);
9561 }
9562}
9563
9564/**
9565 * xmlParseVersionNum:
9566 * @ctxt: an XML parser context
9567 *
9568 * parse the XML version value.
9569 *
Daniel Veillard34e3f642008-07-29 09:02:27 +00009570 * [26] VersionNum ::= '1.' [0-9]+
9571 *
9572 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +00009573 *
9574 * Returns the string giving the XML version number, or NULL
9575 */
9576xmlChar *
9577xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9578 xmlChar *buf = NULL;
9579 int len = 0;
9580 int size = 10;
9581 xmlChar cur;
9582
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009583 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009584 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009585 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009586 return(NULL);
9587 }
9588 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00009589 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009590 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009591 return(NULL);
9592 }
9593 buf[len++] = cur;
9594 NEXT;
9595 cur=CUR;
9596 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009597 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009598 return(NULL);
9599 }
9600 buf[len++] = cur;
9601 NEXT;
9602 cur=CUR;
9603 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009604 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009605 xmlChar *tmp;
9606
Owen Taylor3473f882001-02-23 17:55:21 +00009607 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009608 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9609 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009610 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009611 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009612 return(NULL);
9613 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009614 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009615 }
9616 buf[len++] = cur;
9617 NEXT;
9618 cur=CUR;
9619 }
9620 buf[len] = 0;
9621 return(buf);
9622}
9623
9624/**
9625 * xmlParseVersionInfo:
9626 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009627 *
Owen Taylor3473f882001-02-23 17:55:21 +00009628 * parse the XML version.
9629 *
9630 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009631 *
Owen Taylor3473f882001-02-23 17:55:21 +00009632 * [25] Eq ::= S? '=' S?
9633 *
9634 * Returns the version string, e.g. "1.0"
9635 */
9636
9637xmlChar *
9638xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9639 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009640
Daniel Veillarda07050d2003-10-19 14:46:32 +00009641 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009642 SKIP(7);
9643 SKIP_BLANKS;
9644 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009645 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009646 return(NULL);
9647 }
9648 NEXT;
9649 SKIP_BLANKS;
9650 if (RAW == '"') {
9651 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009652 version = xmlParseVersionNum(ctxt);
9653 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009654 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009655 } else
9656 NEXT;
9657 } else if (RAW == '\''){
9658 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009659 version = xmlParseVersionNum(ctxt);
9660 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009661 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009662 } else
9663 NEXT;
9664 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009665 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009666 }
9667 }
9668 return(version);
9669}
9670
9671/**
9672 * xmlParseEncName:
9673 * @ctxt: an XML parser context
9674 *
9675 * parse the XML encoding name
9676 *
9677 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9678 *
9679 * Returns the encoding name value or NULL
9680 */
9681xmlChar *
9682xmlParseEncName(xmlParserCtxtPtr ctxt) {
9683 xmlChar *buf = NULL;
9684 int len = 0;
9685 int size = 10;
9686 xmlChar cur;
9687
9688 cur = CUR;
9689 if (((cur >= 'a') && (cur <= 'z')) ||
9690 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009691 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009692 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009693 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009694 return(NULL);
9695 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00009696
Owen Taylor3473f882001-02-23 17:55:21 +00009697 buf[len++] = cur;
9698 NEXT;
9699 cur = CUR;
9700 while (((cur >= 'a') && (cur <= 'z')) ||
9701 ((cur >= 'A') && (cur <= 'Z')) ||
9702 ((cur >= '0') && (cur <= '9')) ||
9703 (cur == '.') || (cur == '_') ||
9704 (cur == '-')) {
9705 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009706 xmlChar *tmp;
9707
Owen Taylor3473f882001-02-23 17:55:21 +00009708 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009709 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9710 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009711 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009712 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009713 return(NULL);
9714 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009715 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009716 }
9717 buf[len++] = cur;
9718 NEXT;
9719 cur = CUR;
9720 if (cur == 0) {
9721 SHRINK;
9722 GROW;
9723 cur = CUR;
9724 }
9725 }
9726 buf[len] = 0;
9727 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009728 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009729 }
9730 return(buf);
9731}
9732
9733/**
9734 * xmlParseEncodingDecl:
9735 * @ctxt: an XML parser context
9736 *
9737 * parse the XML encoding declaration
9738 *
9739 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9740 *
9741 * this setups the conversion filters.
9742 *
9743 * Returns the encoding value or NULL
9744 */
9745
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009746const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009747xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9748 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009749
9750 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009751 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009752 SKIP(8);
9753 SKIP_BLANKS;
9754 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009755 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009756 return(NULL);
9757 }
9758 NEXT;
9759 SKIP_BLANKS;
9760 if (RAW == '"') {
9761 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009762 encoding = xmlParseEncName(ctxt);
9763 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009764 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009765 } else
9766 NEXT;
9767 } else if (RAW == '\''){
9768 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009769 encoding = xmlParseEncName(ctxt);
9770 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009771 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009772 } else
9773 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009774 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009775 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009776 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00009777 /*
9778 * UTF-16 encoding stwich has already taken place at this stage,
9779 * more over the little-endian/big-endian selection is already done
9780 */
9781 if ((encoding != NULL) &&
9782 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9783 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +00009784 /*
9785 * If no encoding was passed to the parser, that we are
9786 * using UTF-16 and no decoder is present i.e. the
9787 * document is apparently UTF-8 compatible, then raise an
9788 * encoding mismatch fatal error
9789 */
9790 if ((ctxt->encoding == NULL) &&
9791 (ctxt->input->buf != NULL) &&
9792 (ctxt->input->buf->encoder == NULL)) {
9793 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9794 "Document labelled UTF-16 but has UTF-8 content\n");
9795 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009796 if (ctxt->encoding != NULL)
9797 xmlFree((xmlChar *) ctxt->encoding);
9798 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009799 }
9800 /*
9801 * UTF-8 encoding is handled natively
9802 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009803 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009804 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9805 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009806 if (ctxt->encoding != NULL)
9807 xmlFree((xmlChar *) ctxt->encoding);
9808 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009809 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009810 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009811 xmlCharEncodingHandlerPtr handler;
9812
9813 if (ctxt->input->encoding != NULL)
9814 xmlFree((xmlChar *) ctxt->input->encoding);
9815 ctxt->input->encoding = encoding;
9816
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009817 handler = xmlFindCharEncodingHandler((const char *) encoding);
9818 if (handler != NULL) {
9819 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009820 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009821 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009822 "Unsupported encoding %s\n", encoding);
9823 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009824 }
9825 }
9826 }
9827 return(encoding);
9828}
9829
9830/**
9831 * xmlParseSDDecl:
9832 * @ctxt: an XML parser context
9833 *
9834 * parse the XML standalone declaration
9835 *
9836 * [32] SDDecl ::= S 'standalone' Eq
9837 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9838 *
9839 * [ VC: Standalone Document Declaration ]
9840 * TODO The standalone document declaration must have the value "no"
9841 * if any external markup declarations contain declarations of:
9842 * - attributes with default values, if elements to which these
9843 * attributes apply appear in the document without specifications
9844 * of values for these attributes, or
9845 * - entities (other than amp, lt, gt, apos, quot), if references
9846 * to those entities appear in the document, or
9847 * - attributes with values subject to normalization, where the
9848 * attribute appears in the document with a value which will change
9849 * as a result of normalization, or
9850 * - element types with element content, if white space occurs directly
9851 * within any instance of those types.
9852 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009853 * Returns:
9854 * 1 if standalone="yes"
9855 * 0 if standalone="no"
9856 * -2 if standalone attribute is missing or invalid
9857 * (A standalone value of -2 means that the XML declaration was found,
9858 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00009859 */
9860
9861int
9862xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009863 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00009864
9865 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009866 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009867 SKIP(10);
9868 SKIP_BLANKS;
9869 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009870 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009871 return(standalone);
9872 }
9873 NEXT;
9874 SKIP_BLANKS;
9875 if (RAW == '\''){
9876 NEXT;
9877 if ((RAW == 'n') && (NXT(1) == 'o')) {
9878 standalone = 0;
9879 SKIP(2);
9880 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9881 (NXT(2) == 's')) {
9882 standalone = 1;
9883 SKIP(3);
9884 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009885 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009886 }
9887 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009888 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009889 } else
9890 NEXT;
9891 } else if (RAW == '"'){
9892 NEXT;
9893 if ((RAW == 'n') && (NXT(1) == 'o')) {
9894 standalone = 0;
9895 SKIP(2);
9896 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9897 (NXT(2) == 's')) {
9898 standalone = 1;
9899 SKIP(3);
9900 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009901 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009902 }
9903 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009904 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009905 } else
9906 NEXT;
9907 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009908 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009909 }
9910 }
9911 return(standalone);
9912}
9913
9914/**
9915 * xmlParseXMLDecl:
9916 * @ctxt: an XML parser context
9917 *
9918 * parse an XML declaration header
9919 *
9920 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9921 */
9922
9923void
9924xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9925 xmlChar *version;
9926
9927 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009928 * This value for standalone indicates that the document has an
9929 * XML declaration but it does not have a standalone attribute.
9930 * It will be overwritten later if a standalone attribute is found.
9931 */
9932 ctxt->input->standalone = -2;
9933
9934 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009935 * We know that '<?xml' is here.
9936 */
9937 SKIP(5);
9938
William M. Brack76e95df2003-10-18 16:20:14 +00009939 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009940 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9941 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009942 }
9943 SKIP_BLANKS;
9944
9945 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009946 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009947 */
9948 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009949 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009950 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009951 } else {
9952 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9953 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +00009954 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +00009955 */
Daniel Veillard34e3f642008-07-29 09:02:27 +00009956 if (ctxt->options & XML_PARSE_OLD10) {
9957 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9958 "Unsupported version '%s'\n",
9959 version);
9960 } else {
9961 if ((version[0] == '1') && ((version[1] == '.'))) {
9962 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9963 "Unsupported version '%s'\n",
9964 version, NULL);
9965 } else {
9966 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9967 "Unsupported version '%s'\n",
9968 version);
9969 }
9970 }
Daniel Veillard19840942001-11-29 16:11:38 +00009971 }
9972 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009973 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009974 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009975 }
Owen Taylor3473f882001-02-23 17:55:21 +00009976
9977 /*
9978 * We may have the encoding declaration
9979 */
William M. Brack76e95df2003-10-18 16:20:14 +00009980 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009981 if ((RAW == '?') && (NXT(1) == '>')) {
9982 SKIP(2);
9983 return;
9984 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009985 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009986 }
9987 xmlParseEncodingDecl(ctxt);
9988 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9989 /*
9990 * The XML REC instructs us to stop parsing right here
9991 */
9992 return;
9993 }
9994
9995 /*
9996 * We may have the standalone status.
9997 */
William M. Brack76e95df2003-10-18 16:20:14 +00009998 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009999 if ((RAW == '?') && (NXT(1) == '>')) {
10000 SKIP(2);
10001 return;
10002 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010003 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010004 }
10005 SKIP_BLANKS;
10006 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10007
10008 SKIP_BLANKS;
10009 if ((RAW == '?') && (NXT(1) == '>')) {
10010 SKIP(2);
10011 } else if (RAW == '>') {
10012 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010013 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010014 NEXT;
10015 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010016 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010017 MOVETO_ENDTAG(CUR_PTR);
10018 NEXT;
10019 }
10020}
10021
10022/**
10023 * xmlParseMisc:
10024 * @ctxt: an XML parser context
10025 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010026 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010027 *
10028 * [27] Misc ::= Comment | PI | S
10029 */
10030
10031void
10032xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010033 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +000010034 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +000010035 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010036 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010037 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010038 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010039 NEXT;
10040 } else
10041 xmlParseComment(ctxt);
10042 }
10043}
10044
10045/**
10046 * xmlParseDocument:
10047 * @ctxt: an XML parser context
10048 *
10049 * parse an XML document (and build a tree if using the standard SAX
10050 * interface).
10051 *
10052 * [1] document ::= prolog element Misc*
10053 *
10054 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10055 *
10056 * Returns 0, -1 in case of error. the parser context is augmented
10057 * as a result of the parsing.
10058 */
10059
10060int
10061xmlParseDocument(xmlParserCtxtPtr ctxt) {
10062 xmlChar start[4];
10063 xmlCharEncoding enc;
10064
10065 xmlInitParser();
10066
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010067 if ((ctxt == NULL) || (ctxt->input == NULL))
10068 return(-1);
10069
Owen Taylor3473f882001-02-23 17:55:21 +000010070 GROW;
10071
10072 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010073 * SAX: detecting the level.
10074 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010075 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010076
10077 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010078 * SAX: beginning of the document processing.
10079 */
10080 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10081 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10082
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010083 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
10084 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +000010085 /*
10086 * Get the 4 first bytes and decode the charset
10087 * if enc != XML_CHAR_ENCODING_NONE
10088 * plug some encoding conversion routines.
10089 */
10090 start[0] = RAW;
10091 start[1] = NXT(1);
10092 start[2] = NXT(2);
10093 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010094 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010095 if (enc != XML_CHAR_ENCODING_NONE) {
10096 xmlSwitchEncoding(ctxt, enc);
10097 }
Owen Taylor3473f882001-02-23 17:55:21 +000010098 }
10099
10100
10101 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010102 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010103 }
10104
10105 /*
10106 * Check for the XMLDecl in the Prolog.
10107 */
10108 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010109 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010110
10111 /*
10112 * Note that we will switch encoding on the fly.
10113 */
10114 xmlParseXMLDecl(ctxt);
10115 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10116 /*
10117 * The XML REC instructs us to stop parsing right here
10118 */
10119 return(-1);
10120 }
10121 ctxt->standalone = ctxt->input->standalone;
10122 SKIP_BLANKS;
10123 } else {
10124 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10125 }
10126 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10127 ctxt->sax->startDocument(ctxt->userData);
10128
10129 /*
10130 * The Misc part of the Prolog
10131 */
10132 GROW;
10133 xmlParseMisc(ctxt);
10134
10135 /*
10136 * Then possibly doc type declaration(s) and more Misc
10137 * (doctypedecl Misc*)?
10138 */
10139 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010140 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010141
10142 ctxt->inSubset = 1;
10143 xmlParseDocTypeDecl(ctxt);
10144 if (RAW == '[') {
10145 ctxt->instate = XML_PARSER_DTD;
10146 xmlParseInternalSubset(ctxt);
10147 }
10148
10149 /*
10150 * Create and update the external subset.
10151 */
10152 ctxt->inSubset = 2;
10153 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10154 (!ctxt->disableSAX))
10155 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10156 ctxt->extSubSystem, ctxt->extSubURI);
10157 ctxt->inSubset = 0;
10158
Daniel Veillardac4118d2008-01-11 05:27:32 +000010159 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010160
10161 ctxt->instate = XML_PARSER_PROLOG;
10162 xmlParseMisc(ctxt);
10163 }
10164
10165 /*
10166 * Time to start parsing the tree itself
10167 */
10168 GROW;
10169 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010170 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10171 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010172 } else {
10173 ctxt->instate = XML_PARSER_CONTENT;
10174 xmlParseElement(ctxt);
10175 ctxt->instate = XML_PARSER_EPILOG;
10176
10177
10178 /*
10179 * The Misc part at the end
10180 */
10181 xmlParseMisc(ctxt);
10182
Daniel Veillard561b7f82002-03-20 21:55:57 +000010183 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010184 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010185 }
10186 ctxt->instate = XML_PARSER_EOF;
10187 }
10188
10189 /*
10190 * SAX: end of the document processing.
10191 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010192 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010193 ctxt->sax->endDocument(ctxt->userData);
10194
Daniel Veillard5997aca2002-03-18 18:36:20 +000010195 /*
10196 * Remove locally kept entity definitions if the tree was not built
10197 */
10198 if ((ctxt->myDoc != NULL) &&
10199 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10200 xmlFreeDoc(ctxt->myDoc);
10201 ctxt->myDoc = NULL;
10202 }
10203
Daniel Veillardae0765b2008-07-31 19:54:59 +000010204 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10205 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10206 if (ctxt->valid)
10207 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10208 if (ctxt->nsWellFormed)
10209 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10210 if (ctxt->options & XML_PARSE_OLD10)
10211 ctxt->myDoc->properties |= XML_DOC_OLD10;
10212 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010213 if (! ctxt->wellFormed) {
10214 ctxt->valid = 0;
10215 return(-1);
10216 }
Owen Taylor3473f882001-02-23 17:55:21 +000010217 return(0);
10218}
10219
10220/**
10221 * xmlParseExtParsedEnt:
10222 * @ctxt: an XML parser context
10223 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010224 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010225 * An external general parsed entity is well-formed if it matches the
10226 * production labeled extParsedEnt.
10227 *
10228 * [78] extParsedEnt ::= TextDecl? content
10229 *
10230 * Returns 0, -1 in case of error. the parser context is augmented
10231 * as a result of the parsing.
10232 */
10233
10234int
10235xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10236 xmlChar start[4];
10237 xmlCharEncoding enc;
10238
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010239 if ((ctxt == NULL) || (ctxt->input == NULL))
10240 return(-1);
10241
Owen Taylor3473f882001-02-23 17:55:21 +000010242 xmlDefaultSAXHandlerInit();
10243
Daniel Veillard309f81d2003-09-23 09:02:53 +000010244 xmlDetectSAX2(ctxt);
10245
Owen Taylor3473f882001-02-23 17:55:21 +000010246 GROW;
10247
10248 /*
10249 * SAX: beginning of the document processing.
10250 */
10251 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10252 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10253
10254 /*
10255 * Get the 4 first bytes and decode the charset
10256 * if enc != XML_CHAR_ENCODING_NONE
10257 * plug some encoding conversion routines.
10258 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010259 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10260 start[0] = RAW;
10261 start[1] = NXT(1);
10262 start[2] = NXT(2);
10263 start[3] = NXT(3);
10264 enc = xmlDetectCharEncoding(start, 4);
10265 if (enc != XML_CHAR_ENCODING_NONE) {
10266 xmlSwitchEncoding(ctxt, enc);
10267 }
Owen Taylor3473f882001-02-23 17:55:21 +000010268 }
10269
10270
10271 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010272 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010273 }
10274
10275 /*
10276 * Check for the XMLDecl in the Prolog.
10277 */
10278 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010279 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010280
10281 /*
10282 * Note that we will switch encoding on the fly.
10283 */
10284 xmlParseXMLDecl(ctxt);
10285 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10286 /*
10287 * The XML REC instructs us to stop parsing right here
10288 */
10289 return(-1);
10290 }
10291 SKIP_BLANKS;
10292 } else {
10293 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10294 }
10295 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10296 ctxt->sax->startDocument(ctxt->userData);
10297
10298 /*
10299 * Doing validity checking on chunk doesn't make sense
10300 */
10301 ctxt->instate = XML_PARSER_CONTENT;
10302 ctxt->validate = 0;
10303 ctxt->loadsubset = 0;
10304 ctxt->depth = 0;
10305
10306 xmlParseContent(ctxt);
10307
10308 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010309 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010310 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010311 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010312 }
10313
10314 /*
10315 * SAX: end of the document processing.
10316 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010317 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010318 ctxt->sax->endDocument(ctxt->userData);
10319
10320 if (! ctxt->wellFormed) return(-1);
10321 return(0);
10322}
10323
Daniel Veillard73b013f2003-09-30 12:36:01 +000010324#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010325/************************************************************************
10326 * *
10327 * Progressive parsing interfaces *
10328 * *
10329 ************************************************************************/
10330
10331/**
10332 * xmlParseLookupSequence:
10333 * @ctxt: an XML parser context
10334 * @first: the first char to lookup
10335 * @next: the next char to lookup or zero
10336 * @third: the next char to lookup or zero
10337 *
10338 * Try to find if a sequence (first, next, third) or just (first next) or
10339 * (first) is available in the input stream.
10340 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10341 * to avoid rescanning sequences of bytes, it DOES change the state of the
10342 * parser, do not use liberally.
10343 *
10344 * Returns the index to the current parsing point if the full sequence
10345 * is available, -1 otherwise.
10346 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010347static int
Owen Taylor3473f882001-02-23 17:55:21 +000010348xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10349 xmlChar next, xmlChar third) {
10350 int base, len;
10351 xmlParserInputPtr in;
10352 const xmlChar *buf;
10353
10354 in = ctxt->input;
10355 if (in == NULL) return(-1);
10356 base = in->cur - in->base;
10357 if (base < 0) return(-1);
10358 if (ctxt->checkIndex > base)
10359 base = ctxt->checkIndex;
10360 if (in->buf == NULL) {
10361 buf = in->base;
10362 len = in->length;
10363 } else {
10364 buf = in->buf->buffer->content;
10365 len = in->buf->buffer->use;
10366 }
10367 /* take into account the sequence length */
10368 if (third) len -= 2;
10369 else if (next) len --;
10370 for (;base < len;base++) {
10371 if (buf[base] == first) {
10372 if (third != 0) {
10373 if ((buf[base + 1] != next) ||
10374 (buf[base + 2] != third)) continue;
10375 } else if (next != 0) {
10376 if (buf[base + 1] != next) continue;
10377 }
10378 ctxt->checkIndex = 0;
10379#ifdef DEBUG_PUSH
10380 if (next == 0)
10381 xmlGenericError(xmlGenericErrorContext,
10382 "PP: lookup '%c' found at %d\n",
10383 first, base);
10384 else if (third == 0)
10385 xmlGenericError(xmlGenericErrorContext,
10386 "PP: lookup '%c%c' found at %d\n",
10387 first, next, base);
10388 else
10389 xmlGenericError(xmlGenericErrorContext,
10390 "PP: lookup '%c%c%c' found at %d\n",
10391 first, next, third, base);
10392#endif
10393 return(base - (in->cur - in->base));
10394 }
10395 }
10396 ctxt->checkIndex = base;
10397#ifdef DEBUG_PUSH
10398 if (next == 0)
10399 xmlGenericError(xmlGenericErrorContext,
10400 "PP: lookup '%c' failed\n", first);
10401 else if (third == 0)
10402 xmlGenericError(xmlGenericErrorContext,
10403 "PP: lookup '%c%c' failed\n", first, next);
10404 else
10405 xmlGenericError(xmlGenericErrorContext,
10406 "PP: lookup '%c%c%c' failed\n", first, next, third);
10407#endif
10408 return(-1);
10409}
10410
10411/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010412 * xmlParseGetLasts:
10413 * @ctxt: an XML parser context
10414 * @lastlt: pointer to store the last '<' from the input
10415 * @lastgt: pointer to store the last '>' from the input
10416 *
10417 * Lookup the last < and > in the current chunk
10418 */
10419static void
10420xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10421 const xmlChar **lastgt) {
10422 const xmlChar *tmp;
10423
10424 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10425 xmlGenericError(xmlGenericErrorContext,
10426 "Internal error: xmlParseGetLasts\n");
10427 return;
10428 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010429 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010430 tmp = ctxt->input->end;
10431 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010432 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010433 if (tmp < ctxt->input->base) {
10434 *lastlt = NULL;
10435 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010436 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010437 *lastlt = tmp;
10438 tmp++;
10439 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10440 if (*tmp == '\'') {
10441 tmp++;
10442 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10443 if (tmp < ctxt->input->end) tmp++;
10444 } else if (*tmp == '"') {
10445 tmp++;
10446 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10447 if (tmp < ctxt->input->end) tmp++;
10448 } else
10449 tmp++;
10450 }
10451 if (tmp < ctxt->input->end)
10452 *lastgt = tmp;
10453 else {
10454 tmp = *lastlt;
10455 tmp--;
10456 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10457 if (tmp >= ctxt->input->base)
10458 *lastgt = tmp;
10459 else
10460 *lastgt = NULL;
10461 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010462 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010463 } else {
10464 *lastlt = NULL;
10465 *lastgt = NULL;
10466 }
10467}
10468/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010469 * xmlCheckCdataPush:
10470 * @cur: pointer to the bock of characters
10471 * @len: length of the block in bytes
10472 *
10473 * Check that the block of characters is okay as SCdata content [20]
10474 *
10475 * Returns the number of bytes to pass if okay, a negative index where an
10476 * UTF-8 error occured otherwise
10477 */
10478static int
10479xmlCheckCdataPush(const xmlChar *utf, int len) {
10480 int ix;
10481 unsigned char c;
10482 int codepoint;
10483
10484 if ((utf == NULL) || (len <= 0))
10485 return(0);
10486
10487 for (ix = 0; ix < len;) { /* string is 0-terminated */
10488 c = utf[ix];
10489 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10490 if (c >= 0x20)
10491 ix++;
10492 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10493 ix++;
10494 else
10495 return(-ix);
10496 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10497 if (ix + 2 > len) return(ix);
10498 if ((utf[ix+1] & 0xc0 ) != 0x80)
10499 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010500 codepoint = (utf[ix] & 0x1f) << 6;
10501 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010502 if (!xmlIsCharQ(codepoint))
10503 return(-ix);
10504 ix += 2;
10505 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10506 if (ix + 3 > len) return(ix);
10507 if (((utf[ix+1] & 0xc0) != 0x80) ||
10508 ((utf[ix+2] & 0xc0) != 0x80))
10509 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010510 codepoint = (utf[ix] & 0xf) << 12;
10511 codepoint |= (utf[ix+1] & 0x3f) << 6;
10512 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010513 if (!xmlIsCharQ(codepoint))
10514 return(-ix);
10515 ix += 3;
10516 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10517 if (ix + 4 > len) return(ix);
10518 if (((utf[ix+1] & 0xc0) != 0x80) ||
10519 ((utf[ix+2] & 0xc0) != 0x80) ||
10520 ((utf[ix+3] & 0xc0) != 0x80))
10521 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010522 codepoint = (utf[ix] & 0x7) << 18;
10523 codepoint |= (utf[ix+1] & 0x3f) << 12;
10524 codepoint |= (utf[ix+2] & 0x3f) << 6;
10525 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010526 if (!xmlIsCharQ(codepoint))
10527 return(-ix);
10528 ix += 4;
10529 } else /* unknown encoding */
10530 return(-ix);
10531 }
10532 return(ix);
10533}
10534
10535/**
Owen Taylor3473f882001-02-23 17:55:21 +000010536 * xmlParseTryOrFinish:
10537 * @ctxt: an XML parser context
10538 * @terminate: last chunk indicator
10539 *
10540 * Try to progress on parsing
10541 *
10542 * Returns zero if no parsing was possible
10543 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010544static int
Owen Taylor3473f882001-02-23 17:55:21 +000010545xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10546 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010547 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000010548 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000010549 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000010550
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010551 if (ctxt->input == NULL)
10552 return(0);
10553
Owen Taylor3473f882001-02-23 17:55:21 +000010554#ifdef DEBUG_PUSH
10555 switch (ctxt->instate) {
10556 case XML_PARSER_EOF:
10557 xmlGenericError(xmlGenericErrorContext,
10558 "PP: try EOF\n"); break;
10559 case XML_PARSER_START:
10560 xmlGenericError(xmlGenericErrorContext,
10561 "PP: try START\n"); break;
10562 case XML_PARSER_MISC:
10563 xmlGenericError(xmlGenericErrorContext,
10564 "PP: try MISC\n");break;
10565 case XML_PARSER_COMMENT:
10566 xmlGenericError(xmlGenericErrorContext,
10567 "PP: try COMMENT\n");break;
10568 case XML_PARSER_PROLOG:
10569 xmlGenericError(xmlGenericErrorContext,
10570 "PP: try PROLOG\n");break;
10571 case XML_PARSER_START_TAG:
10572 xmlGenericError(xmlGenericErrorContext,
10573 "PP: try START_TAG\n");break;
10574 case XML_PARSER_CONTENT:
10575 xmlGenericError(xmlGenericErrorContext,
10576 "PP: try CONTENT\n");break;
10577 case XML_PARSER_CDATA_SECTION:
10578 xmlGenericError(xmlGenericErrorContext,
10579 "PP: try CDATA_SECTION\n");break;
10580 case XML_PARSER_END_TAG:
10581 xmlGenericError(xmlGenericErrorContext,
10582 "PP: try END_TAG\n");break;
10583 case XML_PARSER_ENTITY_DECL:
10584 xmlGenericError(xmlGenericErrorContext,
10585 "PP: try ENTITY_DECL\n");break;
10586 case XML_PARSER_ENTITY_VALUE:
10587 xmlGenericError(xmlGenericErrorContext,
10588 "PP: try ENTITY_VALUE\n");break;
10589 case XML_PARSER_ATTRIBUTE_VALUE:
10590 xmlGenericError(xmlGenericErrorContext,
10591 "PP: try ATTRIBUTE_VALUE\n");break;
10592 case XML_PARSER_DTD:
10593 xmlGenericError(xmlGenericErrorContext,
10594 "PP: try DTD\n");break;
10595 case XML_PARSER_EPILOG:
10596 xmlGenericError(xmlGenericErrorContext,
10597 "PP: try EPILOG\n");break;
10598 case XML_PARSER_PI:
10599 xmlGenericError(xmlGenericErrorContext,
10600 "PP: try PI\n");break;
10601 case XML_PARSER_IGNORE:
10602 xmlGenericError(xmlGenericErrorContext,
10603 "PP: try IGNORE\n");break;
10604 }
10605#endif
10606
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010607 if ((ctxt->input != NULL) &&
10608 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010609 xmlSHRINK(ctxt);
10610 ctxt->checkIndex = 0;
10611 }
10612 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010613
Daniel Veillarda880b122003-04-21 21:36:41 +000010614 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010615 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010616 return(0);
10617
10618
Owen Taylor3473f882001-02-23 17:55:21 +000010619 /*
10620 * Pop-up of finished entities.
10621 */
10622 while ((RAW == 0) && (ctxt->inputNr > 1))
10623 xmlPopInput(ctxt);
10624
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010625 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010626 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010627 avail = ctxt->input->length -
10628 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010629 else {
10630 /*
10631 * If we are operating on converted input, try to flush
10632 * remainng chars to avoid them stalling in the non-converted
10633 * buffer.
10634 */
10635 if ((ctxt->input->buf->raw != NULL) &&
10636 (ctxt->input->buf->raw->use > 0)) {
10637 int base = ctxt->input->base -
10638 ctxt->input->buf->buffer->content;
10639 int current = ctxt->input->cur - ctxt->input->base;
10640
10641 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10642 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10643 ctxt->input->cur = ctxt->input->base + current;
10644 ctxt->input->end =
10645 &ctxt->input->buf->buffer->content[
10646 ctxt->input->buf->buffer->use];
10647 }
10648 avail = ctxt->input->buf->buffer->use -
10649 (ctxt->input->cur - ctxt->input->base);
10650 }
Owen Taylor3473f882001-02-23 17:55:21 +000010651 if (avail < 1)
10652 goto done;
10653 switch (ctxt->instate) {
10654 case XML_PARSER_EOF:
10655 /*
10656 * Document parsing is done !
10657 */
10658 goto done;
10659 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010660 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10661 xmlChar start[4];
10662 xmlCharEncoding enc;
10663
10664 /*
10665 * Very first chars read from the document flow.
10666 */
10667 if (avail < 4)
10668 goto done;
10669
10670 /*
10671 * Get the 4 first bytes and decode the charset
10672 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010673 * plug some encoding conversion routines,
10674 * else xmlSwitchEncoding will set to (default)
10675 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010676 */
10677 start[0] = RAW;
10678 start[1] = NXT(1);
10679 start[2] = NXT(2);
10680 start[3] = NXT(3);
10681 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010682 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010683 break;
10684 }
Owen Taylor3473f882001-02-23 17:55:21 +000010685
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010686 if (avail < 2)
10687 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010688 cur = ctxt->input->cur[0];
10689 next = ctxt->input->cur[1];
10690 if (cur == 0) {
10691 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10692 ctxt->sax->setDocumentLocator(ctxt->userData,
10693 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010694 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010695 ctxt->instate = XML_PARSER_EOF;
10696#ifdef DEBUG_PUSH
10697 xmlGenericError(xmlGenericErrorContext,
10698 "PP: entering EOF\n");
10699#endif
10700 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10701 ctxt->sax->endDocument(ctxt->userData);
10702 goto done;
10703 }
10704 if ((cur == '<') && (next == '?')) {
10705 /* PI or XML decl */
10706 if (avail < 5) return(ret);
10707 if ((!terminate) &&
10708 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10709 return(ret);
10710 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10711 ctxt->sax->setDocumentLocator(ctxt->userData,
10712 &xmlDefaultSAXLocator);
10713 if ((ctxt->input->cur[2] == 'x') &&
10714 (ctxt->input->cur[3] == 'm') &&
10715 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010716 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010717 ret += 5;
10718#ifdef DEBUG_PUSH
10719 xmlGenericError(xmlGenericErrorContext,
10720 "PP: Parsing XML Decl\n");
10721#endif
10722 xmlParseXMLDecl(ctxt);
10723 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10724 /*
10725 * The XML REC instructs us to stop parsing right
10726 * here
10727 */
10728 ctxt->instate = XML_PARSER_EOF;
10729 return(0);
10730 }
10731 ctxt->standalone = ctxt->input->standalone;
10732 if ((ctxt->encoding == NULL) &&
10733 (ctxt->input->encoding != NULL))
10734 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10735 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10736 (!ctxt->disableSAX))
10737 ctxt->sax->startDocument(ctxt->userData);
10738 ctxt->instate = XML_PARSER_MISC;
10739#ifdef DEBUG_PUSH
10740 xmlGenericError(xmlGenericErrorContext,
10741 "PP: entering MISC\n");
10742#endif
10743 } else {
10744 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10745 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10746 (!ctxt->disableSAX))
10747 ctxt->sax->startDocument(ctxt->userData);
10748 ctxt->instate = XML_PARSER_MISC;
10749#ifdef DEBUG_PUSH
10750 xmlGenericError(xmlGenericErrorContext,
10751 "PP: entering MISC\n");
10752#endif
10753 }
10754 } else {
10755 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10756 ctxt->sax->setDocumentLocator(ctxt->userData,
10757 &xmlDefaultSAXLocator);
10758 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010759 if (ctxt->version == NULL) {
10760 xmlErrMemory(ctxt, NULL);
10761 break;
10762 }
Owen Taylor3473f882001-02-23 17:55:21 +000010763 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10764 (!ctxt->disableSAX))
10765 ctxt->sax->startDocument(ctxt->userData);
10766 ctxt->instate = XML_PARSER_MISC;
10767#ifdef DEBUG_PUSH
10768 xmlGenericError(xmlGenericErrorContext,
10769 "PP: entering MISC\n");
10770#endif
10771 }
10772 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010773 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010774 const xmlChar *name;
10775 const xmlChar *prefix;
10776 const xmlChar *URI;
10777 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010778
10779 if ((avail < 2) && (ctxt->inputNr == 1))
10780 goto done;
10781 cur = ctxt->input->cur[0];
10782 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010783 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010784 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010785 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10786 ctxt->sax->endDocument(ctxt->userData);
10787 goto done;
10788 }
10789 if (!terminate) {
10790 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010791 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010792 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010793 goto done;
10794 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10795 goto done;
10796 }
10797 }
10798 if (ctxt->spaceNr == 0)
10799 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010800 else if (*ctxt->space == -2)
10801 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010802 else
10803 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010804#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010805 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010806#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010807 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010808#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010809 else
10810 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010811#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010812 if (name == NULL) {
10813 spacePop(ctxt);
10814 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010815 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10816 ctxt->sax->endDocument(ctxt->userData);
10817 goto done;
10818 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010819#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000010820 /*
10821 * [ VC: Root Element Type ]
10822 * The Name in the document type declaration must match
10823 * the element type of the root element.
10824 */
10825 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10826 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10827 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010828#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010829
10830 /*
10831 * Check for an Empty Element.
10832 */
10833 if ((RAW == '/') && (NXT(1) == '>')) {
10834 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010835
10836 if (ctxt->sax2) {
10837 if ((ctxt->sax != NULL) &&
10838 (ctxt->sax->endElementNs != NULL) &&
10839 (!ctxt->disableSAX))
10840 ctxt->sax->endElementNs(ctxt->userData, name,
10841 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000010842 if (ctxt->nsNr - nsNr > 0)
10843 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010844#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010845 } else {
10846 if ((ctxt->sax != NULL) &&
10847 (ctxt->sax->endElement != NULL) &&
10848 (!ctxt->disableSAX))
10849 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010850#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010851 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010852 spacePop(ctxt);
10853 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010854 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010855 } else {
10856 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010857 }
10858 break;
10859 }
10860 if (RAW == '>') {
10861 NEXT;
10862 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000010863 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000010864 "Couldn't find end of Start Tag %s\n",
10865 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000010866 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010867 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010868 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010869 if (ctxt->sax2)
10870 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010871#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010872 else
10873 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010874#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010875
Daniel Veillarda880b122003-04-21 21:36:41 +000010876 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010877 break;
10878 }
10879 case XML_PARSER_CONTENT: {
10880 const xmlChar *test;
10881 unsigned int cons;
10882 if ((avail < 2) && (ctxt->inputNr == 1))
10883 goto done;
10884 cur = ctxt->input->cur[0];
10885 next = ctxt->input->cur[1];
10886
10887 test = CUR_PTR;
10888 cons = ctxt->input->consumed;
10889 if ((cur == '<') && (next == '/')) {
10890 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010891 break;
10892 } else if ((cur == '<') && (next == '?')) {
10893 if ((!terminate) &&
10894 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10895 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010896 xmlParsePI(ctxt);
10897 } else if ((cur == '<') && (next != '!')) {
10898 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010899 break;
10900 } else if ((cur == '<') && (next == '!') &&
10901 (ctxt->input->cur[2] == '-') &&
10902 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000010903 int term;
10904
10905 if (avail < 4)
10906 goto done;
10907 ctxt->input->cur += 4;
10908 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10909 ctxt->input->cur -= 4;
10910 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000010911 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010912 xmlParseComment(ctxt);
10913 ctxt->instate = XML_PARSER_CONTENT;
10914 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10915 (ctxt->input->cur[2] == '[') &&
10916 (ctxt->input->cur[3] == 'C') &&
10917 (ctxt->input->cur[4] == 'D') &&
10918 (ctxt->input->cur[5] == 'A') &&
10919 (ctxt->input->cur[6] == 'T') &&
10920 (ctxt->input->cur[7] == 'A') &&
10921 (ctxt->input->cur[8] == '[')) {
10922 SKIP(9);
10923 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000010924 break;
10925 } else if ((cur == '<') && (next == '!') &&
10926 (avail < 9)) {
10927 goto done;
10928 } else if (cur == '&') {
10929 if ((!terminate) &&
10930 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10931 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010932 xmlParseReference(ctxt);
10933 } else {
10934 /* TODO Avoid the extra copy, handle directly !!! */
10935 /*
10936 * Goal of the following test is:
10937 * - minimize calls to the SAX 'character' callback
10938 * when they are mergeable
10939 * - handle an problem for isBlank when we only parse
10940 * a sequence of blank chars and the next one is
10941 * not available to check against '<' presence.
10942 * - tries to homogenize the differences in SAX
10943 * callbacks between the push and pull versions
10944 * of the parser.
10945 */
10946 if ((ctxt->inputNr == 1) &&
10947 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10948 if (!terminate) {
10949 if (ctxt->progressive) {
10950 if ((lastlt == NULL) ||
10951 (ctxt->input->cur > lastlt))
10952 goto done;
10953 } else if (xmlParseLookupSequence(ctxt,
10954 '<', 0, 0) < 0) {
10955 goto done;
10956 }
10957 }
10958 }
10959 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010960 xmlParseCharData(ctxt, 0);
10961 }
10962 /*
10963 * Pop-up of finished entities.
10964 */
10965 while ((RAW == 0) && (ctxt->inputNr > 1))
10966 xmlPopInput(ctxt);
10967 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010968 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10969 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010970 ctxt->instate = XML_PARSER_EOF;
10971 break;
10972 }
10973 break;
10974 }
10975 case XML_PARSER_END_TAG:
10976 if (avail < 2)
10977 goto done;
10978 if (!terminate) {
10979 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010980 /* > can be found unescaped in attribute values */
10981 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010982 goto done;
10983 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10984 goto done;
10985 }
10986 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010987 if (ctxt->sax2) {
10988 xmlParseEndTag2(ctxt,
10989 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10990 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010991 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010992 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010993 }
10994#ifdef LIBXML_SAX1_ENABLED
10995 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010996 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010997#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010998 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010999 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011000 } else {
11001 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011002 }
11003 break;
11004 case XML_PARSER_CDATA_SECTION: {
11005 /*
11006 * The Push mode need to have the SAX callback for
11007 * cdataBlock merge back contiguous callbacks.
11008 */
11009 int base;
11010
11011 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11012 if (base < 0) {
11013 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011014 int tmp;
11015
11016 tmp = xmlCheckCdataPush(ctxt->input->cur,
11017 XML_PARSER_BIG_BUFFER_SIZE);
11018 if (tmp < 0) {
11019 tmp = -tmp;
11020 ctxt->input->cur += tmp;
11021 goto encoding_error;
11022 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011023 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11024 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011025 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011026 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011027 else if (ctxt->sax->characters != NULL)
11028 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011029 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011030 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011031 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011032 ctxt->checkIndex = 0;
11033 }
11034 goto done;
11035 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011036 int tmp;
11037
11038 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11039 if ((tmp < 0) || (tmp != base)) {
11040 tmp = -tmp;
11041 ctxt->input->cur += tmp;
11042 goto encoding_error;
11043 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011044 if ((ctxt->sax != NULL) && (base == 0) &&
11045 (ctxt->sax->cdataBlock != NULL) &&
11046 (!ctxt->disableSAX)) {
11047 /*
11048 * Special case to provide identical behaviour
11049 * between pull and push parsers on enpty CDATA
11050 * sections
11051 */
11052 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11053 (!strncmp((const char *)&ctxt->input->cur[-9],
11054 "<![CDATA[", 9)))
11055 ctxt->sax->cdataBlock(ctxt->userData,
11056 BAD_CAST "", 0);
11057 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011058 (!ctxt->disableSAX)) {
11059 if (ctxt->sax->cdataBlock != NULL)
11060 ctxt->sax->cdataBlock(ctxt->userData,
11061 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011062 else if (ctxt->sax->characters != NULL)
11063 ctxt->sax->characters(ctxt->userData,
11064 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011065 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000011066 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011067 ctxt->checkIndex = 0;
11068 ctxt->instate = XML_PARSER_CONTENT;
11069#ifdef DEBUG_PUSH
11070 xmlGenericError(xmlGenericErrorContext,
11071 "PP: entering CONTENT\n");
11072#endif
11073 }
11074 break;
11075 }
Owen Taylor3473f882001-02-23 17:55:21 +000011076 case XML_PARSER_MISC:
11077 SKIP_BLANKS;
11078 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011079 avail = ctxt->input->length -
11080 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011081 else
Daniel Veillarda880b122003-04-21 21:36:41 +000011082 avail = ctxt->input->buf->buffer->use -
11083 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011084 if (avail < 2)
11085 goto done;
11086 cur = ctxt->input->cur[0];
11087 next = ctxt->input->cur[1];
11088 if ((cur == '<') && (next == '?')) {
11089 if ((!terminate) &&
11090 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11091 goto done;
11092#ifdef DEBUG_PUSH
11093 xmlGenericError(xmlGenericErrorContext,
11094 "PP: Parsing PI\n");
11095#endif
11096 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000011097 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011098 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011099 (ctxt->input->cur[2] == '-') &&
11100 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011101 if ((!terminate) &&
11102 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11103 goto done;
11104#ifdef DEBUG_PUSH
11105 xmlGenericError(xmlGenericErrorContext,
11106 "PP: Parsing Comment\n");
11107#endif
11108 xmlParseComment(ctxt);
11109 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011110 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011111 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011112 (ctxt->input->cur[2] == 'D') &&
11113 (ctxt->input->cur[3] == 'O') &&
11114 (ctxt->input->cur[4] == 'C') &&
11115 (ctxt->input->cur[5] == 'T') &&
11116 (ctxt->input->cur[6] == 'Y') &&
11117 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011118 (ctxt->input->cur[8] == 'E')) {
11119 if ((!terminate) &&
11120 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11121 goto done;
11122#ifdef DEBUG_PUSH
11123 xmlGenericError(xmlGenericErrorContext,
11124 "PP: Parsing internal subset\n");
11125#endif
11126 ctxt->inSubset = 1;
11127 xmlParseDocTypeDecl(ctxt);
11128 if (RAW == '[') {
11129 ctxt->instate = XML_PARSER_DTD;
11130#ifdef DEBUG_PUSH
11131 xmlGenericError(xmlGenericErrorContext,
11132 "PP: entering DTD\n");
11133#endif
11134 } else {
11135 /*
11136 * Create and update the external subset.
11137 */
11138 ctxt->inSubset = 2;
11139 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11140 (ctxt->sax->externalSubset != NULL))
11141 ctxt->sax->externalSubset(ctxt->userData,
11142 ctxt->intSubName, ctxt->extSubSystem,
11143 ctxt->extSubURI);
11144 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011145 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011146 ctxt->instate = XML_PARSER_PROLOG;
11147#ifdef DEBUG_PUSH
11148 xmlGenericError(xmlGenericErrorContext,
11149 "PP: entering PROLOG\n");
11150#endif
11151 }
11152 } else if ((cur == '<') && (next == '!') &&
11153 (avail < 9)) {
11154 goto done;
11155 } else {
11156 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011157 ctxt->progressive = 1;
11158 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011159#ifdef DEBUG_PUSH
11160 xmlGenericError(xmlGenericErrorContext,
11161 "PP: entering START_TAG\n");
11162#endif
11163 }
11164 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011165 case XML_PARSER_PROLOG:
11166 SKIP_BLANKS;
11167 if (ctxt->input->buf == NULL)
11168 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11169 else
11170 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11171 if (avail < 2)
11172 goto done;
11173 cur = ctxt->input->cur[0];
11174 next = ctxt->input->cur[1];
11175 if ((cur == '<') && (next == '?')) {
11176 if ((!terminate) &&
11177 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11178 goto done;
11179#ifdef DEBUG_PUSH
11180 xmlGenericError(xmlGenericErrorContext,
11181 "PP: Parsing PI\n");
11182#endif
11183 xmlParsePI(ctxt);
11184 } else if ((cur == '<') && (next == '!') &&
11185 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11186 if ((!terminate) &&
11187 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11188 goto done;
11189#ifdef DEBUG_PUSH
11190 xmlGenericError(xmlGenericErrorContext,
11191 "PP: Parsing Comment\n");
11192#endif
11193 xmlParseComment(ctxt);
11194 ctxt->instate = XML_PARSER_PROLOG;
11195 } else if ((cur == '<') && (next == '!') &&
11196 (avail < 4)) {
11197 goto done;
11198 } else {
11199 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011200 if (ctxt->progressive == 0)
11201 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011202 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011203#ifdef DEBUG_PUSH
11204 xmlGenericError(xmlGenericErrorContext,
11205 "PP: entering START_TAG\n");
11206#endif
11207 }
11208 break;
11209 case XML_PARSER_EPILOG:
11210 SKIP_BLANKS;
11211 if (ctxt->input->buf == NULL)
11212 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11213 else
11214 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11215 if (avail < 2)
11216 goto done;
11217 cur = ctxt->input->cur[0];
11218 next = ctxt->input->cur[1];
11219 if ((cur == '<') && (next == '?')) {
11220 if ((!terminate) &&
11221 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11222 goto done;
11223#ifdef DEBUG_PUSH
11224 xmlGenericError(xmlGenericErrorContext,
11225 "PP: Parsing PI\n");
11226#endif
11227 xmlParsePI(ctxt);
11228 ctxt->instate = XML_PARSER_EPILOG;
11229 } else if ((cur == '<') && (next == '!') &&
11230 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11231 if ((!terminate) &&
11232 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11233 goto done;
11234#ifdef DEBUG_PUSH
11235 xmlGenericError(xmlGenericErrorContext,
11236 "PP: Parsing Comment\n");
11237#endif
11238 xmlParseComment(ctxt);
11239 ctxt->instate = XML_PARSER_EPILOG;
11240 } else if ((cur == '<') && (next == '!') &&
11241 (avail < 4)) {
11242 goto done;
11243 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011244 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011245 ctxt->instate = XML_PARSER_EOF;
11246#ifdef DEBUG_PUSH
11247 xmlGenericError(xmlGenericErrorContext,
11248 "PP: entering EOF\n");
11249#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011250 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011251 ctxt->sax->endDocument(ctxt->userData);
11252 goto done;
11253 }
11254 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011255 case XML_PARSER_DTD: {
11256 /*
11257 * Sorry but progressive parsing of the internal subset
11258 * is not expected to be supported. We first check that
11259 * the full content of the internal subset is available and
11260 * the parsing is launched only at that point.
11261 * Internal subset ends up with "']' S? '>'" in an unescaped
11262 * section and not in a ']]>' sequence which are conditional
11263 * sections (whoever argued to keep that crap in XML deserve
11264 * a place in hell !).
11265 */
11266 int base, i;
11267 xmlChar *buf;
11268 xmlChar quote = 0;
11269
11270 base = ctxt->input->cur - ctxt->input->base;
11271 if (base < 0) return(0);
11272 if (ctxt->checkIndex > base)
11273 base = ctxt->checkIndex;
11274 buf = ctxt->input->buf->buffer->content;
11275 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11276 base++) {
11277 if (quote != 0) {
11278 if (buf[base] == quote)
11279 quote = 0;
11280 continue;
11281 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011282 if ((quote == 0) && (buf[base] == '<')) {
11283 int found = 0;
11284 /* special handling of comments */
11285 if (((unsigned int) base + 4 <
11286 ctxt->input->buf->buffer->use) &&
11287 (buf[base + 1] == '!') &&
11288 (buf[base + 2] == '-') &&
11289 (buf[base + 3] == '-')) {
11290 for (;(unsigned int) base + 3 <
11291 ctxt->input->buf->buffer->use; base++) {
11292 if ((buf[base] == '-') &&
11293 (buf[base + 1] == '-') &&
11294 (buf[base + 2] == '>')) {
11295 found = 1;
11296 base += 2;
11297 break;
11298 }
11299 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011300 if (!found) {
11301#if 0
11302 fprintf(stderr, "unfinished comment\n");
11303#endif
11304 break; /* for */
11305 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011306 continue;
11307 }
11308 }
Owen Taylor3473f882001-02-23 17:55:21 +000011309 if (buf[base] == '"') {
11310 quote = '"';
11311 continue;
11312 }
11313 if (buf[base] == '\'') {
11314 quote = '\'';
11315 continue;
11316 }
11317 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011318#if 0
11319 fprintf(stderr, "%c%c%c%c: ", buf[base],
11320 buf[base + 1], buf[base + 2], buf[base + 3]);
11321#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011322 if ((unsigned int) base +1 >=
11323 ctxt->input->buf->buffer->use)
11324 break;
11325 if (buf[base + 1] == ']') {
11326 /* conditional crap, skip both ']' ! */
11327 base++;
11328 continue;
11329 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011330 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011331 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11332 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011333 if (buf[base + i] == '>') {
11334#if 0
11335 fprintf(stderr, "found\n");
11336#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011337 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011338 }
11339 if (!IS_BLANK_CH(buf[base + i])) {
11340#if 0
11341 fprintf(stderr, "not found\n");
11342#endif
11343 goto not_end_of_int_subset;
11344 }
Owen Taylor3473f882001-02-23 17:55:21 +000011345 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011346#if 0
11347 fprintf(stderr, "end of stream\n");
11348#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011349 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011350
Owen Taylor3473f882001-02-23 17:55:21 +000011351 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011352not_end_of_int_subset:
11353 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011354 }
11355 /*
11356 * We didn't found the end of the Internal subset
11357 */
Owen Taylor3473f882001-02-23 17:55:21 +000011358#ifdef DEBUG_PUSH
11359 if (next == 0)
11360 xmlGenericError(xmlGenericErrorContext,
11361 "PP: lookup of int subset end filed\n");
11362#endif
11363 goto done;
11364
11365found_end_int_subset:
11366 xmlParseInternalSubset(ctxt);
11367 ctxt->inSubset = 2;
11368 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11369 (ctxt->sax->externalSubset != NULL))
11370 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11371 ctxt->extSubSystem, ctxt->extSubURI);
11372 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011373 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011374 ctxt->instate = XML_PARSER_PROLOG;
11375 ctxt->checkIndex = 0;
11376#ifdef DEBUG_PUSH
11377 xmlGenericError(xmlGenericErrorContext,
11378 "PP: entering PROLOG\n");
11379#endif
11380 break;
11381 }
11382 case XML_PARSER_COMMENT:
11383 xmlGenericError(xmlGenericErrorContext,
11384 "PP: internal error, state == COMMENT\n");
11385 ctxt->instate = XML_PARSER_CONTENT;
11386#ifdef DEBUG_PUSH
11387 xmlGenericError(xmlGenericErrorContext,
11388 "PP: entering CONTENT\n");
11389#endif
11390 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011391 case XML_PARSER_IGNORE:
11392 xmlGenericError(xmlGenericErrorContext,
11393 "PP: internal error, state == IGNORE");
11394 ctxt->instate = XML_PARSER_DTD;
11395#ifdef DEBUG_PUSH
11396 xmlGenericError(xmlGenericErrorContext,
11397 "PP: entering DTD\n");
11398#endif
11399 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011400 case XML_PARSER_PI:
11401 xmlGenericError(xmlGenericErrorContext,
11402 "PP: internal error, state == PI\n");
11403 ctxt->instate = XML_PARSER_CONTENT;
11404#ifdef DEBUG_PUSH
11405 xmlGenericError(xmlGenericErrorContext,
11406 "PP: entering CONTENT\n");
11407#endif
11408 break;
11409 case XML_PARSER_ENTITY_DECL:
11410 xmlGenericError(xmlGenericErrorContext,
11411 "PP: internal error, state == ENTITY_DECL\n");
11412 ctxt->instate = XML_PARSER_DTD;
11413#ifdef DEBUG_PUSH
11414 xmlGenericError(xmlGenericErrorContext,
11415 "PP: entering DTD\n");
11416#endif
11417 break;
11418 case XML_PARSER_ENTITY_VALUE:
11419 xmlGenericError(xmlGenericErrorContext,
11420 "PP: internal error, state == ENTITY_VALUE\n");
11421 ctxt->instate = XML_PARSER_CONTENT;
11422#ifdef DEBUG_PUSH
11423 xmlGenericError(xmlGenericErrorContext,
11424 "PP: entering DTD\n");
11425#endif
11426 break;
11427 case XML_PARSER_ATTRIBUTE_VALUE:
11428 xmlGenericError(xmlGenericErrorContext,
11429 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11430 ctxt->instate = XML_PARSER_START_TAG;
11431#ifdef DEBUG_PUSH
11432 xmlGenericError(xmlGenericErrorContext,
11433 "PP: entering START_TAG\n");
11434#endif
11435 break;
11436 case XML_PARSER_SYSTEM_LITERAL:
11437 xmlGenericError(xmlGenericErrorContext,
11438 "PP: internal error, state == SYSTEM_LITERAL\n");
11439 ctxt->instate = XML_PARSER_START_TAG;
11440#ifdef DEBUG_PUSH
11441 xmlGenericError(xmlGenericErrorContext,
11442 "PP: entering START_TAG\n");
11443#endif
11444 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011445 case XML_PARSER_PUBLIC_LITERAL:
11446 xmlGenericError(xmlGenericErrorContext,
11447 "PP: internal error, state == PUBLIC_LITERAL\n");
11448 ctxt->instate = XML_PARSER_START_TAG;
11449#ifdef DEBUG_PUSH
11450 xmlGenericError(xmlGenericErrorContext,
11451 "PP: entering START_TAG\n");
11452#endif
11453 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011454 }
11455 }
11456done:
11457#ifdef DEBUG_PUSH
11458 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11459#endif
11460 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011461encoding_error:
11462 {
11463 char buffer[150];
11464
11465 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11466 ctxt->input->cur[0], ctxt->input->cur[1],
11467 ctxt->input->cur[2], ctxt->input->cur[3]);
11468 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11469 "Input is not proper UTF-8, indicate encoding !\n%s",
11470 BAD_CAST buffer, NULL);
11471 }
11472 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000011473}
11474
11475/**
Owen Taylor3473f882001-02-23 17:55:21 +000011476 * xmlParseChunk:
11477 * @ctxt: an XML parser context
11478 * @chunk: an char array
11479 * @size: the size in byte of the chunk
11480 * @terminate: last chunk indicator
11481 *
11482 * Parse a Chunk of memory
11483 *
11484 * Returns zero if no error, the xmlParserErrors otherwise.
11485 */
11486int
11487xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11488 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000011489 int end_in_lf = 0;
11490
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011491 if (ctxt == NULL)
11492 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000011493 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011494 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000011495 if (ctxt->instate == XML_PARSER_START)
11496 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000011497 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11498 (chunk[size - 1] == '\r')) {
11499 end_in_lf = 1;
11500 size--;
11501 }
Owen Taylor3473f882001-02-23 17:55:21 +000011502 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11503 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11504 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11505 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000011506 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000011507
William M. Bracka3215c72004-07-31 16:24:01 +000011508 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11509 if (res < 0) {
11510 ctxt->errNo = XML_PARSER_EOF;
11511 ctxt->disableSAX = 1;
11512 return (XML_PARSER_EOF);
11513 }
Owen Taylor3473f882001-02-23 17:55:21 +000011514 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11515 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011516 ctxt->input->end =
11517 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011518#ifdef DEBUG_PUSH
11519 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11520#endif
11521
Owen Taylor3473f882001-02-23 17:55:21 +000011522 } else if (ctxt->instate != XML_PARSER_EOF) {
11523 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11524 xmlParserInputBufferPtr in = ctxt->input->buf;
11525 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11526 (in->raw != NULL)) {
11527 int nbchars;
11528
11529 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11530 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011531 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000011532 xmlGenericError(xmlGenericErrorContext,
11533 "xmlParseChunk: encoder error\n");
11534 return(XML_ERR_INVALID_ENCODING);
11535 }
11536 }
11537 }
11538 }
11539 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000011540 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11541 (ctxt->input->buf != NULL)) {
11542 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11543 }
Daniel Veillard14412512005-01-21 23:53:26 +000011544 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011545 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000011546 if (terminate) {
11547 /*
11548 * Check for termination
11549 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011550 int avail = 0;
11551
11552 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011553 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011554 avail = ctxt->input->length -
11555 (ctxt->input->cur - ctxt->input->base);
11556 else
11557 avail = ctxt->input->buf->buffer->use -
11558 (ctxt->input->cur - ctxt->input->base);
11559 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011560
Owen Taylor3473f882001-02-23 17:55:21 +000011561 if ((ctxt->instate != XML_PARSER_EOF) &&
11562 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011563 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011564 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011565 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011566 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011567 }
Owen Taylor3473f882001-02-23 17:55:21 +000011568 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011569 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011570 ctxt->sax->endDocument(ctxt->userData);
11571 }
11572 ctxt->instate = XML_PARSER_EOF;
11573 }
11574 return((xmlParserErrors) ctxt->errNo);
11575}
11576
11577/************************************************************************
11578 * *
11579 * I/O front end functions to the parser *
11580 * *
11581 ************************************************************************/
11582
11583/**
Owen Taylor3473f882001-02-23 17:55:21 +000011584 * xmlCreatePushParserCtxt:
11585 * @sax: a SAX handler
11586 * @user_data: The user data returned on SAX callbacks
11587 * @chunk: a pointer to an array of chars
11588 * @size: number of chars in the array
11589 * @filename: an optional file name or URI
11590 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000011591 * Create a parser context for using the XML parser in push mode.
11592 * If @buffer and @size are non-NULL, the data is used to detect
11593 * the encoding. The remaining characters will be parsed so they
11594 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000011595 * To allow content encoding detection, @size should be >= 4
11596 * The value of @filename is used for fetching external entities
11597 * and error/warning reports.
11598 *
11599 * Returns the new parser context or NULL
11600 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000011601
Owen Taylor3473f882001-02-23 17:55:21 +000011602xmlParserCtxtPtr
11603xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11604 const char *chunk, int size, const char *filename) {
11605 xmlParserCtxtPtr ctxt;
11606 xmlParserInputPtr inputStream;
11607 xmlParserInputBufferPtr buf;
11608 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11609
11610 /*
11611 * plug some encoding conversion routines
11612 */
11613 if ((chunk != NULL) && (size >= 4))
11614 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11615
11616 buf = xmlAllocParserInputBuffer(enc);
11617 if (buf == NULL) return(NULL);
11618
11619 ctxt = xmlNewParserCtxt();
11620 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011621 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011622 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011623 return(NULL);
11624 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011625 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011626 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11627 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011628 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011629 xmlFreeParserInputBuffer(buf);
11630 xmlFreeParserCtxt(ctxt);
11631 return(NULL);
11632 }
Owen Taylor3473f882001-02-23 17:55:21 +000011633 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011634#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011635 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011636#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011637 xmlFree(ctxt->sax);
11638 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11639 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011640 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011641 xmlFreeParserInputBuffer(buf);
11642 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011643 return(NULL);
11644 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011645 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11646 if (sax->initialized == XML_SAX2_MAGIC)
11647 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11648 else
11649 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011650 if (user_data != NULL)
11651 ctxt->userData = user_data;
11652 }
11653 if (filename == NULL) {
11654 ctxt->directory = NULL;
11655 } else {
11656 ctxt->directory = xmlParserGetDirectory(filename);
11657 }
11658
11659 inputStream = xmlNewInputStream(ctxt);
11660 if (inputStream == NULL) {
11661 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011662 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011663 return(NULL);
11664 }
11665
11666 if (filename == NULL)
11667 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011668 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011669 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011670 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011671 if (inputStream->filename == NULL) {
11672 xmlFreeParserCtxt(ctxt);
11673 xmlFreeParserInputBuffer(buf);
11674 return(NULL);
11675 }
11676 }
Owen Taylor3473f882001-02-23 17:55:21 +000011677 inputStream->buf = buf;
11678 inputStream->base = inputStream->buf->buffer->content;
11679 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011680 inputStream->end =
11681 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011682
11683 inputPush(ctxt, inputStream);
11684
William M. Brack3a1cd212005-02-11 14:35:54 +000011685 /*
11686 * If the caller didn't provide an initial 'chunk' for determining
11687 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11688 * that it can be automatically determined later
11689 */
11690 if ((size == 0) || (chunk == NULL)) {
11691 ctxt->charset = XML_CHAR_ENCODING_NONE;
11692 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011693 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11694 int cur = ctxt->input->cur - ctxt->input->base;
11695
Owen Taylor3473f882001-02-23 17:55:21 +000011696 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011697
11698 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11699 ctxt->input->cur = ctxt->input->base + cur;
11700 ctxt->input->end =
11701 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011702#ifdef DEBUG_PUSH
11703 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11704#endif
11705 }
11706
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011707 if (enc != XML_CHAR_ENCODING_NONE) {
11708 xmlSwitchEncoding(ctxt, enc);
11709 }
11710
Owen Taylor3473f882001-02-23 17:55:21 +000011711 return(ctxt);
11712}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011713#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011714
11715/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011716 * xmlStopParser:
11717 * @ctxt: an XML parser context
11718 *
11719 * Blocks further parser processing
11720 */
11721void
11722xmlStopParser(xmlParserCtxtPtr ctxt) {
11723 if (ctxt == NULL)
11724 return;
11725 ctxt->instate = XML_PARSER_EOF;
11726 ctxt->disableSAX = 1;
11727 if (ctxt->input != NULL) {
11728 ctxt->input->cur = BAD_CAST"";
11729 ctxt->input->base = ctxt->input->cur;
11730 }
11731}
11732
11733/**
Owen Taylor3473f882001-02-23 17:55:21 +000011734 * xmlCreateIOParserCtxt:
11735 * @sax: a SAX handler
11736 * @user_data: The user data returned on SAX callbacks
11737 * @ioread: an I/O read function
11738 * @ioclose: an I/O close function
11739 * @ioctx: an I/O handler
11740 * @enc: the charset encoding if known
11741 *
11742 * Create a parser context for using the XML parser with an existing
11743 * I/O stream
11744 *
11745 * Returns the new parser context or NULL
11746 */
11747xmlParserCtxtPtr
11748xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11749 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11750 void *ioctx, xmlCharEncoding enc) {
11751 xmlParserCtxtPtr ctxt;
11752 xmlParserInputPtr inputStream;
11753 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011754
11755 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011756
11757 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11758 if (buf == NULL) return(NULL);
11759
11760 ctxt = xmlNewParserCtxt();
11761 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011762 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011763 return(NULL);
11764 }
11765 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011766#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011767 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011768#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011769 xmlFree(ctxt->sax);
11770 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11771 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011772 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011773 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011774 return(NULL);
11775 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011776 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11777 if (sax->initialized == XML_SAX2_MAGIC)
11778 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11779 else
11780 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011781 if (user_data != NULL)
11782 ctxt->userData = user_data;
11783 }
11784
11785 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11786 if (inputStream == NULL) {
11787 xmlFreeParserCtxt(ctxt);
11788 return(NULL);
11789 }
11790 inputPush(ctxt, inputStream);
11791
11792 return(ctxt);
11793}
11794
Daniel Veillard4432df22003-09-28 18:58:27 +000011795#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011796/************************************************************************
11797 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011798 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000011799 * *
11800 ************************************************************************/
11801
11802/**
11803 * xmlIOParseDTD:
11804 * @sax: the SAX handler block or NULL
11805 * @input: an Input Buffer
11806 * @enc: the charset encoding if known
11807 *
11808 * Load and parse a DTD
11809 *
11810 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000011811 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000011812 */
11813
11814xmlDtdPtr
11815xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11816 xmlCharEncoding enc) {
11817 xmlDtdPtr ret = NULL;
11818 xmlParserCtxtPtr ctxt;
11819 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011820 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000011821
11822 if (input == NULL)
11823 return(NULL);
11824
11825 ctxt = xmlNewParserCtxt();
11826 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000011827 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011828 return(NULL);
11829 }
11830
11831 /*
11832 * Set-up the SAX context
11833 */
11834 if (sax != NULL) {
11835 if (ctxt->sax != NULL)
11836 xmlFree(ctxt->sax);
11837 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000011838 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011839 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011840 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011841
11842 /*
11843 * generate a parser input from the I/O handler
11844 */
11845
Daniel Veillard43caefb2003-12-07 19:32:22 +000011846 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000011847 if (pinput == NULL) {
11848 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000011849 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011850 xmlFreeParserCtxt(ctxt);
11851 return(NULL);
11852 }
11853
11854 /*
11855 * plug some encoding conversion routines here.
11856 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000011857 if (xmlPushInput(ctxt, pinput) < 0) {
11858 if (sax != NULL) ctxt->sax = NULL;
11859 xmlFreeParserCtxt(ctxt);
11860 return(NULL);
11861 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000011862 if (enc != XML_CHAR_ENCODING_NONE) {
11863 xmlSwitchEncoding(ctxt, enc);
11864 }
Owen Taylor3473f882001-02-23 17:55:21 +000011865
11866 pinput->filename = NULL;
11867 pinput->line = 1;
11868 pinput->col = 1;
11869 pinput->base = ctxt->input->cur;
11870 pinput->cur = ctxt->input->cur;
11871 pinput->free = NULL;
11872
11873 /*
11874 * let's parse that entity knowing it's an external subset.
11875 */
11876 ctxt->inSubset = 2;
11877 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000011878 if (ctxt->myDoc == NULL) {
11879 xmlErrMemory(ctxt, "New Doc failed");
11880 return(NULL);
11881 }
11882 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000011883 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11884 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000011885
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011886 if ((enc == XML_CHAR_ENCODING_NONE) &&
11887 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000011888 /*
11889 * Get the 4 first bytes and decode the charset
11890 * if enc != XML_CHAR_ENCODING_NONE
11891 * plug some encoding conversion routines.
11892 */
11893 start[0] = RAW;
11894 start[1] = NXT(1);
11895 start[2] = NXT(2);
11896 start[3] = NXT(3);
11897 enc = xmlDetectCharEncoding(start, 4);
11898 if (enc != XML_CHAR_ENCODING_NONE) {
11899 xmlSwitchEncoding(ctxt, enc);
11900 }
11901 }
11902
Owen Taylor3473f882001-02-23 17:55:21 +000011903 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11904
11905 if (ctxt->myDoc != NULL) {
11906 if (ctxt->wellFormed) {
11907 ret = ctxt->myDoc->extSubset;
11908 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000011909 if (ret != NULL) {
11910 xmlNodePtr tmp;
11911
11912 ret->doc = NULL;
11913 tmp = ret->children;
11914 while (tmp != NULL) {
11915 tmp->doc = NULL;
11916 tmp = tmp->next;
11917 }
11918 }
Owen Taylor3473f882001-02-23 17:55:21 +000011919 } else {
11920 ret = NULL;
11921 }
11922 xmlFreeDoc(ctxt->myDoc);
11923 ctxt->myDoc = NULL;
11924 }
11925 if (sax != NULL) ctxt->sax = NULL;
11926 xmlFreeParserCtxt(ctxt);
11927
11928 return(ret);
11929}
11930
11931/**
11932 * xmlSAXParseDTD:
11933 * @sax: the SAX handler block
11934 * @ExternalID: a NAME* containing the External ID of the DTD
11935 * @SystemID: a NAME* containing the URL to the DTD
11936 *
11937 * Load and parse an external subset.
11938 *
11939 * Returns the resulting xmlDtdPtr or NULL in case of error.
11940 */
11941
11942xmlDtdPtr
11943xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11944 const xmlChar *SystemID) {
11945 xmlDtdPtr ret = NULL;
11946 xmlParserCtxtPtr ctxt;
11947 xmlParserInputPtr input = NULL;
11948 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011949 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000011950
11951 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11952
11953 ctxt = xmlNewParserCtxt();
11954 if (ctxt == NULL) {
11955 return(NULL);
11956 }
11957
11958 /*
11959 * Set-up the SAX context
11960 */
11961 if (sax != NULL) {
11962 if (ctxt->sax != NULL)
11963 xmlFree(ctxt->sax);
11964 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000011965 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011966 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011967
11968 /*
11969 * Canonicalise the system ID
11970 */
11971 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000011972 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011973 xmlFreeParserCtxt(ctxt);
11974 return(NULL);
11975 }
Owen Taylor3473f882001-02-23 17:55:21 +000011976
11977 /*
11978 * Ask the Entity resolver to load the damn thing
11979 */
11980
11981 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011982 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11983 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011984 if (input == NULL) {
11985 if (sax != NULL) ctxt->sax = NULL;
11986 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011987 if (systemIdCanonic != NULL)
11988 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011989 return(NULL);
11990 }
11991
11992 /*
11993 * plug some encoding conversion routines here.
11994 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000011995 if (xmlPushInput(ctxt, input) < 0) {
11996 if (sax != NULL) ctxt->sax = NULL;
11997 xmlFreeParserCtxt(ctxt);
11998 if (systemIdCanonic != NULL)
11999 xmlFree(systemIdCanonic);
12000 return(NULL);
12001 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012002 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12003 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12004 xmlSwitchEncoding(ctxt, enc);
12005 }
Owen Taylor3473f882001-02-23 17:55:21 +000012006
12007 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012008 input->filename = (char *) systemIdCanonic;
12009 else
12010 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012011 input->line = 1;
12012 input->col = 1;
12013 input->base = ctxt->input->cur;
12014 input->cur = ctxt->input->cur;
12015 input->free = NULL;
12016
12017 /*
12018 * let's parse that entity knowing it's an external subset.
12019 */
12020 ctxt->inSubset = 2;
12021 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012022 if (ctxt->myDoc == NULL) {
12023 xmlErrMemory(ctxt, "New Doc failed");
12024 if (sax != NULL) ctxt->sax = NULL;
12025 xmlFreeParserCtxt(ctxt);
12026 return(NULL);
12027 }
12028 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012029 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12030 ExternalID, SystemID);
12031 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12032
12033 if (ctxt->myDoc != NULL) {
12034 if (ctxt->wellFormed) {
12035 ret = ctxt->myDoc->extSubset;
12036 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012037 if (ret != NULL) {
12038 xmlNodePtr tmp;
12039
12040 ret->doc = NULL;
12041 tmp = ret->children;
12042 while (tmp != NULL) {
12043 tmp->doc = NULL;
12044 tmp = tmp->next;
12045 }
12046 }
Owen Taylor3473f882001-02-23 17:55:21 +000012047 } else {
12048 ret = NULL;
12049 }
12050 xmlFreeDoc(ctxt->myDoc);
12051 ctxt->myDoc = NULL;
12052 }
12053 if (sax != NULL) ctxt->sax = NULL;
12054 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012055
Owen Taylor3473f882001-02-23 17:55:21 +000012056 return(ret);
12057}
12058
Daniel Veillard4432df22003-09-28 18:58:27 +000012059
Owen Taylor3473f882001-02-23 17:55:21 +000012060/**
12061 * xmlParseDTD:
12062 * @ExternalID: a NAME* containing the External ID of the DTD
12063 * @SystemID: a NAME* containing the URL to the DTD
12064 *
12065 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012066 *
Owen Taylor3473f882001-02-23 17:55:21 +000012067 * Returns the resulting xmlDtdPtr or NULL in case of error.
12068 */
12069
12070xmlDtdPtr
12071xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12072 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12073}
Daniel Veillard4432df22003-09-28 18:58:27 +000012074#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012075
12076/************************************************************************
12077 * *
12078 * Front ends when parsing an Entity *
12079 * *
12080 ************************************************************************/
12081
12082/**
Owen Taylor3473f882001-02-23 17:55:21 +000012083 * xmlParseCtxtExternalEntity:
12084 * @ctx: the existing parsing context
12085 * @URL: the URL for the entity to load
12086 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012087 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012088 *
12089 * Parse an external general entity within an existing parsing context
12090 * An external general parsed entity is well-formed if it matches the
12091 * production labeled extParsedEnt.
12092 *
12093 * [78] extParsedEnt ::= TextDecl? content
12094 *
12095 * Returns 0 if the entity is well formed, -1 in case of args problem and
12096 * the parser error code otherwise
12097 */
12098
12099int
12100xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012101 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012102 xmlParserCtxtPtr ctxt;
12103 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012104 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012105 xmlSAXHandlerPtr oldsax = NULL;
12106 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012107 xmlChar start[4];
12108 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012109
Daniel Veillardce682bc2004-11-05 17:22:25 +000012110 if (ctx == NULL) return(-1);
12111
Daniel Veillard0161e632008-08-28 15:36:32 +000012112 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12113 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012114 return(XML_ERR_ENTITY_LOOP);
12115 }
12116
Daniel Veillardcda96922001-08-21 10:56:31 +000012117 if (lst != NULL)
12118 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012119 if ((URL == NULL) && (ID == NULL))
12120 return(-1);
12121 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12122 return(-1);
12123
Rob Richards798743a2009-06-19 13:54:25 -040012124 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012125 if (ctxt == NULL) {
12126 return(-1);
12127 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012128
Owen Taylor3473f882001-02-23 17:55:21 +000012129 oldsax = ctxt->sax;
12130 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012131 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012132 newDoc = xmlNewDoc(BAD_CAST "1.0");
12133 if (newDoc == NULL) {
12134 xmlFreeParserCtxt(ctxt);
12135 return(-1);
12136 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012137 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012138 if (ctx->myDoc->dict) {
12139 newDoc->dict = ctx->myDoc->dict;
12140 xmlDictReference(newDoc->dict);
12141 }
Owen Taylor3473f882001-02-23 17:55:21 +000012142 if (ctx->myDoc != NULL) {
12143 newDoc->intSubset = ctx->myDoc->intSubset;
12144 newDoc->extSubset = ctx->myDoc->extSubset;
12145 }
12146 if (ctx->myDoc->URL != NULL) {
12147 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12148 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012149 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12150 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012151 ctxt->sax = oldsax;
12152 xmlFreeParserCtxt(ctxt);
12153 newDoc->intSubset = NULL;
12154 newDoc->extSubset = NULL;
12155 xmlFreeDoc(newDoc);
12156 return(-1);
12157 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012158 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012159 nodePush(ctxt, newDoc->children);
12160 if (ctx->myDoc == NULL) {
12161 ctxt->myDoc = newDoc;
12162 } else {
12163 ctxt->myDoc = ctx->myDoc;
12164 newDoc->children->doc = ctx->myDoc;
12165 }
12166
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012167 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012168 * Get the 4 first bytes and decode the charset
12169 * if enc != XML_CHAR_ENCODING_NONE
12170 * plug some encoding conversion routines.
12171 */
12172 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012173 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12174 start[0] = RAW;
12175 start[1] = NXT(1);
12176 start[2] = NXT(2);
12177 start[3] = NXT(3);
12178 enc = xmlDetectCharEncoding(start, 4);
12179 if (enc != XML_CHAR_ENCODING_NONE) {
12180 xmlSwitchEncoding(ctxt, enc);
12181 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012182 }
12183
Owen Taylor3473f882001-02-23 17:55:21 +000012184 /*
12185 * Parse a possible text declaration first
12186 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012187 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012188 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012189 /*
12190 * An XML-1.0 document can't reference an entity not XML-1.0
12191 */
12192 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12193 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12194 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12195 "Version mismatch between document and entity\n");
12196 }
Owen Taylor3473f882001-02-23 17:55:21 +000012197 }
12198
12199 /*
12200 * Doing validity checking on chunk doesn't make sense
12201 */
12202 ctxt->instate = XML_PARSER_CONTENT;
12203 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012204 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012205 ctxt->loadsubset = ctx->loadsubset;
12206 ctxt->depth = ctx->depth + 1;
12207 ctxt->replaceEntities = ctx->replaceEntities;
12208 if (ctxt->validate) {
12209 ctxt->vctxt.error = ctx->vctxt.error;
12210 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012211 } else {
12212 ctxt->vctxt.error = NULL;
12213 ctxt->vctxt.warning = NULL;
12214 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012215 ctxt->vctxt.nodeTab = NULL;
12216 ctxt->vctxt.nodeNr = 0;
12217 ctxt->vctxt.nodeMax = 0;
12218 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012219 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12220 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012221 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12222 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12223 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012224 ctxt->dictNames = ctx->dictNames;
12225 ctxt->attsDefault = ctx->attsDefault;
12226 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012227 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012228
12229 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012230
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012231 ctx->validate = ctxt->validate;
12232 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012233 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012234 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012235 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012236 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012237 }
12238 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012239 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012240 }
12241
12242 if (!ctxt->wellFormed) {
12243 if (ctxt->errNo == 0)
12244 ret = 1;
12245 else
12246 ret = ctxt->errNo;
12247 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012248 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012249 xmlNodePtr cur;
12250
12251 /*
12252 * Return the newly created nodeset after unlinking it from
12253 * they pseudo parent.
12254 */
12255 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012256 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012257 while (cur != NULL) {
12258 cur->parent = NULL;
12259 cur = cur->next;
12260 }
12261 newDoc->children->children = NULL;
12262 }
12263 ret = 0;
12264 }
12265 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012266 ctxt->dict = NULL;
12267 ctxt->attsDefault = NULL;
12268 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012269 xmlFreeParserCtxt(ctxt);
12270 newDoc->intSubset = NULL;
12271 newDoc->extSubset = NULL;
12272 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012273
Owen Taylor3473f882001-02-23 17:55:21 +000012274 return(ret);
12275}
12276
12277/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012278 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012279 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012280 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012281 * @sax: the SAX handler bloc (possibly NULL)
12282 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12283 * @depth: Used for loop detection, use 0
12284 * @URL: the URL for the entity to load
12285 * @ID: the System ID for the entity to load
12286 * @list: the return value for the set of parsed nodes
12287 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012288 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012289 *
12290 * Returns 0 if the entity is well formed, -1 in case of args problem and
12291 * the parser error code otherwise
12292 */
12293
Daniel Veillard7d515752003-09-26 19:12:37 +000012294static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012295xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12296 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012297 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012298 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012299 xmlParserCtxtPtr ctxt;
12300 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012301 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012302 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012303 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012304 xmlChar start[4];
12305 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012306
Daniel Veillard0161e632008-08-28 15:36:32 +000012307 if (((depth > 40) &&
12308 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12309 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012310 return(XML_ERR_ENTITY_LOOP);
12311 }
12312
Owen Taylor3473f882001-02-23 17:55:21 +000012313 if (list != NULL)
12314 *list = NULL;
12315 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012316 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012317 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012318 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012319
12320
Rob Richards9c0aa472009-03-26 18:10:19 +000012321 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000012322 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012323 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012324 if (oldctxt != NULL) {
12325 ctxt->_private = oldctxt->_private;
12326 ctxt->loadsubset = oldctxt->loadsubset;
12327 ctxt->validate = oldctxt->validate;
12328 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012329 ctxt->record_info = oldctxt->record_info;
12330 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12331 ctxt->node_seq.length = oldctxt->node_seq.length;
12332 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012333 } else {
12334 /*
12335 * Doing validity checking on chunk without context
12336 * doesn't make sense
12337 */
12338 ctxt->_private = NULL;
12339 ctxt->validate = 0;
12340 ctxt->external = 2;
12341 ctxt->loadsubset = 0;
12342 }
Owen Taylor3473f882001-02-23 17:55:21 +000012343 if (sax != NULL) {
12344 oldsax = ctxt->sax;
12345 ctxt->sax = sax;
12346 if (user_data != NULL)
12347 ctxt->userData = user_data;
12348 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012349 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012350 newDoc = xmlNewDoc(BAD_CAST "1.0");
12351 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012352 ctxt->node_seq.maximum = 0;
12353 ctxt->node_seq.length = 0;
12354 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012355 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000012356 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012357 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012358 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000012359 newDoc->intSubset = doc->intSubset;
12360 newDoc->extSubset = doc->extSubset;
12361 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012362 xmlDictReference(newDoc->dict);
12363
Owen Taylor3473f882001-02-23 17:55:21 +000012364 if (doc->URL != NULL) {
12365 newDoc->URL = xmlStrdup(doc->URL);
12366 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012367 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12368 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012369 if (sax != NULL)
12370 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012371 ctxt->node_seq.maximum = 0;
12372 ctxt->node_seq.length = 0;
12373 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012374 xmlFreeParserCtxt(ctxt);
12375 newDoc->intSubset = NULL;
12376 newDoc->extSubset = NULL;
12377 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000012378 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012379 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012380 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012381 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000012382 ctxt->myDoc = doc;
12383 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000012384
Daniel Veillard0161e632008-08-28 15:36:32 +000012385 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012386 * Get the 4 first bytes and decode the charset
12387 * if enc != XML_CHAR_ENCODING_NONE
12388 * plug some encoding conversion routines.
12389 */
12390 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012391 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12392 start[0] = RAW;
12393 start[1] = NXT(1);
12394 start[2] = NXT(2);
12395 start[3] = NXT(3);
12396 enc = xmlDetectCharEncoding(start, 4);
12397 if (enc != XML_CHAR_ENCODING_NONE) {
12398 xmlSwitchEncoding(ctxt, enc);
12399 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012400 }
12401
Owen Taylor3473f882001-02-23 17:55:21 +000012402 /*
12403 * Parse a possible text declaration first
12404 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012405 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012406 xmlParseTextDecl(ctxt);
12407 }
12408
Owen Taylor3473f882001-02-23 17:55:21 +000012409 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012410 ctxt->depth = depth;
12411
12412 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012413
Daniel Veillard561b7f82002-03-20 21:55:57 +000012414 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012415 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000012416 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012417 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012418 }
12419 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012420 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012421 }
12422
12423 if (!ctxt->wellFormed) {
12424 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012425 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000012426 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012427 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000012428 } else {
12429 if (list != NULL) {
12430 xmlNodePtr cur;
12431
12432 /*
12433 * Return the newly created nodeset after unlinking it from
12434 * they pseudo parent.
12435 */
12436 cur = newDoc->children->children;
12437 *list = cur;
12438 while (cur != NULL) {
12439 cur->parent = NULL;
12440 cur = cur->next;
12441 }
12442 newDoc->children->children = NULL;
12443 }
Daniel Veillard7d515752003-09-26 19:12:37 +000012444 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000012445 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012446
12447 /*
12448 * Record in the parent context the number of entities replacement
12449 * done when parsing that reference.
12450 */
12451 oldctxt->nbentities += ctxt->nbentities;
12452 /*
12453 * Also record the size of the entity parsed
12454 */
12455 if (ctxt->input != NULL) {
12456 oldctxt->sizeentities += ctxt->input->consumed;
12457 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12458 }
12459 /*
12460 * And record the last error if any
12461 */
12462 if (ctxt->lastError.code != XML_ERR_OK)
12463 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12464
Owen Taylor3473f882001-02-23 17:55:21 +000012465 if (sax != NULL)
12466 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000012467 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12468 oldctxt->node_seq.length = ctxt->node_seq.length;
12469 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012470 ctxt->node_seq.maximum = 0;
12471 ctxt->node_seq.length = 0;
12472 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012473 xmlFreeParserCtxt(ctxt);
12474 newDoc->intSubset = NULL;
12475 newDoc->extSubset = NULL;
12476 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000012477
Owen Taylor3473f882001-02-23 17:55:21 +000012478 return(ret);
12479}
12480
Daniel Veillard81273902003-09-30 00:43:48 +000012481#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012482/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012483 * xmlParseExternalEntity:
12484 * @doc: the document the chunk pertains to
12485 * @sax: the SAX handler bloc (possibly NULL)
12486 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12487 * @depth: Used for loop detection, use 0
12488 * @URL: the URL for the entity to load
12489 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012490 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000012491 *
12492 * Parse an external general entity
12493 * An external general parsed entity is well-formed if it matches the
12494 * production labeled extParsedEnt.
12495 *
12496 * [78] extParsedEnt ::= TextDecl? content
12497 *
12498 * Returns 0 if the entity is well formed, -1 in case of args problem and
12499 * the parser error code otherwise
12500 */
12501
12502int
12503xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000012504 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012505 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012506 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000012507}
12508
12509/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000012510 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000012511 * @doc: the document the chunk pertains to
12512 * @sax: the SAX handler bloc (possibly NULL)
12513 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12514 * @depth: Used for loop detection, use 0
12515 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000012516 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012517 *
12518 * Parse a well-balanced chunk of an XML document
12519 * called by the parser
12520 * The allowed sequence for the Well Balanced Chunk is the one defined by
12521 * the content production in the XML grammar:
12522 *
12523 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12524 *
12525 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12526 * the parser error code otherwise
12527 */
12528
12529int
12530xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000012531 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012532 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12533 depth, string, lst, 0 );
12534}
Daniel Veillard81273902003-09-30 00:43:48 +000012535#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000012536
12537/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000012538 * xmlParseBalancedChunkMemoryInternal:
12539 * @oldctxt: the existing parsing context
12540 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12541 * @user_data: the user data field for the parser context
12542 * @lst: the return value for the set of parsed nodes
12543 *
12544 *
12545 * Parse a well-balanced chunk of an XML document
12546 * called by the parser
12547 * The allowed sequence for the Well Balanced Chunk is the one defined by
12548 * the content production in the XML grammar:
12549 *
12550 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12551 *
Daniel Veillard7d515752003-09-26 19:12:37 +000012552 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12553 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000012554 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000012555 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000012556 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000012557 */
Daniel Veillard7d515752003-09-26 19:12:37 +000012558static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000012559xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12560 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12561 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012562 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012563 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012564 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012565 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012566 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012567 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000012568 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012569
Daniel Veillard0161e632008-08-28 15:36:32 +000012570 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12571 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012572 return(XML_ERR_ENTITY_LOOP);
12573 }
12574
12575
12576 if (lst != NULL)
12577 *lst = NULL;
12578 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000012579 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012580
12581 size = xmlStrlen(string);
12582
12583 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000012584 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012585 if (user_data != NULL)
12586 ctxt->userData = user_data;
12587 else
12588 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012589 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12590 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012591 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12592 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12593 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012594
12595 oldsax = ctxt->sax;
12596 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012597 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012598 ctxt->replaceEntities = oldctxt->replaceEntities;
12599 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000012600
Daniel Veillarde1ca5032002-12-09 14:13:43 +000012601 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012602 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012603 newDoc = xmlNewDoc(BAD_CAST "1.0");
12604 if (newDoc == NULL) {
12605 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012606 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012607 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000012608 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012609 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012610 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012611 newDoc->dict = ctxt->dict;
12612 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012613 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012614 } else {
12615 ctxt->myDoc = oldctxt->myDoc;
12616 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012617 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012618 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012619 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12620 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012621 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012622 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012623 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012624 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012625 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012626 }
William M. Brack7b9154b2003-09-27 19:23:50 +000012627 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012628 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012629 ctxt->myDoc->children = NULL;
12630 ctxt->myDoc->last = NULL;
12631 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012632 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012633 ctxt->instate = XML_PARSER_CONTENT;
12634 ctxt->depth = oldctxt->depth + 1;
12635
Daniel Veillard328f48c2002-11-15 15:24:34 +000012636 ctxt->validate = 0;
12637 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000012638 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12639 /*
12640 * ID/IDREF registration will be done in xmlValidateElement below
12641 */
12642 ctxt->loadsubset |= XML_SKIP_IDS;
12643 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012644 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012645 ctxt->attsDefault = oldctxt->attsDefault;
12646 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012647
Daniel Veillard68e9e742002-11-16 15:35:11 +000012648 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012649 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012650 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012651 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012652 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012653 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012654 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012655 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012656 }
12657
12658 if (!ctxt->wellFormed) {
12659 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012660 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012661 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012662 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012663 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012664 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012665 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012666
William M. Brack7b9154b2003-09-27 19:23:50 +000012667 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012668 xmlNodePtr cur;
12669
12670 /*
12671 * Return the newly created nodeset after unlinking it from
12672 * they pseudo parent.
12673 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012674 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012675 *lst = cur;
12676 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012677#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012678 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12679 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12680 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012681 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12682 oldctxt->myDoc, cur);
12683 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012684#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012685 cur->parent = NULL;
12686 cur = cur->next;
12687 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012688 ctxt->myDoc->children->children = NULL;
12689 }
12690 if (ctxt->myDoc != NULL) {
12691 xmlFreeNode(ctxt->myDoc->children);
12692 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012693 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012694 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012695
12696 /*
12697 * Record in the parent context the number of entities replacement
12698 * done when parsing that reference.
12699 */
Daniel Veillard4bf899b2008-08-20 17:04:30 +000012700 oldctxt->nbentities += ctxt->nbentities;
Daniel Veillard0161e632008-08-28 15:36:32 +000012701 /*
12702 * Also record the last error if any
12703 */
12704 if (ctxt->lastError.code != XML_ERR_OK)
12705 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12706
Daniel Veillard328f48c2002-11-15 15:24:34 +000012707 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012708 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012709 ctxt->attsDefault = NULL;
12710 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012711 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012712 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012713 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012714 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012715
Daniel Veillard328f48c2002-11-15 15:24:34 +000012716 return(ret);
12717}
12718
Daniel Veillard29b17482004-08-16 00:39:03 +000012719/**
12720 * xmlParseInNodeContext:
12721 * @node: the context node
12722 * @data: the input string
12723 * @datalen: the input string length in bytes
12724 * @options: a combination of xmlParserOption
12725 * @lst: the return value for the set of parsed nodes
12726 *
12727 * Parse a well-balanced chunk of an XML document
12728 * within the context (DTD, namespaces, etc ...) of the given node.
12729 *
12730 * The allowed sequence for the data is a Well Balanced Chunk defined by
12731 * the content production in the XML grammar:
12732 *
12733 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12734 *
12735 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12736 * error code otherwise
12737 */
12738xmlParserErrors
12739xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12740 int options, xmlNodePtr *lst) {
12741#ifdef SAX2
12742 xmlParserCtxtPtr ctxt;
12743 xmlDocPtr doc = NULL;
12744 xmlNodePtr fake, cur;
12745 int nsnr = 0;
12746
12747 xmlParserErrors ret = XML_ERR_OK;
12748
12749 /*
12750 * check all input parameters, grab the document
12751 */
12752 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12753 return(XML_ERR_INTERNAL_ERROR);
12754 switch (node->type) {
12755 case XML_ELEMENT_NODE:
12756 case XML_ATTRIBUTE_NODE:
12757 case XML_TEXT_NODE:
12758 case XML_CDATA_SECTION_NODE:
12759 case XML_ENTITY_REF_NODE:
12760 case XML_PI_NODE:
12761 case XML_COMMENT_NODE:
12762 case XML_DOCUMENT_NODE:
12763 case XML_HTML_DOCUMENT_NODE:
12764 break;
12765 default:
12766 return(XML_ERR_INTERNAL_ERROR);
12767
12768 }
12769 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12770 (node->type != XML_DOCUMENT_NODE) &&
12771 (node->type != XML_HTML_DOCUMENT_NODE))
12772 node = node->parent;
12773 if (node == NULL)
12774 return(XML_ERR_INTERNAL_ERROR);
12775 if (node->type == XML_ELEMENT_NODE)
12776 doc = node->doc;
12777 else
12778 doc = (xmlDocPtr) node;
12779 if (doc == NULL)
12780 return(XML_ERR_INTERNAL_ERROR);
12781
12782 /*
12783 * allocate a context and set-up everything not related to the
12784 * node position in the tree
12785 */
12786 if (doc->type == XML_DOCUMENT_NODE)
12787 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12788#ifdef LIBXML_HTML_ENABLED
12789 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12790 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12791#endif
12792 else
12793 return(XML_ERR_INTERNAL_ERROR);
12794
12795 if (ctxt == NULL)
12796 return(XML_ERR_NO_MEMORY);
12797 fake = xmlNewComment(NULL);
12798 if (fake == NULL) {
12799 xmlFreeParserCtxt(ctxt);
12800 return(XML_ERR_NO_MEMORY);
12801 }
12802 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000012803
12804 /*
12805 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12806 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12807 * we must wait until the last moment to free the original one.
12808 */
Daniel Veillard29b17482004-08-16 00:39:03 +000012809 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000012810 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000012811 xmlDictFree(ctxt->dict);
12812 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000012813 } else
12814 options |= XML_PARSE_NODICT;
12815
Daniel Veillard37334572008-07-31 08:20:02 +000012816 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000012817 xmlDetectSAX2(ctxt);
12818 ctxt->myDoc = doc;
12819
12820 if (node->type == XML_ELEMENT_NODE) {
12821 nodePush(ctxt, node);
12822 /*
12823 * initialize the SAX2 namespaces stack
12824 */
12825 cur = node;
12826 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12827 xmlNsPtr ns = cur->nsDef;
12828 const xmlChar *iprefix, *ihref;
12829
12830 while (ns != NULL) {
12831 if (ctxt->dict) {
12832 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12833 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12834 } else {
12835 iprefix = ns->prefix;
12836 ihref = ns->href;
12837 }
12838
12839 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12840 nsPush(ctxt, iprefix, ihref);
12841 nsnr++;
12842 }
12843 ns = ns->next;
12844 }
12845 cur = cur->parent;
12846 }
12847 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000012848 }
Daniel Veillard29b17482004-08-16 00:39:03 +000012849
12850 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12851 /*
12852 * ID/IDREF registration will be done in xmlValidateElement below
12853 */
12854 ctxt->loadsubset |= XML_SKIP_IDS;
12855 }
12856
Daniel Veillard499cc922006-01-18 17:22:35 +000012857#ifdef LIBXML_HTML_ENABLED
12858 if (doc->type == XML_HTML_DOCUMENT_NODE)
12859 __htmlParseContent(ctxt);
12860 else
12861#endif
12862 xmlParseContent(ctxt);
12863
Daniel Veillard29b17482004-08-16 00:39:03 +000012864 nsPop(ctxt, nsnr);
12865 if ((RAW == '<') && (NXT(1) == '/')) {
12866 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12867 } else if (RAW != 0) {
12868 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12869 }
12870 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12871 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12872 ctxt->wellFormed = 0;
12873 }
12874
12875 if (!ctxt->wellFormed) {
12876 if (ctxt->errNo == 0)
12877 ret = XML_ERR_INTERNAL_ERROR;
12878 else
12879 ret = (xmlParserErrors)ctxt->errNo;
12880 } else {
12881 ret = XML_ERR_OK;
12882 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012883
Daniel Veillard29b17482004-08-16 00:39:03 +000012884 /*
12885 * Return the newly created nodeset after unlinking it from
12886 * the pseudo sibling.
12887 */
Daniel Veillard0161e632008-08-28 15:36:32 +000012888
Daniel Veillard29b17482004-08-16 00:39:03 +000012889 cur = fake->next;
12890 fake->next = NULL;
12891 node->last = fake;
12892
12893 if (cur != NULL) {
12894 cur->prev = NULL;
12895 }
12896
12897 *lst = cur;
12898
12899 while (cur != NULL) {
12900 cur->parent = NULL;
12901 cur = cur->next;
12902 }
12903
12904 xmlUnlinkNode(fake);
12905 xmlFreeNode(fake);
12906
12907
12908 if (ret != XML_ERR_OK) {
12909 xmlFreeNodeList(*lst);
12910 *lst = NULL;
12911 }
William M. Brackc3f81342004-10-03 01:22:44 +000012912
William M. Brackb7b54de2004-10-06 16:38:01 +000012913 if (doc->dict != NULL)
12914 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000012915 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012916
Daniel Veillard29b17482004-08-16 00:39:03 +000012917 return(ret);
12918#else /* !SAX2 */
12919 return(XML_ERR_INTERNAL_ERROR);
12920#endif
12921}
12922
Daniel Veillard81273902003-09-30 00:43:48 +000012923#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000012924/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000012925 * xmlParseBalancedChunkMemoryRecover:
12926 * @doc: the document the chunk pertains to
12927 * @sax: the SAX handler bloc (possibly NULL)
12928 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12929 * @depth: Used for loop detection, use 0
12930 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12931 * @lst: the return value for the set of parsed nodes
12932 * @recover: return nodes even if the data is broken (use 0)
12933 *
12934 *
12935 * Parse a well-balanced chunk of an XML document
12936 * called by the parser
12937 * The allowed sequence for the Well Balanced Chunk is the one defined by
12938 * the content production in the XML grammar:
12939 *
12940 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12941 *
12942 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12943 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000012944 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000012945 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000012946 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12947 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000012948 */
12949int
12950xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000012951 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000012952 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000012953 xmlParserCtxtPtr ctxt;
12954 xmlDocPtr newDoc;
12955 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012956 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012957 int size;
12958 int ret = 0;
12959
Daniel Veillard0161e632008-08-28 15:36:32 +000012960 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000012961 return(XML_ERR_ENTITY_LOOP);
12962 }
12963
12964
Daniel Veillardcda96922001-08-21 10:56:31 +000012965 if (lst != NULL)
12966 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012967 if (string == NULL)
12968 return(-1);
12969
12970 size = xmlStrlen(string);
12971
12972 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12973 if (ctxt == NULL) return(-1);
12974 ctxt->userData = ctxt;
12975 if (sax != NULL) {
12976 oldsax = ctxt->sax;
12977 ctxt->sax = sax;
12978 if (user_data != NULL)
12979 ctxt->userData = user_data;
12980 }
12981 newDoc = xmlNewDoc(BAD_CAST "1.0");
12982 if (newDoc == NULL) {
12983 xmlFreeParserCtxt(ctxt);
12984 return(-1);
12985 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012986 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012987 if ((doc != NULL) && (doc->dict != NULL)) {
12988 xmlDictFree(ctxt->dict);
12989 ctxt->dict = doc->dict;
12990 xmlDictReference(ctxt->dict);
12991 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12992 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12993 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12994 ctxt->dictNames = 1;
12995 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000012996 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012997 }
Owen Taylor3473f882001-02-23 17:55:21 +000012998 if (doc != NULL) {
12999 newDoc->intSubset = doc->intSubset;
13000 newDoc->extSubset = doc->extSubset;
13001 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013002 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13003 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013004 if (sax != NULL)
13005 ctxt->sax = oldsax;
13006 xmlFreeParserCtxt(ctxt);
13007 newDoc->intSubset = NULL;
13008 newDoc->extSubset = NULL;
13009 xmlFreeDoc(newDoc);
13010 return(-1);
13011 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013012 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13013 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013014 if (doc == NULL) {
13015 ctxt->myDoc = newDoc;
13016 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013017 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013018 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013019 /* Ensure that doc has XML spec namespace */
13020 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13021 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013022 }
13023 ctxt->instate = XML_PARSER_CONTENT;
13024 ctxt->depth = depth;
13025
13026 /*
13027 * Doing validity checking on chunk doesn't make sense
13028 */
13029 ctxt->validate = 0;
13030 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013031 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013032
Daniel Veillardb39bc392002-10-26 19:29:51 +000013033 if ( doc != NULL ){
13034 content = doc->children;
13035 doc->children = NULL;
13036 xmlParseContent(ctxt);
13037 doc->children = content;
13038 }
13039 else {
13040 xmlParseContent(ctxt);
13041 }
Owen Taylor3473f882001-02-23 17:55:21 +000013042 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013043 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013044 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013045 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013046 }
13047 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013048 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013049 }
13050
13051 if (!ctxt->wellFormed) {
13052 if (ctxt->errNo == 0)
13053 ret = 1;
13054 else
13055 ret = ctxt->errNo;
13056 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013057 ret = 0;
13058 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013059
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013060 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13061 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013062
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013063 /*
13064 * Return the newly created nodeset after unlinking it from
13065 * they pseudo parent.
13066 */
13067 cur = newDoc->children->children;
13068 *lst = cur;
13069 while (cur != NULL) {
13070 xmlSetTreeDoc(cur, doc);
13071 cur->parent = NULL;
13072 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013073 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013074 newDoc->children->children = NULL;
13075 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013076
13077 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013078 ctxt->sax = oldsax;
13079 xmlFreeParserCtxt(ctxt);
13080 newDoc->intSubset = NULL;
13081 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013082 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013083 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013084
Owen Taylor3473f882001-02-23 17:55:21 +000013085 return(ret);
13086}
13087
13088/**
13089 * xmlSAXParseEntity:
13090 * @sax: the SAX handler block
13091 * @filename: the filename
13092 *
13093 * parse an XML external entity out of context and build a tree.
13094 * It use the given SAX function block to handle the parsing callback.
13095 * If sax is NULL, fallback to the default DOM tree building routines.
13096 *
13097 * [78] extParsedEnt ::= TextDecl? content
13098 *
13099 * This correspond to a "Well Balanced" chunk
13100 *
13101 * Returns the resulting document tree
13102 */
13103
13104xmlDocPtr
13105xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13106 xmlDocPtr ret;
13107 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013108
13109 ctxt = xmlCreateFileParserCtxt(filename);
13110 if (ctxt == NULL) {
13111 return(NULL);
13112 }
13113 if (sax != NULL) {
13114 if (ctxt->sax != NULL)
13115 xmlFree(ctxt->sax);
13116 ctxt->sax = sax;
13117 ctxt->userData = NULL;
13118 }
13119
Owen Taylor3473f882001-02-23 17:55:21 +000013120 xmlParseExtParsedEnt(ctxt);
13121
13122 if (ctxt->wellFormed)
13123 ret = ctxt->myDoc;
13124 else {
13125 ret = NULL;
13126 xmlFreeDoc(ctxt->myDoc);
13127 ctxt->myDoc = NULL;
13128 }
13129 if (sax != NULL)
13130 ctxt->sax = NULL;
13131 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013132
Owen Taylor3473f882001-02-23 17:55:21 +000013133 return(ret);
13134}
13135
13136/**
13137 * xmlParseEntity:
13138 * @filename: the filename
13139 *
13140 * parse an XML external entity out of context and build a tree.
13141 *
13142 * [78] extParsedEnt ::= TextDecl? content
13143 *
13144 * This correspond to a "Well Balanced" chunk
13145 *
13146 * Returns the resulting document tree
13147 */
13148
13149xmlDocPtr
13150xmlParseEntity(const char *filename) {
13151 return(xmlSAXParseEntity(NULL, filename));
13152}
Daniel Veillard81273902003-09-30 00:43:48 +000013153#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013154
13155/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013156 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013157 * @URL: the entity URL
13158 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013159 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013160 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013161 *
13162 * Create a parser context for an external entity
13163 * Automatic support for ZLIB/Compress compressed document is provided
13164 * by default if found at compile-time.
13165 *
13166 * Returns the new parser context or NULL
13167 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013168static xmlParserCtxtPtr
13169xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13170 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013171 xmlParserCtxtPtr ctxt;
13172 xmlParserInputPtr inputStream;
13173 char *directory = NULL;
13174 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013175
Owen Taylor3473f882001-02-23 17:55:21 +000013176 ctxt = xmlNewParserCtxt();
13177 if (ctxt == NULL) {
13178 return(NULL);
13179 }
13180
Daniel Veillard48247b42009-07-10 16:12:46 +020013181 if (pctx != NULL) {
13182 ctxt->options = pctx->options;
13183 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000013184 }
13185
Owen Taylor3473f882001-02-23 17:55:21 +000013186 uri = xmlBuildURI(URL, base);
13187
13188 if (uri == NULL) {
13189 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13190 if (inputStream == NULL) {
13191 xmlFreeParserCtxt(ctxt);
13192 return(NULL);
13193 }
13194
13195 inputPush(ctxt, inputStream);
13196
13197 if ((ctxt->directory == NULL) && (directory == NULL))
13198 directory = xmlParserGetDirectory((char *)URL);
13199 if ((ctxt->directory == NULL) && (directory != NULL))
13200 ctxt->directory = directory;
13201 } else {
13202 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13203 if (inputStream == NULL) {
13204 xmlFree(uri);
13205 xmlFreeParserCtxt(ctxt);
13206 return(NULL);
13207 }
13208
13209 inputPush(ctxt, inputStream);
13210
13211 if ((ctxt->directory == NULL) && (directory == NULL))
13212 directory = xmlParserGetDirectory((char *)uri);
13213 if ((ctxt->directory == NULL) && (directory != NULL))
13214 ctxt->directory = directory;
13215 xmlFree(uri);
13216 }
Owen Taylor3473f882001-02-23 17:55:21 +000013217 return(ctxt);
13218}
13219
Rob Richards9c0aa472009-03-26 18:10:19 +000013220/**
13221 * xmlCreateEntityParserCtxt:
13222 * @URL: the entity URL
13223 * @ID: the entity PUBLIC ID
13224 * @base: a possible base for the target URI
13225 *
13226 * Create a parser context for an external entity
13227 * Automatic support for ZLIB/Compress compressed document is provided
13228 * by default if found at compile-time.
13229 *
13230 * Returns the new parser context or NULL
13231 */
13232xmlParserCtxtPtr
13233xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13234 const xmlChar *base) {
13235 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13236
13237}
13238
Owen Taylor3473f882001-02-23 17:55:21 +000013239/************************************************************************
13240 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000013241 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000013242 * *
13243 ************************************************************************/
13244
13245/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013246 * xmlCreateURLParserCtxt:
13247 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013248 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013249 *
Daniel Veillard61b93382003-11-03 14:28:31 +000013250 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000013251 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000013252 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000013253 *
13254 * Returns the new parser context or NULL
13255 */
13256xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000013257xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000013258{
13259 xmlParserCtxtPtr ctxt;
13260 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000013261 char *directory = NULL;
13262
Owen Taylor3473f882001-02-23 17:55:21 +000013263 ctxt = xmlNewParserCtxt();
13264 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000013265 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000013266 return(NULL);
13267 }
13268
Daniel Veillarddf292f72005-01-16 19:00:15 +000013269 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000013270 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000013271 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000013272
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000013273 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013274 if (inputStream == NULL) {
13275 xmlFreeParserCtxt(ctxt);
13276 return(NULL);
13277 }
13278
Owen Taylor3473f882001-02-23 17:55:21 +000013279 inputPush(ctxt, inputStream);
13280 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000013281 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013282 if ((ctxt->directory == NULL) && (directory != NULL))
13283 ctxt->directory = directory;
13284
13285 return(ctxt);
13286}
13287
Daniel Veillard61b93382003-11-03 14:28:31 +000013288/**
13289 * xmlCreateFileParserCtxt:
13290 * @filename: the filename
13291 *
13292 * Create a parser context for a file content.
13293 * Automatic support for ZLIB/Compress compressed document is provided
13294 * by default if found at compile-time.
13295 *
13296 * Returns the new parser context or NULL
13297 */
13298xmlParserCtxtPtr
13299xmlCreateFileParserCtxt(const char *filename)
13300{
13301 return(xmlCreateURLParserCtxt(filename, 0));
13302}
13303
Daniel Veillard81273902003-09-30 00:43:48 +000013304#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013305/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013306 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000013307 * @sax: the SAX handler block
13308 * @filename: the filename
13309 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13310 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000013311 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000013312 *
13313 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13314 * compressed document is provided by default if found at compile-time.
13315 * It use the given SAX function block to handle the parsing callback.
13316 * If sax is NULL, fallback to the default DOM tree building routines.
13317 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000013318 * User data (void *) is stored within the parser context in the
13319 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000013320 *
Owen Taylor3473f882001-02-23 17:55:21 +000013321 * Returns the resulting document tree
13322 */
13323
13324xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000013325xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13326 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000013327 xmlDocPtr ret;
13328 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013329
Daniel Veillard635ef722001-10-29 11:48:19 +000013330 xmlInitParser();
13331
Owen Taylor3473f882001-02-23 17:55:21 +000013332 ctxt = xmlCreateFileParserCtxt(filename);
13333 if (ctxt == NULL) {
13334 return(NULL);
13335 }
13336 if (sax != NULL) {
13337 if (ctxt->sax != NULL)
13338 xmlFree(ctxt->sax);
13339 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013340 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013341 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000013342 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000013343 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000013344 }
Owen Taylor3473f882001-02-23 17:55:21 +000013345
Daniel Veillard37d2d162008-03-14 10:54:00 +000013346 if (ctxt->directory == NULL)
13347 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013348
Daniel Veillarddad3f682002-11-17 16:47:27 +000013349 ctxt->recovery = recovery;
13350
Owen Taylor3473f882001-02-23 17:55:21 +000013351 xmlParseDocument(ctxt);
13352
William M. Brackc07329e2003-09-08 01:57:30 +000013353 if ((ctxt->wellFormed) || recovery) {
13354 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000013355 if (ret != NULL) {
13356 if (ctxt->input->buf->compressed > 0)
13357 ret->compression = 9;
13358 else
13359 ret->compression = ctxt->input->buf->compressed;
13360 }
William M. Brackc07329e2003-09-08 01:57:30 +000013361 }
Owen Taylor3473f882001-02-23 17:55:21 +000013362 else {
13363 ret = NULL;
13364 xmlFreeDoc(ctxt->myDoc);
13365 ctxt->myDoc = NULL;
13366 }
13367 if (sax != NULL)
13368 ctxt->sax = NULL;
13369 xmlFreeParserCtxt(ctxt);
13370
13371 return(ret);
13372}
13373
13374/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013375 * xmlSAXParseFile:
13376 * @sax: the SAX handler block
13377 * @filename: the filename
13378 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13379 * documents
13380 *
13381 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13382 * compressed document is provided by default if found at compile-time.
13383 * It use the given SAX function block to handle the parsing callback.
13384 * If sax is NULL, fallback to the default DOM tree building routines.
13385 *
13386 * Returns the resulting document tree
13387 */
13388
13389xmlDocPtr
13390xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13391 int recovery) {
13392 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13393}
13394
13395/**
Owen Taylor3473f882001-02-23 17:55:21 +000013396 * xmlRecoverDoc:
13397 * @cur: a pointer to an array of xmlChar
13398 *
13399 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013400 * In the case the document is not Well Formed, a attempt to build a
13401 * tree is tried anyway
13402 *
13403 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013404 */
13405
13406xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020013407xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013408 return(xmlSAXParseDoc(NULL, cur, 1));
13409}
13410
13411/**
13412 * xmlParseFile:
13413 * @filename: the filename
13414 *
13415 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13416 * compressed document is provided by default if found at compile-time.
13417 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000013418 * Returns the resulting document tree if the file was wellformed,
13419 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000013420 */
13421
13422xmlDocPtr
13423xmlParseFile(const char *filename) {
13424 return(xmlSAXParseFile(NULL, filename, 0));
13425}
13426
13427/**
13428 * xmlRecoverFile:
13429 * @filename: the filename
13430 *
13431 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13432 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013433 * In the case the document is not Well Formed, it attempts to build
13434 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000013435 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000013436 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013437 */
13438
13439xmlDocPtr
13440xmlRecoverFile(const char *filename) {
13441 return(xmlSAXParseFile(NULL, filename, 1));
13442}
13443
13444
13445/**
13446 * xmlSetupParserForBuffer:
13447 * @ctxt: an XML parser context
13448 * @buffer: a xmlChar * buffer
13449 * @filename: a file name
13450 *
13451 * Setup the parser context to parse a new buffer; Clears any prior
13452 * contents from the parser context. The buffer parameter must not be
13453 * NULL, but the filename parameter can be
13454 */
13455void
13456xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13457 const char* filename)
13458{
13459 xmlParserInputPtr input;
13460
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013461 if ((ctxt == NULL) || (buffer == NULL))
13462 return;
13463
Owen Taylor3473f882001-02-23 17:55:21 +000013464 input = xmlNewInputStream(ctxt);
13465 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000013466 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013467 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013468 return;
13469 }
13470
13471 xmlClearParserCtxt(ctxt);
13472 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000013473 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013474 input->base = buffer;
13475 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013476 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000013477 inputPush(ctxt, input);
13478}
13479
13480/**
13481 * xmlSAXUserParseFile:
13482 * @sax: a SAX handler
13483 * @user_data: The user data returned on SAX callbacks
13484 * @filename: a file name
13485 *
13486 * parse an XML file and call the given SAX handler routines.
13487 * Automatic support for ZLIB/Compress compressed document is provided
13488 *
13489 * Returns 0 in case of success or a error number otherwise
13490 */
13491int
13492xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13493 const char *filename) {
13494 int ret = 0;
13495 xmlParserCtxtPtr ctxt;
13496
13497 ctxt = xmlCreateFileParserCtxt(filename);
13498 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000013499 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000013500 xmlFree(ctxt->sax);
13501 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013502 xmlDetectSAX2(ctxt);
13503
Owen Taylor3473f882001-02-23 17:55:21 +000013504 if (user_data != NULL)
13505 ctxt->userData = user_data;
13506
13507 xmlParseDocument(ctxt);
13508
13509 if (ctxt->wellFormed)
13510 ret = 0;
13511 else {
13512 if (ctxt->errNo != 0)
13513 ret = ctxt->errNo;
13514 else
13515 ret = -1;
13516 }
13517 if (sax != NULL)
13518 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013519 if (ctxt->myDoc != NULL) {
13520 xmlFreeDoc(ctxt->myDoc);
13521 ctxt->myDoc = NULL;
13522 }
Owen Taylor3473f882001-02-23 17:55:21 +000013523 xmlFreeParserCtxt(ctxt);
13524
13525 return ret;
13526}
Daniel Veillard81273902003-09-30 00:43:48 +000013527#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013528
13529/************************************************************************
13530 * *
13531 * Front ends when parsing from memory *
13532 * *
13533 ************************************************************************/
13534
13535/**
13536 * xmlCreateMemoryParserCtxt:
13537 * @buffer: a pointer to a char array
13538 * @size: the size of the array
13539 *
13540 * Create a parser context for an XML in-memory document.
13541 *
13542 * Returns the new parser context or NULL
13543 */
13544xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013545xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013546 xmlParserCtxtPtr ctxt;
13547 xmlParserInputPtr input;
13548 xmlParserInputBufferPtr buf;
13549
13550 if (buffer == NULL)
13551 return(NULL);
13552 if (size <= 0)
13553 return(NULL);
13554
13555 ctxt = xmlNewParserCtxt();
13556 if (ctxt == NULL)
13557 return(NULL);
13558
Daniel Veillard53350552003-09-18 13:35:51 +000013559 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000013560 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013561 if (buf == NULL) {
13562 xmlFreeParserCtxt(ctxt);
13563 return(NULL);
13564 }
Owen Taylor3473f882001-02-23 17:55:21 +000013565
13566 input = xmlNewInputStream(ctxt);
13567 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013568 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000013569 xmlFreeParserCtxt(ctxt);
13570 return(NULL);
13571 }
13572
13573 input->filename = NULL;
13574 input->buf = buf;
13575 input->base = input->buf->buffer->content;
13576 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013577 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000013578
13579 inputPush(ctxt, input);
13580 return(ctxt);
13581}
13582
Daniel Veillard81273902003-09-30 00:43:48 +000013583#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013584/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013585 * xmlSAXParseMemoryWithData:
13586 * @sax: the SAX handler block
13587 * @buffer: an pointer to a char array
13588 * @size: the size of the array
13589 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13590 * documents
13591 * @data: the userdata
13592 *
13593 * parse an XML in-memory block and use the given SAX function block
13594 * to handle the parsing callback. If sax is NULL, fallback to the default
13595 * DOM tree building routines.
13596 *
13597 * User data (void *) is stored within the parser context in the
13598 * context's _private member, so it is available nearly everywhere in libxml
13599 *
13600 * Returns the resulting document tree
13601 */
13602
13603xmlDocPtr
13604xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13605 int size, int recovery, void *data) {
13606 xmlDocPtr ret;
13607 xmlParserCtxtPtr ctxt;
13608
Daniel Veillardab2a7632009-07-09 08:45:03 +020013609 xmlInitParser();
13610
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013611 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13612 if (ctxt == NULL) return(NULL);
13613 if (sax != NULL) {
13614 if (ctxt->sax != NULL)
13615 xmlFree(ctxt->sax);
13616 ctxt->sax = sax;
13617 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013618 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013619 if (data!=NULL) {
13620 ctxt->_private=data;
13621 }
13622
Daniel Veillardadba5f12003-04-04 16:09:01 +000013623 ctxt->recovery = recovery;
13624
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013625 xmlParseDocument(ctxt);
13626
13627 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13628 else {
13629 ret = NULL;
13630 xmlFreeDoc(ctxt->myDoc);
13631 ctxt->myDoc = NULL;
13632 }
13633 if (sax != NULL)
13634 ctxt->sax = NULL;
13635 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020013636
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013637 return(ret);
13638}
13639
13640/**
Owen Taylor3473f882001-02-23 17:55:21 +000013641 * xmlSAXParseMemory:
13642 * @sax: the SAX handler block
13643 * @buffer: an pointer to a char array
13644 * @size: the size of the array
13645 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13646 * documents
13647 *
13648 * parse an XML in-memory block and use the given SAX function block
13649 * to handle the parsing callback. If sax is NULL, fallback to the default
13650 * DOM tree building routines.
13651 *
13652 * Returns the resulting document tree
13653 */
13654xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000013655xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13656 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013657 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013658}
13659
13660/**
13661 * xmlParseMemory:
13662 * @buffer: an pointer to a char array
13663 * @size: the size of the array
13664 *
13665 * parse an XML in-memory block and build a tree.
13666 *
13667 * Returns the resulting document tree
13668 */
13669
Daniel Veillard50822cb2001-07-26 20:05:51 +000013670xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013671 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13672}
13673
13674/**
13675 * xmlRecoverMemory:
13676 * @buffer: an pointer to a char array
13677 * @size: the size of the array
13678 *
13679 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013680 * In the case the document is not Well Formed, an attempt to
13681 * build a tree is tried anyway
13682 *
13683 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000013684 */
13685
Daniel Veillard50822cb2001-07-26 20:05:51 +000013686xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013687 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13688}
13689
13690/**
13691 * xmlSAXUserParseMemory:
13692 * @sax: a SAX handler
13693 * @user_data: The user data returned on SAX callbacks
13694 * @buffer: an in-memory XML document input
13695 * @size: the length of the XML document in bytes
13696 *
13697 * A better SAX parsing routine.
13698 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020013699 *
Owen Taylor3473f882001-02-23 17:55:21 +000013700 * Returns 0 in case of success or a error number otherwise
13701 */
13702int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013703 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013704 int ret = 0;
13705 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020013706
13707 xmlInitParser();
13708
Owen Taylor3473f882001-02-23 17:55:21 +000013709 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13710 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013711 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13712 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000013713 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013714 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013715
Daniel Veillard30211a02001-04-26 09:33:18 +000013716 if (user_data != NULL)
13717 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020013718
Owen Taylor3473f882001-02-23 17:55:21 +000013719 xmlParseDocument(ctxt);
13720
13721 if (ctxt->wellFormed)
13722 ret = 0;
13723 else {
13724 if (ctxt->errNo != 0)
13725 ret = ctxt->errNo;
13726 else
13727 ret = -1;
13728 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013729 if (sax != NULL)
13730 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013731 if (ctxt->myDoc != NULL) {
13732 xmlFreeDoc(ctxt->myDoc);
13733 ctxt->myDoc = NULL;
13734 }
Owen Taylor3473f882001-02-23 17:55:21 +000013735 xmlFreeParserCtxt(ctxt);
13736
13737 return ret;
13738}
Daniel Veillard81273902003-09-30 00:43:48 +000013739#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013740
13741/**
13742 * xmlCreateDocParserCtxt:
13743 * @cur: a pointer to an array of xmlChar
13744 *
13745 * Creates a parser context for an XML in-memory document.
13746 *
13747 * Returns the new parser context or NULL
13748 */
13749xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013750xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013751 int len;
13752
13753 if (cur == NULL)
13754 return(NULL);
13755 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013756 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000013757}
13758
Daniel Veillard81273902003-09-30 00:43:48 +000013759#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013760/**
13761 * xmlSAXParseDoc:
13762 * @sax: the SAX handler block
13763 * @cur: a pointer to an array of xmlChar
13764 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13765 * documents
13766 *
13767 * parse an XML in-memory document and build a tree.
13768 * It use the given SAX function block to handle the parsing callback.
13769 * If sax is NULL, fallback to the default DOM tree building routines.
13770 *
13771 * Returns the resulting document tree
13772 */
13773
13774xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013775xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000013776 xmlDocPtr ret;
13777 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000013778 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013779
Daniel Veillard38936062004-11-04 17:45:11 +000013780 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013781
13782
13783 ctxt = xmlCreateDocParserCtxt(cur);
13784 if (ctxt == NULL) return(NULL);
13785 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000013786 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013787 ctxt->sax = sax;
13788 ctxt->userData = NULL;
13789 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013790 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013791
13792 xmlParseDocument(ctxt);
13793 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13794 else {
13795 ret = NULL;
13796 xmlFreeDoc(ctxt->myDoc);
13797 ctxt->myDoc = NULL;
13798 }
Daniel Veillard34099b42004-11-04 17:34:35 +000013799 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000013800 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000013801 xmlFreeParserCtxt(ctxt);
13802
13803 return(ret);
13804}
13805
13806/**
13807 * xmlParseDoc:
13808 * @cur: a pointer to an array of xmlChar
13809 *
13810 * parse an XML in-memory document and build a tree.
13811 *
13812 * Returns the resulting document tree
13813 */
13814
13815xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013816xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013817 return(xmlSAXParseDoc(NULL, cur, 0));
13818}
Daniel Veillard81273902003-09-30 00:43:48 +000013819#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013820
Daniel Veillard81273902003-09-30 00:43:48 +000013821#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000013822/************************************************************************
13823 * *
13824 * Specific function to keep track of entities references *
13825 * and used by the XSLT debugger *
13826 * *
13827 ************************************************************************/
13828
13829static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13830
13831/**
13832 * xmlAddEntityReference:
13833 * @ent : A valid entity
13834 * @firstNode : A valid first node for children of entity
13835 * @lastNode : A valid last node of children entity
13836 *
13837 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13838 */
13839static void
13840xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13841 xmlNodePtr lastNode)
13842{
13843 if (xmlEntityRefFunc != NULL) {
13844 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13845 }
13846}
13847
13848
13849/**
13850 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000013851 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000013852 *
13853 * Set the function to call call back when a xml reference has been made
13854 */
13855void
13856xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13857{
13858 xmlEntityRefFunc = func;
13859}
Daniel Veillard81273902003-09-30 00:43:48 +000013860#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013861
13862/************************************************************************
13863 * *
13864 * Miscellaneous *
13865 * *
13866 ************************************************************************/
13867
13868#ifdef LIBXML_XPATH_ENABLED
13869#include <libxml/xpath.h>
13870#endif
13871
Daniel Veillardffa3c742005-07-21 13:24:09 +000013872extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000013873static int xmlParserInitialized = 0;
13874
13875/**
13876 * xmlInitParser:
13877 *
13878 * Initialization function for the XML parser.
13879 * This is not reentrant. Call once before processing in case of
13880 * use in multithreaded programs.
13881 */
13882
13883void
13884xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000013885 if (xmlParserInitialized != 0)
13886 return;
Owen Taylor3473f882001-02-23 17:55:21 +000013887
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013888#ifdef LIBXML_THREAD_ENABLED
13889 __xmlGlobalInitMutexLock();
13890 if (xmlParserInitialized == 0) {
13891#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020013892 xmlInitGlobals();
13893 xmlInitThreads();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013894 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13895 (xmlGenericError == NULL))
13896 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013897 xmlInitMemory();
13898 xmlInitCharEncodingHandlers();
13899 xmlDefaultSAXHandlerInit();
13900 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013901#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013902 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013903#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013904#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013905 htmlInitAutoClose();
13906 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013907#endif
13908#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013909 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013910#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013911 xmlParserInitialized = 1;
13912#ifdef LIBXML_THREAD_ENABLED
13913 }
13914 __xmlGlobalInitMutexUnlock();
13915#endif
Owen Taylor3473f882001-02-23 17:55:21 +000013916}
13917
13918/**
13919 * xmlCleanupParser:
13920 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000013921 * This function name is somewhat misleading. It does not clean up
13922 * parser state, it cleans up memory allocated by the library itself.
13923 * It is a cleanup function for the XML library. It tries to reclaim all
13924 * related global memory allocated for the library processing.
13925 * It doesn't deallocate any document related memory. One should
13926 * call xmlCleanupParser() only when the process has finished using
13927 * the library and all XML/HTML documents built with it.
13928 * See also xmlInitParser() which has the opposite function of preparing
13929 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000013930 *
13931 * WARNING: if your application is multithreaded or has plugin support
13932 * calling this may crash the application if another thread or
13933 * a plugin is still using libxml2. It's sometimes very hard to
13934 * guess if libxml2 is in use in the application, some libraries
13935 * or plugins may use it without notice. In case of doubt abstain
13936 * from calling this function or do it just before calling exit()
13937 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000013938 */
13939
13940void
13941xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000013942 if (!xmlParserInitialized)
13943 return;
13944
Owen Taylor3473f882001-02-23 17:55:21 +000013945 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000013946#ifdef LIBXML_CATALOG_ENABLED
13947 xmlCatalogCleanup();
13948#endif
Daniel Veillard14412512005-01-21 23:53:26 +000013949 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000013950 xmlCleanupInputCallbacks();
13951#ifdef LIBXML_OUTPUT_ENABLED
13952 xmlCleanupOutputCallbacks();
13953#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013954#ifdef LIBXML_SCHEMAS_ENABLED
13955 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000013956 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013957#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000013958 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000013959 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000013960 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000013961 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000013962 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000013963}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013964
13965/************************************************************************
13966 * *
13967 * New set (2.6.0) of simpler and more flexible APIs *
13968 * *
13969 ************************************************************************/
13970
13971/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013972 * DICT_FREE:
13973 * @str: a string
13974 *
13975 * Free a string if it is not owned by the "dict" dictionnary in the
13976 * current scope
13977 */
13978#define DICT_FREE(str) \
13979 if ((str) && ((!dict) || \
13980 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13981 xmlFree((char *)(str));
13982
13983/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013984 * xmlCtxtReset:
13985 * @ctxt: an XML parser context
13986 *
13987 * Reset a parser context
13988 */
13989void
13990xmlCtxtReset(xmlParserCtxtPtr ctxt)
13991{
13992 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013993 xmlDictPtr dict;
13994
13995 if (ctxt == NULL)
13996 return;
13997
13998 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013999
14000 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14001 xmlFreeInputStream(input);
14002 }
14003 ctxt->inputNr = 0;
14004 ctxt->input = NULL;
14005
14006 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014007 if (ctxt->spaceTab != NULL) {
14008 ctxt->spaceTab[0] = -1;
14009 ctxt->space = &ctxt->spaceTab[0];
14010 } else {
14011 ctxt->space = NULL;
14012 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014013
14014
14015 ctxt->nodeNr = 0;
14016 ctxt->node = NULL;
14017
14018 ctxt->nameNr = 0;
14019 ctxt->name = NULL;
14020
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014021 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014022 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014023 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014024 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014025 DICT_FREE(ctxt->directory);
14026 ctxt->directory = NULL;
14027 DICT_FREE(ctxt->extSubURI);
14028 ctxt->extSubURI = NULL;
14029 DICT_FREE(ctxt->extSubSystem);
14030 ctxt->extSubSystem = NULL;
14031 if (ctxt->myDoc != NULL)
14032 xmlFreeDoc(ctxt->myDoc);
14033 ctxt->myDoc = NULL;
14034
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014035 ctxt->standalone = -1;
14036 ctxt->hasExternalSubset = 0;
14037 ctxt->hasPErefs = 0;
14038 ctxt->html = 0;
14039 ctxt->external = 0;
14040 ctxt->instate = XML_PARSER_START;
14041 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014042
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014043 ctxt->wellFormed = 1;
14044 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014045 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014046 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014047#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014048 ctxt->vctxt.userData = ctxt;
14049 ctxt->vctxt.error = xmlParserValidityError;
14050 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014051#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014052 ctxt->record_info = 0;
14053 ctxt->nbChars = 0;
14054 ctxt->checkIndex = 0;
14055 ctxt->inSubset = 0;
14056 ctxt->errNo = XML_ERR_OK;
14057 ctxt->depth = 0;
14058 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14059 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014060 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014061 ctxt->sizeentities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014062 xmlInitNodeInfoSeq(&ctxt->node_seq);
14063
14064 if (ctxt->attsDefault != NULL) {
14065 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14066 ctxt->attsDefault = NULL;
14067 }
14068 if (ctxt->attsSpecial != NULL) {
14069 xmlHashFree(ctxt->attsSpecial, NULL);
14070 ctxt->attsSpecial = NULL;
14071 }
14072
Daniel Veillard4432df22003-09-28 18:58:27 +000014073#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014074 if (ctxt->catalogs != NULL)
14075 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014076#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014077 if (ctxt->lastError.code != XML_ERR_OK)
14078 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014079}
14080
14081/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014082 * xmlCtxtResetPush:
14083 * @ctxt: an XML parser context
14084 * @chunk: a pointer to an array of chars
14085 * @size: number of chars in the array
14086 * @filename: an optional file name or URI
14087 * @encoding: the document encoding, or NULL
14088 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014089 * Reset a push parser context
14090 *
14091 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014092 */
14093int
14094xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14095 int size, const char *filename, const char *encoding)
14096{
14097 xmlParserInputPtr inputStream;
14098 xmlParserInputBufferPtr buf;
14099 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14100
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014101 if (ctxt == NULL)
14102 return(1);
14103
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014104 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14105 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14106
14107 buf = xmlAllocParserInputBuffer(enc);
14108 if (buf == NULL)
14109 return(1);
14110
14111 if (ctxt == NULL) {
14112 xmlFreeParserInputBuffer(buf);
14113 return(1);
14114 }
14115
14116 xmlCtxtReset(ctxt);
14117
14118 if (ctxt->pushTab == NULL) {
14119 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14120 sizeof(xmlChar *));
14121 if (ctxt->pushTab == NULL) {
14122 xmlErrMemory(ctxt, NULL);
14123 xmlFreeParserInputBuffer(buf);
14124 return(1);
14125 }
14126 }
14127
14128 if (filename == NULL) {
14129 ctxt->directory = NULL;
14130 } else {
14131 ctxt->directory = xmlParserGetDirectory(filename);
14132 }
14133
14134 inputStream = xmlNewInputStream(ctxt);
14135 if (inputStream == NULL) {
14136 xmlFreeParserInputBuffer(buf);
14137 return(1);
14138 }
14139
14140 if (filename == NULL)
14141 inputStream->filename = NULL;
14142 else
14143 inputStream->filename = (char *)
14144 xmlCanonicPath((const xmlChar *) filename);
14145 inputStream->buf = buf;
14146 inputStream->base = inputStream->buf->buffer->content;
14147 inputStream->cur = inputStream->buf->buffer->content;
14148 inputStream->end =
14149 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14150
14151 inputPush(ctxt, inputStream);
14152
14153 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14154 (ctxt->input->buf != NULL)) {
14155 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14156 int cur = ctxt->input->cur - ctxt->input->base;
14157
14158 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14159
14160 ctxt->input->base = ctxt->input->buf->buffer->content + base;
14161 ctxt->input->cur = ctxt->input->base + cur;
14162 ctxt->input->end =
14163 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14164 use];
14165#ifdef DEBUG_PUSH
14166 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14167#endif
14168 }
14169
14170 if (encoding != NULL) {
14171 xmlCharEncodingHandlerPtr hdlr;
14172
Daniel Veillard37334572008-07-31 08:20:02 +000014173 if (ctxt->encoding != NULL)
14174 xmlFree((xmlChar *) ctxt->encoding);
14175 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14176
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014177 hdlr = xmlFindCharEncodingHandler(encoding);
14178 if (hdlr != NULL) {
14179 xmlSwitchToEncoding(ctxt, hdlr);
14180 } else {
14181 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14182 "Unsupported encoding %s\n", BAD_CAST encoding);
14183 }
14184 } else if (enc != XML_CHAR_ENCODING_NONE) {
14185 xmlSwitchEncoding(ctxt, enc);
14186 }
14187
14188 return(0);
14189}
14190
Daniel Veillard37334572008-07-31 08:20:02 +000014191
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014192/**
Daniel Veillard37334572008-07-31 08:20:02 +000014193 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014194 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014195 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014196 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014197 *
14198 * Applies the options to the parser context
14199 *
14200 * Returns 0 in case of success, the set of unknown or unimplemented options
14201 * in case of error.
14202 */
Daniel Veillard37334572008-07-31 08:20:02 +000014203static int
14204xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014205{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014206 if (ctxt == NULL)
14207 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014208 if (encoding != NULL) {
14209 if (ctxt->encoding != NULL)
14210 xmlFree((xmlChar *) ctxt->encoding);
14211 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14212 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014213 if (options & XML_PARSE_RECOVER) {
14214 ctxt->recovery = 1;
14215 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014216 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014217 } else
14218 ctxt->recovery = 0;
14219 if (options & XML_PARSE_DTDLOAD) {
14220 ctxt->loadsubset = XML_DETECT_IDS;
14221 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014222 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014223 } else
14224 ctxt->loadsubset = 0;
14225 if (options & XML_PARSE_DTDATTR) {
14226 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14227 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014228 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014229 }
14230 if (options & XML_PARSE_NOENT) {
14231 ctxt->replaceEntities = 1;
14232 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14233 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014234 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014235 } else
14236 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014237 if (options & XML_PARSE_PEDANTIC) {
14238 ctxt->pedantic = 1;
14239 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014240 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014241 } else
14242 ctxt->pedantic = 0;
14243 if (options & XML_PARSE_NOBLANKS) {
14244 ctxt->keepBlanks = 0;
14245 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14246 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014247 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014248 } else
14249 ctxt->keepBlanks = 1;
14250 if (options & XML_PARSE_DTDVALID) {
14251 ctxt->validate = 1;
14252 if (options & XML_PARSE_NOWARNING)
14253 ctxt->vctxt.warning = NULL;
14254 if (options & XML_PARSE_NOERROR)
14255 ctxt->vctxt.error = NULL;
14256 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014257 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014258 } else
14259 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000014260 if (options & XML_PARSE_NOWARNING) {
14261 ctxt->sax->warning = NULL;
14262 options -= XML_PARSE_NOWARNING;
14263 }
14264 if (options & XML_PARSE_NOERROR) {
14265 ctxt->sax->error = NULL;
14266 ctxt->sax->fatalError = NULL;
14267 options -= XML_PARSE_NOERROR;
14268 }
Daniel Veillard81273902003-09-30 00:43:48 +000014269#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014270 if (options & XML_PARSE_SAX1) {
14271 ctxt->sax->startElement = xmlSAX2StartElement;
14272 ctxt->sax->endElement = xmlSAX2EndElement;
14273 ctxt->sax->startElementNs = NULL;
14274 ctxt->sax->endElementNs = NULL;
14275 ctxt->sax->initialized = 1;
14276 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014277 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014278 }
Daniel Veillard81273902003-09-30 00:43:48 +000014279#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014280 if (options & XML_PARSE_NODICT) {
14281 ctxt->dictNames = 0;
14282 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014283 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014284 } else {
14285 ctxt->dictNames = 1;
14286 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014287 if (options & XML_PARSE_NOCDATA) {
14288 ctxt->sax->cdataBlock = NULL;
14289 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014290 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014291 }
14292 if (options & XML_PARSE_NSCLEAN) {
14293 ctxt->options |= XML_PARSE_NSCLEAN;
14294 options -= XML_PARSE_NSCLEAN;
14295 }
Daniel Veillard61b93382003-11-03 14:28:31 +000014296 if (options & XML_PARSE_NONET) {
14297 ctxt->options |= XML_PARSE_NONET;
14298 options -= XML_PARSE_NONET;
14299 }
Daniel Veillard8874b942005-08-25 13:19:21 +000014300 if (options & XML_PARSE_COMPACT) {
14301 ctxt->options |= XML_PARSE_COMPACT;
14302 options -= XML_PARSE_COMPACT;
14303 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000014304 if (options & XML_PARSE_OLD10) {
14305 ctxt->options |= XML_PARSE_OLD10;
14306 options -= XML_PARSE_OLD10;
14307 }
Daniel Veillard8915c152008-08-26 13:05:34 +000014308 if (options & XML_PARSE_NOBASEFIX) {
14309 ctxt->options |= XML_PARSE_NOBASEFIX;
14310 options -= XML_PARSE_NOBASEFIX;
14311 }
14312 if (options & XML_PARSE_HUGE) {
14313 ctxt->options |= XML_PARSE_HUGE;
14314 options -= XML_PARSE_HUGE;
14315 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000014316 if (options & XML_PARSE_OLDSAX) {
14317 ctxt->options |= XML_PARSE_OLDSAX;
14318 options -= XML_PARSE_OLDSAX;
14319 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000014320 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014321 return (options);
14322}
14323
14324/**
Daniel Veillard37334572008-07-31 08:20:02 +000014325 * xmlCtxtUseOptions:
14326 * @ctxt: an XML parser context
14327 * @options: a combination of xmlParserOption
14328 *
14329 * Applies the options to the parser context
14330 *
14331 * Returns 0 in case of success, the set of unknown or unimplemented options
14332 * in case of error.
14333 */
14334int
14335xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14336{
14337 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14338}
14339
14340/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014341 * xmlDoRead:
14342 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000014343 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014344 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014345 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014346 * @reuse: keep the context for reuse
14347 *
14348 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000014349 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014350 * Returns the resulting document tree or NULL
14351 */
14352static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014353xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14354 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014355{
14356 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000014357
14358 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014359 if (encoding != NULL) {
14360 xmlCharEncodingHandlerPtr hdlr;
14361
14362 hdlr = xmlFindCharEncodingHandler(encoding);
14363 if (hdlr != NULL)
14364 xmlSwitchToEncoding(ctxt, hdlr);
14365 }
Daniel Veillard60942de2003-09-25 21:05:58 +000014366 if ((URL != NULL) && (ctxt->input != NULL) &&
14367 (ctxt->input->filename == NULL))
14368 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014369 xmlParseDocument(ctxt);
14370 if ((ctxt->wellFormed) || ctxt->recovery)
14371 ret = ctxt->myDoc;
14372 else {
14373 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014374 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014375 xmlFreeDoc(ctxt->myDoc);
14376 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014377 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014378 ctxt->myDoc = NULL;
14379 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014380 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014381 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014382
14383 return (ret);
14384}
14385
14386/**
14387 * xmlReadDoc:
14388 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014389 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014390 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014391 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014392 *
14393 * parse an XML in-memory document and build a tree.
14394 *
14395 * Returns the resulting document tree
14396 */
14397xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014398xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014399{
14400 xmlParserCtxtPtr ctxt;
14401
14402 if (cur == NULL)
14403 return (NULL);
14404
14405 ctxt = xmlCreateDocParserCtxt(cur);
14406 if (ctxt == NULL)
14407 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014408 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014409}
14410
14411/**
14412 * xmlReadFile:
14413 * @filename: a file or URL
14414 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014415 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014416 *
14417 * parse an XML file from the filesystem or the network.
14418 *
14419 * Returns the resulting document tree
14420 */
14421xmlDocPtr
14422xmlReadFile(const char *filename, const char *encoding, int options)
14423{
14424 xmlParserCtxtPtr ctxt;
14425
Daniel Veillard61b93382003-11-03 14:28:31 +000014426 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014427 if (ctxt == NULL)
14428 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014429 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014430}
14431
14432/**
14433 * xmlReadMemory:
14434 * @buffer: a pointer to a char array
14435 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014436 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014437 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014438 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014439 *
14440 * parse an XML in-memory document and build a tree.
14441 *
14442 * Returns the resulting document tree
14443 */
14444xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014445xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014446{
14447 xmlParserCtxtPtr ctxt;
14448
14449 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14450 if (ctxt == NULL)
14451 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014452 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014453}
14454
14455/**
14456 * xmlReadFd:
14457 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014458 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014459 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014460 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014461 *
14462 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014463 * NOTE that the file descriptor will not be closed when the
14464 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014465 *
14466 * Returns the resulting document tree
14467 */
14468xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014469xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014470{
14471 xmlParserCtxtPtr ctxt;
14472 xmlParserInputBufferPtr input;
14473 xmlParserInputPtr stream;
14474
14475 if (fd < 0)
14476 return (NULL);
14477
14478 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14479 if (input == NULL)
14480 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014481 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014482 ctxt = xmlNewParserCtxt();
14483 if (ctxt == NULL) {
14484 xmlFreeParserInputBuffer(input);
14485 return (NULL);
14486 }
14487 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14488 if (stream == NULL) {
14489 xmlFreeParserInputBuffer(input);
14490 xmlFreeParserCtxt(ctxt);
14491 return (NULL);
14492 }
14493 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014494 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014495}
14496
14497/**
14498 * xmlReadIO:
14499 * @ioread: an I/O read function
14500 * @ioclose: an I/O close function
14501 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014502 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014503 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014504 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014505 *
14506 * parse an XML document from I/O functions and source and build a tree.
14507 *
14508 * Returns the resulting document tree
14509 */
14510xmlDocPtr
14511xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000014512 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014513{
14514 xmlParserCtxtPtr ctxt;
14515 xmlParserInputBufferPtr input;
14516 xmlParserInputPtr stream;
14517
14518 if (ioread == NULL)
14519 return (NULL);
14520
14521 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14522 XML_CHAR_ENCODING_NONE);
14523 if (input == NULL)
14524 return (NULL);
14525 ctxt = xmlNewParserCtxt();
14526 if (ctxt == NULL) {
14527 xmlFreeParserInputBuffer(input);
14528 return (NULL);
14529 }
14530 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14531 if (stream == NULL) {
14532 xmlFreeParserInputBuffer(input);
14533 xmlFreeParserCtxt(ctxt);
14534 return (NULL);
14535 }
14536 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014537 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014538}
14539
14540/**
14541 * xmlCtxtReadDoc:
14542 * @ctxt: an XML parser context
14543 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014544 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014545 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014546 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014547 *
14548 * parse an XML in-memory document and build a tree.
14549 * This reuses the existing @ctxt parser context
14550 *
14551 * Returns the resulting document tree
14552 */
14553xmlDocPtr
14554xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000014555 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014556{
14557 xmlParserInputPtr stream;
14558
14559 if (cur == NULL)
14560 return (NULL);
14561 if (ctxt == NULL)
14562 return (NULL);
14563
14564 xmlCtxtReset(ctxt);
14565
14566 stream = xmlNewStringInputStream(ctxt, cur);
14567 if (stream == NULL) {
14568 return (NULL);
14569 }
14570 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014571 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014572}
14573
14574/**
14575 * xmlCtxtReadFile:
14576 * @ctxt: an XML parser context
14577 * @filename: a file or URL
14578 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014579 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014580 *
14581 * parse an XML file from the filesystem or the network.
14582 * This reuses the existing @ctxt parser context
14583 *
14584 * Returns the resulting document tree
14585 */
14586xmlDocPtr
14587xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14588 const char *encoding, int options)
14589{
14590 xmlParserInputPtr stream;
14591
14592 if (filename == NULL)
14593 return (NULL);
14594 if (ctxt == NULL)
14595 return (NULL);
14596
14597 xmlCtxtReset(ctxt);
14598
Daniel Veillard29614c72004-11-26 10:47:26 +000014599 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014600 if (stream == NULL) {
14601 return (NULL);
14602 }
14603 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014604 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014605}
14606
14607/**
14608 * xmlCtxtReadMemory:
14609 * @ctxt: an XML parser context
14610 * @buffer: a pointer to a char array
14611 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014612 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014613 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014614 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014615 *
14616 * parse an XML in-memory document and build a tree.
14617 * This reuses the existing @ctxt parser context
14618 *
14619 * Returns the resulting document tree
14620 */
14621xmlDocPtr
14622xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000014623 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014624{
14625 xmlParserInputBufferPtr input;
14626 xmlParserInputPtr stream;
14627
14628 if (ctxt == NULL)
14629 return (NULL);
14630 if (buffer == NULL)
14631 return (NULL);
14632
14633 xmlCtxtReset(ctxt);
14634
14635 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14636 if (input == NULL) {
14637 return(NULL);
14638 }
14639
14640 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14641 if (stream == NULL) {
14642 xmlFreeParserInputBuffer(input);
14643 return(NULL);
14644 }
14645
14646 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014647 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014648}
14649
14650/**
14651 * xmlCtxtReadFd:
14652 * @ctxt: an XML parser context
14653 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014654 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014655 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014656 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014657 *
14658 * parse an XML from a file descriptor and build a tree.
14659 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014660 * NOTE that the file descriptor will not be closed when the
14661 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014662 *
14663 * Returns the resulting document tree
14664 */
14665xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014666xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14667 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014668{
14669 xmlParserInputBufferPtr input;
14670 xmlParserInputPtr stream;
14671
14672 if (fd < 0)
14673 return (NULL);
14674 if (ctxt == NULL)
14675 return (NULL);
14676
14677 xmlCtxtReset(ctxt);
14678
14679
14680 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14681 if (input == NULL)
14682 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014683 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014684 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14685 if (stream == NULL) {
14686 xmlFreeParserInputBuffer(input);
14687 return (NULL);
14688 }
14689 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014690 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014691}
14692
14693/**
14694 * xmlCtxtReadIO:
14695 * @ctxt: an XML parser context
14696 * @ioread: an I/O read function
14697 * @ioclose: an I/O close function
14698 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014699 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014700 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014701 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014702 *
14703 * parse an XML document from I/O functions and source and build a tree.
14704 * This reuses the existing @ctxt parser context
14705 *
14706 * Returns the resulting document tree
14707 */
14708xmlDocPtr
14709xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14710 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000014711 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014712 const char *encoding, int options)
14713{
14714 xmlParserInputBufferPtr input;
14715 xmlParserInputPtr stream;
14716
14717 if (ioread == NULL)
14718 return (NULL);
14719 if (ctxt == NULL)
14720 return (NULL);
14721
14722 xmlCtxtReset(ctxt);
14723
14724 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14725 XML_CHAR_ENCODING_NONE);
14726 if (input == NULL)
14727 return (NULL);
14728 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14729 if (stream == NULL) {
14730 xmlFreeParserInputBuffer(input);
14731 return (NULL);
14732 }
14733 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014734 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014735}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000014736
14737#define bottom_parser
14738#include "elfgcchack.h"