blob: dc9c14de55de61850699a57f8436a31f8ba36ce1 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard0161e632008-08-28 15:36:32 +000083static void
84xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
85
Rob Richards9c0aa472009-03-26 18:10:19 +000086static xmlParserCtxtPtr
87xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
88 const xmlChar *base, xmlParserCtxtPtr pctx);
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090/************************************************************************
91 * *
92 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
93 * *
94 ************************************************************************/
95
96#define XML_PARSER_BIG_ENTITY 1000
97#define XML_PARSER_LOT_ENTITY 5000
98
99/*
100 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
101 * replacement over the size in byte of the input indicates that you have
102 * and eponential behaviour. A value of 10 correspond to at least 3 entity
103 * replacement per byte of input.
104 */
105#define XML_PARSER_NON_LINEAR 10
106
107/*
108 * xmlParserEntityCheck
109 *
110 * Function to check non-linear entity expansion behaviour
111 * This is here to detect and stop exponential linear entity expansion
112 * This is not a limitation of the parser but a safety
113 * boundary feature. It can be disabled with the XML_PARSE_HUGE
114 * parser option.
115 */
116static int
117xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long size,
118 xmlEntityPtr ent)
119{
Daniel Veillardcba68392008-08-29 12:43:40 +0000120 unsigned long consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000121
122 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
123 return (0);
124 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
125 return (1);
126 if (size != 0) {
127 /*
128 * Do the check based on the replacement size of the entity
129 */
130 if (size < XML_PARSER_BIG_ENTITY)
131 return(0);
132
133 /*
134 * A limit on the amount of text data reasonably used
135 */
136 if (ctxt->input != NULL) {
137 consumed = ctxt->input->consumed +
138 (ctxt->input->cur - ctxt->input->base);
139 }
140 consumed += ctxt->sizeentities;
141
142 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
143 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
144 return (0);
145 } else if (ent != NULL) {
146 /*
147 * use the number of parsed entities in the replacement
148 */
149 size = ent->checked;
150
151 /*
152 * The amount of data parsed counting entities size only once
153 */
154 if (ctxt->input != NULL) {
155 consumed = ctxt->input->consumed +
156 (ctxt->input->cur - ctxt->input->base);
157 }
158 consumed += ctxt->sizeentities;
159
160 /*
161 * Check the density of entities for the amount of data
162 * knowing an entity reference will take at least 3 bytes
163 */
164 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
165 return (0);
166 } else {
167 /*
168 * strange we got no data for checking just return
169 */
170 return (0);
171 }
172
173 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
174 return (1);
175}
176
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000177/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000178 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000179 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000180 * arbitrary depth limit for the XML documents that we allow to
181 * process. This is not a limitation of the parser but a safety
182 * boundary feature. It can be disabled with the XML_PARSE_HUGE
183 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000184 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000185unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000186
Daniel Veillard0fb18932003-09-07 09:14:37 +0000187
Daniel Veillard0161e632008-08-28 15:36:32 +0000188
189#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000190#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000191#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000192#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
193
Owen Taylor3473f882001-02-23 17:55:21 +0000194/*
Owen Taylor3473f882001-02-23 17:55:21 +0000195 * List of XML prefixed PI allowed by W3C specs
196 */
197
Daniel Veillardb44025c2001-10-11 22:55:55 +0000198static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000199 "xml-stylesheet",
200 NULL
201};
202
Daniel Veillarda07050d2003-10-19 14:46:32 +0000203
Owen Taylor3473f882001-02-23 17:55:21 +0000204/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200205static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
206 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000207
Daniel Veillard7d515752003-09-26 19:12:37 +0000208static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000209xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
210 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000211 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000212 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000213
Daniel Veillard37334572008-07-31 08:20:02 +0000214static int
215xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
216 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000217#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000218static void
219xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
220 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000221#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000222
Daniel Veillard7d515752003-09-26 19:12:37 +0000223static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000224xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
225 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000226
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000227static int
228xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
229
Daniel Veillarde57ec792003-09-10 10:50:59 +0000230/************************************************************************
231 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000232 * Some factorized error routines *
233 * *
234 ************************************************************************/
235
236/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000237 * xmlErrAttributeDup:
238 * @ctxt: an XML parser context
239 * @prefix: the attribute prefix
240 * @localname: the attribute localname
241 *
242 * Handle a redefinition of attribute error
243 */
244static void
245xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
246 const xmlChar * localname)
247{
Daniel Veillard157fee02003-10-31 10:36:03 +0000248 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
249 (ctxt->instate == XML_PARSER_EOF))
250 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000251 if (ctxt != NULL)
252 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000253 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000254 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000255 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
256 (const char *) localname, NULL, NULL, 0, 0,
257 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000258 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000259 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000260 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
261 (const char *) prefix, (const char *) localname,
262 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
263 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000264 if (ctxt != NULL) {
265 ctxt->wellFormed = 0;
266 if (ctxt->recovery == 0)
267 ctxt->disableSAX = 1;
268 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000269}
270
271/**
272 * xmlFatalErr:
273 * @ctxt: an XML parser context
274 * @error: the error number
275 * @extra: extra information string
276 *
277 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
278 */
279static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000280xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000281{
282 const char *errmsg;
283
Daniel Veillard157fee02003-10-31 10:36:03 +0000284 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
285 (ctxt->instate == XML_PARSER_EOF))
286 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 switch (error) {
288 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000289 errmsg = "CharRef: invalid hexadecimal value\n";
290 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000291 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000292 errmsg = "CharRef: invalid decimal value\n";
293 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000294 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000295 errmsg = "CharRef: invalid value\n";
296 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000297 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000298 errmsg = "internal error";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "PEReference at end of document\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "PEReference in prolog\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "PEReference in epilog\n";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "PEReference: no name\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "PEReference: expecting ';'\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "Detected an entity reference loop\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "EntityValue: \" or ' expected\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "PEReferences forbidden in internal subset\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "EntityValue: \" or ' expected\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "AttValue: \" or ' expected\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "Unescaped '<' not allowed in attributes values\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "SystemLiteral \" or ' expected\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "Unfinished System or Public ID \" or ' expected\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Sequence ']]>' not allowed in content\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "PUBLIC, the Public Identifier is missing\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Comment must not contain '--' (double-hyphen)\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "xmlParsePI : no target name\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "Invalid PI name\n";
356 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 errmsg = "NOTATION: Name expected here\n";
359 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000360 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 errmsg = "'>' required to close NOTATION declaration\n";
362 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 errmsg = "Entity value required\n";
365 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000367 errmsg = "Fragment not allowed";
368 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000369 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000370 errmsg = "'(' required to start ATTLIST enumeration\n";
371 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000372 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000373 errmsg = "NmToken expected in ATTLIST enumeration\n";
374 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000375 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000376 errmsg = "')' required to finish ATTLIST enumeration\n";
377 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000378 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000379 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
380 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000381 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000382 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
383 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000384 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 errmsg = "ContentDecl : Name or '(' expected\n";
386 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000387 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
389 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000390 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000391 errmsg =
392 "PEReference: forbidden within markup decl in internal subset\n";
393 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000394 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000395 errmsg = "expected '>'\n";
396 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000397 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000398 errmsg = "XML conditional section '[' expected\n";
399 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000400 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000401 errmsg = "Content error in the external subset\n";
402 break;
403 case XML_ERR_CONDSEC_INVALID_KEYWORD:
404 errmsg =
405 "conditional section INCLUDE or IGNORE keyword expected\n";
406 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000407 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000408 errmsg = "XML conditional section not closed\n";
409 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000410 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000411 errmsg = "Text declaration '<?xml' required\n";
412 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000413 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000414 errmsg = "parsing XML declaration: '?>' expected\n";
415 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000416 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 errmsg = "external parsed entities cannot be standalone\n";
418 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000419 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 errmsg = "EntityRef: expecting ';'\n";
421 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000422 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 errmsg = "DOCTYPE improperly terminated\n";
424 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000425 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 errmsg = "EndTag: '</' not found\n";
427 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000428 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000429 errmsg = "expected '='\n";
430 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000431 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000432 errmsg = "String not closed expecting \" or '\n";
433 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000434 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000435 errmsg = "String not started expecting ' or \"\n";
436 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000437 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000438 errmsg = "Invalid XML encoding name\n";
439 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000440 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000441 errmsg = "standalone accepts only 'yes' or 'no'\n";
442 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000443 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000444 errmsg = "Document is empty\n";
445 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000446 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000447 errmsg = "Extra content at the end of the document\n";
448 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000449 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000450 errmsg = "chunk is not well balanced\n";
451 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000452 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000453 errmsg = "extra content at the end of well balanced chunk\n";
454 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000455 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000456 errmsg = "Malformed declaration expecting version\n";
457 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000458#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000459 case:
460 errmsg = "\n";
461 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000462#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000463 default:
464 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000465 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000466 if (ctxt != NULL)
467 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000468 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000469 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
470 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000471 if (ctxt != NULL) {
472 ctxt->wellFormed = 0;
473 if (ctxt->recovery == 0)
474 ctxt->disableSAX = 1;
475 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000476}
477
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000478/**
479 * xmlFatalErrMsg:
480 * @ctxt: an XML parser context
481 * @error: the error number
482 * @msg: the error message
483 *
484 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
485 */
486static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000487xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
488 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000489{
Daniel Veillard157fee02003-10-31 10:36:03 +0000490 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
491 (ctxt->instate == XML_PARSER_EOF))
492 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000493 if (ctxt != NULL)
494 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000495 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200496 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000497 if (ctxt != NULL) {
498 ctxt->wellFormed = 0;
499 if (ctxt->recovery == 0)
500 ctxt->disableSAX = 1;
501 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000502}
503
504/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000505 * xmlWarningMsg:
506 * @ctxt: an XML parser context
507 * @error: the error number
508 * @msg: the error message
509 * @str1: extra data
510 * @str2: extra data
511 *
512 * Handle a warning.
513 */
514static void
515xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
516 const char *msg, const xmlChar *str1, const xmlChar *str2)
517{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000518 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000519
Daniel Veillard157fee02003-10-31 10:36:03 +0000520 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
521 (ctxt->instate == XML_PARSER_EOF))
522 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000523 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
524 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000525 schannel = ctxt->sax->serror;
526 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000527 (ctxt->sax) ? ctxt->sax->warning : NULL,
528 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000529 ctxt, NULL, XML_FROM_PARSER, error,
530 XML_ERR_WARNING, NULL, 0,
531 (const char *) str1, (const char *) str2, NULL, 0, 0,
532 msg, (const char *) str1, (const char *) str2);
533}
534
535/**
536 * xmlValidityError:
537 * @ctxt: an XML parser context
538 * @error: the error number
539 * @msg: the error message
540 * @str1: extra data
541 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000542 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000543 */
544static void
545xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000546 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000547{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000548 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000549
550 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
551 (ctxt->instate == XML_PARSER_EOF))
552 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000553 if (ctxt != NULL) {
554 ctxt->errNo = error;
555 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
556 schannel = ctxt->sax->serror;
557 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000558 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000559 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000560 ctxt, NULL, XML_FROM_DTD, error,
561 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000562 (const char *) str2, NULL, 0, 0,
563 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000564 if (ctxt != NULL) {
565 ctxt->valid = 0;
566 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000567}
568
569/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000570 * xmlFatalErrMsgInt:
571 * @ctxt: an XML parser context
572 * @error: the error number
573 * @msg: the error message
574 * @val: an integer value
575 *
576 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
577 */
578static void
579xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000580 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000581{
Daniel Veillard157fee02003-10-31 10:36:03 +0000582 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
583 (ctxt->instate == XML_PARSER_EOF))
584 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000585 if (ctxt != NULL)
586 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000587 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000588 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
589 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000590 if (ctxt != NULL) {
591 ctxt->wellFormed = 0;
592 if (ctxt->recovery == 0)
593 ctxt->disableSAX = 1;
594 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000595}
596
597/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000598 * xmlFatalErrMsgStrIntStr:
599 * @ctxt: an XML parser context
600 * @error: the error number
601 * @msg: the error message
602 * @str1: an string info
603 * @val: an integer value
604 * @str2: an string info
605 *
606 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
607 */
608static void
609xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
610 const char *msg, const xmlChar *str1, int val,
611 const xmlChar *str2)
612{
Daniel Veillard157fee02003-10-31 10:36:03 +0000613 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
614 (ctxt->instate == XML_PARSER_EOF))
615 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000616 if (ctxt != NULL)
617 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000618 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000619 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
620 NULL, 0, (const char *) str1, (const char *) str2,
621 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000622 if (ctxt != NULL) {
623 ctxt->wellFormed = 0;
624 if (ctxt->recovery == 0)
625 ctxt->disableSAX = 1;
626 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000627}
628
629/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000630 * xmlFatalErrMsgStr:
631 * @ctxt: an XML parser context
632 * @error: the error number
633 * @msg: the error message
634 * @val: a string value
635 *
636 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
637 */
638static void
639xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000640 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000641{
Daniel Veillard157fee02003-10-31 10:36:03 +0000642 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
643 (ctxt->instate == XML_PARSER_EOF))
644 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000645 if (ctxt != NULL)
646 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000647 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000648 XML_FROM_PARSER, error, XML_ERR_FATAL,
649 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
650 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000651 if (ctxt != NULL) {
652 ctxt->wellFormed = 0;
653 if (ctxt->recovery == 0)
654 ctxt->disableSAX = 1;
655 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000656}
657
658/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000659 * xmlErrMsgStr:
660 * @ctxt: an XML parser context
661 * @error: the error number
662 * @msg: the error message
663 * @val: a string value
664 *
665 * Handle a non fatal parser error
666 */
667static void
668xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
669 const char *msg, const xmlChar * val)
670{
Daniel Veillard157fee02003-10-31 10:36:03 +0000671 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
672 (ctxt->instate == XML_PARSER_EOF))
673 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000674 if (ctxt != NULL)
675 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000676 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000677 XML_FROM_PARSER, error, XML_ERR_ERROR,
678 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
679 val);
680}
681
682/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000683 * xmlNsErr:
684 * @ctxt: an XML parser context
685 * @error: the error number
686 * @msg: the message
687 * @info1: extra information string
688 * @info2: extra information string
689 *
690 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
691 */
692static void
693xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
694 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000695 const xmlChar * info1, const xmlChar * info2,
696 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000697{
Daniel Veillard157fee02003-10-31 10:36:03 +0000698 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
699 (ctxt->instate == XML_PARSER_EOF))
700 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000701 if (ctxt != NULL)
702 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000703 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000704 XML_ERR_ERROR, NULL, 0, (const char *) info1,
705 (const char *) info2, (const char *) info3, 0, 0, msg,
706 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000707 if (ctxt != NULL)
708 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000709}
710
Daniel Veillard37334572008-07-31 08:20:02 +0000711/**
712 * xmlNsWarn
713 * @ctxt: an XML parser context
714 * @error: the error number
715 * @msg: the message
716 * @info1: extra information string
717 * @info2: extra information string
718 *
719 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
720 */
721static void
722xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
723 const char *msg,
724 const xmlChar * info1, const xmlChar * info2,
725 const xmlChar * info3)
726{
727 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
728 (ctxt->instate == XML_PARSER_EOF))
729 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000730 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
731 XML_ERR_WARNING, NULL, 0, (const char *) info1,
732 (const char *) info2, (const char *) info3, 0, 0, msg,
733 info1, info2, info3);
734}
735
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000736/************************************************************************
737 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000738 * Library wide options *
739 * *
740 ************************************************************************/
741
742/**
743 * xmlHasFeature:
744 * @feature: the feature to be examined
745 *
746 * Examines if the library has been compiled with a given feature.
747 *
748 * Returns a non-zero value if the feature exist, otherwise zero.
749 * Returns zero (0) if the feature does not exist or an unknown
750 * unknown feature is requested, non-zero otherwise.
751 */
752int
753xmlHasFeature(xmlFeature feature)
754{
755 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000756 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000757#ifdef LIBXML_THREAD_ENABLED
758 return(1);
759#else
760 return(0);
761#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000762 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000763#ifdef LIBXML_TREE_ENABLED
764 return(1);
765#else
766 return(0);
767#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000768 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000769#ifdef LIBXML_OUTPUT_ENABLED
770 return(1);
771#else
772 return(0);
773#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000774 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000775#ifdef LIBXML_PUSH_ENABLED
776 return(1);
777#else
778 return(0);
779#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000780 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000781#ifdef LIBXML_READER_ENABLED
782 return(1);
783#else
784 return(0);
785#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000786 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000787#ifdef LIBXML_PATTERN_ENABLED
788 return(1);
789#else
790 return(0);
791#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000792 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000793#ifdef LIBXML_WRITER_ENABLED
794 return(1);
795#else
796 return(0);
797#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000798 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000799#ifdef LIBXML_SAX1_ENABLED
800 return(1);
801#else
802 return(0);
803#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000804 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000805#ifdef LIBXML_FTP_ENABLED
806 return(1);
807#else
808 return(0);
809#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000810 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000811#ifdef LIBXML_HTTP_ENABLED
812 return(1);
813#else
814 return(0);
815#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000816 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000817#ifdef LIBXML_VALID_ENABLED
818 return(1);
819#else
820 return(0);
821#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000822 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000823#ifdef LIBXML_HTML_ENABLED
824 return(1);
825#else
826 return(0);
827#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000828 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000829#ifdef LIBXML_LEGACY_ENABLED
830 return(1);
831#else
832 return(0);
833#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000834 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000835#ifdef LIBXML_C14N_ENABLED
836 return(1);
837#else
838 return(0);
839#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000840 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000841#ifdef LIBXML_CATALOG_ENABLED
842 return(1);
843#else
844 return(0);
845#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000846 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000847#ifdef LIBXML_XPATH_ENABLED
848 return(1);
849#else
850 return(0);
851#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000852 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000853#ifdef LIBXML_XPTR_ENABLED
854 return(1);
855#else
856 return(0);
857#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000858 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000859#ifdef LIBXML_XINCLUDE_ENABLED
860 return(1);
861#else
862 return(0);
863#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000864 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000865#ifdef LIBXML_ICONV_ENABLED
866 return(1);
867#else
868 return(0);
869#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000870 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000871#ifdef LIBXML_ISO8859X_ENABLED
872 return(1);
873#else
874 return(0);
875#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000876 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000877#ifdef LIBXML_UNICODE_ENABLED
878 return(1);
879#else
880 return(0);
881#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000882 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000883#ifdef LIBXML_REGEXP_ENABLED
884 return(1);
885#else
886 return(0);
887#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000888 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000889#ifdef LIBXML_AUTOMATA_ENABLED
890 return(1);
891#else
892 return(0);
893#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000894 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000895#ifdef LIBXML_EXPR_ENABLED
896 return(1);
897#else
898 return(0);
899#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000900 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000901#ifdef LIBXML_SCHEMAS_ENABLED
902 return(1);
903#else
904 return(0);
905#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000906 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000907#ifdef LIBXML_SCHEMATRON_ENABLED
908 return(1);
909#else
910 return(0);
911#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000912 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000913#ifdef LIBXML_MODULES_ENABLED
914 return(1);
915#else
916 return(0);
917#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000918 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000919#ifdef LIBXML_DEBUG_ENABLED
920 return(1);
921#else
922 return(0);
923#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000924 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000925#ifdef DEBUG_MEMORY_LOCATION
926 return(1);
927#else
928 return(0);
929#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000930 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000931#ifdef LIBXML_DEBUG_RUNTIME
932 return(1);
933#else
934 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000935#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000936 case XML_WITH_ZLIB:
937#ifdef LIBXML_ZLIB_ENABLED
938 return(1);
939#else
940 return(0);
941#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000942 default:
943 break;
944 }
945 return(0);
946}
947
948/************************************************************************
949 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000950 * SAX2 defaulted attributes handling *
951 * *
952 ************************************************************************/
953
954/**
955 * xmlDetectSAX2:
956 * @ctxt: an XML parser context
957 *
958 * Do the SAX2 detection and specific intialization
959 */
960static void
961xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
962 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000963#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000964 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
965 ((ctxt->sax->startElementNs != NULL) ||
966 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000967#else
968 ctxt->sax2 = 1;
969#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000970
971 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
972 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
973 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000974 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
975 (ctxt->str_xml_ns == NULL)) {
976 xmlErrMemory(ctxt, NULL);
977 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000978}
979
Daniel Veillarde57ec792003-09-10 10:50:59 +0000980typedef struct _xmlDefAttrs xmlDefAttrs;
981typedef xmlDefAttrs *xmlDefAttrsPtr;
982struct _xmlDefAttrs {
983 int nbAttrs; /* number of defaulted attributes on that element */
984 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +0000985 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000986};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000987
988/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000989 * xmlAttrNormalizeSpace:
990 * @src: the source string
991 * @dst: the target string
992 *
993 * Normalize the space in non CDATA attribute values:
994 * If the attribute type is not CDATA, then the XML processor MUST further
995 * process the normalized attribute value by discarding any leading and
996 * trailing space (#x20) characters, and by replacing sequences of space
997 * (#x20) characters by a single space (#x20) character.
998 * Note that the size of dst need to be at least src, and if one doesn't need
999 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1000 * passing src as dst is just fine.
1001 *
1002 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1003 * is needed.
1004 */
1005static xmlChar *
1006xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1007{
1008 if ((src == NULL) || (dst == NULL))
1009 return(NULL);
1010
1011 while (*src == 0x20) src++;
1012 while (*src != 0) {
1013 if (*src == 0x20) {
1014 while (*src == 0x20) src++;
1015 if (*src != 0)
1016 *dst++ = 0x20;
1017 } else {
1018 *dst++ = *src++;
1019 }
1020 }
1021 *dst = 0;
1022 if (dst == src)
1023 return(NULL);
1024 return(dst);
1025}
1026
1027/**
1028 * xmlAttrNormalizeSpace2:
1029 * @src: the source string
1030 *
1031 * Normalize the space in non CDATA attribute values, a slightly more complex
1032 * front end to avoid allocation problems when running on attribute values
1033 * coming from the input.
1034 *
1035 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1036 * is needed.
1037 */
1038static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001039xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001040{
1041 int i;
1042 int remove_head = 0;
1043 int need_realloc = 0;
1044 const xmlChar *cur;
1045
1046 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1047 return(NULL);
1048 i = *len;
1049 if (i <= 0)
1050 return(NULL);
1051
1052 cur = src;
1053 while (*cur == 0x20) {
1054 cur++;
1055 remove_head++;
1056 }
1057 while (*cur != 0) {
1058 if (*cur == 0x20) {
1059 cur++;
1060 if ((*cur == 0x20) || (*cur == 0)) {
1061 need_realloc = 1;
1062 break;
1063 }
1064 } else
1065 cur++;
1066 }
1067 if (need_realloc) {
1068 xmlChar *ret;
1069
1070 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1071 if (ret == NULL) {
1072 xmlErrMemory(ctxt, NULL);
1073 return(NULL);
1074 }
1075 xmlAttrNormalizeSpace(ret, ret);
1076 *len = (int) strlen((const char *)ret);
1077 return(ret);
1078 } else if (remove_head) {
1079 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001080 memmove(src, src + remove_head, 1 + *len);
1081 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001082 }
1083 return(NULL);
1084}
1085
1086/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001087 * xmlAddDefAttrs:
1088 * @ctxt: an XML parser context
1089 * @fullname: the element fullname
1090 * @fullattr: the attribute fullname
1091 * @value: the attribute value
1092 *
1093 * Add a defaulted attribute for an element
1094 */
1095static void
1096xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1097 const xmlChar *fullname,
1098 const xmlChar *fullattr,
1099 const xmlChar *value) {
1100 xmlDefAttrsPtr defaults;
1101 int len;
1102 const xmlChar *name;
1103 const xmlChar *prefix;
1104
Daniel Veillard6a31b832008-03-26 14:06:44 +00001105 /*
1106 * Allows to detect attribute redefinitions
1107 */
1108 if (ctxt->attsSpecial != NULL) {
1109 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1110 return;
1111 }
1112
Daniel Veillarde57ec792003-09-10 10:50:59 +00001113 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001114 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001115 if (ctxt->attsDefault == NULL)
1116 goto mem_error;
1117 }
1118
1119 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001120 * split the element name into prefix:localname , the string found
1121 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001122 */
1123 name = xmlSplitQName3(fullname, &len);
1124 if (name == NULL) {
1125 name = xmlDictLookup(ctxt->dict, fullname, -1);
1126 prefix = NULL;
1127 } else {
1128 name = xmlDictLookup(ctxt->dict, name, -1);
1129 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1130 }
1131
1132 /*
1133 * make sure there is some storage
1134 */
1135 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1136 if (defaults == NULL) {
1137 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001138 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001139 if (defaults == NULL)
1140 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001141 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001142 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001143 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1144 defaults, NULL) < 0) {
1145 xmlFree(defaults);
1146 goto mem_error;
1147 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001148 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001149 xmlDefAttrsPtr temp;
1150
1151 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001152 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001153 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001154 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001155 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001156 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001157 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1158 defaults, NULL) < 0) {
1159 xmlFree(defaults);
1160 goto mem_error;
1161 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001162 }
1163
1164 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001165 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001166 * are within the DTD and hen not associated to namespace names.
1167 */
1168 name = xmlSplitQName3(fullattr, &len);
1169 if (name == NULL) {
1170 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1171 prefix = NULL;
1172 } else {
1173 name = xmlDictLookup(ctxt->dict, name, -1);
1174 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1175 }
1176
Daniel Veillardae0765b2008-07-31 19:54:59 +00001177 defaults->values[5 * defaults->nbAttrs] = name;
1178 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001179 /* intern the string and precompute the end */
1180 len = xmlStrlen(value);
1181 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001182 defaults->values[5 * defaults->nbAttrs + 2] = value;
1183 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1184 if (ctxt->external)
1185 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1186 else
1187 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001188 defaults->nbAttrs++;
1189
1190 return;
1191
1192mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001193 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001194 return;
1195}
1196
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001197/**
1198 * xmlAddSpecialAttr:
1199 * @ctxt: an XML parser context
1200 * @fullname: the element fullname
1201 * @fullattr: the attribute fullname
1202 * @type: the attribute type
1203 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001204 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001205 */
1206static void
1207xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1208 const xmlChar *fullname,
1209 const xmlChar *fullattr,
1210 int type)
1211{
1212 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001213 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001214 if (ctxt->attsSpecial == NULL)
1215 goto mem_error;
1216 }
1217
Daniel Veillardac4118d2008-01-11 05:27:32 +00001218 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1219 return;
1220
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001221 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1222 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001223 return;
1224
1225mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001226 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001227 return;
1228}
1229
Daniel Veillard4432df22003-09-28 18:58:27 +00001230/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001231 * xmlCleanSpecialAttrCallback:
1232 *
1233 * Removes CDATA attributes from the special attribute table
1234 */
1235static void
1236xmlCleanSpecialAttrCallback(void *payload, void *data,
1237 const xmlChar *fullname, const xmlChar *fullattr,
1238 const xmlChar *unused ATTRIBUTE_UNUSED) {
1239 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1240
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001241 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001242 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1243 }
1244}
1245
1246/**
1247 * xmlCleanSpecialAttr:
1248 * @ctxt: an XML parser context
1249 *
1250 * Trim the list of attributes defined to remove all those of type
1251 * CDATA as they are not special. This call should be done when finishing
1252 * to parse the DTD and before starting to parse the document root.
1253 */
1254static void
1255xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1256{
1257 if (ctxt->attsSpecial == NULL)
1258 return;
1259
1260 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1261
1262 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1263 xmlHashFree(ctxt->attsSpecial, NULL);
1264 ctxt->attsSpecial = NULL;
1265 }
1266 return;
1267}
1268
1269/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001270 * xmlCheckLanguageID:
1271 * @lang: pointer to the string value
1272 *
1273 * Checks that the value conforms to the LanguageID production:
1274 *
1275 * NOTE: this is somewhat deprecated, those productions were removed from
1276 * the XML Second edition.
1277 *
1278 * [33] LanguageID ::= Langcode ('-' Subcode)*
1279 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1280 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1281 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1282 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1283 * [38] Subcode ::= ([a-z] | [A-Z])+
1284 *
1285 * Returns 1 if correct 0 otherwise
1286 **/
1287int
1288xmlCheckLanguageID(const xmlChar * lang)
1289{
1290 const xmlChar *cur = lang;
1291
1292 if (cur == NULL)
1293 return (0);
1294 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1295 ((cur[0] == 'I') && (cur[1] == '-'))) {
1296 /*
1297 * IANA code
1298 */
1299 cur += 2;
1300 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1301 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1302 cur++;
1303 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1304 ((cur[0] == 'X') && (cur[1] == '-'))) {
1305 /*
1306 * User code
1307 */
1308 cur += 2;
1309 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1310 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1311 cur++;
1312 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1313 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1314 /*
1315 * ISO639
1316 */
1317 cur++;
1318 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1319 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1320 cur++;
1321 else
1322 return (0);
1323 } else
1324 return (0);
1325 while (cur[0] != 0) { /* non input consuming */
1326 if (cur[0] != '-')
1327 return (0);
1328 cur++;
1329 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1330 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1331 cur++;
1332 else
1333 return (0);
1334 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1335 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1336 cur++;
1337 }
1338 return (1);
1339}
1340
Owen Taylor3473f882001-02-23 17:55:21 +00001341/************************************************************************
1342 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001343 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001344 * *
1345 ************************************************************************/
1346
Daniel Veillard8ed10722009-08-20 19:17:36 +02001347static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1348 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001349
Daniel Veillard0fb18932003-09-07 09:14:37 +00001350#ifdef SAX2
1351/**
1352 * nsPush:
1353 * @ctxt: an XML parser context
1354 * @prefix: the namespace prefix or NULL
1355 * @URL: the namespace name
1356 *
1357 * Pushes a new parser namespace on top of the ns stack
1358 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001359 * Returns -1 in case of error, -2 if the namespace should be discarded
1360 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001361 */
1362static int
1363nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1364{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001365 if (ctxt->options & XML_PARSE_NSCLEAN) {
1366 int i;
1367 for (i = 0;i < ctxt->nsNr;i += 2) {
1368 if (ctxt->nsTab[i] == prefix) {
1369 /* in scope */
1370 if (ctxt->nsTab[i + 1] == URL)
1371 return(-2);
1372 /* out of scope keep it */
1373 break;
1374 }
1375 }
1376 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001377 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1378 ctxt->nsMax = 10;
1379 ctxt->nsNr = 0;
1380 ctxt->nsTab = (const xmlChar **)
1381 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1382 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001383 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001384 ctxt->nsMax = 0;
1385 return (-1);
1386 }
1387 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001388 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001389 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001390 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1391 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1392 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001393 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001394 ctxt->nsMax /= 2;
1395 return (-1);
1396 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001397 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001398 }
1399 ctxt->nsTab[ctxt->nsNr++] = prefix;
1400 ctxt->nsTab[ctxt->nsNr++] = URL;
1401 return (ctxt->nsNr);
1402}
1403/**
1404 * nsPop:
1405 * @ctxt: an XML parser context
1406 * @nr: the number to pop
1407 *
1408 * Pops the top @nr parser prefix/namespace from the ns stack
1409 *
1410 * Returns the number of namespaces removed
1411 */
1412static int
1413nsPop(xmlParserCtxtPtr ctxt, int nr)
1414{
1415 int i;
1416
1417 if (ctxt->nsTab == NULL) return(0);
1418 if (ctxt->nsNr < nr) {
1419 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1420 nr = ctxt->nsNr;
1421 }
1422 if (ctxt->nsNr <= 0)
1423 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001424
Daniel Veillard0fb18932003-09-07 09:14:37 +00001425 for (i = 0;i < nr;i++) {
1426 ctxt->nsNr--;
1427 ctxt->nsTab[ctxt->nsNr] = NULL;
1428 }
1429 return(nr);
1430}
1431#endif
1432
1433static int
1434xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1435 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001436 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001437 int maxatts;
1438
1439 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001440 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001441 atts = (const xmlChar **)
1442 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001443 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001444 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001445 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1446 if (attallocs == NULL) goto mem_error;
1447 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001448 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001449 } else if (nr + 5 > ctxt->maxatts) {
1450 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001451 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1452 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001453 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001454 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001455 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1456 (maxatts / 5) * sizeof(int));
1457 if (attallocs == NULL) goto mem_error;
1458 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001459 ctxt->maxatts = maxatts;
1460 }
1461 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001462mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001463 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001464 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001465}
1466
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001467/**
1468 * inputPush:
1469 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001470 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001471 *
1472 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001473 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001474 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001475 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001476int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001477inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1478{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001479 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001480 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001481 if (ctxt->inputNr >= ctxt->inputMax) {
1482 ctxt->inputMax *= 2;
1483 ctxt->inputTab =
1484 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1485 ctxt->inputMax *
1486 sizeof(ctxt->inputTab[0]));
1487 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001488 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001489 xmlFreeInputStream(value);
1490 ctxt->inputMax /= 2;
1491 value = NULL;
1492 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001493 }
1494 }
1495 ctxt->inputTab[ctxt->inputNr] = value;
1496 ctxt->input = value;
1497 return (ctxt->inputNr++);
1498}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001499/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001500 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001501 * @ctxt: an XML parser context
1502 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001503 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001504 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001505 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001506 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001507xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001508inputPop(xmlParserCtxtPtr ctxt)
1509{
1510 xmlParserInputPtr ret;
1511
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001512 if (ctxt == NULL)
1513 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001514 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001515 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001516 ctxt->inputNr--;
1517 if (ctxt->inputNr > 0)
1518 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1519 else
1520 ctxt->input = NULL;
1521 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001522 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001523 return (ret);
1524}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001525/**
1526 * nodePush:
1527 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001528 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001529 *
1530 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001531 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001532 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001533 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001534int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001535nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1536{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001537 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001538 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001539 xmlNodePtr *tmp;
1540
1541 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1542 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001543 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001544 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001545 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001546 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001547 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001548 ctxt->nodeTab = tmp;
1549 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001550 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001551 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1552 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001553 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001554 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001555 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001556 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001557 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001558 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001559 ctxt->nodeTab[ctxt->nodeNr] = value;
1560 ctxt->node = value;
1561 return (ctxt->nodeNr++);
1562}
Daniel Veillard8915c152008-08-26 13:05:34 +00001563
Daniel Veillard1c732d22002-11-30 11:22:59 +00001564/**
1565 * nodePop:
1566 * @ctxt: an XML parser context
1567 *
1568 * Pops the top element node from the node stack
1569 *
1570 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001571 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001572xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001573nodePop(xmlParserCtxtPtr ctxt)
1574{
1575 xmlNodePtr ret;
1576
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001577 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001578 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001579 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001580 ctxt->nodeNr--;
1581 if (ctxt->nodeNr > 0)
1582 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1583 else
1584 ctxt->node = NULL;
1585 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001586 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001587 return (ret);
1588}
Daniel Veillarda2351322004-06-27 12:08:10 +00001589
1590#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001591/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001592 * nameNsPush:
1593 * @ctxt: an XML parser context
1594 * @value: the element name
1595 * @prefix: the element prefix
1596 * @URI: the element namespace name
1597 *
1598 * Pushes a new element name/prefix/URL on top of the name stack
1599 *
1600 * Returns -1 in case of error, the index in the stack otherwise
1601 */
1602static int
1603nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1604 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1605{
1606 if (ctxt->nameNr >= ctxt->nameMax) {
1607 const xmlChar * *tmp;
1608 void **tmp2;
1609 ctxt->nameMax *= 2;
1610 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1611 ctxt->nameMax *
1612 sizeof(ctxt->nameTab[0]));
1613 if (tmp == NULL) {
1614 ctxt->nameMax /= 2;
1615 goto mem_error;
1616 }
1617 ctxt->nameTab = tmp;
1618 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1619 ctxt->nameMax * 3 *
1620 sizeof(ctxt->pushTab[0]));
1621 if (tmp2 == NULL) {
1622 ctxt->nameMax /= 2;
1623 goto mem_error;
1624 }
1625 ctxt->pushTab = tmp2;
1626 }
1627 ctxt->nameTab[ctxt->nameNr] = value;
1628 ctxt->name = value;
1629 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1630 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001631 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001632 return (ctxt->nameNr++);
1633mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001634 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001635 return (-1);
1636}
1637/**
1638 * nameNsPop:
1639 * @ctxt: an XML parser context
1640 *
1641 * Pops the top element/prefix/URI name from the name stack
1642 *
1643 * Returns the name just removed
1644 */
1645static const xmlChar *
1646nameNsPop(xmlParserCtxtPtr ctxt)
1647{
1648 const xmlChar *ret;
1649
1650 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001651 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001652 ctxt->nameNr--;
1653 if (ctxt->nameNr > 0)
1654 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1655 else
1656 ctxt->name = NULL;
1657 ret = ctxt->nameTab[ctxt->nameNr];
1658 ctxt->nameTab[ctxt->nameNr] = NULL;
1659 return (ret);
1660}
Daniel Veillarda2351322004-06-27 12:08:10 +00001661#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001662
1663/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001664 * namePush:
1665 * @ctxt: an XML parser context
1666 * @value: the element name
1667 *
1668 * Pushes a new element name on top of the name stack
1669 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001670 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001671 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001672int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001673namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001674{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001675 if (ctxt == NULL) return (-1);
1676
Daniel Veillard1c732d22002-11-30 11:22:59 +00001677 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001678 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001679 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001680 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001681 ctxt->nameMax *
1682 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001683 if (tmp == NULL) {
1684 ctxt->nameMax /= 2;
1685 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001686 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001687 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001688 }
1689 ctxt->nameTab[ctxt->nameNr] = value;
1690 ctxt->name = value;
1691 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001692mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001693 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001694 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001695}
1696/**
1697 * namePop:
1698 * @ctxt: an XML parser context
1699 *
1700 * Pops the top element name from the name stack
1701 *
1702 * Returns the name just removed
1703 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001704const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001705namePop(xmlParserCtxtPtr ctxt)
1706{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001707 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001708
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001709 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1710 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001711 ctxt->nameNr--;
1712 if (ctxt->nameNr > 0)
1713 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1714 else
1715 ctxt->name = NULL;
1716 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001717 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001718 return (ret);
1719}
Owen Taylor3473f882001-02-23 17:55:21 +00001720
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001721static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001722 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001723 int *tmp;
1724
Owen Taylor3473f882001-02-23 17:55:21 +00001725 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001726 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1727 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1728 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001729 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001730 ctxt->spaceMax /=2;
1731 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001732 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001733 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001734 }
1735 ctxt->spaceTab[ctxt->spaceNr] = val;
1736 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1737 return(ctxt->spaceNr++);
1738}
1739
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001740static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001741 int ret;
1742 if (ctxt->spaceNr <= 0) return(0);
1743 ctxt->spaceNr--;
1744 if (ctxt->spaceNr > 0)
1745 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1746 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001747 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001748 ret = ctxt->spaceTab[ctxt->spaceNr];
1749 ctxt->spaceTab[ctxt->spaceNr] = -1;
1750 return(ret);
1751}
1752
1753/*
1754 * Macros for accessing the content. Those should be used only by the parser,
1755 * and not exported.
1756 *
1757 * Dirty macros, i.e. one often need to make assumption on the context to
1758 * use them
1759 *
1760 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1761 * To be used with extreme caution since operations consuming
1762 * characters may move the input buffer to a different location !
1763 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1764 * This should be used internally by the parser
1765 * only to compare to ASCII values otherwise it would break when
1766 * running with UTF-8 encoding.
1767 * RAW same as CUR but in the input buffer, bypass any token
1768 * extraction that may have been done
1769 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1770 * to compare on ASCII based substring.
1771 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001772 * strings without newlines within the parser.
1773 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1774 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001775 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1776 *
1777 * NEXT Skip to the next character, this does the proper decoding
1778 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001779 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001780 * CUR_CHAR(l) returns the current unicode character (int), set l
1781 * to the number of xmlChars used for the encoding [0-5].
1782 * CUR_SCHAR same but operate on a string instead of the context
1783 * COPY_BUF copy the current unicode char to the target buffer, increment
1784 * the index
1785 * GROW, SHRINK handling of input buffers
1786 */
1787
Daniel Veillardfdc91562002-07-01 21:52:03 +00001788#define RAW (*ctxt->input->cur)
1789#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001790#define NXT(val) ctxt->input->cur[(val)]
1791#define CUR_PTR ctxt->input->cur
1792
Daniel Veillarda07050d2003-10-19 14:46:32 +00001793#define CMP4( s, c1, c2, c3, c4 ) \
1794 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1795 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1796#define CMP5( s, c1, c2, c3, c4, c5 ) \
1797 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1798#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1799 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1800#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1801 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1802#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1803 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1804#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1805 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1806 ((unsigned char *) s)[ 8 ] == c9 )
1807#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1808 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1809 ((unsigned char *) s)[ 9 ] == c10 )
1810
Owen Taylor3473f882001-02-23 17:55:21 +00001811#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001812 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001813 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001814 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001815 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1816 xmlPopInput(ctxt); \
1817 } while (0)
1818
Daniel Veillard0b787f32004-03-26 17:29:53 +00001819#define SKIPL(val) do { \
1820 int skipl; \
1821 for(skipl=0; skipl<val; skipl++) { \
1822 if (*(ctxt->input->cur) == '\n') { \
1823 ctxt->input->line++; ctxt->input->col = 1; \
1824 } else ctxt->input->col++; \
1825 ctxt->nbChars++; \
1826 ctxt->input->cur++; \
1827 } \
1828 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1829 if ((*ctxt->input->cur == 0) && \
1830 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1831 xmlPopInput(ctxt); \
1832 } while (0)
1833
Daniel Veillarda880b122003-04-21 21:36:41 +00001834#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001835 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1836 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001837 xmlSHRINK (ctxt);
1838
1839static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1840 xmlParserInputShrink(ctxt->input);
1841 if ((*ctxt->input->cur == 0) &&
1842 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1843 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001844 }
Owen Taylor3473f882001-02-23 17:55:21 +00001845
Daniel Veillarda880b122003-04-21 21:36:41 +00001846#define GROW if ((ctxt->progressive == 0) && \
1847 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001848 xmlGROW (ctxt);
1849
1850static void xmlGROW (xmlParserCtxtPtr ctxt) {
1851 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1852 if ((*ctxt->input->cur == 0) &&
1853 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1854 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001855}
Owen Taylor3473f882001-02-23 17:55:21 +00001856
1857#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1858
1859#define NEXT xmlNextChar(ctxt)
1860
Daniel Veillard21a0f912001-02-25 19:54:14 +00001861#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001862 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001863 ctxt->input->cur++; \
1864 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001865 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001866 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1867 }
1868
Owen Taylor3473f882001-02-23 17:55:21 +00001869#define NEXTL(l) do { \
1870 if (*(ctxt->input->cur) == '\n') { \
1871 ctxt->input->line++; ctxt->input->col = 1; \
1872 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001873 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001874 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001875 } while (0)
1876
1877#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1878#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1879
1880#define COPY_BUF(l,b,i,v) \
1881 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001882 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001883
1884/**
1885 * xmlSkipBlankChars:
1886 * @ctxt: the XML parser context
1887 *
1888 * skip all blanks character found at that point in the input streams.
1889 * It pops up finished entities in the process if allowable at that point.
1890 *
1891 * Returns the number of space chars skipped
1892 */
1893
1894int
1895xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001896 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001897
1898 /*
1899 * It's Okay to use CUR/NEXT here since all the blanks are on
1900 * the ASCII range.
1901 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001902 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1903 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001904 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001905 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001906 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001907 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001908 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001909 if (*cur == '\n') {
1910 ctxt->input->line++; ctxt->input->col = 1;
1911 }
1912 cur++;
1913 res++;
1914 if (*cur == 0) {
1915 ctxt->input->cur = cur;
1916 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1917 cur = ctxt->input->cur;
1918 }
1919 }
1920 ctxt->input->cur = cur;
1921 } else {
1922 int cur;
1923 do {
1924 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001925 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001926 NEXT;
1927 cur = CUR;
1928 res++;
1929 }
1930 while ((cur == 0) && (ctxt->inputNr > 1) &&
1931 (ctxt->instate != XML_PARSER_COMMENT)) {
1932 xmlPopInput(ctxt);
1933 cur = CUR;
1934 }
1935 /*
1936 * Need to handle support of entities branching here
1937 */
1938 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1939 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1940 }
Owen Taylor3473f882001-02-23 17:55:21 +00001941 return(res);
1942}
1943
1944/************************************************************************
1945 * *
1946 * Commodity functions to handle entities *
1947 * *
1948 ************************************************************************/
1949
1950/**
1951 * xmlPopInput:
1952 * @ctxt: an XML parser context
1953 *
1954 * xmlPopInput: the current input pointed by ctxt->input came to an end
1955 * pop it and return the next char.
1956 *
1957 * Returns the current xmlChar in the parser context
1958 */
1959xmlChar
1960xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001961 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001962 if (xmlParserDebugEntities)
1963 xmlGenericError(xmlGenericErrorContext,
1964 "Popping input %d\n", ctxt->inputNr);
1965 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001966 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001967 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1968 return(xmlPopInput(ctxt));
1969 return(CUR);
1970}
1971
1972/**
1973 * xmlPushInput:
1974 * @ctxt: an XML parser context
1975 * @input: an XML parser input fragment (entity, XML fragment ...).
1976 *
1977 * xmlPushInput: switch to a new input stream which is stacked on top
1978 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001979 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00001980 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001981int
Owen Taylor3473f882001-02-23 17:55:21 +00001982xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001983 int ret;
1984 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001985
1986 if (xmlParserDebugEntities) {
1987 if ((ctxt->input != NULL) && (ctxt->input->filename))
1988 xmlGenericError(xmlGenericErrorContext,
1989 "%s(%d): ", ctxt->input->filename,
1990 ctxt->input->line);
1991 xmlGenericError(xmlGenericErrorContext,
1992 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1993 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001994 ret = inputPush(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001995 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001996 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001997}
1998
1999/**
2000 * xmlParseCharRef:
2001 * @ctxt: an XML parser context
2002 *
2003 * parse Reference declarations
2004 *
2005 * [66] CharRef ::= '&#' [0-9]+ ';' |
2006 * '&#x' [0-9a-fA-F]+ ';'
2007 *
2008 * [ WFC: Legal Character ]
2009 * Characters referred to using character references must match the
2010 * production for Char.
2011 *
2012 * Returns the value parsed (as an int), 0 in case of error
2013 */
2014int
2015xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002016 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002017 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002018 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002019
Owen Taylor3473f882001-02-23 17:55:21 +00002020 /*
2021 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2022 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002023 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002024 (NXT(2) == 'x')) {
2025 SKIP(3);
2026 GROW;
2027 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002028 if (count++ > 20) {
2029 count = 0;
2030 GROW;
2031 }
2032 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002033 val = val * 16 + (CUR - '0');
2034 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2035 val = val * 16 + (CUR - 'a') + 10;
2036 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2037 val = val * 16 + (CUR - 'A') + 10;
2038 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002039 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002040 val = 0;
2041 break;
2042 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002043 if (val > 0x10FFFF)
2044 outofrange = val;
2045
Owen Taylor3473f882001-02-23 17:55:21 +00002046 NEXT;
2047 count++;
2048 }
2049 if (RAW == ';') {
2050 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002051 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002052 ctxt->nbChars ++;
2053 ctxt->input->cur++;
2054 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002055 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002056 SKIP(2);
2057 GROW;
2058 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002059 if (count++ > 20) {
2060 count = 0;
2061 GROW;
2062 }
2063 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002064 val = val * 10 + (CUR - '0');
2065 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002066 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002067 val = 0;
2068 break;
2069 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002070 if (val > 0x10FFFF)
2071 outofrange = val;
2072
Owen Taylor3473f882001-02-23 17:55:21 +00002073 NEXT;
2074 count++;
2075 }
2076 if (RAW == ';') {
2077 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002078 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002079 ctxt->nbChars ++;
2080 ctxt->input->cur++;
2081 }
2082 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002083 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002084 }
2085
2086 /*
2087 * [ WFC: Legal Character ]
2088 * Characters referred to using character references must match the
2089 * production for Char.
2090 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002091 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002092 return(val);
2093 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002094 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2095 "xmlParseCharRef: invalid xmlChar value %d\n",
2096 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002097 }
2098 return(0);
2099}
2100
2101/**
2102 * xmlParseStringCharRef:
2103 * @ctxt: an XML parser context
2104 * @str: a pointer to an index in the string
2105 *
2106 * parse Reference declarations, variant parsing from a string rather
2107 * than an an input flow.
2108 *
2109 * [66] CharRef ::= '&#' [0-9]+ ';' |
2110 * '&#x' [0-9a-fA-F]+ ';'
2111 *
2112 * [ WFC: Legal Character ]
2113 * Characters referred to using character references must match the
2114 * production for Char.
2115 *
2116 * Returns the value parsed (as an int), 0 in case of error, str will be
2117 * updated to the current value of the index
2118 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002119static int
Owen Taylor3473f882001-02-23 17:55:21 +00002120xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2121 const xmlChar *ptr;
2122 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002123 unsigned int val = 0;
2124 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002125
2126 if ((str == NULL) || (*str == NULL)) return(0);
2127 ptr = *str;
2128 cur = *ptr;
2129 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2130 ptr += 3;
2131 cur = *ptr;
2132 while (cur != ';') { /* Non input consuming loop */
2133 if ((cur >= '0') && (cur <= '9'))
2134 val = val * 16 + (cur - '0');
2135 else if ((cur >= 'a') && (cur <= 'f'))
2136 val = val * 16 + (cur - 'a') + 10;
2137 else if ((cur >= 'A') && (cur <= 'F'))
2138 val = val * 16 + (cur - 'A') + 10;
2139 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002140 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002141 val = 0;
2142 break;
2143 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002144 if (val > 0x10FFFF)
2145 outofrange = val;
2146
Owen Taylor3473f882001-02-23 17:55:21 +00002147 ptr++;
2148 cur = *ptr;
2149 }
2150 if (cur == ';')
2151 ptr++;
2152 } else if ((cur == '&') && (ptr[1] == '#')){
2153 ptr += 2;
2154 cur = *ptr;
2155 while (cur != ';') { /* Non input consuming loops */
2156 if ((cur >= '0') && (cur <= '9'))
2157 val = val * 10 + (cur - '0');
2158 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002159 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002160 val = 0;
2161 break;
2162 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002163 if (val > 0x10FFFF)
2164 outofrange = val;
2165
Owen Taylor3473f882001-02-23 17:55:21 +00002166 ptr++;
2167 cur = *ptr;
2168 }
2169 if (cur == ';')
2170 ptr++;
2171 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002172 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002173 return(0);
2174 }
2175 *str = ptr;
2176
2177 /*
2178 * [ WFC: Legal Character ]
2179 * Characters referred to using character references must match the
2180 * production for Char.
2181 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002182 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002183 return(val);
2184 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002185 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2186 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2187 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002188 }
2189 return(0);
2190}
2191
2192/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002193 * xmlNewBlanksWrapperInputStream:
2194 * @ctxt: an XML parser context
2195 * @entity: an Entity pointer
2196 *
2197 * Create a new input stream for wrapping
2198 * blanks around a PEReference
2199 *
2200 * Returns the new input stream or NULL
2201 */
2202
2203static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2204
Daniel Veillardf4862f02002-09-10 11:13:43 +00002205static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002206xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2207 xmlParserInputPtr input;
2208 xmlChar *buffer;
2209 size_t length;
2210 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002211 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2212 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002213 return(NULL);
2214 }
2215 if (xmlParserDebugEntities)
2216 xmlGenericError(xmlGenericErrorContext,
2217 "new blanks wrapper for entity: %s\n", entity->name);
2218 input = xmlNewInputStream(ctxt);
2219 if (input == NULL) {
2220 return(NULL);
2221 }
2222 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002223 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002224 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002225 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002226 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002227 return(NULL);
2228 }
2229 buffer [0] = ' ';
2230 buffer [1] = '%';
2231 buffer [length-3] = ';';
2232 buffer [length-2] = ' ';
2233 buffer [length-1] = 0;
2234 memcpy(buffer + 2, entity->name, length - 5);
2235 input->free = deallocblankswrapper;
2236 input->base = buffer;
2237 input->cur = buffer;
2238 input->length = length;
2239 input->end = &buffer[length];
2240 return(input);
2241}
2242
2243/**
Owen Taylor3473f882001-02-23 17:55:21 +00002244 * xmlParserHandlePEReference:
2245 * @ctxt: the parser context
2246 *
2247 * [69] PEReference ::= '%' Name ';'
2248 *
2249 * [ WFC: No Recursion ]
2250 * A parsed entity must not contain a recursive
2251 * reference to itself, either directly or indirectly.
2252 *
2253 * [ WFC: Entity Declared ]
2254 * In a document without any DTD, a document with only an internal DTD
2255 * subset which contains no parameter entity references, or a document
2256 * with "standalone='yes'", ... ... The declaration of a parameter
2257 * entity must precede any reference to it...
2258 *
2259 * [ VC: Entity Declared ]
2260 * In a document with an external subset or external parameter entities
2261 * with "standalone='no'", ... ... The declaration of a parameter entity
2262 * must precede any reference to it...
2263 *
2264 * [ WFC: In DTD ]
2265 * Parameter-entity references may only appear in the DTD.
2266 * NOTE: misleading but this is handled.
2267 *
2268 * A PEReference may have been detected in the current input stream
2269 * the handling is done accordingly to
2270 * http://www.w3.org/TR/REC-xml#entproc
2271 * i.e.
2272 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002273 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002274 */
2275void
2276xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002277 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002278 xmlEntityPtr entity = NULL;
2279 xmlParserInputPtr input;
2280
Owen Taylor3473f882001-02-23 17:55:21 +00002281 if (RAW != '%') return;
2282 switch(ctxt->instate) {
2283 case XML_PARSER_CDATA_SECTION:
2284 return;
2285 case XML_PARSER_COMMENT:
2286 return;
2287 case XML_PARSER_START_TAG:
2288 return;
2289 case XML_PARSER_END_TAG:
2290 return;
2291 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002292 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002293 return;
2294 case XML_PARSER_PROLOG:
2295 case XML_PARSER_START:
2296 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002297 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002298 return;
2299 case XML_PARSER_ENTITY_DECL:
2300 case XML_PARSER_CONTENT:
2301 case XML_PARSER_ATTRIBUTE_VALUE:
2302 case XML_PARSER_PI:
2303 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002304 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002305 /* we just ignore it there */
2306 return;
2307 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002308 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002309 return;
2310 case XML_PARSER_ENTITY_VALUE:
2311 /*
2312 * NOTE: in the case of entity values, we don't do the
2313 * substitution here since we need the literal
2314 * entity value to be able to save the internal
2315 * subset of the document.
2316 * This will be handled by xmlStringDecodeEntities
2317 */
2318 return;
2319 case XML_PARSER_DTD:
2320 /*
2321 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2322 * In the internal DTD subset, parameter-entity references
2323 * can occur only where markup declarations can occur, not
2324 * within markup declarations.
2325 * In that case this is handled in xmlParseMarkupDecl
2326 */
2327 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2328 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002329 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002330 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002331 break;
2332 case XML_PARSER_IGNORE:
2333 return;
2334 }
2335
2336 NEXT;
2337 name = xmlParseName(ctxt);
2338 if (xmlParserDebugEntities)
2339 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002340 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002341 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002342 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002343 } else {
2344 if (RAW == ';') {
2345 NEXT;
2346 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2347 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2348 if (entity == NULL) {
2349
2350 /*
2351 * [ WFC: Entity Declared ]
2352 * In a document without any DTD, a document with only an
2353 * internal DTD subset which contains no parameter entity
2354 * references, or a document with "standalone='yes'", ...
2355 * ... The declaration of a parameter entity must precede
2356 * any reference to it...
2357 */
2358 if ((ctxt->standalone == 1) ||
2359 ((ctxt->hasExternalSubset == 0) &&
2360 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002361 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002362 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002363 } else {
2364 /*
2365 * [ VC: Entity Declared ]
2366 * In a document with an external subset or external
2367 * parameter entities with "standalone='no'", ...
2368 * ... The declaration of a parameter entity must precede
2369 * any reference to it...
2370 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002371 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2372 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2373 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002374 name, NULL);
Daniel Veillard24eb9782003-10-04 21:08:09 +00002375 } else
2376 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2377 "PEReference: %%%s; not found\n",
2378 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002379 ctxt->valid = 0;
2380 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002381 } else if (ctxt->input->free != deallocblankswrapper) {
2382 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002383 if (xmlPushInput(ctxt, input) < 0)
2384 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002385 } else {
2386 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2387 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002388 xmlChar start[4];
2389 xmlCharEncoding enc;
2390
Owen Taylor3473f882001-02-23 17:55:21 +00002391 /*
2392 * handle the extra spaces added before and after
2393 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002394 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002395 */
2396 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002397 if (xmlPushInput(ctxt, input) < 0)
2398 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002399
2400 /*
2401 * Get the 4 first bytes and decode the charset
2402 * if enc != XML_CHAR_ENCODING_NONE
2403 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002404 * Note that, since we may have some non-UTF8
2405 * encoding (like UTF16, bug 135229), the 'length'
2406 * is not known, but we can calculate based upon
2407 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002408 */
2409 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002410 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002411 start[0] = RAW;
2412 start[1] = NXT(1);
2413 start[2] = NXT(2);
2414 start[3] = NXT(3);
2415 enc = xmlDetectCharEncoding(start, 4);
2416 if (enc != XML_CHAR_ENCODING_NONE) {
2417 xmlSwitchEncoding(ctxt, enc);
2418 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002419 }
2420
Owen Taylor3473f882001-02-23 17:55:21 +00002421 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002422 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2423 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002424 xmlParseTextDecl(ctxt);
2425 }
Owen Taylor3473f882001-02-23 17:55:21 +00002426 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002427 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2428 "PEReference: %s is not a parameter entity\n",
2429 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002430 }
2431 }
2432 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002433 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002434 }
Owen Taylor3473f882001-02-23 17:55:21 +00002435 }
2436}
2437
2438/*
2439 * Macro used to grow the current buffer.
2440 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002441#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002442 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002443 buffer##_size *= 2; \
Daniel Veillard0161e632008-08-28 15:36:32 +00002444 buffer##_size += n; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002445 tmp = (xmlChar *) \
Daniel Veillard68b6e022008-03-31 09:26:00 +00002446 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002447 if (tmp == NULL) goto mem_error; \
2448 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002449}
2450
2451/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002452 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002453 * @ctxt: the parser context
2454 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002455 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002456 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2457 * @end: an end marker xmlChar, 0 if none
2458 * @end2: an end marker xmlChar, 0 if none
2459 * @end3: an end marker xmlChar, 0 if none
2460 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002461 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002462 *
2463 * [67] Reference ::= EntityRef | CharRef
2464 *
2465 * [69] PEReference ::= '%' Name ';'
2466 *
2467 * Returns A newly allocated string with the substitution done. The caller
2468 * must deallocate it !
2469 */
2470xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002471xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2472 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002473 xmlChar *buffer = NULL;
2474 int buffer_size = 0;
2475
2476 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002477 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002478 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002479 xmlEntityPtr ent;
2480 int c,l;
2481 int nbchars = 0;
2482
Daniel Veillarda82b1822004-11-08 16:24:57 +00002483 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002484 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002485 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002486
Daniel Veillard0161e632008-08-28 15:36:32 +00002487 if (((ctxt->depth > 40) &&
2488 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2489 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002490 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002491 return(NULL);
2492 }
2493
2494 /*
2495 * allocate a translation buffer.
2496 */
2497 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002498 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002499 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002500
2501 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002502 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002503 * we are operating on already parsed values.
2504 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002505 if (str < last)
2506 c = CUR_SCHAR(str, l);
2507 else
2508 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002509 while ((c != 0) && (c != end) && /* non input consuming loop */
2510 (c != end2) && (c != end3)) {
2511
2512 if (c == 0) break;
2513 if ((c == '&') && (str[1] == '#')) {
2514 int val = xmlParseStringCharRef(ctxt, &str);
2515 if (val != 0) {
2516 COPY_BUF(0,buffer,nbchars,val);
2517 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002518 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002519 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002520 }
Owen Taylor3473f882001-02-23 17:55:21 +00002521 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2522 if (xmlParserDebugEntities)
2523 xmlGenericError(xmlGenericErrorContext,
2524 "String decoding Entity Reference: %.30s\n",
2525 str);
2526 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002527 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2528 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002529 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002530 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002531 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002532 if ((ent != NULL) &&
2533 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2534 if (ent->content != NULL) {
2535 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002536 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002537 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002538 }
Owen Taylor3473f882001-02-23 17:55:21 +00002539 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002540 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2541 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002542 }
2543 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002544 ctxt->depth++;
2545 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2546 0, 0, 0);
2547 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002548
Owen Taylor3473f882001-02-23 17:55:21 +00002549 if (rep != NULL) {
2550 current = rep;
2551 while (*current != 0) { /* non input consuming loop */
2552 buffer[nbchars++] = *current++;
2553 if (nbchars >
2554 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002555 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2556 goto int_error;
2557 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002558 }
2559 }
2560 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002561 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002562 }
2563 } else if (ent != NULL) {
2564 int i = xmlStrlen(ent->name);
2565 const xmlChar *cur = ent->name;
2566
2567 buffer[nbchars++] = '&';
2568 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002569 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002570 }
2571 for (;i > 0;i--)
2572 buffer[nbchars++] = *cur++;
2573 buffer[nbchars++] = ';';
2574 }
2575 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2576 if (xmlParserDebugEntities)
2577 xmlGenericError(xmlGenericErrorContext,
2578 "String decoding PE Reference: %.30s\n", str);
2579 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002580 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2581 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002582 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002583 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002584 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002585 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002586 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002587 }
Owen Taylor3473f882001-02-23 17:55:21 +00002588 ctxt->depth++;
2589 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2590 0, 0, 0);
2591 ctxt->depth--;
2592 if (rep != NULL) {
2593 current = rep;
2594 while (*current != 0) { /* non input consuming loop */
2595 buffer[nbchars++] = *current++;
2596 if (nbchars >
2597 buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002598 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2599 goto int_error;
2600 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002601 }
2602 }
2603 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002604 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002605 }
2606 }
2607 } else {
2608 COPY_BUF(l,buffer,nbchars,c);
2609 str += l;
2610 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002611 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002612 }
2613 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002614 if (str < last)
2615 c = CUR_SCHAR(str, l);
2616 else
2617 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002618 }
2619 buffer[nbchars++] = 0;
2620 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002621
2622mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002623 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002624int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002625 if (rep != NULL)
2626 xmlFree(rep);
2627 if (buffer != NULL)
2628 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002629 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002630}
2631
Daniel Veillarde57ec792003-09-10 10:50:59 +00002632/**
2633 * xmlStringDecodeEntities:
2634 * @ctxt: the parser context
2635 * @str: the input string
2636 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2637 * @end: an end marker xmlChar, 0 if none
2638 * @end2: an end marker xmlChar, 0 if none
2639 * @end3: an end marker xmlChar, 0 if none
2640 *
2641 * Takes a entity string content and process to do the adequate substitutions.
2642 *
2643 * [67] Reference ::= EntityRef | CharRef
2644 *
2645 * [69] PEReference ::= '%' Name ';'
2646 *
2647 * Returns A newly allocated string with the substitution done. The caller
2648 * must deallocate it !
2649 */
2650xmlChar *
2651xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2652 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002653 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002654 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2655 end, end2, end3));
2656}
Owen Taylor3473f882001-02-23 17:55:21 +00002657
2658/************************************************************************
2659 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002660 * Commodity functions, cleanup needed ? *
2661 * *
2662 ************************************************************************/
2663
2664/**
2665 * areBlanks:
2666 * @ctxt: an XML parser context
2667 * @str: a xmlChar *
2668 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002669 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002670 *
2671 * Is this a sequence of blank chars that one can ignore ?
2672 *
2673 * Returns 1 if ignorable 0 otherwise.
2674 */
2675
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002676static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2677 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002678 int i, ret;
2679 xmlNodePtr lastChild;
2680
Daniel Veillard05c13a22001-09-09 08:38:09 +00002681 /*
2682 * Don't spend time trying to differentiate them, the same callback is
2683 * used !
2684 */
2685 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002686 return(0);
2687
Owen Taylor3473f882001-02-23 17:55:21 +00002688 /*
2689 * Check for xml:space value.
2690 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002691 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2692 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002693 return(0);
2694
2695 /*
2696 * Check that the string is made of blanks
2697 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002698 if (blank_chars == 0) {
2699 for (i = 0;i < len;i++)
2700 if (!(IS_BLANK_CH(str[i]))) return(0);
2701 }
Owen Taylor3473f882001-02-23 17:55:21 +00002702
2703 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002704 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002705 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002706 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002707 if (ctxt->myDoc != NULL) {
2708 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2709 if (ret == 0) return(1);
2710 if (ret == 1) return(0);
2711 }
2712
2713 /*
2714 * Otherwise, heuristic :-\
2715 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002716 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002717 if ((ctxt->node->children == NULL) &&
2718 (RAW == '<') && (NXT(1) == '/')) return(0);
2719
2720 lastChild = xmlGetLastChild(ctxt->node);
2721 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002722 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2723 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002724 } else if (xmlNodeIsText(lastChild))
2725 return(0);
2726 else if ((ctxt->node->children != NULL) &&
2727 (xmlNodeIsText(ctxt->node->children)))
2728 return(0);
2729 return(1);
2730}
2731
Owen Taylor3473f882001-02-23 17:55:21 +00002732/************************************************************************
2733 * *
2734 * Extra stuff for namespace support *
2735 * Relates to http://www.w3.org/TR/WD-xml-names *
2736 * *
2737 ************************************************************************/
2738
2739/**
2740 * xmlSplitQName:
2741 * @ctxt: an XML parser context
2742 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002743 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002744 *
2745 * parse an UTF8 encoded XML qualified name string
2746 *
2747 * [NS 5] QName ::= (Prefix ':')? LocalPart
2748 *
2749 * [NS 6] Prefix ::= NCName
2750 *
2751 * [NS 7] LocalPart ::= NCName
2752 *
2753 * Returns the local part, and prefix is updated
2754 * to get the Prefix if any.
2755 */
2756
2757xmlChar *
2758xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2759 xmlChar buf[XML_MAX_NAMELEN + 5];
2760 xmlChar *buffer = NULL;
2761 int len = 0;
2762 int max = XML_MAX_NAMELEN;
2763 xmlChar *ret = NULL;
2764 const xmlChar *cur = name;
2765 int c;
2766
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002767 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002768 *prefix = NULL;
2769
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002770 if (cur == NULL) return(NULL);
2771
Owen Taylor3473f882001-02-23 17:55:21 +00002772#ifndef XML_XML_NAMESPACE
2773 /* xml: prefix is not really a namespace */
2774 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2775 (cur[2] == 'l') && (cur[3] == ':'))
2776 return(xmlStrdup(name));
2777#endif
2778
Daniel Veillard597bc482003-07-24 16:08:28 +00002779 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002780 if (cur[0] == ':')
2781 return(xmlStrdup(name));
2782
2783 c = *cur++;
2784 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2785 buf[len++] = c;
2786 c = *cur++;
2787 }
2788 if (len >= max) {
2789 /*
2790 * Okay someone managed to make a huge name, so he's ready to pay
2791 * for the processing speed.
2792 */
2793 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002794
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002795 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002796 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002797 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002798 return(NULL);
2799 }
2800 memcpy(buffer, buf, len);
2801 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2802 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002803 xmlChar *tmp;
2804
Owen Taylor3473f882001-02-23 17:55:21 +00002805 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002806 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002807 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002808 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002809 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002810 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002811 return(NULL);
2812 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002813 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002814 }
2815 buffer[len++] = c;
2816 c = *cur++;
2817 }
2818 buffer[len] = 0;
2819 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002820
Daniel Veillard597bc482003-07-24 16:08:28 +00002821 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002822 if (buffer != NULL)
2823 xmlFree(buffer);
2824 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002825 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002826 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002827
Owen Taylor3473f882001-02-23 17:55:21 +00002828 if (buffer == NULL)
2829 ret = xmlStrndup(buf, len);
2830 else {
2831 ret = buffer;
2832 buffer = NULL;
2833 max = XML_MAX_NAMELEN;
2834 }
2835
2836
2837 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002838 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002839 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002840 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002841 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002842 }
Owen Taylor3473f882001-02-23 17:55:21 +00002843 len = 0;
2844
Daniel Veillardbb284f42002-10-16 18:02:47 +00002845 /*
2846 * Check that the first character is proper to start
2847 * a new name
2848 */
2849 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2850 ((c >= 0x41) && (c <= 0x5A)) ||
2851 (c == '_') || (c == ':'))) {
2852 int l;
2853 int first = CUR_SCHAR(cur, l);
2854
2855 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002856 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002857 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002858 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002859 }
2860 }
2861 cur++;
2862
Owen Taylor3473f882001-02-23 17:55:21 +00002863 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2864 buf[len++] = c;
2865 c = *cur++;
2866 }
2867 if (len >= max) {
2868 /*
2869 * Okay someone managed to make a huge name, so he's ready to pay
2870 * for the processing speed.
2871 */
2872 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002873
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002874 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002875 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002876 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002877 return(NULL);
2878 }
2879 memcpy(buffer, buf, len);
2880 while (c != 0) { /* tested bigname2.xml */
2881 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002882 xmlChar *tmp;
2883
Owen Taylor3473f882001-02-23 17:55:21 +00002884 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002885 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002886 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002887 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002888 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002889 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002890 return(NULL);
2891 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002892 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002893 }
2894 buffer[len++] = c;
2895 c = *cur++;
2896 }
2897 buffer[len] = 0;
2898 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00002899
Owen Taylor3473f882001-02-23 17:55:21 +00002900 if (buffer == NULL)
2901 ret = xmlStrndup(buf, len);
2902 else {
2903 ret = buffer;
2904 }
2905 }
2906
2907 return(ret);
2908}
2909
2910/************************************************************************
2911 * *
2912 * The parser itself *
2913 * Relates to http://www.w3.org/TR/REC-xml *
2914 * *
2915 ************************************************************************/
2916
Daniel Veillard34e3f642008-07-29 09:02:27 +00002917/************************************************************************
2918 * *
2919 * Routines to parse Name, NCName and NmToken *
2920 * *
2921 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00002922#ifdef DEBUG
2923static unsigned long nbParseName = 0;
2924static unsigned long nbParseNmToken = 0;
2925static unsigned long nbParseNCName = 0;
2926static unsigned long nbParseNCNameComplex = 0;
2927static unsigned long nbParseNameComplex = 0;
2928static unsigned long nbParseStringName = 0;
2929#endif
2930
Daniel Veillard34e3f642008-07-29 09:02:27 +00002931/*
2932 * The two following functions are related to the change of accepted
2933 * characters for Name and NmToken in the Revision 5 of XML-1.0
2934 * They correspond to the modified production [4] and the new production [4a]
2935 * changes in that revision. Also note that the macros used for the
2936 * productions Letter, Digit, CombiningChar and Extender are not needed
2937 * anymore.
2938 * We still keep compatibility to pre-revision5 parsing semantic if the
2939 * new XML_PARSE_OLD10 option is given to the parser.
2940 */
2941static int
2942xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
2943 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2944 /*
2945 * Use the new checks of production [4] [4a] amd [5] of the
2946 * Update 5 of XML-1.0
2947 */
2948 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2949 (((c >= 'a') && (c <= 'z')) ||
2950 ((c >= 'A') && (c <= 'Z')) ||
2951 (c == '_') || (c == ':') ||
2952 ((c >= 0xC0) && (c <= 0xD6)) ||
2953 ((c >= 0xD8) && (c <= 0xF6)) ||
2954 ((c >= 0xF8) && (c <= 0x2FF)) ||
2955 ((c >= 0x370) && (c <= 0x37D)) ||
2956 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2957 ((c >= 0x200C) && (c <= 0x200D)) ||
2958 ((c >= 0x2070) && (c <= 0x218F)) ||
2959 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2960 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2961 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2962 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2963 ((c >= 0x10000) && (c <= 0xEFFFF))))
2964 return(1);
2965 } else {
2966 if (IS_LETTER(c) || (c == '_') || (c == ':'))
2967 return(1);
2968 }
2969 return(0);
2970}
2971
2972static int
2973xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
2974 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2975 /*
2976 * Use the new checks of production [4] [4a] amd [5] of the
2977 * Update 5 of XML-1.0
2978 */
2979 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2980 (((c >= 'a') && (c <= 'z')) ||
2981 ((c >= 'A') && (c <= 'Z')) ||
2982 ((c >= '0') && (c <= '9')) || /* !start */
2983 (c == '_') || (c == ':') ||
2984 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2985 ((c >= 0xC0) && (c <= 0xD6)) ||
2986 ((c >= 0xD8) && (c <= 0xF6)) ||
2987 ((c >= 0xF8) && (c <= 0x2FF)) ||
2988 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2989 ((c >= 0x370) && (c <= 0x37D)) ||
2990 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2991 ((c >= 0x200C) && (c <= 0x200D)) ||
2992 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2993 ((c >= 0x2070) && (c <= 0x218F)) ||
2994 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2995 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2996 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2997 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2998 ((c >= 0x10000) && (c <= 0xEFFFF))))
2999 return(1);
3000 } else {
3001 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3002 (c == '.') || (c == '-') ||
3003 (c == '_') || (c == ':') ||
3004 (IS_COMBINING(c)) ||
3005 (IS_EXTENDER(c)))
3006 return(1);
3007 }
3008 return(0);
3009}
3010
Daniel Veillarde57ec792003-09-10 10:50:59 +00003011static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003012 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003013
Daniel Veillard34e3f642008-07-29 09:02:27 +00003014static const xmlChar *
3015xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3016 int len = 0, l;
3017 int c;
3018 int count = 0;
3019
Daniel Veillardc6561462009-03-25 10:22:31 +00003020#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003021 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003022#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003023
3024 /*
3025 * Handler for more complex cases
3026 */
3027 GROW;
3028 c = CUR_CHAR(l);
3029 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3030 /*
3031 * Use the new checks of production [4] [4a] amd [5] of the
3032 * Update 5 of XML-1.0
3033 */
3034 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3035 (!(((c >= 'a') && (c <= 'z')) ||
3036 ((c >= 'A') && (c <= 'Z')) ||
3037 (c == '_') || (c == ':') ||
3038 ((c >= 0xC0) && (c <= 0xD6)) ||
3039 ((c >= 0xD8) && (c <= 0xF6)) ||
3040 ((c >= 0xF8) && (c <= 0x2FF)) ||
3041 ((c >= 0x370) && (c <= 0x37D)) ||
3042 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3043 ((c >= 0x200C) && (c <= 0x200D)) ||
3044 ((c >= 0x2070) && (c <= 0x218F)) ||
3045 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3046 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3047 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3048 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3049 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3050 return(NULL);
3051 }
3052 len += l;
3053 NEXTL(l);
3054 c = CUR_CHAR(l);
3055 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3056 (((c >= 'a') && (c <= 'z')) ||
3057 ((c >= 'A') && (c <= 'Z')) ||
3058 ((c >= '0') && (c <= '9')) || /* !start */
3059 (c == '_') || (c == ':') ||
3060 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3061 ((c >= 0xC0) && (c <= 0xD6)) ||
3062 ((c >= 0xD8) && (c <= 0xF6)) ||
3063 ((c >= 0xF8) && (c <= 0x2FF)) ||
3064 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3065 ((c >= 0x370) && (c <= 0x37D)) ||
3066 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3067 ((c >= 0x200C) && (c <= 0x200D)) ||
3068 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3069 ((c >= 0x2070) && (c <= 0x218F)) ||
3070 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3071 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3072 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3073 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3074 ((c >= 0x10000) && (c <= 0xEFFFF))
3075 )) {
3076 if (count++ > 100) {
3077 count = 0;
3078 GROW;
3079 }
3080 len += l;
3081 NEXTL(l);
3082 c = CUR_CHAR(l);
3083 }
3084 } else {
3085 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3086 (!IS_LETTER(c) && (c != '_') &&
3087 (c != ':'))) {
3088 return(NULL);
3089 }
3090 len += l;
3091 NEXTL(l);
3092 c = CUR_CHAR(l);
3093
3094 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3095 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3096 (c == '.') || (c == '-') ||
3097 (c == '_') || (c == ':') ||
3098 (IS_COMBINING(c)) ||
3099 (IS_EXTENDER(c)))) {
3100 if (count++ > 100) {
3101 count = 0;
3102 GROW;
3103 }
3104 len += l;
3105 NEXTL(l);
3106 c = CUR_CHAR(l);
3107 }
3108 }
3109 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3110 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3111 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3112}
3113
Owen Taylor3473f882001-02-23 17:55:21 +00003114/**
3115 * xmlParseName:
3116 * @ctxt: an XML parser context
3117 *
3118 * parse an XML name.
3119 *
3120 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3121 * CombiningChar | Extender
3122 *
3123 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3124 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003125 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003126 *
3127 * Returns the Name parsed or NULL
3128 */
3129
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003130const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003131xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003132 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003133 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003134 int count = 0;
3135
3136 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003137
Daniel Veillardc6561462009-03-25 10:22:31 +00003138#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003139 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003140#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003141
Daniel Veillard48b2f892001-02-25 16:11:03 +00003142 /*
3143 * Accelerator for simple ASCII names
3144 */
3145 in = ctxt->input->cur;
3146 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3147 ((*in >= 0x41) && (*in <= 0x5A)) ||
3148 (*in == '_') || (*in == ':')) {
3149 in++;
3150 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3151 ((*in >= 0x41) && (*in <= 0x5A)) ||
3152 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003153 (*in == '_') || (*in == '-') ||
3154 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003155 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003156 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003157 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003158 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003159 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003160 ctxt->nbChars += count;
3161 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003162 if (ret == NULL)
3163 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003164 return(ret);
3165 }
3166 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003167 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003168 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003169}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003170
Daniel Veillard34e3f642008-07-29 09:02:27 +00003171static const xmlChar *
3172xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3173 int len = 0, l;
3174 int c;
3175 int count = 0;
3176
Daniel Veillardc6561462009-03-25 10:22:31 +00003177#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003178 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003179#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003180
3181 /*
3182 * Handler for more complex cases
3183 */
3184 GROW;
3185 c = CUR_CHAR(l);
3186 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3187 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3188 return(NULL);
3189 }
3190
3191 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3192 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3193 if (count++ > 100) {
3194 count = 0;
3195 GROW;
3196 }
3197 len += l;
3198 NEXTL(l);
3199 c = CUR_CHAR(l);
3200 }
3201 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3202}
3203
3204/**
3205 * xmlParseNCName:
3206 * @ctxt: an XML parser context
3207 * @len: lenght of the string parsed
3208 *
3209 * parse an XML name.
3210 *
3211 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3212 * CombiningChar | Extender
3213 *
3214 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3215 *
3216 * Returns the Name parsed or NULL
3217 */
3218
3219static const xmlChar *
3220xmlParseNCName(xmlParserCtxtPtr ctxt) {
3221 const xmlChar *in;
3222 const xmlChar *ret;
3223 int count = 0;
3224
Daniel Veillardc6561462009-03-25 10:22:31 +00003225#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003226 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003227#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003228
3229 /*
3230 * Accelerator for simple ASCII names
3231 */
3232 in = ctxt->input->cur;
3233 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3234 ((*in >= 0x41) && (*in <= 0x5A)) ||
3235 (*in == '_')) {
3236 in++;
3237 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3238 ((*in >= 0x41) && (*in <= 0x5A)) ||
3239 ((*in >= 0x30) && (*in <= 0x39)) ||
3240 (*in == '_') || (*in == '-') ||
3241 (*in == '.'))
3242 in++;
3243 if ((*in > 0) && (*in < 0x80)) {
3244 count = in - ctxt->input->cur;
3245 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3246 ctxt->input->cur = in;
3247 ctxt->nbChars += count;
3248 ctxt->input->col += count;
3249 if (ret == NULL) {
3250 xmlErrMemory(ctxt, NULL);
3251 }
3252 return(ret);
3253 }
3254 }
3255 return(xmlParseNCNameComplex(ctxt));
3256}
3257
Daniel Veillard46de64e2002-05-29 08:21:33 +00003258/**
3259 * xmlParseNameAndCompare:
3260 * @ctxt: an XML parser context
3261 *
3262 * parse an XML name and compares for match
3263 * (specialized for endtag parsing)
3264 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003265 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3266 * and the name for mismatch
3267 */
3268
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003269static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003270xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003271 register const xmlChar *cmp = other;
3272 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003273 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003274
3275 GROW;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003276
Daniel Veillard46de64e2002-05-29 08:21:33 +00003277 in = ctxt->input->cur;
3278 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003279 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003280 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003281 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003282 }
William M. Brack76e95df2003-10-18 16:20:14 +00003283 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003284 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003285 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003286 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003287 }
3288 /* failure (or end of input buffer), check with full function */
3289 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003290 /* strings coming from the dictionnary direct compare possible */
3291 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003292 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003293 }
3294 return ret;
3295}
3296
Owen Taylor3473f882001-02-23 17:55:21 +00003297/**
3298 * xmlParseStringName:
3299 * @ctxt: an XML parser context
3300 * @str: a pointer to the string pointer (IN/OUT)
3301 *
3302 * parse an XML name.
3303 *
3304 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3305 * CombiningChar | Extender
3306 *
3307 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3308 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003309 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003310 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003311 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003312 * is updated to the current location in the string.
3313 */
3314
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003315static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003316xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3317 xmlChar buf[XML_MAX_NAMELEN + 5];
3318 const xmlChar *cur = *str;
3319 int len = 0, l;
3320 int c;
3321
Daniel Veillardc6561462009-03-25 10:22:31 +00003322#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003323 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003324#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003325
Owen Taylor3473f882001-02-23 17:55:21 +00003326 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003327 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003328 return(NULL);
3329 }
3330
Daniel Veillard34e3f642008-07-29 09:02:27 +00003331 COPY_BUF(l,buf,len,c);
3332 cur += l;
3333 c = CUR_SCHAR(cur, l);
3334 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003335 COPY_BUF(l,buf,len,c);
3336 cur += l;
3337 c = CUR_SCHAR(cur, l);
3338 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3339 /*
3340 * Okay someone managed to make a huge name, so he's ready to pay
3341 * for the processing speed.
3342 */
3343 xmlChar *buffer;
3344 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003345
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003346 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003347 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003348 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003349 return(NULL);
3350 }
3351 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003352 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003353 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003354 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003355 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003356 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003357 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003358 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003359 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003360 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003361 return(NULL);
3362 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003363 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003364 }
3365 COPY_BUF(l,buffer,len,c);
3366 cur += l;
3367 c = CUR_SCHAR(cur, l);
3368 }
3369 buffer[len] = 0;
3370 *str = cur;
3371 return(buffer);
3372 }
3373 }
3374 *str = cur;
3375 return(xmlStrndup(buf, len));
3376}
3377
3378/**
3379 * xmlParseNmtoken:
3380 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003381 *
Owen Taylor3473f882001-02-23 17:55:21 +00003382 * parse an XML Nmtoken.
3383 *
3384 * [7] Nmtoken ::= (NameChar)+
3385 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003386 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003387 *
3388 * Returns the Nmtoken parsed or NULL
3389 */
3390
3391xmlChar *
3392xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3393 xmlChar buf[XML_MAX_NAMELEN + 5];
3394 int len = 0, l;
3395 int c;
3396 int count = 0;
3397
Daniel Veillardc6561462009-03-25 10:22:31 +00003398#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003399 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003400#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003401
Owen Taylor3473f882001-02-23 17:55:21 +00003402 GROW;
3403 c = CUR_CHAR(l);
3404
Daniel Veillard34e3f642008-07-29 09:02:27 +00003405 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003406 if (count++ > 100) {
3407 count = 0;
3408 GROW;
3409 }
3410 COPY_BUF(l,buf,len,c);
3411 NEXTL(l);
3412 c = CUR_CHAR(l);
3413 if (len >= XML_MAX_NAMELEN) {
3414 /*
3415 * Okay someone managed to make a huge token, so he's ready to pay
3416 * for the processing speed.
3417 */
3418 xmlChar *buffer;
3419 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003420
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003421 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003422 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003423 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003424 return(NULL);
3425 }
3426 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003427 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003428 if (count++ > 100) {
3429 count = 0;
3430 GROW;
3431 }
3432 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003433 xmlChar *tmp;
3434
Owen Taylor3473f882001-02-23 17:55:21 +00003435 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003436 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003437 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003438 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003439 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003440 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003441 return(NULL);
3442 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003443 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003444 }
3445 COPY_BUF(l,buffer,len,c);
3446 NEXTL(l);
3447 c = CUR_CHAR(l);
3448 }
3449 buffer[len] = 0;
3450 return(buffer);
3451 }
3452 }
3453 if (len == 0)
3454 return(NULL);
3455 return(xmlStrndup(buf, len));
3456}
3457
3458/**
3459 * xmlParseEntityValue:
3460 * @ctxt: an XML parser context
3461 * @orig: if non-NULL store a copy of the original entity value
3462 *
3463 * parse a value for ENTITY declarations
3464 *
3465 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3466 * "'" ([^%&'] | PEReference | Reference)* "'"
3467 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003468 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003469 */
3470
3471xmlChar *
3472xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3473 xmlChar *buf = NULL;
3474 int len = 0;
3475 int size = XML_PARSER_BUFFER_SIZE;
3476 int c, l;
3477 xmlChar stop;
3478 xmlChar *ret = NULL;
3479 const xmlChar *cur = NULL;
3480 xmlParserInputPtr input;
3481
3482 if (RAW == '"') stop = '"';
3483 else if (RAW == '\'') stop = '\'';
3484 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003485 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003486 return(NULL);
3487 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003488 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003489 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003490 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003491 return(NULL);
3492 }
3493
3494 /*
3495 * The content of the entity definition is copied in a buffer.
3496 */
3497
3498 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3499 input = ctxt->input;
3500 GROW;
3501 NEXT;
3502 c = CUR_CHAR(l);
3503 /*
3504 * NOTE: 4.4.5 Included in Literal
3505 * When a parameter entity reference appears in a literal entity
3506 * value, ... a single or double quote character in the replacement
3507 * text is always treated as a normal data character and will not
3508 * terminate the literal.
3509 * In practice it means we stop the loop only when back at parsing
3510 * the initial entity and the quote is found
3511 */
William M. Brack871611b2003-10-18 04:53:14 +00003512 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003513 (ctxt->input != input))) {
3514 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003515 xmlChar *tmp;
3516
Owen Taylor3473f882001-02-23 17:55:21 +00003517 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003518 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3519 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003520 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003521 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003522 return(NULL);
3523 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003524 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003525 }
3526 COPY_BUF(l,buf,len,c);
3527 NEXTL(l);
3528 /*
3529 * Pop-up of finished entities.
3530 */
3531 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3532 xmlPopInput(ctxt);
3533
3534 GROW;
3535 c = CUR_CHAR(l);
3536 if (c == 0) {
3537 GROW;
3538 c = CUR_CHAR(l);
3539 }
3540 }
3541 buf[len] = 0;
3542
3543 /*
3544 * Raise problem w.r.t. '&' and '%' being used in non-entities
3545 * reference constructs. Note Charref will be handled in
3546 * xmlStringDecodeEntities()
3547 */
3548 cur = buf;
3549 while (*cur != 0) { /* non input consuming */
3550 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3551 xmlChar *name;
3552 xmlChar tmp = *cur;
3553
3554 cur++;
3555 name = xmlParseStringName(ctxt, &cur);
3556 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003557 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003558 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003559 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003560 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003561 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3562 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003563 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003564 }
3565 if (name != NULL)
3566 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003567 if (*cur == 0)
3568 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003569 }
3570 cur++;
3571 }
3572
3573 /*
3574 * Then PEReference entities are substituted.
3575 */
3576 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003577 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003578 xmlFree(buf);
3579 } else {
3580 NEXT;
3581 /*
3582 * NOTE: 4.4.7 Bypassed
3583 * When a general entity reference appears in the EntityValue in
3584 * an entity declaration, it is bypassed and left as is.
3585 * so XML_SUBSTITUTE_REF is not set here.
3586 */
3587 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3588 0, 0, 0);
3589 if (orig != NULL)
3590 *orig = buf;
3591 else
3592 xmlFree(buf);
3593 }
3594
3595 return(ret);
3596}
3597
3598/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003599 * xmlParseAttValueComplex:
3600 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003601 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003602 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003603 *
3604 * parse a value for an attribute, this is the fallback function
3605 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003606 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003607 *
3608 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3609 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003610static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003611xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003612 xmlChar limit = 0;
3613 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003614 xmlChar *rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003615 int len = 0;
3616 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003617 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003618 xmlChar *current = NULL;
3619 xmlEntityPtr ent;
3620
Owen Taylor3473f882001-02-23 17:55:21 +00003621 if (NXT(0) == '"') {
3622 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3623 limit = '"';
3624 NEXT;
3625 } else if (NXT(0) == '\'') {
3626 limit = '\'';
3627 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3628 NEXT;
3629 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003630 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003631 return(NULL);
3632 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003633
Owen Taylor3473f882001-02-23 17:55:21 +00003634 /*
3635 * allocate a translation buffer.
3636 */
3637 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003638 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003639 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003640
3641 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003642 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003643 */
3644 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003645 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003646 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003647 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003648 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003649 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003650 if (NXT(1) == '#') {
3651 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003652
Owen Taylor3473f882001-02-23 17:55:21 +00003653 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003654 if (ctxt->replaceEntities) {
3655 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003656 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003657 }
3658 buf[len++] = '&';
3659 } else {
3660 /*
3661 * The reparsing will be done in xmlStringGetNodeList()
3662 * called by the attribute() function in SAX.c
3663 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003664 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003665 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003666 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003667 buf[len++] = '&';
3668 buf[len++] = '#';
3669 buf[len++] = '3';
3670 buf[len++] = '8';
3671 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003672 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003673 } else if (val != 0) {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003674 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003675 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003676 }
Owen Taylor3473f882001-02-23 17:55:21 +00003677 len += xmlCopyChar(0, &buf[len], val);
3678 }
3679 } else {
3680 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003681 ctxt->nbentities++;
3682 if (ent != NULL)
3683 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003684 if ((ent != NULL) &&
3685 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3686 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003687 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003688 }
3689 if ((ctxt->replaceEntities == 0) &&
3690 (ent->content[0] == '&')) {
3691 buf[len++] = '&';
3692 buf[len++] = '#';
3693 buf[len++] = '3';
3694 buf[len++] = '8';
3695 buf[len++] = ';';
3696 } else {
3697 buf[len++] = ent->content[0];
3698 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003699 } else if ((ent != NULL) &&
3700 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003701 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3702 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003703 XML_SUBSTITUTE_REF,
3704 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003705 if (rep != NULL) {
3706 current = rep;
3707 while (*current != 0) { /* non input consuming */
3708 buf[len++] = *current++;
3709 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003710 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003711 }
3712 }
3713 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003714 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003715 }
3716 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003717 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003718 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003719 }
Owen Taylor3473f882001-02-23 17:55:21 +00003720 if (ent->content != NULL)
3721 buf[len++] = ent->content[0];
3722 }
3723 } else if (ent != NULL) {
3724 int i = xmlStrlen(ent->name);
3725 const xmlChar *cur = ent->name;
3726
3727 /*
3728 * This may look absurd but is needed to detect
3729 * entities problems
3730 */
3731 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3732 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003733 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003734 XML_SUBSTITUTE_REF, 0, 0, 0);
3735 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003736 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003737 rep = NULL;
3738 }
Owen Taylor3473f882001-02-23 17:55:21 +00003739 }
3740
3741 /*
3742 * Just output the reference
3743 */
3744 buf[len++] = '&';
Daniel Veillard0161e632008-08-28 15:36:32 +00003745 while (len > buf_size - i - 10) {
3746 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003747 }
3748 for (;i > 0;i--)
3749 buf[len++] = *cur++;
3750 buf[len++] = ';';
3751 }
3752 }
3753 } else {
3754 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003755 if ((len != 0) || (!normalize)) {
3756 if ((!normalize) || (!in_space)) {
3757 COPY_BUF(l,buf,len,0x20);
Daniel Veillard0161e632008-08-28 15:36:32 +00003758 while (len > buf_size - 10) {
3759 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003760 }
3761 }
3762 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003763 }
3764 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003765 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003766 COPY_BUF(l,buf,len,c);
3767 if (len > buf_size - 10) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003768 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003769 }
3770 }
3771 NEXTL(l);
3772 }
3773 GROW;
3774 c = CUR_CHAR(l);
3775 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003776 if ((in_space) && (normalize)) {
3777 while (buf[len - 1] == 0x20) len--;
3778 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003779 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003780 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003781 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003782 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003783 if ((c != 0) && (!IS_CHAR(c))) {
3784 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3785 "invalid character in attribute value\n");
3786 } else {
3787 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3788 "AttValue: ' expected\n");
3789 }
Owen Taylor3473f882001-02-23 17:55:21 +00003790 } else
3791 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003792 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003793 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003794
3795mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003796 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003797 if (buf != NULL)
3798 xmlFree(buf);
3799 if (rep != NULL)
3800 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003801 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003802}
3803
3804/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003805 * xmlParseAttValue:
3806 * @ctxt: an XML parser context
3807 *
3808 * parse a value for an attribute
3809 * Note: the parser won't do substitution of entities here, this
3810 * will be handled later in xmlStringGetNodeList
3811 *
3812 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3813 * "'" ([^<&'] | Reference)* "'"
3814 *
3815 * 3.3.3 Attribute-Value Normalization:
3816 * Before the value of an attribute is passed to the application or
3817 * checked for validity, the XML processor must normalize it as follows:
3818 * - a character reference is processed by appending the referenced
3819 * character to the attribute value
3820 * - an entity reference is processed by recursively processing the
3821 * replacement text of the entity
3822 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3823 * appending #x20 to the normalized value, except that only a single
3824 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3825 * parsed entity or the literal entity value of an internal parsed entity
3826 * - other characters are processed by appending them to the normalized value
3827 * If the declared value is not CDATA, then the XML processor must further
3828 * process the normalized attribute value by discarding any leading and
3829 * trailing space (#x20) characters, and by replacing sequences of space
3830 * (#x20) characters by a single space (#x20) character.
3831 * All attributes for which no declaration has been read should be treated
3832 * by a non-validating parser as if declared CDATA.
3833 *
3834 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3835 */
3836
3837
3838xmlChar *
3839xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003840 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003841 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003842}
3843
3844/**
Owen Taylor3473f882001-02-23 17:55:21 +00003845 * xmlParseSystemLiteral:
3846 * @ctxt: an XML parser context
3847 *
3848 * parse an XML Literal
3849 *
3850 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3851 *
3852 * Returns the SystemLiteral parsed or NULL
3853 */
3854
3855xmlChar *
3856xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3857 xmlChar *buf = NULL;
3858 int len = 0;
3859 int size = XML_PARSER_BUFFER_SIZE;
3860 int cur, l;
3861 xmlChar stop;
3862 int state = ctxt->instate;
3863 int count = 0;
3864
3865 SHRINK;
3866 if (RAW == '"') {
3867 NEXT;
3868 stop = '"';
3869 } else if (RAW == '\'') {
3870 NEXT;
3871 stop = '\'';
3872 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003873 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003874 return(NULL);
3875 }
3876
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003877 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003878 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003879 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003880 return(NULL);
3881 }
3882 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3883 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003884 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003885 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003886 xmlChar *tmp;
3887
Owen Taylor3473f882001-02-23 17:55:21 +00003888 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003889 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3890 if (tmp == NULL) {
3891 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003892 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003893 ctxt->instate = (xmlParserInputState) state;
3894 return(NULL);
3895 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003896 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003897 }
3898 count++;
3899 if (count > 50) {
3900 GROW;
3901 count = 0;
3902 }
3903 COPY_BUF(l,buf,len,cur);
3904 NEXTL(l);
3905 cur = CUR_CHAR(l);
3906 if (cur == 0) {
3907 GROW;
3908 SHRINK;
3909 cur = CUR_CHAR(l);
3910 }
3911 }
3912 buf[len] = 0;
3913 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003914 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003915 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003916 } else {
3917 NEXT;
3918 }
3919 return(buf);
3920}
3921
3922/**
3923 * xmlParsePubidLiteral:
3924 * @ctxt: an XML parser context
3925 *
3926 * parse an XML public literal
3927 *
3928 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3929 *
3930 * Returns the PubidLiteral parsed or NULL.
3931 */
3932
3933xmlChar *
3934xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3935 xmlChar *buf = NULL;
3936 int len = 0;
3937 int size = XML_PARSER_BUFFER_SIZE;
3938 xmlChar cur;
3939 xmlChar stop;
3940 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003941 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003942
3943 SHRINK;
3944 if (RAW == '"') {
3945 NEXT;
3946 stop = '"';
3947 } else if (RAW == '\'') {
3948 NEXT;
3949 stop = '\'';
3950 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003951 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003952 return(NULL);
3953 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003954 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003955 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003956 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003957 return(NULL);
3958 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003959 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003960 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003961 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003962 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003963 xmlChar *tmp;
3964
Owen Taylor3473f882001-02-23 17:55:21 +00003965 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003966 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3967 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003968 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003969 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003970 return(NULL);
3971 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003972 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003973 }
3974 buf[len++] = cur;
3975 count++;
3976 if (count > 50) {
3977 GROW;
3978 count = 0;
3979 }
3980 NEXT;
3981 cur = CUR;
3982 if (cur == 0) {
3983 GROW;
3984 SHRINK;
3985 cur = CUR;
3986 }
3987 }
3988 buf[len] = 0;
3989 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003990 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003991 } else {
3992 NEXT;
3993 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003994 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003995 return(buf);
3996}
3997
Daniel Veillard8ed10722009-08-20 19:17:36 +02003998static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003999
4000/*
4001 * used for the test in the inner loop of the char data testing
4002 */
4003static const unsigned char test_char_data[256] = {
4004 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4005 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4006 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4007 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4008 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4009 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4010 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4011 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4012 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4013 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4014 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4015 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4016 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4017 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4018 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4019 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4020 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4021 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4022 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4023 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4024 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4025 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4026 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4027 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4028 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4029 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4030 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4031 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4032 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4033 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4034 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4035 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4036};
4037
Owen Taylor3473f882001-02-23 17:55:21 +00004038/**
4039 * xmlParseCharData:
4040 * @ctxt: an XML parser context
4041 * @cdata: int indicating whether we are within a CDATA section
4042 *
4043 * parse a CharData section.
4044 * if we are within a CDATA section ']]>' marks an end of section.
4045 *
4046 * The right angle bracket (>) may be represented using the string "&gt;",
4047 * and must, for compatibility, be escaped using "&gt;" or a character
4048 * reference when it appears in the string "]]>" in content, when that
4049 * string is not marking the end of a CDATA section.
4050 *
4051 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4052 */
4053
4054void
4055xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004056 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004057 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004058 int line = ctxt->input->line;
4059 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004060 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004061
4062 SHRINK;
4063 GROW;
4064 /*
4065 * Accelerated common case where input don't need to be
4066 * modified before passing it to the handler.
4067 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004068 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004069 in = ctxt->input->cur;
4070 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004071get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004072 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004073 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004074 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004075 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004076 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004077 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004078 goto get_more_space;
4079 }
4080 if (*in == '<') {
4081 nbchar = in - ctxt->input->cur;
4082 if (nbchar > 0) {
4083 const xmlChar *tmp = ctxt->input->cur;
4084 ctxt->input->cur = in;
4085
Daniel Veillard34099b42004-11-04 17:34:35 +00004086 if ((ctxt->sax != NULL) &&
4087 (ctxt->sax->ignorableWhitespace !=
4088 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004089 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004090 if (ctxt->sax->ignorableWhitespace != NULL)
4091 ctxt->sax->ignorableWhitespace(ctxt->userData,
4092 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004093 } else {
4094 if (ctxt->sax->characters != NULL)
4095 ctxt->sax->characters(ctxt->userData,
4096 tmp, nbchar);
4097 if (*ctxt->space == -1)
4098 *ctxt->space = -2;
4099 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004100 } else if ((ctxt->sax != NULL) &&
4101 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004102 ctxt->sax->characters(ctxt->userData,
4103 tmp, nbchar);
4104 }
4105 }
4106 return;
4107 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004108
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004109get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004110 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004111 while (test_char_data[*in]) {
4112 in++;
4113 ccol++;
4114 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004115 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004116 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004117 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004118 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004119 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004120 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004121 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004122 }
4123 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004124 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004125 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004126 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004127 return;
4128 }
4129 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004130 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004131 goto get_more;
4132 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004133 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004134 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004135 if ((ctxt->sax != NULL) &&
4136 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004137 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004138 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004139 const xmlChar *tmp = ctxt->input->cur;
4140 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004141
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004142 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004143 if (ctxt->sax->ignorableWhitespace != NULL)
4144 ctxt->sax->ignorableWhitespace(ctxt->userData,
4145 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004146 } else {
4147 if (ctxt->sax->characters != NULL)
4148 ctxt->sax->characters(ctxt->userData,
4149 tmp, nbchar);
4150 if (*ctxt->space == -1)
4151 *ctxt->space = -2;
4152 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004153 line = ctxt->input->line;
4154 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004155 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004156 if (ctxt->sax->characters != NULL)
4157 ctxt->sax->characters(ctxt->userData,
4158 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004159 line = ctxt->input->line;
4160 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004161 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004162 /* something really bad happened in the SAX callback */
4163 if (ctxt->instate != XML_PARSER_CONTENT)
4164 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004165 }
4166 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004167 if (*in == 0xD) {
4168 in++;
4169 if (*in == 0xA) {
4170 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004171 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004172 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004173 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004174 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004175 in--;
4176 }
4177 if (*in == '<') {
4178 return;
4179 }
4180 if (*in == '&') {
4181 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004182 }
4183 SHRINK;
4184 GROW;
4185 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004186 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004187 nbchar = 0;
4188 }
Daniel Veillard50582112001-03-26 22:52:16 +00004189 ctxt->input->line = line;
4190 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004191 xmlParseCharDataComplex(ctxt, cdata);
4192}
4193
Daniel Veillard01c13b52002-12-10 15:19:08 +00004194/**
4195 * xmlParseCharDataComplex:
4196 * @ctxt: an XML parser context
4197 * @cdata: int indicating whether we are within a CDATA section
4198 *
4199 * parse a CharData section.this is the fallback function
4200 * of xmlParseCharData() when the parsing requires handling
4201 * of non-ASCII characters.
4202 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004203static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004204xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004205 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4206 int nbchar = 0;
4207 int cur, l;
4208 int count = 0;
4209
4210 SHRINK;
4211 GROW;
4212 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004213 while ((cur != '<') && /* checked */
4214 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004215 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004216 if ((cur == ']') && (NXT(1) == ']') &&
4217 (NXT(2) == '>')) {
4218 if (cdata) break;
4219 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004220 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004221 }
4222 }
4223 COPY_BUF(l,buf,nbchar,cur);
4224 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004225 buf[nbchar] = 0;
4226
Owen Taylor3473f882001-02-23 17:55:21 +00004227 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004228 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004229 */
4230 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004231 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004232 if (ctxt->sax->ignorableWhitespace != NULL)
4233 ctxt->sax->ignorableWhitespace(ctxt->userData,
4234 buf, nbchar);
4235 } else {
4236 if (ctxt->sax->characters != NULL)
4237 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004238 if ((ctxt->sax->characters !=
4239 ctxt->sax->ignorableWhitespace) &&
4240 (*ctxt->space == -1))
4241 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004242 }
4243 }
4244 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004245 /* something really bad happened in the SAX callback */
4246 if (ctxt->instate != XML_PARSER_CONTENT)
4247 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004248 }
4249 count++;
4250 if (count > 50) {
4251 GROW;
4252 count = 0;
4253 }
4254 NEXTL(l);
4255 cur = CUR_CHAR(l);
4256 }
4257 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004258 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004259 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004260 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004261 */
4262 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004263 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004264 if (ctxt->sax->ignorableWhitespace != NULL)
4265 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4266 } else {
4267 if (ctxt->sax->characters != NULL)
4268 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004269 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4270 (*ctxt->space == -1))
4271 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004272 }
4273 }
4274 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004275 if ((cur != 0) && (!IS_CHAR(cur))) {
4276 /* Generate the error and skip the offending character */
4277 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4278 "PCDATA invalid Char value %d\n",
4279 cur);
4280 NEXTL(l);
4281 }
Owen Taylor3473f882001-02-23 17:55:21 +00004282}
4283
4284/**
4285 * xmlParseExternalID:
4286 * @ctxt: an XML parser context
4287 * @publicID: a xmlChar** receiving PubidLiteral
4288 * @strict: indicate whether we should restrict parsing to only
4289 * production [75], see NOTE below
4290 *
4291 * Parse an External ID or a Public ID
4292 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004293 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004294 * 'PUBLIC' S PubidLiteral S SystemLiteral
4295 *
4296 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4297 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4298 *
4299 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4300 *
4301 * Returns the function returns SystemLiteral and in the second
4302 * case publicID receives PubidLiteral, is strict is off
4303 * it is possible to return NULL and have publicID set.
4304 */
4305
4306xmlChar *
4307xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4308 xmlChar *URI = NULL;
4309
4310 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004311
4312 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004313 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004314 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004315 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004316 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4317 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004318 }
4319 SKIP_BLANKS;
4320 URI = xmlParseSystemLiteral(ctxt);
4321 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004322 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004323 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004324 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004325 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004326 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004327 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004328 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004329 }
4330 SKIP_BLANKS;
4331 *publicID = xmlParsePubidLiteral(ctxt);
4332 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004333 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004334 }
4335 if (strict) {
4336 /*
4337 * We don't handle [83] so "S SystemLiteral" is required.
4338 */
William M. Brack76e95df2003-10-18 16:20:14 +00004339 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004340 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004341 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004342 }
4343 } else {
4344 /*
4345 * We handle [83] so we return immediately, if
4346 * "S SystemLiteral" is not detected. From a purely parsing
4347 * point of view that's a nice mess.
4348 */
4349 const xmlChar *ptr;
4350 GROW;
4351
4352 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004353 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004354
William M. Brack76e95df2003-10-18 16:20:14 +00004355 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004356 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4357 }
4358 SKIP_BLANKS;
4359 URI = xmlParseSystemLiteral(ctxt);
4360 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004361 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004362 }
4363 }
4364 return(URI);
4365}
4366
4367/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004368 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004369 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004370 * @buf: the already parsed part of the buffer
4371 * @len: number of bytes filles in the buffer
4372 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004373 *
4374 * Skip an XML (SGML) comment <!-- .... -->
4375 * The spec says that "For compatibility, the string "--" (double-hyphen)
4376 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004377 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004378 *
4379 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4380 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004381static void
4382xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004383 int q, ql;
4384 int r, rl;
4385 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00004386 int count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004387 int inputid;
4388
4389 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004390
Owen Taylor3473f882001-02-23 17:55:21 +00004391 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004392 len = 0;
4393 size = XML_PARSER_BUFFER_SIZE;
4394 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4395 if (buf == NULL) {
4396 xmlErrMemory(ctxt, NULL);
4397 return;
4398 }
Owen Taylor3473f882001-02-23 17:55:21 +00004399 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004400 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004401 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004402 if (q == 0)
4403 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004404 if (!IS_CHAR(q)) {
4405 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4406 "xmlParseComment: invalid xmlChar value %d\n",
4407 q);
4408 xmlFree (buf);
4409 return;
4410 }
Owen Taylor3473f882001-02-23 17:55:21 +00004411 NEXTL(ql);
4412 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004413 if (r == 0)
4414 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004415 if (!IS_CHAR(r)) {
4416 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4417 "xmlParseComment: invalid xmlChar value %d\n",
4418 q);
4419 xmlFree (buf);
4420 return;
4421 }
Owen Taylor3473f882001-02-23 17:55:21 +00004422 NEXTL(rl);
4423 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004424 if (cur == 0)
4425 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004426 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004427 ((cur != '>') ||
4428 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004429 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004430 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004431 }
4432 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004433 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004434 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004435 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4436 if (new_buf == NULL) {
4437 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004438 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004439 return;
4440 }
William M. Bracka3215c72004-07-31 16:24:01 +00004441 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004442 }
4443 COPY_BUF(ql,buf,len,q);
4444 q = r;
4445 ql = rl;
4446 r = cur;
4447 rl = l;
4448
4449 count++;
4450 if (count > 50) {
4451 GROW;
4452 count = 0;
4453 }
4454 NEXTL(l);
4455 cur = CUR_CHAR(l);
4456 if (cur == 0) {
4457 SHRINK;
4458 GROW;
4459 cur = CUR_CHAR(l);
4460 }
4461 }
4462 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004463 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004464 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004465 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004466 } else if (!IS_CHAR(cur)) {
4467 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4468 "xmlParseComment: invalid xmlChar value %d\n",
4469 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004470 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004471 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004472 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4473 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004474 }
4475 NEXT;
4476 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4477 (!ctxt->disableSAX))
4478 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004479 }
Daniel Veillardda629342007-08-01 07:49:06 +00004480 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004481 return;
4482not_terminated:
4483 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4484 "Comment not terminated\n", NULL);
4485 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004486 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004487}
Daniel Veillardda629342007-08-01 07:49:06 +00004488
Daniel Veillard4c778d82005-01-23 17:37:44 +00004489/**
4490 * xmlParseComment:
4491 * @ctxt: an XML parser context
4492 *
4493 * Skip an XML (SGML) comment <!-- .... -->
4494 * The spec says that "For compatibility, the string "--" (double-hyphen)
4495 * must not occur within comments. "
4496 *
4497 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4498 */
4499void
4500xmlParseComment(xmlParserCtxtPtr ctxt) {
4501 xmlChar *buf = NULL;
4502 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004503 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004504 xmlParserInputState state;
4505 const xmlChar *in;
4506 int nbchar = 0, ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004507 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004508
4509 /*
4510 * Check that there is a comment right here.
4511 */
4512 if ((RAW != '<') || (NXT(1) != '!') ||
4513 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004514 state = ctxt->instate;
4515 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004516 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004517 SKIP(4);
4518 SHRINK;
4519 GROW;
4520
4521 /*
4522 * Accelerated common case where input don't need to be
4523 * modified before passing it to the handler.
4524 */
4525 in = ctxt->input->cur;
4526 do {
4527 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004528 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004529 ctxt->input->line++; ctxt->input->col = 1;
4530 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004531 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004532 }
4533get_more:
4534 ccol = ctxt->input->col;
4535 while (((*in > '-') && (*in <= 0x7F)) ||
4536 ((*in >= 0x20) && (*in < '-')) ||
4537 (*in == 0x09)) {
4538 in++;
4539 ccol++;
4540 }
4541 ctxt->input->col = ccol;
4542 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004543 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004544 ctxt->input->line++; ctxt->input->col = 1;
4545 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004546 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004547 goto get_more;
4548 }
4549 nbchar = in - ctxt->input->cur;
4550 /*
4551 * save current set of data
4552 */
4553 if (nbchar > 0) {
4554 if ((ctxt->sax != NULL) &&
4555 (ctxt->sax->comment != NULL)) {
4556 if (buf == NULL) {
4557 if ((*in == '-') && (in[1] == '-'))
4558 size = nbchar + 1;
4559 else
4560 size = XML_PARSER_BUFFER_SIZE + nbchar;
4561 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4562 if (buf == NULL) {
4563 xmlErrMemory(ctxt, NULL);
4564 ctxt->instate = state;
4565 return;
4566 }
4567 len = 0;
4568 } else if (len + nbchar + 1 >= size) {
4569 xmlChar *new_buf;
4570 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4571 new_buf = (xmlChar *) xmlRealloc(buf,
4572 size * sizeof(xmlChar));
4573 if (new_buf == NULL) {
4574 xmlFree (buf);
4575 xmlErrMemory(ctxt, NULL);
4576 ctxt->instate = state;
4577 return;
4578 }
4579 buf = new_buf;
4580 }
4581 memcpy(&buf[len], ctxt->input->cur, nbchar);
4582 len += nbchar;
4583 buf[len] = 0;
4584 }
4585 }
4586 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004587 if (*in == 0xA) {
4588 in++;
4589 ctxt->input->line++; ctxt->input->col = 1;
4590 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004591 if (*in == 0xD) {
4592 in++;
4593 if (*in == 0xA) {
4594 ctxt->input->cur = in;
4595 in++;
4596 ctxt->input->line++; ctxt->input->col = 1;
4597 continue; /* while */
4598 }
4599 in--;
4600 }
4601 SHRINK;
4602 GROW;
4603 in = ctxt->input->cur;
4604 if (*in == '-') {
4605 if (in[1] == '-') {
4606 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004607 if (ctxt->input->id != inputid) {
4608 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4609 "comment doesn't start and stop in the same entity\n");
4610 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004611 SKIP(3);
4612 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4613 (!ctxt->disableSAX)) {
4614 if (buf != NULL)
4615 ctxt->sax->comment(ctxt->userData, buf);
4616 else
4617 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4618 }
4619 if (buf != NULL)
4620 xmlFree(buf);
4621 ctxt->instate = state;
4622 return;
4623 }
4624 if (buf != NULL)
4625 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4626 "Comment not terminated \n<!--%.50s\n",
4627 buf);
4628 else
4629 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4630 "Comment not terminated \n", NULL);
4631 in++;
4632 ctxt->input->col++;
4633 }
4634 in++;
4635 ctxt->input->col++;
4636 goto get_more;
4637 }
4638 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4639 xmlParseCommentComplex(ctxt, buf, len, size);
4640 ctxt->instate = state;
4641 return;
4642}
4643
Owen Taylor3473f882001-02-23 17:55:21 +00004644
4645/**
4646 * xmlParsePITarget:
4647 * @ctxt: an XML parser context
4648 *
4649 * parse the name of a PI
4650 *
4651 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4652 *
4653 * Returns the PITarget name or NULL
4654 */
4655
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004656const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004657xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004658 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004659
4660 name = xmlParseName(ctxt);
4661 if ((name != NULL) &&
4662 ((name[0] == 'x') || (name[0] == 'X')) &&
4663 ((name[1] == 'm') || (name[1] == 'M')) &&
4664 ((name[2] == 'l') || (name[2] == 'L'))) {
4665 int i;
4666 if ((name[0] == 'x') && (name[1] == 'm') &&
4667 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004668 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004669 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004670 return(name);
4671 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004672 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004673 return(name);
4674 }
4675 for (i = 0;;i++) {
4676 if (xmlW3CPIs[i] == NULL) break;
4677 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4678 return(name);
4679 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004680 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4681 "xmlParsePITarget: invalid name prefix 'xml'\n",
4682 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004683 }
Daniel Veillard37334572008-07-31 08:20:02 +00004684 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4685 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4686 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4687 }
Owen Taylor3473f882001-02-23 17:55:21 +00004688 return(name);
4689}
4690
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004691#ifdef LIBXML_CATALOG_ENABLED
4692/**
4693 * xmlParseCatalogPI:
4694 * @ctxt: an XML parser context
4695 * @catalog: the PI value string
4696 *
4697 * parse an XML Catalog Processing Instruction.
4698 *
4699 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4700 *
4701 * Occurs only if allowed by the user and if happening in the Misc
4702 * part of the document before any doctype informations
4703 * This will add the given catalog to the parsing context in order
4704 * to be used if there is a resolution need further down in the document
4705 */
4706
4707static void
4708xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4709 xmlChar *URL = NULL;
4710 const xmlChar *tmp, *base;
4711 xmlChar marker;
4712
4713 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004714 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004715 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4716 goto error;
4717 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004718 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004719 if (*tmp != '=') {
4720 return;
4721 }
4722 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004723 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004724 marker = *tmp;
4725 if ((marker != '\'') && (marker != '"'))
4726 goto error;
4727 tmp++;
4728 base = tmp;
4729 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4730 if (*tmp == 0)
4731 goto error;
4732 URL = xmlStrndup(base, tmp - base);
4733 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004734 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004735 if (*tmp != 0)
4736 goto error;
4737
4738 if (URL != NULL) {
4739 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4740 xmlFree(URL);
4741 }
4742 return;
4743
4744error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004745 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4746 "Catalog PI syntax error: %s\n",
4747 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004748 if (URL != NULL)
4749 xmlFree(URL);
4750}
4751#endif
4752
Owen Taylor3473f882001-02-23 17:55:21 +00004753/**
4754 * xmlParsePI:
4755 * @ctxt: an XML parser context
4756 *
4757 * parse an XML Processing Instruction.
4758 *
4759 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4760 *
4761 * The processing is transfered to SAX once parsed.
4762 */
4763
4764void
4765xmlParsePI(xmlParserCtxtPtr ctxt) {
4766 xmlChar *buf = NULL;
4767 int len = 0;
4768 int size = XML_PARSER_BUFFER_SIZE;
4769 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004770 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004771 xmlParserInputState state;
4772 int count = 0;
4773
4774 if ((RAW == '<') && (NXT(1) == '?')) {
4775 xmlParserInputPtr input = ctxt->input;
4776 state = ctxt->instate;
4777 ctxt->instate = XML_PARSER_PI;
4778 /*
4779 * this is a Processing Instruction.
4780 */
4781 SKIP(2);
4782 SHRINK;
4783
4784 /*
4785 * Parse the target name and check for special support like
4786 * namespace.
4787 */
4788 target = xmlParsePITarget(ctxt);
4789 if (target != NULL) {
4790 if ((RAW == '?') && (NXT(1) == '>')) {
4791 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004792 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4793 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004794 }
4795 SKIP(2);
4796
4797 /*
4798 * SAX: PI detected.
4799 */
4800 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4801 (ctxt->sax->processingInstruction != NULL))
4802 ctxt->sax->processingInstruction(ctxt->userData,
4803 target, NULL);
4804 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004805 return;
4806 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004807 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004808 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004809 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004810 ctxt->instate = state;
4811 return;
4812 }
4813 cur = CUR;
4814 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004815 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4816 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004817 }
4818 SKIP_BLANKS;
4819 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004820 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004821 ((cur != '?') || (NXT(1) != '>'))) {
4822 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004823 xmlChar *tmp;
4824
Owen Taylor3473f882001-02-23 17:55:21 +00004825 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004826 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4827 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004828 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004829 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004830 ctxt->instate = state;
4831 return;
4832 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004833 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004834 }
4835 count++;
4836 if (count > 50) {
4837 GROW;
4838 count = 0;
4839 }
4840 COPY_BUF(l,buf,len,cur);
4841 NEXTL(l);
4842 cur = CUR_CHAR(l);
4843 if (cur == 0) {
4844 SHRINK;
4845 GROW;
4846 cur = CUR_CHAR(l);
4847 }
4848 }
4849 buf[len] = 0;
4850 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004851 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4852 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004853 } else {
4854 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004855 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4856 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004857 }
4858 SKIP(2);
4859
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004860#ifdef LIBXML_CATALOG_ENABLED
4861 if (((state == XML_PARSER_MISC) ||
4862 (state == XML_PARSER_START)) &&
4863 (xmlStrEqual(target, XML_CATALOG_PI))) {
4864 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4865 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4866 (allow == XML_CATA_ALLOW_ALL))
4867 xmlParseCatalogPI(ctxt, buf);
4868 }
4869#endif
4870
4871
Owen Taylor3473f882001-02-23 17:55:21 +00004872 /*
4873 * SAX: PI detected.
4874 */
4875 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4876 (ctxt->sax->processingInstruction != NULL))
4877 ctxt->sax->processingInstruction(ctxt->userData,
4878 target, buf);
4879 }
4880 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004881 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004882 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004883 }
4884 ctxt->instate = state;
4885 }
4886}
4887
4888/**
4889 * xmlParseNotationDecl:
4890 * @ctxt: an XML parser context
4891 *
4892 * parse a notation declaration
4893 *
4894 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4895 *
4896 * Hence there is actually 3 choices:
4897 * 'PUBLIC' S PubidLiteral
4898 * 'PUBLIC' S PubidLiteral S SystemLiteral
4899 * and 'SYSTEM' S SystemLiteral
4900 *
4901 * See the NOTE on xmlParseExternalID().
4902 */
4903
4904void
4905xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004906 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004907 xmlChar *Pubid;
4908 xmlChar *Systemid;
4909
Daniel Veillarda07050d2003-10-19 14:46:32 +00004910 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004911 xmlParserInputPtr input = ctxt->input;
4912 SHRINK;
4913 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004914 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004915 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4916 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004917 return;
4918 }
4919 SKIP_BLANKS;
4920
Daniel Veillard76d66f42001-05-16 21:05:17 +00004921 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004922 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004923 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004924 return;
4925 }
William M. Brack76e95df2003-10-18 16:20:14 +00004926 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004927 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004928 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004929 return;
4930 }
Daniel Veillard37334572008-07-31 08:20:02 +00004931 if (xmlStrchr(name, ':') != NULL) {
4932 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4933 "colon are forbidden from notation names '%s'\n",
4934 name, NULL, NULL);
4935 }
Owen Taylor3473f882001-02-23 17:55:21 +00004936 SKIP_BLANKS;
4937
4938 /*
4939 * Parse the IDs.
4940 */
4941 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4942 SKIP_BLANKS;
4943
4944 if (RAW == '>') {
4945 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004946 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4947 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004948 }
4949 NEXT;
4950 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4951 (ctxt->sax->notationDecl != NULL))
4952 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4953 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004954 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004955 }
Owen Taylor3473f882001-02-23 17:55:21 +00004956 if (Systemid != NULL) xmlFree(Systemid);
4957 if (Pubid != NULL) xmlFree(Pubid);
4958 }
4959}
4960
4961/**
4962 * xmlParseEntityDecl:
4963 * @ctxt: an XML parser context
4964 *
4965 * parse <!ENTITY declarations
4966 *
4967 * [70] EntityDecl ::= GEDecl | PEDecl
4968 *
4969 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4970 *
4971 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4972 *
4973 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4974 *
4975 * [74] PEDef ::= EntityValue | ExternalID
4976 *
4977 * [76] NDataDecl ::= S 'NDATA' S Name
4978 *
4979 * [ VC: Notation Declared ]
4980 * The Name must match the declared name of a notation.
4981 */
4982
4983void
4984xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004985 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004986 xmlChar *value = NULL;
4987 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004988 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004989 int isParameter = 0;
4990 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004991 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00004992
Daniel Veillard4c778d82005-01-23 17:37:44 +00004993 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004994 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004995 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004996 SHRINK;
4997 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004998 skipped = SKIP_BLANKS;
4999 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005000 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5001 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005002 }
Owen Taylor3473f882001-02-23 17:55:21 +00005003
5004 if (RAW == '%') {
5005 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005006 skipped = SKIP_BLANKS;
5007 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005008 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5009 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005010 }
Owen Taylor3473f882001-02-23 17:55:21 +00005011 isParameter = 1;
5012 }
5013
Daniel Veillard76d66f42001-05-16 21:05:17 +00005014 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005015 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005016 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5017 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005018 return;
5019 }
Daniel Veillard37334572008-07-31 08:20:02 +00005020 if (xmlStrchr(name, ':') != NULL) {
5021 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5022 "colon are forbidden from entities names '%s'\n",
5023 name, NULL, NULL);
5024 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005025 skipped = SKIP_BLANKS;
5026 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005027 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5028 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005029 }
Owen Taylor3473f882001-02-23 17:55:21 +00005030
Daniel Veillardf5582f12002-06-11 10:08:16 +00005031 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005032 /*
5033 * handle the various case of definitions...
5034 */
5035 if (isParameter) {
5036 if ((RAW == '"') || (RAW == '\'')) {
5037 value = xmlParseEntityValue(ctxt, &orig);
5038 if (value) {
5039 if ((ctxt->sax != NULL) &&
5040 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5041 ctxt->sax->entityDecl(ctxt->userData, name,
5042 XML_INTERNAL_PARAMETER_ENTITY,
5043 NULL, NULL, value);
5044 }
5045 } else {
5046 URI = xmlParseExternalID(ctxt, &literal, 1);
5047 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005048 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005049 }
5050 if (URI) {
5051 xmlURIPtr uri;
5052
5053 uri = xmlParseURI((const char *) URI);
5054 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005055 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5056 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005057 /*
5058 * This really ought to be a well formedness error
5059 * but the XML Core WG decided otherwise c.f. issue
5060 * E26 of the XML erratas.
5061 */
Owen Taylor3473f882001-02-23 17:55:21 +00005062 } else {
5063 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005064 /*
5065 * Okay this is foolish to block those but not
5066 * invalid URIs.
5067 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005068 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005069 } else {
5070 if ((ctxt->sax != NULL) &&
5071 (!ctxt->disableSAX) &&
5072 (ctxt->sax->entityDecl != NULL))
5073 ctxt->sax->entityDecl(ctxt->userData, name,
5074 XML_EXTERNAL_PARAMETER_ENTITY,
5075 literal, URI, NULL);
5076 }
5077 xmlFreeURI(uri);
5078 }
5079 }
5080 }
5081 } else {
5082 if ((RAW == '"') || (RAW == '\'')) {
5083 value = xmlParseEntityValue(ctxt, &orig);
5084 if ((ctxt->sax != NULL) &&
5085 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5086 ctxt->sax->entityDecl(ctxt->userData, name,
5087 XML_INTERNAL_GENERAL_ENTITY,
5088 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005089 /*
5090 * For expat compatibility in SAX mode.
5091 */
5092 if ((ctxt->myDoc == NULL) ||
5093 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5094 if (ctxt->myDoc == NULL) {
5095 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005096 if (ctxt->myDoc == NULL) {
5097 xmlErrMemory(ctxt, "New Doc failed");
5098 return;
5099 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005100 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005101 }
5102 if (ctxt->myDoc->intSubset == NULL)
5103 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5104 BAD_CAST "fake", NULL, NULL);
5105
Daniel Veillard1af9a412003-08-20 22:54:39 +00005106 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5107 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005108 }
Owen Taylor3473f882001-02-23 17:55:21 +00005109 } else {
5110 URI = xmlParseExternalID(ctxt, &literal, 1);
5111 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005112 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005113 }
5114 if (URI) {
5115 xmlURIPtr uri;
5116
5117 uri = xmlParseURI((const char *)URI);
5118 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005119 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5120 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005121 /*
5122 * This really ought to be a well formedness error
5123 * but the XML Core WG decided otherwise c.f. issue
5124 * E26 of the XML erratas.
5125 */
Owen Taylor3473f882001-02-23 17:55:21 +00005126 } else {
5127 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005128 /*
5129 * Okay this is foolish to block those but not
5130 * invalid URIs.
5131 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005132 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005133 }
5134 xmlFreeURI(uri);
5135 }
5136 }
William M. Brack76e95df2003-10-18 16:20:14 +00005137 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005138 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5139 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005140 }
5141 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005142 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005143 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005144 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005145 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5146 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005147 }
5148 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005149 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005150 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5151 (ctxt->sax->unparsedEntityDecl != NULL))
5152 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5153 literal, URI, ndata);
5154 } else {
5155 if ((ctxt->sax != NULL) &&
5156 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5157 ctxt->sax->entityDecl(ctxt->userData, name,
5158 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5159 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005160 /*
5161 * For expat compatibility in SAX mode.
5162 * assuming the entity repalcement was asked for
5163 */
5164 if ((ctxt->replaceEntities != 0) &&
5165 ((ctxt->myDoc == NULL) ||
5166 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5167 if (ctxt->myDoc == NULL) {
5168 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005169 if (ctxt->myDoc == NULL) {
5170 xmlErrMemory(ctxt, "New Doc failed");
5171 return;
5172 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005173 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005174 }
5175
5176 if (ctxt->myDoc->intSubset == NULL)
5177 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5178 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005179 xmlSAX2EntityDecl(ctxt, name,
5180 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5181 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005182 }
Owen Taylor3473f882001-02-23 17:55:21 +00005183 }
5184 }
5185 }
5186 SKIP_BLANKS;
5187 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005188 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005189 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005190 } else {
5191 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005192 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5193 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005194 }
5195 NEXT;
5196 }
5197 if (orig != NULL) {
5198 /*
5199 * Ugly mechanism to save the raw entity value.
5200 */
5201 xmlEntityPtr cur = NULL;
5202
5203 if (isParameter) {
5204 if ((ctxt->sax != NULL) &&
5205 (ctxt->sax->getParameterEntity != NULL))
5206 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5207 } else {
5208 if ((ctxt->sax != NULL) &&
5209 (ctxt->sax->getEntity != NULL))
5210 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005211 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005212 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005213 }
Owen Taylor3473f882001-02-23 17:55:21 +00005214 }
5215 if (cur != NULL) {
5216 if (cur->orig != NULL)
5217 xmlFree(orig);
5218 else
5219 cur->orig = orig;
5220 } else
5221 xmlFree(orig);
5222 }
Owen Taylor3473f882001-02-23 17:55:21 +00005223 if (value != NULL) xmlFree(value);
5224 if (URI != NULL) xmlFree(URI);
5225 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005226 }
5227}
5228
5229/**
5230 * xmlParseDefaultDecl:
5231 * @ctxt: an XML parser context
5232 * @value: Receive a possible fixed default value for the attribute
5233 *
5234 * Parse an attribute default declaration
5235 *
5236 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5237 *
5238 * [ VC: Required Attribute ]
5239 * if the default declaration is the keyword #REQUIRED, then the
5240 * attribute must be specified for all elements of the type in the
5241 * attribute-list declaration.
5242 *
5243 * [ VC: Attribute Default Legal ]
5244 * The declared default value must meet the lexical constraints of
5245 * the declared attribute type c.f. xmlValidateAttributeDecl()
5246 *
5247 * [ VC: Fixed Attribute Default ]
5248 * if an attribute has a default value declared with the #FIXED
5249 * keyword, instances of that attribute must match the default value.
5250 *
5251 * [ WFC: No < in Attribute Values ]
5252 * handled in xmlParseAttValue()
5253 *
5254 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5255 * or XML_ATTRIBUTE_FIXED.
5256 */
5257
5258int
5259xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5260 int val;
5261 xmlChar *ret;
5262
5263 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005264 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005265 SKIP(9);
5266 return(XML_ATTRIBUTE_REQUIRED);
5267 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005268 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005269 SKIP(8);
5270 return(XML_ATTRIBUTE_IMPLIED);
5271 }
5272 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005273 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005274 SKIP(6);
5275 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005276 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005277 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5278 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005279 }
5280 SKIP_BLANKS;
5281 }
5282 ret = xmlParseAttValue(ctxt);
5283 ctxt->instate = XML_PARSER_DTD;
5284 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005285 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005286 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005287 } else
5288 *value = ret;
5289 return(val);
5290}
5291
5292/**
5293 * xmlParseNotationType:
5294 * @ctxt: an XML parser context
5295 *
5296 * parse an Notation attribute type.
5297 *
5298 * Note: the leading 'NOTATION' S part has already being parsed...
5299 *
5300 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5301 *
5302 * [ VC: Notation Attributes ]
5303 * Values of this type must match one of the notation names included
5304 * in the declaration; all notation names in the declaration must be declared.
5305 *
5306 * Returns: the notation attribute tree built while parsing
5307 */
5308
5309xmlEnumerationPtr
5310xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005311 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005312 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005313
5314 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005315 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005316 return(NULL);
5317 }
5318 SHRINK;
5319 do {
5320 NEXT;
5321 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005322 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005323 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005324 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5325 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005326 xmlFreeEnumeration(ret);
5327 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005328 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005329 tmp = ret;
5330 while (tmp != NULL) {
5331 if (xmlStrEqual(name, tmp->name)) {
5332 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5333 "standalone: attribute notation value token %s duplicated\n",
5334 name, NULL);
5335 if (!xmlDictOwns(ctxt->dict, name))
5336 xmlFree((xmlChar *) name);
5337 break;
5338 }
5339 tmp = tmp->next;
5340 }
5341 if (tmp == NULL) {
5342 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005343 if (cur == NULL) {
5344 xmlFreeEnumeration(ret);
5345 return(NULL);
5346 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005347 if (last == NULL) ret = last = cur;
5348 else {
5349 last->next = cur;
5350 last = cur;
5351 }
Owen Taylor3473f882001-02-23 17:55:21 +00005352 }
5353 SKIP_BLANKS;
5354 } while (RAW == '|');
5355 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005356 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005357 xmlFreeEnumeration(ret);
5358 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005359 }
5360 NEXT;
5361 return(ret);
5362}
5363
5364/**
5365 * xmlParseEnumerationType:
5366 * @ctxt: an XML parser context
5367 *
5368 * parse an Enumeration attribute type.
5369 *
5370 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5371 *
5372 * [ VC: Enumeration ]
5373 * Values of this type must match one of the Nmtoken tokens in
5374 * the declaration
5375 *
5376 * Returns: the enumeration attribute tree built while parsing
5377 */
5378
5379xmlEnumerationPtr
5380xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5381 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005382 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005383
5384 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005385 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005386 return(NULL);
5387 }
5388 SHRINK;
5389 do {
5390 NEXT;
5391 SKIP_BLANKS;
5392 name = xmlParseNmtoken(ctxt);
5393 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005394 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005395 return(ret);
5396 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005397 tmp = ret;
5398 while (tmp != NULL) {
5399 if (xmlStrEqual(name, tmp->name)) {
5400 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5401 "standalone: attribute enumeration value token %s duplicated\n",
5402 name, NULL);
5403 if (!xmlDictOwns(ctxt->dict, name))
5404 xmlFree(name);
5405 break;
5406 }
5407 tmp = tmp->next;
5408 }
5409 if (tmp == NULL) {
5410 cur = xmlCreateEnumeration(name);
5411 if (!xmlDictOwns(ctxt->dict, name))
5412 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005413 if (cur == NULL) {
5414 xmlFreeEnumeration(ret);
5415 return(NULL);
5416 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005417 if (last == NULL) ret = last = cur;
5418 else {
5419 last->next = cur;
5420 last = cur;
5421 }
Owen Taylor3473f882001-02-23 17:55:21 +00005422 }
5423 SKIP_BLANKS;
5424 } while (RAW == '|');
5425 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005426 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005427 return(ret);
5428 }
5429 NEXT;
5430 return(ret);
5431}
5432
5433/**
5434 * xmlParseEnumeratedType:
5435 * @ctxt: an XML parser context
5436 * @tree: the enumeration tree built while parsing
5437 *
5438 * parse an Enumerated attribute type.
5439 *
5440 * [57] EnumeratedType ::= NotationType | Enumeration
5441 *
5442 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5443 *
5444 *
5445 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5446 */
5447
5448int
5449xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005450 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005451 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005452 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005453 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5454 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005455 return(0);
5456 }
5457 SKIP_BLANKS;
5458 *tree = xmlParseNotationType(ctxt);
5459 if (*tree == NULL) return(0);
5460 return(XML_ATTRIBUTE_NOTATION);
5461 }
5462 *tree = xmlParseEnumerationType(ctxt);
5463 if (*tree == NULL) return(0);
5464 return(XML_ATTRIBUTE_ENUMERATION);
5465}
5466
5467/**
5468 * xmlParseAttributeType:
5469 * @ctxt: an XML parser context
5470 * @tree: the enumeration tree built while parsing
5471 *
5472 * parse the Attribute list def for an element
5473 *
5474 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5475 *
5476 * [55] StringType ::= 'CDATA'
5477 *
5478 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5479 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5480 *
5481 * Validity constraints for attribute values syntax are checked in
5482 * xmlValidateAttributeValue()
5483 *
5484 * [ VC: ID ]
5485 * Values of type ID must match the Name production. A name must not
5486 * appear more than once in an XML document as a value of this type;
5487 * i.e., ID values must uniquely identify the elements which bear them.
5488 *
5489 * [ VC: One ID per Element Type ]
5490 * No element type may have more than one ID attribute specified.
5491 *
5492 * [ VC: ID Attribute Default ]
5493 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5494 *
5495 * [ VC: IDREF ]
5496 * Values of type IDREF must match the Name production, and values
5497 * of type IDREFS must match Names; each IDREF Name must match the value
5498 * of an ID attribute on some element in the XML document; i.e. IDREF
5499 * values must match the value of some ID attribute.
5500 *
5501 * [ VC: Entity Name ]
5502 * Values of type ENTITY must match the Name production, values
5503 * of type ENTITIES must match Names; each Entity Name must match the
5504 * name of an unparsed entity declared in the DTD.
5505 *
5506 * [ VC: Name Token ]
5507 * Values of type NMTOKEN must match the Nmtoken production; values
5508 * of type NMTOKENS must match Nmtokens.
5509 *
5510 * Returns the attribute type
5511 */
5512int
5513xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5514 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005515 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005516 SKIP(5);
5517 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005518 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005519 SKIP(6);
5520 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005521 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005522 SKIP(5);
5523 return(XML_ATTRIBUTE_IDREF);
5524 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5525 SKIP(2);
5526 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005527 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005528 SKIP(6);
5529 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005530 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005531 SKIP(8);
5532 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005533 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005534 SKIP(8);
5535 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005536 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005537 SKIP(7);
5538 return(XML_ATTRIBUTE_NMTOKEN);
5539 }
5540 return(xmlParseEnumeratedType(ctxt, tree));
5541}
5542
5543/**
5544 * xmlParseAttributeListDecl:
5545 * @ctxt: an XML parser context
5546 *
5547 * : parse the Attribute list def for an element
5548 *
5549 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5550 *
5551 * [53] AttDef ::= S Name S AttType S DefaultDecl
5552 *
5553 */
5554void
5555xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005556 const xmlChar *elemName;
5557 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005558 xmlEnumerationPtr tree;
5559
Daniel Veillarda07050d2003-10-19 14:46:32 +00005560 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005561 xmlParserInputPtr input = ctxt->input;
5562
5563 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005564 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005565 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005566 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005567 }
5568 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005569 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005570 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005571 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5572 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005573 return;
5574 }
5575 SKIP_BLANKS;
5576 GROW;
5577 while (RAW != '>') {
5578 const xmlChar *check = CUR_PTR;
5579 int type;
5580 int def;
5581 xmlChar *defaultValue = NULL;
5582
5583 GROW;
5584 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005585 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005586 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005587 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5588 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005589 break;
5590 }
5591 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005592 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005593 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005594 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005595 break;
5596 }
5597 SKIP_BLANKS;
5598
5599 type = xmlParseAttributeType(ctxt, &tree);
5600 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005601 break;
5602 }
5603
5604 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005605 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005606 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5607 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005608 if (tree != NULL)
5609 xmlFreeEnumeration(tree);
5610 break;
5611 }
5612 SKIP_BLANKS;
5613
5614 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5615 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005616 if (defaultValue != NULL)
5617 xmlFree(defaultValue);
5618 if (tree != NULL)
5619 xmlFreeEnumeration(tree);
5620 break;
5621 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005622 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5623 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005624
5625 GROW;
5626 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005627 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005628 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005629 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005630 if (defaultValue != NULL)
5631 xmlFree(defaultValue);
5632 if (tree != NULL)
5633 xmlFreeEnumeration(tree);
5634 break;
5635 }
5636 SKIP_BLANKS;
5637 }
5638 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005639 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5640 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005641 if (defaultValue != NULL)
5642 xmlFree(defaultValue);
5643 if (tree != NULL)
5644 xmlFreeEnumeration(tree);
5645 break;
5646 }
5647 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5648 (ctxt->sax->attributeDecl != NULL))
5649 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5650 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005651 else if (tree != NULL)
5652 xmlFreeEnumeration(tree);
5653
5654 if ((ctxt->sax2) && (defaultValue != NULL) &&
5655 (def != XML_ATTRIBUTE_IMPLIED) &&
5656 (def != XML_ATTRIBUTE_REQUIRED)) {
5657 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5658 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005659 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005660 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5661 }
Owen Taylor3473f882001-02-23 17:55:21 +00005662 if (defaultValue != NULL)
5663 xmlFree(defaultValue);
5664 GROW;
5665 }
5666 if (RAW == '>') {
5667 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00005668 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5669 "Attribute list declaration doesn't start and stop in the same entity\n",
5670 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005671 }
5672 NEXT;
5673 }
Owen Taylor3473f882001-02-23 17:55:21 +00005674 }
5675}
5676
5677/**
5678 * xmlParseElementMixedContentDecl:
5679 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005680 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005681 *
5682 * parse the declaration for a Mixed Element content
5683 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5684 *
5685 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5686 * '(' S? '#PCDATA' S? ')'
5687 *
5688 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5689 *
5690 * [ VC: No Duplicate Types ]
5691 * The same name must not appear more than once in a single
5692 * mixed-content declaration.
5693 *
5694 * returns: the list of the xmlElementContentPtr describing the element choices
5695 */
5696xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005697xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005698 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005699 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005700
5701 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005702 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005703 SKIP(7);
5704 SKIP_BLANKS;
5705 SHRINK;
5706 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005707 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005708 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5709"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005710 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005711 }
Owen Taylor3473f882001-02-23 17:55:21 +00005712 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005713 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005714 if (ret == NULL)
5715 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005716 if (RAW == '*') {
5717 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5718 NEXT;
5719 }
5720 return(ret);
5721 }
5722 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005723 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005724 if (ret == NULL) return(NULL);
5725 }
5726 while (RAW == '|') {
5727 NEXT;
5728 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005729 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005730 if (ret == NULL) return(NULL);
5731 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005732 if (cur != NULL)
5733 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005734 cur = ret;
5735 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005736 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005737 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005738 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005739 if (n->c1 != NULL)
5740 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005741 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005742 if (n != NULL)
5743 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005744 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005745 }
5746 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005747 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005748 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005749 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005750 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005751 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005752 return(NULL);
5753 }
5754 SKIP_BLANKS;
5755 GROW;
5756 }
5757 if ((RAW == ')') && (NXT(1) == '*')) {
5758 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005759 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005760 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005761 if (cur->c2 != NULL)
5762 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005763 }
5764 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005765 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005766 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5767"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005768 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005769 }
Owen Taylor3473f882001-02-23 17:55:21 +00005770 SKIP(2);
5771 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005772 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005773 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005774 return(NULL);
5775 }
5776
5777 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005778 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005779 }
5780 return(ret);
5781}
5782
5783/**
Daniel Veillard489f9672009-08-10 16:49:30 +02005784 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00005785 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005786 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02005787 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00005788 *
5789 * parse the declaration for a Mixed Element content
5790 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5791 *
5792 *
5793 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5794 *
5795 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5796 *
5797 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5798 *
5799 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5800 *
5801 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5802 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005803 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005804 * opening or closing parentheses in a choice, seq, or Mixed
5805 * construct is contained in the replacement text for a parameter
5806 * entity, both must be contained in the same replacement text. For
5807 * interoperability, if a parameter-entity reference appears in a
5808 * choice, seq, or Mixed construct, its replacement text should not
5809 * be empty, and neither the first nor last non-blank character of
5810 * the replacement text should be a connector (| or ,).
5811 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005812 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005813 * hierarchy.
5814 */
Daniel Veillard489f9672009-08-10 16:49:30 +02005815static xmlElementContentPtr
5816xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5817 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00005818 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005819 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005820 xmlChar type = 0;
5821
Daniel Veillard489f9672009-08-10 16:49:30 +02005822 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5823 (depth > 2048)) {
5824 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
5825"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
5826 depth);
5827 return(NULL);
5828 }
Owen Taylor3473f882001-02-23 17:55:21 +00005829 SKIP_BLANKS;
5830 GROW;
5831 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005832 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005833
Owen Taylor3473f882001-02-23 17:55:21 +00005834 /* Recurse on first child */
5835 NEXT;
5836 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02005837 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5838 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00005839 SKIP_BLANKS;
5840 GROW;
5841 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005842 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005843 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005844 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005845 return(NULL);
5846 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005847 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005848 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005849 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005850 return(NULL);
5851 }
Owen Taylor3473f882001-02-23 17:55:21 +00005852 GROW;
5853 if (RAW == '?') {
5854 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5855 NEXT;
5856 } else if (RAW == '*') {
5857 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5858 NEXT;
5859 } else if (RAW == '+') {
5860 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5861 NEXT;
5862 } else {
5863 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5864 }
Owen Taylor3473f882001-02-23 17:55:21 +00005865 GROW;
5866 }
5867 SKIP_BLANKS;
5868 SHRINK;
5869 while (RAW != ')') {
5870 /*
5871 * Each loop we parse one separator and one element.
5872 */
5873 if (RAW == ',') {
5874 if (type == 0) type = CUR;
5875
5876 /*
5877 * Detect "Name | Name , Name" error
5878 */
5879 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005880 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005881 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005882 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005883 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005884 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005885 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005886 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005887 return(NULL);
5888 }
5889 NEXT;
5890
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005891 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005892 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005893 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005894 xmlFreeDocElementContent(ctxt->myDoc, last);
5895 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005896 return(NULL);
5897 }
5898 if (last == NULL) {
5899 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005900 if (ret != NULL)
5901 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005902 ret = cur = op;
5903 } else {
5904 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005905 if (op != NULL)
5906 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005907 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005908 if (last != NULL)
5909 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005910 cur =op;
5911 last = NULL;
5912 }
5913 } else if (RAW == '|') {
5914 if (type == 0) type = CUR;
5915
5916 /*
5917 * Detect "Name , Name | Name" error
5918 */
5919 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005920 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005921 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005922 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005923 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005924 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005925 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005926 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005927 return(NULL);
5928 }
5929 NEXT;
5930
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005931 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005932 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005933 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005934 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005935 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005936 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005937 return(NULL);
5938 }
5939 if (last == NULL) {
5940 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005941 if (ret != NULL)
5942 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005943 ret = cur = op;
5944 } else {
5945 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005946 if (op != NULL)
5947 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005948 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005949 if (last != NULL)
5950 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005951 cur =op;
5952 last = NULL;
5953 }
5954 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005955 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00005956 if ((last != NULL) && (last != ret))
5957 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005958 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005959 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005960 return(NULL);
5961 }
5962 GROW;
5963 SKIP_BLANKS;
5964 GROW;
5965 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005966 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005967 /* Recurse on second child */
5968 NEXT;
5969 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02005970 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
5971 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00005972 SKIP_BLANKS;
5973 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005974 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005975 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005976 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005977 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005978 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005979 return(NULL);
5980 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005981 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005982 if (last == NULL) {
5983 if (ret != NULL)
5984 xmlFreeDocElementContent(ctxt->myDoc, ret);
5985 return(NULL);
5986 }
Owen Taylor3473f882001-02-23 17:55:21 +00005987 if (RAW == '?') {
5988 last->ocur = XML_ELEMENT_CONTENT_OPT;
5989 NEXT;
5990 } else if (RAW == '*') {
5991 last->ocur = XML_ELEMENT_CONTENT_MULT;
5992 NEXT;
5993 } else if (RAW == '+') {
5994 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5995 NEXT;
5996 } else {
5997 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5998 }
5999 }
6000 SKIP_BLANKS;
6001 GROW;
6002 }
6003 if ((cur != NULL) && (last != NULL)) {
6004 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006005 if (last != NULL)
6006 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006007 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006008 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006009 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6010"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006011 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006012 }
Owen Taylor3473f882001-02-23 17:55:21 +00006013 NEXT;
6014 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006015 if (ret != NULL) {
6016 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6017 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6018 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6019 else
6020 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6021 }
Owen Taylor3473f882001-02-23 17:55:21 +00006022 NEXT;
6023 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006024 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006025 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006026 cur = ret;
6027 /*
6028 * Some normalization:
6029 * (a | b* | c?)* == (a | b | c)*
6030 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006031 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006032 if ((cur->c1 != NULL) &&
6033 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6034 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6035 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6036 if ((cur->c2 != NULL) &&
6037 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6038 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6039 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6040 cur = cur->c2;
6041 }
6042 }
Owen Taylor3473f882001-02-23 17:55:21 +00006043 NEXT;
6044 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006045 if (ret != NULL) {
6046 int found = 0;
6047
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006048 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6049 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6050 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006051 else
6052 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006053 /*
6054 * Some normalization:
6055 * (a | b*)+ == (a | b)*
6056 * (a | b?)+ == (a | b)*
6057 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006058 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006059 if ((cur->c1 != NULL) &&
6060 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6061 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6062 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6063 found = 1;
6064 }
6065 if ((cur->c2 != NULL) &&
6066 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6067 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6068 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6069 found = 1;
6070 }
6071 cur = cur->c2;
6072 }
6073 if (found)
6074 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6075 }
Owen Taylor3473f882001-02-23 17:55:21 +00006076 NEXT;
6077 }
6078 return(ret);
6079}
6080
6081/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006082 * xmlParseElementChildrenContentDecl:
6083 * @ctxt: an XML parser context
6084 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006085 *
6086 * parse the declaration for a Mixed Element content
6087 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6088 *
6089 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6090 *
6091 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6092 *
6093 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6094 *
6095 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6096 *
6097 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6098 * TODO Parameter-entity replacement text must be properly nested
6099 * with parenthesized groups. That is to say, if either of the
6100 * opening or closing parentheses in a choice, seq, or Mixed
6101 * construct is contained in the replacement text for a parameter
6102 * entity, both must be contained in the same replacement text. For
6103 * interoperability, if a parameter-entity reference appears in a
6104 * choice, seq, or Mixed construct, its replacement text should not
6105 * be empty, and neither the first nor last non-blank character of
6106 * the replacement text should be a connector (| or ,).
6107 *
6108 * Returns the tree of xmlElementContentPtr describing the element
6109 * hierarchy.
6110 */
6111xmlElementContentPtr
6112xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6113 /* stub left for API/ABI compat */
6114 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6115}
6116
6117/**
Owen Taylor3473f882001-02-23 17:55:21 +00006118 * xmlParseElementContentDecl:
6119 * @ctxt: an XML parser context
6120 * @name: the name of the element being defined.
6121 * @result: the Element Content pointer will be stored here if any
6122 *
6123 * parse the declaration for an Element content either Mixed or Children,
6124 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6125 *
6126 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6127 *
6128 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6129 */
6130
6131int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006132xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006133 xmlElementContentPtr *result) {
6134
6135 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006136 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006137 int res;
6138
6139 *result = NULL;
6140
6141 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006142 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006143 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006144 return(-1);
6145 }
6146 NEXT;
6147 GROW;
6148 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006149 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006150 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006151 res = XML_ELEMENT_TYPE_MIXED;
6152 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006153 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006154 res = XML_ELEMENT_TYPE_ELEMENT;
6155 }
Owen Taylor3473f882001-02-23 17:55:21 +00006156 SKIP_BLANKS;
6157 *result = tree;
6158 return(res);
6159}
6160
6161/**
6162 * xmlParseElementDecl:
6163 * @ctxt: an XML parser context
6164 *
6165 * parse an Element declaration.
6166 *
6167 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6168 *
6169 * [ VC: Unique Element Type Declaration ]
6170 * No element type may be declared more than once
6171 *
6172 * Returns the type of the element, or -1 in case of error
6173 */
6174int
6175xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006176 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006177 int ret = -1;
6178 xmlElementContentPtr content = NULL;
6179
Daniel Veillard4c778d82005-01-23 17:37:44 +00006180 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006181 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006182 xmlParserInputPtr input = ctxt->input;
6183
6184 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006185 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006186 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6187 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006188 }
6189 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006190 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006191 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006192 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6193 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006194 return(-1);
6195 }
6196 while ((RAW == 0) && (ctxt->inputNr > 1))
6197 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006198 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006199 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6200 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006201 }
6202 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006203 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006204 SKIP(5);
6205 /*
6206 * Element must always be empty.
6207 */
6208 ret = XML_ELEMENT_TYPE_EMPTY;
6209 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6210 (NXT(2) == 'Y')) {
6211 SKIP(3);
6212 /*
6213 * Element is a generic container.
6214 */
6215 ret = XML_ELEMENT_TYPE_ANY;
6216 } else if (RAW == '(') {
6217 ret = xmlParseElementContentDecl(ctxt, name, &content);
6218 } else {
6219 /*
6220 * [ WFC: PEs in Internal Subset ] error handling.
6221 */
6222 if ((RAW == '%') && (ctxt->external == 0) &&
6223 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006224 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006225 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006226 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006227 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006228 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6229 }
Owen Taylor3473f882001-02-23 17:55:21 +00006230 return(-1);
6231 }
6232
6233 SKIP_BLANKS;
6234 /*
6235 * Pop-up of finished entities.
6236 */
6237 while ((RAW == 0) && (ctxt->inputNr > 1))
6238 xmlPopInput(ctxt);
6239 SKIP_BLANKS;
6240
6241 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006242 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006243 if (content != NULL) {
6244 xmlFreeDocElementContent(ctxt->myDoc, content);
6245 }
Owen Taylor3473f882001-02-23 17:55:21 +00006246 } else {
6247 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006248 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6249 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006250 }
6251
6252 NEXT;
6253 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006254 (ctxt->sax->elementDecl != NULL)) {
6255 if (content != NULL)
6256 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006257 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6258 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006259 if ((content != NULL) && (content->parent == NULL)) {
6260 /*
6261 * this is a trick: if xmlAddElementDecl is called,
6262 * instead of copying the full tree it is plugged directly
6263 * if called from the parser. Avoid duplicating the
6264 * interfaces or change the API/ABI
6265 */
6266 xmlFreeDocElementContent(ctxt->myDoc, content);
6267 }
6268 } else if (content != NULL) {
6269 xmlFreeDocElementContent(ctxt->myDoc, content);
6270 }
Owen Taylor3473f882001-02-23 17:55:21 +00006271 }
Owen Taylor3473f882001-02-23 17:55:21 +00006272 }
6273 return(ret);
6274}
6275
6276/**
Owen Taylor3473f882001-02-23 17:55:21 +00006277 * xmlParseConditionalSections
6278 * @ctxt: an XML parser context
6279 *
6280 * [61] conditionalSect ::= includeSect | ignoreSect
6281 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6282 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6283 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6284 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6285 */
6286
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006287static void
Owen Taylor3473f882001-02-23 17:55:21 +00006288xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006289 int id = ctxt->input->id;
6290
Owen Taylor3473f882001-02-23 17:55:21 +00006291 SKIP(3);
6292 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006293 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006294 SKIP(7);
6295 SKIP_BLANKS;
6296 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006297 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006298 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006299 if (ctxt->input->id != id) {
6300 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6301 "All markup of the conditional section is not in the same entity\n",
6302 NULL, NULL);
6303 }
Owen Taylor3473f882001-02-23 17:55:21 +00006304 NEXT;
6305 }
6306 if (xmlParserDebugEntities) {
6307 if ((ctxt->input != NULL) && (ctxt->input->filename))
6308 xmlGenericError(xmlGenericErrorContext,
6309 "%s(%d): ", ctxt->input->filename,
6310 ctxt->input->line);
6311 xmlGenericError(xmlGenericErrorContext,
6312 "Entering INCLUDE Conditional Section\n");
6313 }
6314
6315 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6316 (NXT(2) != '>'))) {
6317 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006318 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006319
6320 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6321 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006322 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006323 NEXT;
6324 } else if (RAW == '%') {
6325 xmlParsePEReference(ctxt);
6326 } else
6327 xmlParseMarkupDecl(ctxt);
6328
6329 /*
6330 * Pop-up of finished entities.
6331 */
6332 while ((RAW == 0) && (ctxt->inputNr > 1))
6333 xmlPopInput(ctxt);
6334
Daniel Veillardfdc91562002-07-01 21:52:03 +00006335 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006336 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006337 break;
6338 }
6339 }
6340 if (xmlParserDebugEntities) {
6341 if ((ctxt->input != NULL) && (ctxt->input->filename))
6342 xmlGenericError(xmlGenericErrorContext,
6343 "%s(%d): ", ctxt->input->filename,
6344 ctxt->input->line);
6345 xmlGenericError(xmlGenericErrorContext,
6346 "Leaving INCLUDE Conditional Section\n");
6347 }
6348
Daniel Veillarda07050d2003-10-19 14:46:32 +00006349 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006350 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006351 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006352 int depth = 0;
6353
6354 SKIP(6);
6355 SKIP_BLANKS;
6356 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006357 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006358 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006359 if (ctxt->input->id != id) {
6360 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6361 "All markup of the conditional section is not in the same entity\n",
6362 NULL, NULL);
6363 }
Owen Taylor3473f882001-02-23 17:55:21 +00006364 NEXT;
6365 }
6366 if (xmlParserDebugEntities) {
6367 if ((ctxt->input != NULL) && (ctxt->input->filename))
6368 xmlGenericError(xmlGenericErrorContext,
6369 "%s(%d): ", ctxt->input->filename,
6370 ctxt->input->line);
6371 xmlGenericError(xmlGenericErrorContext,
6372 "Entering IGNORE Conditional Section\n");
6373 }
6374
6375 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006376 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006377 * But disable SAX event generating DTD building in the meantime
6378 */
6379 state = ctxt->disableSAX;
6380 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006381 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006382 ctxt->instate = XML_PARSER_IGNORE;
6383
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006384 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006385 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6386 depth++;
6387 SKIP(3);
6388 continue;
6389 }
6390 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6391 if (--depth >= 0) SKIP(3);
6392 continue;
6393 }
6394 NEXT;
6395 continue;
6396 }
6397
6398 ctxt->disableSAX = state;
6399 ctxt->instate = instate;
6400
6401 if (xmlParserDebugEntities) {
6402 if ((ctxt->input != NULL) && (ctxt->input->filename))
6403 xmlGenericError(xmlGenericErrorContext,
6404 "%s(%d): ", ctxt->input->filename,
6405 ctxt->input->line);
6406 xmlGenericError(xmlGenericErrorContext,
6407 "Leaving IGNORE Conditional Section\n");
6408 }
6409
6410 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006411 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006412 }
6413
6414 if (RAW == 0)
6415 SHRINK;
6416
6417 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006418 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006419 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006420 if (ctxt->input->id != id) {
6421 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6422 "All markup of the conditional section is not in the same entity\n",
6423 NULL, NULL);
6424 }
Owen Taylor3473f882001-02-23 17:55:21 +00006425 SKIP(3);
6426 }
6427}
6428
6429/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006430 * xmlParseMarkupDecl:
6431 * @ctxt: an XML parser context
6432 *
6433 * parse Markup declarations
6434 *
6435 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6436 * NotationDecl | PI | Comment
6437 *
6438 * [ VC: Proper Declaration/PE Nesting ]
6439 * Parameter-entity replacement text must be properly nested with
6440 * markup declarations. That is to say, if either the first character
6441 * or the last character of a markup declaration (markupdecl above) is
6442 * contained in the replacement text for a parameter-entity reference,
6443 * both must be contained in the same replacement text.
6444 *
6445 * [ WFC: PEs in Internal Subset ]
6446 * In the internal DTD subset, parameter-entity references can occur
6447 * only where markup declarations can occur, not within markup declarations.
6448 * (This does not apply to references that occur in external parameter
6449 * entities or to the external subset.)
6450 */
6451void
6452xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6453 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006454 if (CUR == '<') {
6455 if (NXT(1) == '!') {
6456 switch (NXT(2)) {
6457 case 'E':
6458 if (NXT(3) == 'L')
6459 xmlParseElementDecl(ctxt);
6460 else if (NXT(3) == 'N')
6461 xmlParseEntityDecl(ctxt);
6462 break;
6463 case 'A':
6464 xmlParseAttributeListDecl(ctxt);
6465 break;
6466 case 'N':
6467 xmlParseNotationDecl(ctxt);
6468 break;
6469 case '-':
6470 xmlParseComment(ctxt);
6471 break;
6472 default:
6473 /* there is an error but it will be detected later */
6474 break;
6475 }
6476 } else if (NXT(1) == '?') {
6477 xmlParsePI(ctxt);
6478 }
6479 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006480 /*
6481 * This is only for internal subset. On external entities,
6482 * the replacement is done before parsing stage
6483 */
6484 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6485 xmlParsePEReference(ctxt);
6486
6487 /*
6488 * Conditional sections are allowed from entities included
6489 * by PE References in the internal subset.
6490 */
6491 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6492 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6493 xmlParseConditionalSections(ctxt);
6494 }
6495 }
6496
6497 ctxt->instate = XML_PARSER_DTD;
6498}
6499
6500/**
6501 * xmlParseTextDecl:
6502 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006503 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006504 * parse an XML declaration header for external entities
6505 *
6506 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006507 */
6508
6509void
6510xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6511 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006512 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006513
6514 /*
6515 * We know that '<?xml' is here.
6516 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006517 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006518 SKIP(5);
6519 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006520 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006521 return;
6522 }
6523
William M. Brack76e95df2003-10-18 16:20:14 +00006524 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006525 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6526 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006527 }
6528 SKIP_BLANKS;
6529
6530 /*
6531 * We may have the VersionInfo here.
6532 */
6533 version = xmlParseVersionInfo(ctxt);
6534 if (version == NULL)
6535 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006536 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006537 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006538 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6539 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006540 }
6541 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006542 ctxt->input->version = version;
6543
6544 /*
6545 * We must have the encoding declaration
6546 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006547 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006548 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6549 /*
6550 * The XML REC instructs us to stop parsing right here
6551 */
6552 return;
6553 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006554 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6555 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6556 "Missing encoding in text declaration\n");
6557 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006558
6559 SKIP_BLANKS;
6560 if ((RAW == '?') && (NXT(1) == '>')) {
6561 SKIP(2);
6562 } else if (RAW == '>') {
6563 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006564 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006565 NEXT;
6566 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006567 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006568 MOVETO_ENDTAG(CUR_PTR);
6569 NEXT;
6570 }
6571}
6572
6573/**
Owen Taylor3473f882001-02-23 17:55:21 +00006574 * xmlParseExternalSubset:
6575 * @ctxt: an XML parser context
6576 * @ExternalID: the external identifier
6577 * @SystemID: the system identifier (or URL)
6578 *
6579 * parse Markup declarations from an external subset
6580 *
6581 * [30] extSubset ::= textDecl? extSubsetDecl
6582 *
6583 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6584 */
6585void
6586xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6587 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006588 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006589 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006590
6591 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
6592 (ctxt->input->end - ctxt->input->cur >= 4)) {
6593 xmlChar start[4];
6594 xmlCharEncoding enc;
6595
6596 start[0] = RAW;
6597 start[1] = NXT(1);
6598 start[2] = NXT(2);
6599 start[3] = NXT(3);
6600 enc = xmlDetectCharEncoding(start, 4);
6601 if (enc != XML_CHAR_ENCODING_NONE)
6602 xmlSwitchEncoding(ctxt, enc);
6603 }
6604
Daniel Veillarda07050d2003-10-19 14:46:32 +00006605 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006606 xmlParseTextDecl(ctxt);
6607 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6608 /*
6609 * The XML REC instructs us to stop parsing right here
6610 */
6611 ctxt->instate = XML_PARSER_EOF;
6612 return;
6613 }
6614 }
6615 if (ctxt->myDoc == NULL) {
6616 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006617 if (ctxt->myDoc == NULL) {
6618 xmlErrMemory(ctxt, "New Doc failed");
6619 return;
6620 }
6621 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006622 }
6623 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6624 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6625
6626 ctxt->instate = XML_PARSER_DTD;
6627 ctxt->external = 1;
6628 while (((RAW == '<') && (NXT(1) == '?')) ||
6629 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006630 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006631 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006632 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006633
6634 GROW;
6635 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6636 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006637 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006638 NEXT;
6639 } else if (RAW == '%') {
6640 xmlParsePEReference(ctxt);
6641 } else
6642 xmlParseMarkupDecl(ctxt);
6643
6644 /*
6645 * Pop-up of finished entities.
6646 */
6647 while ((RAW == 0) && (ctxt->inputNr > 1))
6648 xmlPopInput(ctxt);
6649
Daniel Veillardfdc91562002-07-01 21:52:03 +00006650 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006651 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006652 break;
6653 }
6654 }
6655
6656 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006657 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006658 }
6659
6660}
6661
6662/**
6663 * xmlParseReference:
6664 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006665 *
Owen Taylor3473f882001-02-23 17:55:21 +00006666 * parse and handle entity references in content, depending on the SAX
6667 * interface, this may end-up in a call to character() if this is a
6668 * CharRef, a predefined entity, if there is no reference() callback.
6669 * or if the parser was asked to switch to that mode.
6670 *
6671 * [67] Reference ::= EntityRef | CharRef
6672 */
6673void
6674xmlParseReference(xmlParserCtxtPtr ctxt) {
6675 xmlEntityPtr ent;
6676 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00006677 int was_checked;
6678 xmlNodePtr list = NULL;
6679 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006680
Daniel Veillard0161e632008-08-28 15:36:32 +00006681
6682 if (RAW != '&')
6683 return;
6684
6685 /*
6686 * Simple case of a CharRef
6687 */
Owen Taylor3473f882001-02-23 17:55:21 +00006688 if (NXT(1) == '#') {
6689 int i = 0;
6690 xmlChar out[10];
6691 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006692 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00006693
Daniel Veillarddc171602008-03-26 17:41:38 +00006694 if (value == 0)
6695 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006696 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6697 /*
6698 * So we are using non-UTF-8 buffers
6699 * Check that the char fit on 8bits, if not
6700 * generate a CharRef.
6701 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006702 if (value <= 0xFF) {
6703 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006704 out[1] = 0;
6705 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6706 (!ctxt->disableSAX))
6707 ctxt->sax->characters(ctxt->userData, out, 1);
6708 } else {
6709 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006710 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006711 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006712 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006713 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6714 (!ctxt->disableSAX))
6715 ctxt->sax->reference(ctxt->userData, out);
6716 }
6717 } else {
6718 /*
6719 * Just encode the value in UTF-8
6720 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006721 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006722 out[i] = 0;
6723 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6724 (!ctxt->disableSAX))
6725 ctxt->sax->characters(ctxt->userData, out, i);
6726 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006727 return;
6728 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006729
Daniel Veillard0161e632008-08-28 15:36:32 +00006730 /*
6731 * We are seeing an entity reference
6732 */
6733 ent = xmlParseEntityRef(ctxt);
6734 if (ent == NULL) return;
6735 if (!ctxt->wellFormed)
6736 return;
6737 was_checked = ent->checked;
6738
6739 /* special case of predefined entities */
6740 if ((ent->name == NULL) ||
6741 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6742 val = ent->content;
6743 if (val == NULL) return;
6744 /*
6745 * inline the entity.
6746 */
6747 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6748 (!ctxt->disableSAX))
6749 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6750 return;
6751 }
6752
6753 /*
6754 * The first reference to the entity trigger a parsing phase
6755 * where the ent->children is filled with the result from
6756 * the parsing.
6757 */
6758 if (ent->checked == 0) {
6759 unsigned long oldnbent = ctxt->nbentities;
6760
6761 /*
6762 * This is a bit hackish but this seems the best
6763 * way to make sure both SAX and DOM entity support
6764 * behaves okay.
6765 */
6766 void *user_data;
6767 if (ctxt->userData == ctxt)
6768 user_data = NULL;
6769 else
6770 user_data = ctxt->userData;
6771
6772 /*
6773 * Check that this entity is well formed
6774 * 4.3.2: An internal general parsed entity is well-formed
6775 * if its replacement text matches the production labeled
6776 * content.
6777 */
6778 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6779 ctxt->depth++;
6780 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6781 user_data, &list);
6782 ctxt->depth--;
6783
6784 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6785 ctxt->depth++;
6786 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6787 user_data, ctxt->depth, ent->URI,
6788 ent->ExternalID, &list);
6789 ctxt->depth--;
6790 } else {
6791 ret = XML_ERR_ENTITY_PE_INTERNAL;
6792 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6793 "invalid entity type found\n", NULL);
6794 }
6795
6796 /*
6797 * Store the number of entities needing parsing for this entity
6798 * content and do checkings
6799 */
6800 ent->checked = ctxt->nbentities - oldnbent;
6801 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006802 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00006803 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006804 return;
6805 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006806 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6807 xmlFreeNodeList(list);
6808 return;
6809 }
Owen Taylor3473f882001-02-23 17:55:21 +00006810
Daniel Veillard0161e632008-08-28 15:36:32 +00006811 if ((ret == XML_ERR_OK) && (list != NULL)) {
6812 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6813 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6814 (ent->children == NULL)) {
6815 ent->children = list;
6816 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00006817 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006818 * Prune it directly in the generated document
6819 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00006820 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006821 if (((list->type == XML_TEXT_NODE) &&
6822 (list->next == NULL)) ||
6823 (ctxt->parseMode == XML_PARSE_READER)) {
6824 list->parent = (xmlNodePtr) ent;
6825 list = NULL;
6826 ent->owner = 1;
6827 } else {
6828 ent->owner = 0;
6829 while (list != NULL) {
6830 list->parent = (xmlNodePtr) ctxt->node;
6831 list->doc = ctxt->myDoc;
6832 if (list->next == NULL)
6833 ent->last = list;
6834 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006835 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006836 list = ent->children;
6837#ifdef LIBXML_LEGACY_ENABLED
6838 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6839 xmlAddEntityReference(ent, list, NULL);
6840#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00006841 }
6842 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00006843 ent->owner = 1;
6844 while (list != NULL) {
6845 list->parent = (xmlNodePtr) ent;
6846 if (list->next == NULL)
6847 ent->last = list;
6848 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00006849 }
6850 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006851 } else {
6852 xmlFreeNodeList(list);
6853 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006854 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006855 } else if ((ret != XML_ERR_OK) &&
6856 (ret != XML_WAR_UNDECLARED_ENTITY)) {
6857 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6858 "Entity '%s' failed to parse\n", ent->name);
6859 } else if (list != NULL) {
6860 xmlFreeNodeList(list);
6861 list = NULL;
6862 }
6863 if (ent->checked == 0)
6864 ent->checked = 1;
6865 } else if (ent->checked != 1) {
6866 ctxt->nbentities += ent->checked;
6867 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006868
Daniel Veillard0161e632008-08-28 15:36:32 +00006869 /*
6870 * Now that the entity content has been gathered
6871 * provide it to the application, this can take different forms based
6872 * on the parsing modes.
6873 */
6874 if (ent->children == NULL) {
6875 /*
6876 * Probably running in SAX mode and the callbacks don't
6877 * build the entity content. So unless we already went
6878 * though parsing for first checking go though the entity
6879 * content to generate callbacks associated to the entity
6880 */
6881 if (was_checked != 0) {
6882 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00006883 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00006884 * This is a bit hackish but this seems the best
6885 * way to make sure both SAX and DOM entity support
6886 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00006887 */
Daniel Veillard0161e632008-08-28 15:36:32 +00006888 if (ctxt->userData == ctxt)
6889 user_data = NULL;
6890 else
6891 user_data = ctxt->userData;
6892
6893 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6894 ctxt->depth++;
6895 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6896 ent->content, user_data, NULL);
6897 ctxt->depth--;
6898 } else if (ent->etype ==
6899 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6900 ctxt->depth++;
6901 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6902 ctxt->sax, user_data, ctxt->depth,
6903 ent->URI, ent->ExternalID, NULL);
6904 ctxt->depth--;
6905 } else {
6906 ret = XML_ERR_ENTITY_PE_INTERNAL;
6907 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6908 "invalid entity type found\n", NULL);
6909 }
6910 if (ret == XML_ERR_ENTITY_LOOP) {
6911 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6912 return;
6913 }
6914 }
6915 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6916 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6917 /*
6918 * Entity reference callback comes second, it's somewhat
6919 * superfluous but a compatibility to historical behaviour
6920 */
6921 ctxt->sax->reference(ctxt->userData, ent->name);
6922 }
6923 return;
6924 }
6925
6926 /*
6927 * If we didn't get any children for the entity being built
6928 */
6929 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6930 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6931 /*
6932 * Create a node.
6933 */
6934 ctxt->sax->reference(ctxt->userData, ent->name);
6935 return;
6936 }
6937
6938 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
6939 /*
6940 * There is a problem on the handling of _private for entities
6941 * (bug 155816): Should we copy the content of the field from
6942 * the entity (possibly overwriting some value set by the user
6943 * when a copy is created), should we leave it alone, or should
6944 * we try to take care of different situations? The problem
6945 * is exacerbated by the usage of this field by the xmlReader.
6946 * To fix this bug, we look at _private on the created node
6947 * and, if it's NULL, we copy in whatever was in the entity.
6948 * If it's not NULL we leave it alone. This is somewhat of a
6949 * hack - maybe we should have further tests to determine
6950 * what to do.
6951 */
6952 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6953 /*
6954 * Seems we are generating the DOM content, do
6955 * a simple tree copy for all references except the first
6956 * In the first occurrence list contains the replacement.
6957 * progressive == 2 means we are operating on the Reader
6958 * and since nodes are discarded we must copy all the time.
6959 */
6960 if (((list == NULL) && (ent->owner == 0)) ||
6961 (ctxt->parseMode == XML_PARSE_READER)) {
6962 xmlNodePtr nw = NULL, cur, firstChild = NULL;
6963
6964 /*
6965 * when operating on a reader, the entities definitions
6966 * are always owning the entities subtree.
6967 if (ctxt->parseMode == XML_PARSE_READER)
6968 ent->owner = 1;
6969 */
6970
6971 cur = ent->children;
6972 while (cur != NULL) {
6973 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
6974 if (nw != NULL) {
6975 if (nw->_private == NULL)
6976 nw->_private = cur->_private;
6977 if (firstChild == NULL){
6978 firstChild = nw;
6979 }
6980 nw = xmlAddChild(ctxt->node, nw);
6981 }
6982 if (cur == ent->last) {
6983 /*
6984 * needed to detect some strange empty
6985 * node cases in the reader tests
6986 */
6987 if ((ctxt->parseMode == XML_PARSE_READER) &&
6988 (nw != NULL) &&
6989 (nw->type == XML_ELEMENT_NODE) &&
6990 (nw->children == NULL))
6991 nw->extra = 1;
6992
6993 break;
6994 }
6995 cur = cur->next;
6996 }
6997#ifdef LIBXML_LEGACY_ENABLED
6998 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6999 xmlAddEntityReference(ent, firstChild, nw);
7000#endif /* LIBXML_LEGACY_ENABLED */
7001 } else if (list == NULL) {
7002 xmlNodePtr nw = NULL, cur, next, last,
7003 firstChild = NULL;
7004 /*
7005 * Copy the entity child list and make it the new
7006 * entity child list. The goal is to make sure any
7007 * ID or REF referenced will be the one from the
7008 * document content and not the entity copy.
7009 */
7010 cur = ent->children;
7011 ent->children = NULL;
7012 last = ent->last;
7013 ent->last = NULL;
7014 while (cur != NULL) {
7015 next = cur->next;
7016 cur->next = NULL;
7017 cur->parent = NULL;
7018 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7019 if (nw != NULL) {
7020 if (nw->_private == NULL)
7021 nw->_private = cur->_private;
7022 if (firstChild == NULL){
7023 firstChild = cur;
7024 }
7025 xmlAddChild((xmlNodePtr) ent, nw);
7026 xmlAddChild(ctxt->node, cur);
7027 }
7028 if (cur == last)
7029 break;
7030 cur = next;
7031 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007032 if (ent->owner == 0)
7033 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007034#ifdef LIBXML_LEGACY_ENABLED
7035 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7036 xmlAddEntityReference(ent, firstChild, nw);
7037#endif /* LIBXML_LEGACY_ENABLED */
7038 } else {
7039 const xmlChar *nbktext;
7040
7041 /*
7042 * the name change is to avoid coalescing of the
7043 * node with a possible previous text one which
7044 * would make ent->children a dangling pointer
7045 */
7046 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7047 -1);
7048 if (ent->children->type == XML_TEXT_NODE)
7049 ent->children->name = nbktext;
7050 if ((ent->last != ent->children) &&
7051 (ent->last->type == XML_TEXT_NODE))
7052 ent->last->name = nbktext;
7053 xmlAddChildList(ctxt->node, ent->children);
7054 }
7055
7056 /*
7057 * This is to avoid a nasty side effect, see
7058 * characters() in SAX.c
7059 */
7060 ctxt->nodemem = 0;
7061 ctxt->nodelen = 0;
7062 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007063 }
7064 }
7065}
7066
7067/**
7068 * xmlParseEntityRef:
7069 * @ctxt: an XML parser context
7070 *
7071 * parse ENTITY references declarations
7072 *
7073 * [68] EntityRef ::= '&' Name ';'
7074 *
7075 * [ WFC: Entity Declared ]
7076 * In a document without any DTD, a document with only an internal DTD
7077 * subset which contains no parameter entity references, or a document
7078 * with "standalone='yes'", the Name given in the entity reference
7079 * must match that in an entity declaration, except that well-formed
7080 * documents need not declare any of the following entities: amp, lt,
7081 * gt, apos, quot. The declaration of a parameter entity must precede
7082 * any reference to it. Similarly, the declaration of a general entity
7083 * must precede any reference to it which appears in a default value in an
7084 * attribute-list declaration. Note that if entities are declared in the
7085 * external subset or in external parameter entities, a non-validating
7086 * processor is not obligated to read and process their declarations;
7087 * for such documents, the rule that an entity must be declared is a
7088 * well-formedness constraint only if standalone='yes'.
7089 *
7090 * [ WFC: Parsed Entity ]
7091 * An entity reference must not contain the name of an unparsed entity
7092 *
7093 * Returns the xmlEntityPtr if found, or NULL otherwise.
7094 */
7095xmlEntityPtr
7096xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007097 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007098 xmlEntityPtr ent = NULL;
7099
7100 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007101
Daniel Veillard0161e632008-08-28 15:36:32 +00007102 if (RAW != '&')
7103 return(NULL);
7104 NEXT;
7105 name = xmlParseName(ctxt);
7106 if (name == NULL) {
7107 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7108 "xmlParseEntityRef: no name\n");
7109 return(NULL);
7110 }
7111 if (RAW != ';') {
7112 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7113 return(NULL);
7114 }
7115 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007116
Daniel Veillard0161e632008-08-28 15:36:32 +00007117 /*
7118 * Predefined entites override any extra definition
7119 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007120 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7121 ent = xmlGetPredefinedEntity(name);
7122 if (ent != NULL)
7123 return(ent);
7124 }
Owen Taylor3473f882001-02-23 17:55:21 +00007125
Daniel Veillard0161e632008-08-28 15:36:32 +00007126 /*
7127 * Increate the number of entity references parsed
7128 */
7129 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007130
Daniel Veillard0161e632008-08-28 15:36:32 +00007131 /*
7132 * Ask first SAX for entity resolution, otherwise try the
7133 * entities which may have stored in the parser context.
7134 */
7135 if (ctxt->sax != NULL) {
7136 if (ctxt->sax->getEntity != NULL)
7137 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007138 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7139 (ctxt->options & XML_PARSE_OLDSAX))
7140 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007141 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7142 (ctxt->userData==ctxt)) {
7143 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007144 }
7145 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007146 /*
7147 * [ WFC: Entity Declared ]
7148 * In a document without any DTD, a document with only an
7149 * internal DTD subset which contains no parameter entity
7150 * references, or a document with "standalone='yes'", the
7151 * Name given in the entity reference must match that in an
7152 * entity declaration, except that well-formed documents
7153 * need not declare any of the following entities: amp, lt,
7154 * gt, apos, quot.
7155 * The declaration of a parameter entity must precede any
7156 * reference to it.
7157 * Similarly, the declaration of a general entity must
7158 * precede any reference to it which appears in a default
7159 * value in an attribute-list declaration. Note that if
7160 * entities are declared in the external subset or in
7161 * external parameter entities, a non-validating processor
7162 * is not obligated to read and process their declarations;
7163 * for such documents, the rule that an entity must be
7164 * declared is a well-formedness constraint only if
7165 * standalone='yes'.
7166 */
7167 if (ent == NULL) {
7168 if ((ctxt->standalone == 1) ||
7169 ((ctxt->hasExternalSubset == 0) &&
7170 (ctxt->hasPErefs == 0))) {
7171 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7172 "Entity '%s' not defined\n", name);
7173 } else {
7174 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7175 "Entity '%s' not defined\n", name);
7176 if ((ctxt->inSubset == 0) &&
7177 (ctxt->sax != NULL) &&
7178 (ctxt->sax->reference != NULL)) {
7179 ctxt->sax->reference(ctxt->userData, name);
7180 }
7181 }
7182 ctxt->valid = 0;
7183 }
7184
7185 /*
7186 * [ WFC: Parsed Entity ]
7187 * An entity reference must not contain the name of an
7188 * unparsed entity
7189 */
7190 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7191 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7192 "Entity reference to unparsed entity %s\n", name);
7193 }
7194
7195 /*
7196 * [ WFC: No External Entity References ]
7197 * Attribute values cannot contain direct or indirect
7198 * entity references to external entities.
7199 */
7200 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7201 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7202 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7203 "Attribute references external entity '%s'\n", name);
7204 }
7205 /*
7206 * [ WFC: No < in Attribute Values ]
7207 * The replacement text of any entity referred to directly or
7208 * indirectly in an attribute value (other than "&lt;") must
7209 * not contain a <.
7210 */
7211 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7212 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007213 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007214 (xmlStrchr(ent->content, '<'))) {
7215 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7216 "'<' in entity '%s' is not allowed in attributes values\n", name);
7217 }
7218
7219 /*
7220 * Internal check, no parameter entities here ...
7221 */
7222 else {
7223 switch (ent->etype) {
7224 case XML_INTERNAL_PARAMETER_ENTITY:
7225 case XML_EXTERNAL_PARAMETER_ENTITY:
7226 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7227 "Attempt to reference the parameter entity '%s'\n",
7228 name);
7229 break;
7230 default:
7231 break;
7232 }
7233 }
7234
7235 /*
7236 * [ WFC: No Recursion ]
7237 * A parsed entity must not contain a recursive reference
7238 * to itself, either directly or indirectly.
7239 * Done somewhere else
7240 */
Owen Taylor3473f882001-02-23 17:55:21 +00007241 return(ent);
7242}
7243
7244/**
7245 * xmlParseStringEntityRef:
7246 * @ctxt: an XML parser context
7247 * @str: a pointer to an index in the string
7248 *
7249 * parse ENTITY references declarations, but this version parses it from
7250 * a string value.
7251 *
7252 * [68] EntityRef ::= '&' Name ';'
7253 *
7254 * [ WFC: Entity Declared ]
7255 * In a document without any DTD, a document with only an internal DTD
7256 * subset which contains no parameter entity references, or a document
7257 * with "standalone='yes'", the Name given in the entity reference
7258 * must match that in an entity declaration, except that well-formed
7259 * documents need not declare any of the following entities: amp, lt,
7260 * gt, apos, quot. The declaration of a parameter entity must precede
7261 * any reference to it. Similarly, the declaration of a general entity
7262 * must precede any reference to it which appears in a default value in an
7263 * attribute-list declaration. Note that if entities are declared in the
7264 * external subset or in external parameter entities, a non-validating
7265 * processor is not obligated to read and process their declarations;
7266 * for such documents, the rule that an entity must be declared is a
7267 * well-formedness constraint only if standalone='yes'.
7268 *
7269 * [ WFC: Parsed Entity ]
7270 * An entity reference must not contain the name of an unparsed entity
7271 *
7272 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7273 * is updated to the current location in the string.
7274 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007275static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007276xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7277 xmlChar *name;
7278 const xmlChar *ptr;
7279 xmlChar cur;
7280 xmlEntityPtr ent = NULL;
7281
7282 if ((str == NULL) || (*str == NULL))
7283 return(NULL);
7284 ptr = *str;
7285 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007286 if (cur != '&')
7287 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007288
Daniel Veillard0161e632008-08-28 15:36:32 +00007289 ptr++;
7290 cur = *ptr;
7291 name = xmlParseStringName(ctxt, &ptr);
7292 if (name == NULL) {
7293 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7294 "xmlParseStringEntityRef: no name\n");
7295 *str = ptr;
7296 return(NULL);
7297 }
7298 if (*ptr != ';') {
7299 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007300 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007301 *str = ptr;
7302 return(NULL);
7303 }
7304 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007305
Owen Taylor3473f882001-02-23 17:55:21 +00007306
Daniel Veillard0161e632008-08-28 15:36:32 +00007307 /*
7308 * Predefined entites override any extra definition
7309 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007310 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7311 ent = xmlGetPredefinedEntity(name);
7312 if (ent != NULL) {
7313 xmlFree(name);
7314 *str = ptr;
7315 return(ent);
7316 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007317 }
Owen Taylor3473f882001-02-23 17:55:21 +00007318
Daniel Veillard0161e632008-08-28 15:36:32 +00007319 /*
7320 * Increate the number of entity references parsed
7321 */
7322 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007323
Daniel Veillard0161e632008-08-28 15:36:32 +00007324 /*
7325 * Ask first SAX for entity resolution, otherwise try the
7326 * entities which may have stored in the parser context.
7327 */
7328 if (ctxt->sax != NULL) {
7329 if (ctxt->sax->getEntity != NULL)
7330 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007331 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7332 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007333 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7334 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007335 }
7336 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007337
7338 /*
7339 * [ WFC: Entity Declared ]
7340 * In a document without any DTD, a document with only an
7341 * internal DTD subset which contains no parameter entity
7342 * references, or a document with "standalone='yes'", the
7343 * Name given in the entity reference must match that in an
7344 * entity declaration, except that well-formed documents
7345 * need not declare any of the following entities: amp, lt,
7346 * gt, apos, quot.
7347 * The declaration of a parameter entity must precede any
7348 * reference to it.
7349 * Similarly, the declaration of a general entity must
7350 * precede any reference to it which appears in a default
7351 * value in an attribute-list declaration. Note that if
7352 * entities are declared in the external subset or in
7353 * external parameter entities, a non-validating processor
7354 * is not obligated to read and process their declarations;
7355 * for such documents, the rule that an entity must be
7356 * declared is a well-formedness constraint only if
7357 * standalone='yes'.
7358 */
7359 if (ent == NULL) {
7360 if ((ctxt->standalone == 1) ||
7361 ((ctxt->hasExternalSubset == 0) &&
7362 (ctxt->hasPErefs == 0))) {
7363 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7364 "Entity '%s' not defined\n", name);
7365 } else {
7366 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7367 "Entity '%s' not defined\n",
7368 name);
7369 }
7370 /* TODO ? check regressions ctxt->valid = 0; */
7371 }
7372
7373 /*
7374 * [ WFC: Parsed Entity ]
7375 * An entity reference must not contain the name of an
7376 * unparsed entity
7377 */
7378 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7379 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7380 "Entity reference to unparsed entity %s\n", name);
7381 }
7382
7383 /*
7384 * [ WFC: No External Entity References ]
7385 * Attribute values cannot contain direct or indirect
7386 * entity references to external entities.
7387 */
7388 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7389 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7390 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7391 "Attribute references external entity '%s'\n", name);
7392 }
7393 /*
7394 * [ WFC: No < in Attribute Values ]
7395 * The replacement text of any entity referred to directly or
7396 * indirectly in an attribute value (other than "&lt;") must
7397 * not contain a <.
7398 */
7399 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7400 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007401 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007402 (xmlStrchr(ent->content, '<'))) {
7403 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7404 "'<' in entity '%s' is not allowed in attributes values\n",
7405 name);
7406 }
7407
7408 /*
7409 * Internal check, no parameter entities here ...
7410 */
7411 else {
7412 switch (ent->etype) {
7413 case XML_INTERNAL_PARAMETER_ENTITY:
7414 case XML_EXTERNAL_PARAMETER_ENTITY:
7415 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7416 "Attempt to reference the parameter entity '%s'\n",
7417 name);
7418 break;
7419 default:
7420 break;
7421 }
7422 }
7423
7424 /*
7425 * [ WFC: No Recursion ]
7426 * A parsed entity must not contain a recursive reference
7427 * to itself, either directly or indirectly.
7428 * Done somewhere else
7429 */
7430
7431 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007432 *str = ptr;
7433 return(ent);
7434}
7435
7436/**
7437 * xmlParsePEReference:
7438 * @ctxt: an XML parser context
7439 *
7440 * parse PEReference declarations
7441 * The entity content is handled directly by pushing it's content as
7442 * a new input stream.
7443 *
7444 * [69] PEReference ::= '%' Name ';'
7445 *
7446 * [ WFC: No Recursion ]
7447 * A parsed entity must not contain a recursive
7448 * reference to itself, either directly or indirectly.
7449 *
7450 * [ WFC: Entity Declared ]
7451 * In a document without any DTD, a document with only an internal DTD
7452 * subset which contains no parameter entity references, or a document
7453 * with "standalone='yes'", ... ... The declaration of a parameter
7454 * entity must precede any reference to it...
7455 *
7456 * [ VC: Entity Declared ]
7457 * In a document with an external subset or external parameter entities
7458 * with "standalone='no'", ... ... The declaration of a parameter entity
7459 * must precede any reference to it...
7460 *
7461 * [ WFC: In DTD ]
7462 * Parameter-entity references may only appear in the DTD.
7463 * NOTE: misleading but this is handled.
7464 */
7465void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007466xmlParsePEReference(xmlParserCtxtPtr ctxt)
7467{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007468 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007469 xmlEntityPtr entity = NULL;
7470 xmlParserInputPtr input;
7471
Daniel Veillard0161e632008-08-28 15:36:32 +00007472 if (RAW != '%')
7473 return;
7474 NEXT;
7475 name = xmlParseName(ctxt);
7476 if (name == NULL) {
7477 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7478 "xmlParsePEReference: no name\n");
7479 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007480 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007481 if (RAW != ';') {
7482 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7483 return;
7484 }
7485
7486 NEXT;
7487
7488 /*
7489 * Increate the number of entity references parsed
7490 */
7491 ctxt->nbentities++;
7492
7493 /*
7494 * Request the entity from SAX
7495 */
7496 if ((ctxt->sax != NULL) &&
7497 (ctxt->sax->getParameterEntity != NULL))
7498 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7499 name);
7500 if (entity == NULL) {
7501 /*
7502 * [ WFC: Entity Declared ]
7503 * In a document without any DTD, a document with only an
7504 * internal DTD subset which contains no parameter entity
7505 * references, or a document with "standalone='yes'", ...
7506 * ... The declaration of a parameter entity must precede
7507 * any reference to it...
7508 */
7509 if ((ctxt->standalone == 1) ||
7510 ((ctxt->hasExternalSubset == 0) &&
7511 (ctxt->hasPErefs == 0))) {
7512 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7513 "PEReference: %%%s; not found\n",
7514 name);
7515 } else {
7516 /*
7517 * [ VC: Entity Declared ]
7518 * In a document with an external subset or external
7519 * parameter entities with "standalone='no'", ...
7520 * ... The declaration of a parameter entity must
7521 * precede any reference to it...
7522 */
7523 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7524 "PEReference: %%%s; not found\n",
7525 name, NULL);
7526 ctxt->valid = 0;
7527 }
7528 } else {
7529 /*
7530 * Internal checking in case the entity quest barfed
7531 */
7532 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7533 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7534 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7535 "Internal: %%%s; is not a parameter entity\n",
7536 name, NULL);
7537 } else if (ctxt->input->free != deallocblankswrapper) {
7538 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7539 if (xmlPushInput(ctxt, input) < 0)
7540 return;
7541 } else {
7542 /*
7543 * TODO !!!
7544 * handle the extra spaces added before and after
7545 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7546 */
7547 input = xmlNewEntityInputStream(ctxt, entity);
7548 if (xmlPushInput(ctxt, input) < 0)
7549 return;
7550 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7551 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7552 (IS_BLANK_CH(NXT(5)))) {
7553 xmlParseTextDecl(ctxt);
7554 if (ctxt->errNo ==
7555 XML_ERR_UNSUPPORTED_ENCODING) {
7556 /*
7557 * The XML REC instructs us to stop parsing
7558 * right here
7559 */
7560 ctxt->instate = XML_PARSER_EOF;
7561 return;
7562 }
7563 }
7564 }
7565 }
7566 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007567}
7568
7569/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007570 * xmlLoadEntityContent:
7571 * @ctxt: an XML parser context
7572 * @entity: an unloaded system entity
7573 *
7574 * Load the original content of the given system entity from the
7575 * ExternalID/SystemID given. This is to be used for Included in Literal
7576 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7577 *
7578 * Returns 0 in case of success and -1 in case of failure
7579 */
7580static int
7581xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7582 xmlParserInputPtr input;
7583 xmlBufferPtr buf;
7584 int l, c;
7585 int count = 0;
7586
7587 if ((ctxt == NULL) || (entity == NULL) ||
7588 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7589 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7590 (entity->content != NULL)) {
7591 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7592 "xmlLoadEntityContent parameter error");
7593 return(-1);
7594 }
7595
7596 if (xmlParserDebugEntities)
7597 xmlGenericError(xmlGenericErrorContext,
7598 "Reading %s entity content input\n", entity->name);
7599
7600 buf = xmlBufferCreate();
7601 if (buf == NULL) {
7602 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7603 "xmlLoadEntityContent parameter error");
7604 return(-1);
7605 }
7606
7607 input = xmlNewEntityInputStream(ctxt, entity);
7608 if (input == NULL) {
7609 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7610 "xmlLoadEntityContent input error");
7611 xmlBufferFree(buf);
7612 return(-1);
7613 }
7614
7615 /*
7616 * Push the entity as the current input, read char by char
7617 * saving to the buffer until the end of the entity or an error
7618 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00007619 if (xmlPushInput(ctxt, input) < 0) {
7620 xmlBufferFree(buf);
7621 return(-1);
7622 }
7623
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007624 GROW;
7625 c = CUR_CHAR(l);
7626 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7627 (IS_CHAR(c))) {
7628 xmlBufferAdd(buf, ctxt->input->cur, l);
7629 if (count++ > 100) {
7630 count = 0;
7631 GROW;
7632 }
7633 NEXTL(l);
7634 c = CUR_CHAR(l);
7635 }
7636
7637 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7638 xmlPopInput(ctxt);
7639 } else if (!IS_CHAR(c)) {
7640 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7641 "xmlLoadEntityContent: invalid char value %d\n",
7642 c);
7643 xmlBufferFree(buf);
7644 return(-1);
7645 }
7646 entity->content = buf->content;
7647 buf->content = NULL;
7648 xmlBufferFree(buf);
7649
7650 return(0);
7651}
7652
7653/**
Owen Taylor3473f882001-02-23 17:55:21 +00007654 * xmlParseStringPEReference:
7655 * @ctxt: an XML parser context
7656 * @str: a pointer to an index in the string
7657 *
7658 * parse PEReference declarations
7659 *
7660 * [69] PEReference ::= '%' Name ';'
7661 *
7662 * [ WFC: No Recursion ]
7663 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007664 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007665 *
7666 * [ WFC: Entity Declared ]
7667 * In a document without any DTD, a document with only an internal DTD
7668 * subset which contains no parameter entity references, or a document
7669 * with "standalone='yes'", ... ... The declaration of a parameter
7670 * entity must precede any reference to it...
7671 *
7672 * [ VC: Entity Declared ]
7673 * In a document with an external subset or external parameter entities
7674 * with "standalone='no'", ... ... The declaration of a parameter entity
7675 * must precede any reference to it...
7676 *
7677 * [ WFC: In DTD ]
7678 * Parameter-entity references may only appear in the DTD.
7679 * NOTE: misleading but this is handled.
7680 *
7681 * Returns the string of the entity content.
7682 * str is updated to the current value of the index
7683 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007684static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007685xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7686 const xmlChar *ptr;
7687 xmlChar cur;
7688 xmlChar *name;
7689 xmlEntityPtr entity = NULL;
7690
7691 if ((str == NULL) || (*str == NULL)) return(NULL);
7692 ptr = *str;
7693 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007694 if (cur != '%')
7695 return(NULL);
7696 ptr++;
7697 cur = *ptr;
7698 name = xmlParseStringName(ctxt, &ptr);
7699 if (name == NULL) {
7700 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7701 "xmlParseStringPEReference: no name\n");
7702 *str = ptr;
7703 return(NULL);
7704 }
7705 cur = *ptr;
7706 if (cur != ';') {
7707 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7708 xmlFree(name);
7709 *str = ptr;
7710 return(NULL);
7711 }
7712 ptr++;
7713
7714 /*
7715 * Increate the number of entity references parsed
7716 */
7717 ctxt->nbentities++;
7718
7719 /*
7720 * Request the entity from SAX
7721 */
7722 if ((ctxt->sax != NULL) &&
7723 (ctxt->sax->getParameterEntity != NULL))
7724 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7725 name);
7726 if (entity == NULL) {
7727 /*
7728 * [ WFC: Entity Declared ]
7729 * In a document without any DTD, a document with only an
7730 * internal DTD subset which contains no parameter entity
7731 * references, or a document with "standalone='yes'", ...
7732 * ... The declaration of a parameter entity must precede
7733 * any reference to it...
7734 */
7735 if ((ctxt->standalone == 1) ||
7736 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7737 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7738 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007739 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007740 /*
7741 * [ VC: Entity Declared ]
7742 * In a document with an external subset or external
7743 * parameter entities with "standalone='no'", ...
7744 * ... The declaration of a parameter entity must
7745 * precede any reference to it...
7746 */
7747 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7748 "PEReference: %%%s; not found\n",
7749 name, NULL);
7750 ctxt->valid = 0;
7751 }
7752 } else {
7753 /*
7754 * Internal checking in case the entity quest barfed
7755 */
7756 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7757 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7758 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7759 "%%%s; is not a parameter entity\n",
7760 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007761 }
7762 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007763 ctxt->hasPErefs = 1;
7764 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007765 *str = ptr;
7766 return(entity);
7767}
7768
7769/**
7770 * xmlParseDocTypeDecl:
7771 * @ctxt: an XML parser context
7772 *
7773 * parse a DOCTYPE declaration
7774 *
7775 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7776 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7777 *
7778 * [ VC: Root Element Type ]
7779 * The Name in the document type declaration must match the element
7780 * type of the root element.
7781 */
7782
7783void
7784xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007785 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007786 xmlChar *ExternalID = NULL;
7787 xmlChar *URI = NULL;
7788
7789 /*
7790 * We know that '<!DOCTYPE' has been detected.
7791 */
7792 SKIP(9);
7793
7794 SKIP_BLANKS;
7795
7796 /*
7797 * Parse the DOCTYPE name.
7798 */
7799 name = xmlParseName(ctxt);
7800 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007801 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7802 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007803 }
7804 ctxt->intSubName = name;
7805
7806 SKIP_BLANKS;
7807
7808 /*
7809 * Check for SystemID and ExternalID
7810 */
7811 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7812
7813 if ((URI != NULL) || (ExternalID != NULL)) {
7814 ctxt->hasExternalSubset = 1;
7815 }
7816 ctxt->extSubURI = URI;
7817 ctxt->extSubSystem = ExternalID;
7818
7819 SKIP_BLANKS;
7820
7821 /*
7822 * Create and update the internal subset.
7823 */
7824 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7825 (!ctxt->disableSAX))
7826 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7827
7828 /*
7829 * Is there any internal subset declarations ?
7830 * they are handled separately in xmlParseInternalSubset()
7831 */
7832 if (RAW == '[')
7833 return;
7834
7835 /*
7836 * We should be at the end of the DOCTYPE declaration.
7837 */
7838 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007839 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007840 }
7841 NEXT;
7842}
7843
7844/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007845 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007846 * @ctxt: an XML parser context
7847 *
7848 * parse the internal subset declaration
7849 *
7850 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7851 */
7852
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007853static void
Owen Taylor3473f882001-02-23 17:55:21 +00007854xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7855 /*
7856 * Is there any DTD definition ?
7857 */
7858 if (RAW == '[') {
7859 ctxt->instate = XML_PARSER_DTD;
7860 NEXT;
7861 /*
7862 * Parse the succession of Markup declarations and
7863 * PEReferences.
7864 * Subsequence (markupdecl | PEReference | S)*
7865 */
7866 while (RAW != ']') {
7867 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007868 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007869
7870 SKIP_BLANKS;
7871 xmlParseMarkupDecl(ctxt);
7872 xmlParsePEReference(ctxt);
7873
7874 /*
7875 * Pop-up of finished entities.
7876 */
7877 while ((RAW == 0) && (ctxt->inputNr > 1))
7878 xmlPopInput(ctxt);
7879
7880 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007881 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00007882 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007883 break;
7884 }
7885 }
7886 if (RAW == ']') {
7887 NEXT;
7888 SKIP_BLANKS;
7889 }
7890 }
7891
7892 /*
7893 * We should be at the end of the DOCTYPE declaration.
7894 */
7895 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007896 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007897 }
7898 NEXT;
7899}
7900
Daniel Veillard81273902003-09-30 00:43:48 +00007901#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007902/**
7903 * xmlParseAttribute:
7904 * @ctxt: an XML parser context
7905 * @value: a xmlChar ** used to store the value of the attribute
7906 *
7907 * parse an attribute
7908 *
7909 * [41] Attribute ::= Name Eq AttValue
7910 *
7911 * [ WFC: No External Entity References ]
7912 * Attribute values cannot contain direct or indirect entity references
7913 * to external entities.
7914 *
7915 * [ WFC: No < in Attribute Values ]
7916 * The replacement text of any entity referred to directly or indirectly in
7917 * an attribute value (other than "&lt;") must not contain a <.
7918 *
7919 * [ VC: Attribute Value Type ]
7920 * The attribute must have been declared; the value must be of the type
7921 * declared for it.
7922 *
7923 * [25] Eq ::= S? '=' S?
7924 *
7925 * With namespace:
7926 *
7927 * [NS 11] Attribute ::= QName Eq AttValue
7928 *
7929 * Also the case QName == xmlns:??? is handled independently as a namespace
7930 * definition.
7931 *
7932 * Returns the attribute name, and the value in *value.
7933 */
7934
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007935const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007936xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007937 const xmlChar *name;
7938 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007939
7940 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007941 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007942 name = xmlParseName(ctxt);
7943 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007944 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007945 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007946 return(NULL);
7947 }
7948
7949 /*
7950 * read the value
7951 */
7952 SKIP_BLANKS;
7953 if (RAW == '=') {
7954 NEXT;
7955 SKIP_BLANKS;
7956 val = xmlParseAttValue(ctxt);
7957 ctxt->instate = XML_PARSER_CONTENT;
7958 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007959 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007960 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007961 return(NULL);
7962 }
7963
7964 /*
7965 * Check that xml:lang conforms to the specification
7966 * No more registered as an error, just generate a warning now
7967 * since this was deprecated in XML second edition
7968 */
7969 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7970 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007971 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7972 "Malformed value for xml:lang : %s\n",
7973 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007974 }
7975 }
7976
7977 /*
7978 * Check that xml:space conforms to the specification
7979 */
7980 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7981 if (xmlStrEqual(val, BAD_CAST "default"))
7982 *(ctxt->space) = 0;
7983 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7984 *(ctxt->space) = 1;
7985 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007986 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007987"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007988 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007989 }
7990 }
7991
7992 *value = val;
7993 return(name);
7994}
7995
7996/**
7997 * xmlParseStartTag:
7998 * @ctxt: an XML parser context
7999 *
8000 * parse a start of tag either for rule element or
8001 * EmptyElement. In both case we don't parse the tag closing chars.
8002 *
8003 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8004 *
8005 * [ WFC: Unique Att Spec ]
8006 * No attribute name may appear more than once in the same start-tag or
8007 * empty-element tag.
8008 *
8009 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8010 *
8011 * [ WFC: Unique Att Spec ]
8012 * No attribute name may appear more than once in the same start-tag or
8013 * empty-element tag.
8014 *
8015 * With namespace:
8016 *
8017 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8018 *
8019 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8020 *
8021 * Returns the element name parsed
8022 */
8023
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008024const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008025xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008026 const xmlChar *name;
8027 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008028 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008029 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008030 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008031 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008032 int i;
8033
8034 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008035 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008036
8037 name = xmlParseName(ctxt);
8038 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008039 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008040 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008041 return(NULL);
8042 }
8043
8044 /*
8045 * Now parse the attributes, it ends up with the ending
8046 *
8047 * (S Attribute)* S?
8048 */
8049 SKIP_BLANKS;
8050 GROW;
8051
Daniel Veillard21a0f912001-02-25 19:54:14 +00008052 while ((RAW != '>') &&
8053 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008054 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008055 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008056 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008057
8058 attname = xmlParseAttribute(ctxt, &attvalue);
8059 if ((attname != NULL) && (attvalue != NULL)) {
8060 /*
8061 * [ WFC: Unique Att Spec ]
8062 * No attribute name may appear more than once in the same
8063 * start-tag or empty-element tag.
8064 */
8065 for (i = 0; i < nbatts;i += 2) {
8066 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008067 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008068 xmlFree(attvalue);
8069 goto failed;
8070 }
8071 }
Owen Taylor3473f882001-02-23 17:55:21 +00008072 /*
8073 * Add the pair to atts
8074 */
8075 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008076 maxatts = 22; /* allow for 10 attrs by default */
8077 atts = (const xmlChar **)
8078 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008079 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008080 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008081 if (attvalue != NULL)
8082 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008083 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008084 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008085 ctxt->atts = atts;
8086 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008087 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008088 const xmlChar **n;
8089
Owen Taylor3473f882001-02-23 17:55:21 +00008090 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008091 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008092 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008093 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008094 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008095 if (attvalue != NULL)
8096 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008097 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008098 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008099 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008100 ctxt->atts = atts;
8101 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008102 }
8103 atts[nbatts++] = attname;
8104 atts[nbatts++] = attvalue;
8105 atts[nbatts] = NULL;
8106 atts[nbatts + 1] = NULL;
8107 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008108 if (attvalue != NULL)
8109 xmlFree(attvalue);
8110 }
8111
8112failed:
8113
Daniel Veillard3772de32002-12-17 10:31:45 +00008114 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008115 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8116 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008117 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008118 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8119 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008120 }
8121 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008122 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8123 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008124 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8125 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008126 break;
8127 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008128 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008129 GROW;
8130 }
8131
8132 /*
8133 * SAX: Start of Element !
8134 */
8135 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008136 (!ctxt->disableSAX)) {
8137 if (nbatts > 0)
8138 ctxt->sax->startElement(ctxt->userData, name, atts);
8139 else
8140 ctxt->sax->startElement(ctxt->userData, name, NULL);
8141 }
Owen Taylor3473f882001-02-23 17:55:21 +00008142
8143 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008144 /* Free only the content strings */
8145 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008146 if (atts[i] != NULL)
8147 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008148 }
8149 return(name);
8150}
8151
8152/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008153 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008154 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008155 * @line: line of the start tag
8156 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008157 *
8158 * parse an end of tag
8159 *
8160 * [42] ETag ::= '</' Name S? '>'
8161 *
8162 * With namespace
8163 *
8164 * [NS 9] ETag ::= '</' QName S? '>'
8165 */
8166
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008167static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008168xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008169 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008170
8171 GROW;
8172 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008173 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008174 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008175 return;
8176 }
8177 SKIP(2);
8178
Daniel Veillard46de64e2002-05-29 08:21:33 +00008179 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008180
8181 /*
8182 * We should definitely be at the ending "S? '>'" part
8183 */
8184 GROW;
8185 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008186 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008187 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008188 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008189 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008190
8191 /*
8192 * [ WFC: Element Type Match ]
8193 * The Name in an element's end-tag must match the element type in the
8194 * start-tag.
8195 *
8196 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008197 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008198 if (name == NULL) name = BAD_CAST "unparseable";
8199 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008200 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008201 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008202 }
8203
8204 /*
8205 * SAX: End of Tag
8206 */
8207 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8208 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008209 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008210
Daniel Veillarde57ec792003-09-10 10:50:59 +00008211 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008212 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008213 return;
8214}
8215
8216/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008217 * xmlParseEndTag:
8218 * @ctxt: an XML parser context
8219 *
8220 * parse an end of tag
8221 *
8222 * [42] ETag ::= '</' Name S? '>'
8223 *
8224 * With namespace
8225 *
8226 * [NS 9] ETag ::= '</' QName S? '>'
8227 */
8228
8229void
8230xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008231 xmlParseEndTag1(ctxt, 0);
8232}
Daniel Veillard81273902003-09-30 00:43:48 +00008233#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008234
8235/************************************************************************
8236 * *
8237 * SAX 2 specific operations *
8238 * *
8239 ************************************************************************/
8240
Daniel Veillard0fb18932003-09-07 09:14:37 +00008241/*
8242 * xmlGetNamespace:
8243 * @ctxt: an XML parser context
8244 * @prefix: the prefix to lookup
8245 *
8246 * Lookup the namespace name for the @prefix (which ca be NULL)
8247 * The prefix must come from the @ctxt->dict dictionnary
8248 *
8249 * Returns the namespace name or NULL if not bound
8250 */
8251static const xmlChar *
8252xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8253 int i;
8254
Daniel Veillarde57ec792003-09-10 10:50:59 +00008255 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008256 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008257 if (ctxt->nsTab[i] == prefix) {
8258 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8259 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008260 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008261 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008262 return(NULL);
8263}
8264
8265/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008266 * xmlParseQName:
8267 * @ctxt: an XML parser context
8268 * @prefix: pointer to store the prefix part
8269 *
8270 * parse an XML Namespace QName
8271 *
8272 * [6] QName ::= (Prefix ':')? LocalPart
8273 * [7] Prefix ::= NCName
8274 * [8] LocalPart ::= NCName
8275 *
8276 * Returns the Name parsed or NULL
8277 */
8278
8279static const xmlChar *
8280xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8281 const xmlChar *l, *p;
8282
8283 GROW;
8284
8285 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008286 if (l == NULL) {
8287 if (CUR == ':') {
8288 l = xmlParseName(ctxt);
8289 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008290 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8291 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008292 *prefix = NULL;
8293 return(l);
8294 }
8295 }
8296 return(NULL);
8297 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008298 if (CUR == ':') {
8299 NEXT;
8300 p = l;
8301 l = xmlParseNCName(ctxt);
8302 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008303 xmlChar *tmp;
8304
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008305 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8306 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008307 l = xmlParseNmtoken(ctxt);
8308 if (l == NULL)
8309 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8310 else {
8311 tmp = xmlBuildQName(l, p, NULL, 0);
8312 xmlFree((char *)l);
8313 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008314 p = xmlDictLookup(ctxt->dict, tmp, -1);
8315 if (tmp != NULL) xmlFree(tmp);
8316 *prefix = NULL;
8317 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008318 }
8319 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008320 xmlChar *tmp;
8321
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008322 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8323 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008324 NEXT;
8325 tmp = (xmlChar *) xmlParseName(ctxt);
8326 if (tmp != NULL) {
8327 tmp = xmlBuildQName(tmp, l, NULL, 0);
8328 l = xmlDictLookup(ctxt->dict, tmp, -1);
8329 if (tmp != NULL) xmlFree(tmp);
8330 *prefix = p;
8331 return(l);
8332 }
8333 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8334 l = xmlDictLookup(ctxt->dict, tmp, -1);
8335 if (tmp != NULL) xmlFree(tmp);
8336 *prefix = p;
8337 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008338 }
8339 *prefix = p;
8340 } else
8341 *prefix = NULL;
8342 return(l);
8343}
8344
8345/**
8346 * xmlParseQNameAndCompare:
8347 * @ctxt: an XML parser context
8348 * @name: the localname
8349 * @prefix: the prefix, if any.
8350 *
8351 * parse an XML name and compares for match
8352 * (specialized for endtag parsing)
8353 *
8354 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8355 * and the name for mismatch
8356 */
8357
8358static const xmlChar *
8359xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8360 xmlChar const *prefix) {
8361 const xmlChar *cmp = name;
8362 const xmlChar *in;
8363 const xmlChar *ret;
8364 const xmlChar *prefix2;
8365
8366 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8367
8368 GROW;
8369 in = ctxt->input->cur;
8370
8371 cmp = prefix;
8372 while (*in != 0 && *in == *cmp) {
8373 ++in;
8374 ++cmp;
8375 }
8376 if ((*cmp == 0) && (*in == ':')) {
8377 in++;
8378 cmp = name;
8379 while (*in != 0 && *in == *cmp) {
8380 ++in;
8381 ++cmp;
8382 }
William M. Brack76e95df2003-10-18 16:20:14 +00008383 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008384 /* success */
8385 ctxt->input->cur = in;
8386 return((const xmlChar*) 1);
8387 }
8388 }
8389 /*
8390 * all strings coms from the dictionary, equality can be done directly
8391 */
8392 ret = xmlParseQName (ctxt, &prefix2);
8393 if ((ret == name) && (prefix == prefix2))
8394 return((const xmlChar*) 1);
8395 return ret;
8396}
8397
8398/**
8399 * xmlParseAttValueInternal:
8400 * @ctxt: an XML parser context
8401 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008402 * @alloc: whether the attribute was reallocated as a new string
8403 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008404 *
8405 * parse a value for an attribute.
8406 * NOTE: if no normalization is needed, the routine will return pointers
8407 * directly from the data buffer.
8408 *
8409 * 3.3.3 Attribute-Value Normalization:
8410 * Before the value of an attribute is passed to the application or
8411 * checked for validity, the XML processor must normalize it as follows:
8412 * - a character reference is processed by appending the referenced
8413 * character to the attribute value
8414 * - an entity reference is processed by recursively processing the
8415 * replacement text of the entity
8416 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8417 * appending #x20 to the normalized value, except that only a single
8418 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8419 * parsed entity or the literal entity value of an internal parsed entity
8420 * - other characters are processed by appending them to the normalized value
8421 * If the declared value is not CDATA, then the XML processor must further
8422 * process the normalized attribute value by discarding any leading and
8423 * trailing space (#x20) characters, and by replacing sequences of space
8424 * (#x20) characters by a single space (#x20) character.
8425 * All attributes for which no declaration has been read should be treated
8426 * by a non-validating parser as if declared CDATA.
8427 *
8428 * Returns the AttValue parsed or NULL. The value has to be freed by the
8429 * caller if it was copied, this can be detected by val[*len] == 0.
8430 */
8431
8432static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008433xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8434 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008435{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008436 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008437 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008438 xmlChar *ret = NULL;
8439
8440 GROW;
8441 in = (xmlChar *) CUR_PTR;
8442 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008443 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008444 return (NULL);
8445 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008446 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008447
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008448 /*
8449 * try to handle in this routine the most common case where no
8450 * allocation of a new string is required and where content is
8451 * pure ASCII.
8452 */
8453 limit = *in++;
8454 end = ctxt->input->end;
8455 start = in;
8456 if (in >= end) {
8457 const xmlChar *oldbase = ctxt->input->base;
8458 GROW;
8459 if (oldbase != ctxt->input->base) {
8460 long delta = ctxt->input->base - oldbase;
8461 start = start + delta;
8462 in = in + delta;
8463 }
8464 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008465 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008466 if (normalize) {
8467 /*
8468 * Skip any leading spaces
8469 */
8470 while ((in < end) && (*in != limit) &&
8471 ((*in == 0x20) || (*in == 0x9) ||
8472 (*in == 0xA) || (*in == 0xD))) {
8473 in++;
8474 start = in;
8475 if (in >= end) {
8476 const xmlChar *oldbase = ctxt->input->base;
8477 GROW;
8478 if (oldbase != ctxt->input->base) {
8479 long delta = ctxt->input->base - oldbase;
8480 start = start + delta;
8481 in = in + delta;
8482 }
8483 end = ctxt->input->end;
8484 }
8485 }
8486 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8487 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8488 if ((*in++ == 0x20) && (*in == 0x20)) break;
8489 if (in >= end) {
8490 const xmlChar *oldbase = ctxt->input->base;
8491 GROW;
8492 if (oldbase != ctxt->input->base) {
8493 long delta = ctxt->input->base - oldbase;
8494 start = start + delta;
8495 in = in + delta;
8496 }
8497 end = ctxt->input->end;
8498 }
8499 }
8500 last = in;
8501 /*
8502 * skip the trailing blanks
8503 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008504 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008505 while ((in < end) && (*in != limit) &&
8506 ((*in == 0x20) || (*in == 0x9) ||
8507 (*in == 0xA) || (*in == 0xD))) {
8508 in++;
8509 if (in >= end) {
8510 const xmlChar *oldbase = ctxt->input->base;
8511 GROW;
8512 if (oldbase != ctxt->input->base) {
8513 long delta = ctxt->input->base - oldbase;
8514 start = start + delta;
8515 in = in + delta;
8516 last = last + delta;
8517 }
8518 end = ctxt->input->end;
8519 }
8520 }
8521 if (*in != limit) goto need_complex;
8522 } else {
8523 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8524 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8525 in++;
8526 if (in >= end) {
8527 const xmlChar *oldbase = ctxt->input->base;
8528 GROW;
8529 if (oldbase != ctxt->input->base) {
8530 long delta = ctxt->input->base - oldbase;
8531 start = start + delta;
8532 in = in + delta;
8533 }
8534 end = ctxt->input->end;
8535 }
8536 }
8537 last = in;
8538 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008539 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008540 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008541 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008542 *len = last - start;
8543 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008544 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008545 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008546 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008547 }
8548 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008549 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008550 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008551need_complex:
8552 if (alloc) *alloc = 1;
8553 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008554}
8555
8556/**
8557 * xmlParseAttribute2:
8558 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008559 * @pref: the element prefix
8560 * @elem: the element name
8561 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008562 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008563 * @len: an int * to save the length of the attribute
8564 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008565 *
8566 * parse an attribute in the new SAX2 framework.
8567 *
8568 * Returns the attribute name, and the value in *value, .
8569 */
8570
8571static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008572xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008573 const xmlChar * pref, const xmlChar * elem,
8574 const xmlChar ** prefix, xmlChar ** value,
8575 int *len, int *alloc)
8576{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008577 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008578 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008579 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008580
8581 *value = NULL;
8582 GROW;
8583 name = xmlParseQName(ctxt, prefix);
8584 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008585 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8586 "error parsing attribute name\n");
8587 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008588 }
8589
8590 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008591 * get the type if needed
8592 */
8593 if (ctxt->attsSpecial != NULL) {
8594 int type;
8595
8596 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008597 pref, elem, *prefix, name);
8598 if (type != 0)
8599 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008600 }
8601
8602 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008603 * read the value
8604 */
8605 SKIP_BLANKS;
8606 if (RAW == '=') {
8607 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008608 SKIP_BLANKS;
8609 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8610 if (normalize) {
8611 /*
8612 * Sometimes a second normalisation pass for spaces is needed
8613 * but that only happens if charrefs or entities refernces
8614 * have been used in the attribute value, i.e. the attribute
8615 * value have been extracted in an allocated string already.
8616 */
8617 if (*alloc) {
8618 const xmlChar *val2;
8619
8620 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008621 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008622 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008623 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008624 }
8625 }
8626 }
8627 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008628 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008629 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8630 "Specification mandate value for attribute %s\n",
8631 name);
8632 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008633 }
8634
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008635 if (*prefix == ctxt->str_xml) {
8636 /*
8637 * Check that xml:lang conforms to the specification
8638 * No more registered as an error, just generate a warning now
8639 * since this was deprecated in XML second edition
8640 */
8641 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8642 internal_val = xmlStrndup(val, *len);
8643 if (!xmlCheckLanguageID(internal_val)) {
8644 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8645 "Malformed value for xml:lang : %s\n",
8646 internal_val, NULL);
8647 }
8648 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008649
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008650 /*
8651 * Check that xml:space conforms to the specification
8652 */
8653 if (xmlStrEqual(name, BAD_CAST "space")) {
8654 internal_val = xmlStrndup(val, *len);
8655 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8656 *(ctxt->space) = 0;
8657 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8658 *(ctxt->space) = 1;
8659 else {
8660 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8661 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8662 internal_val, NULL);
8663 }
8664 }
8665 if (internal_val) {
8666 xmlFree(internal_val);
8667 }
8668 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008669
8670 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008671 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008672}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008673/**
8674 * xmlParseStartTag2:
8675 * @ctxt: an XML parser context
8676 *
8677 * parse a start of tag either for rule element or
8678 * EmptyElement. In both case we don't parse the tag closing chars.
8679 * This routine is called when running SAX2 parsing
8680 *
8681 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8682 *
8683 * [ WFC: Unique Att Spec ]
8684 * No attribute name may appear more than once in the same start-tag or
8685 * empty-element tag.
8686 *
8687 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8688 *
8689 * [ WFC: Unique Att Spec ]
8690 * No attribute name may appear more than once in the same start-tag or
8691 * empty-element tag.
8692 *
8693 * With namespace:
8694 *
8695 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8696 *
8697 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8698 *
8699 * Returns the element name parsed
8700 */
8701
8702static const xmlChar *
8703xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008704 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008705 const xmlChar *localname;
8706 const xmlChar *prefix;
8707 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008708 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008709 const xmlChar *nsname;
8710 xmlChar *attvalue;
8711 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008712 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008713 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008714 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008715 const xmlChar *base;
8716 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008717 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008718
8719 if (RAW != '<') return(NULL);
8720 NEXT1;
8721
8722 /*
8723 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8724 * point since the attribute values may be stored as pointers to
8725 * the buffer and calling SHRINK would destroy them !
8726 * The Shrinking is only possible once the full set of attribute
8727 * callbacks have been done.
8728 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008729reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008730 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008731 base = ctxt->input->base;
8732 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008733 oldline = ctxt->input->line;
8734 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008735 nbatts = 0;
8736 nratts = 0;
8737 nbdef = 0;
8738 nbNs = 0;
8739 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008740 /* Forget any namespaces added during an earlier parse of this element. */
8741 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008742
8743 localname = xmlParseQName(ctxt, &prefix);
8744 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008745 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8746 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008747 return(NULL);
8748 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008749 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008750
8751 /*
8752 * Now parse the attributes, it ends up with the ending
8753 *
8754 * (S Attribute)* S?
8755 */
8756 SKIP_BLANKS;
8757 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008758 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008759
8760 while ((RAW != '>') &&
8761 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008762 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008763 const xmlChar *q = CUR_PTR;
8764 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008765 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008766
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008767 attname = xmlParseAttribute2(ctxt, prefix, localname,
8768 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008769 if (ctxt->input->base != base) {
8770 if ((attvalue != NULL) && (alloc != 0))
8771 xmlFree(attvalue);
8772 attvalue = NULL;
8773 goto base_changed;
8774 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008775 if ((attname != NULL) && (attvalue != NULL)) {
8776 if (len < 0) len = xmlStrlen(attvalue);
8777 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008778 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8779 xmlURIPtr uri;
8780
8781 if (*URL != 0) {
8782 uri = xmlParseURI((const char *) URL);
8783 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008784 xmlNsErr(ctxt, XML_WAR_NS_URI,
8785 "xmlns: '%s' is not a valid URI\n",
8786 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008787 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00008788 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008789 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8790 "xmlns: URI %s is not absolute\n",
8791 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008792 }
8793 xmlFreeURI(uri);
8794 }
Daniel Veillard37334572008-07-31 08:20:02 +00008795 if (URL == ctxt->str_xml_ns) {
8796 if (attname != ctxt->str_xml) {
8797 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8798 "xml namespace URI cannot be the default namespace\n",
8799 NULL, NULL, NULL);
8800 }
8801 goto skip_default_ns;
8802 }
8803 if ((len == 29) &&
8804 (xmlStrEqual(URL,
8805 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8806 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8807 "reuse of the xmlns namespace name is forbidden\n",
8808 NULL, NULL, NULL);
8809 goto skip_default_ns;
8810 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008811 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008812 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008813 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008814 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008815 for (j = 1;j <= nbNs;j++)
8816 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8817 break;
8818 if (j <= nbNs)
8819 xmlErrAttributeDup(ctxt, NULL, attname);
8820 else
8821 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008822skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008823 if (alloc != 0) xmlFree(attvalue);
8824 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008825 continue;
8826 }
8827 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008828 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8829 xmlURIPtr uri;
8830
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008831 if (attname == ctxt->str_xml) {
8832 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008833 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8834 "xml namespace prefix mapped to wrong URI\n",
8835 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008836 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008837 /*
8838 * Do not keep a namespace definition node
8839 */
Daniel Veillard37334572008-07-31 08:20:02 +00008840 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008841 }
Daniel Veillard37334572008-07-31 08:20:02 +00008842 if (URL == ctxt->str_xml_ns) {
8843 if (attname != ctxt->str_xml) {
8844 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8845 "xml namespace URI mapped to wrong prefix\n",
8846 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008847 }
Daniel Veillard37334572008-07-31 08:20:02 +00008848 goto skip_ns;
8849 }
8850 if (attname == ctxt->str_xmlns) {
8851 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8852 "redefinition of the xmlns prefix is forbidden\n",
8853 NULL, NULL, NULL);
8854 goto skip_ns;
8855 }
8856 if ((len == 29) &&
8857 (xmlStrEqual(URL,
8858 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8859 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8860 "reuse of the xmlns namespace name is forbidden\n",
8861 NULL, NULL, NULL);
8862 goto skip_ns;
8863 }
8864 if ((URL == NULL) || (URL[0] == 0)) {
8865 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8866 "xmlns:%s: Empty XML namespace is not allowed\n",
8867 attname, NULL, NULL);
8868 goto skip_ns;
8869 } else {
8870 uri = xmlParseURI((const char *) URL);
8871 if (uri == NULL) {
8872 xmlNsErr(ctxt, XML_WAR_NS_URI,
8873 "xmlns:%s: '%s' is not a valid URI\n",
8874 attname, URL, NULL);
8875 } else {
8876 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8877 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8878 "xmlns:%s: URI %s is not absolute\n",
8879 attname, URL, NULL);
8880 }
8881 xmlFreeURI(uri);
8882 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008883 }
8884
Daniel Veillard0fb18932003-09-07 09:14:37 +00008885 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008886 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008887 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008888 for (j = 1;j <= nbNs;j++)
8889 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8890 break;
8891 if (j <= nbNs)
8892 xmlErrAttributeDup(ctxt, aprefix, attname);
8893 else
8894 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008895skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008896 if (alloc != 0) xmlFree(attvalue);
8897 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008898 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008899 continue;
8900 }
8901
8902 /*
8903 * Add the pair to atts
8904 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008905 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8906 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008907 if (attvalue[len] == 0)
8908 xmlFree(attvalue);
8909 goto failed;
8910 }
8911 maxatts = ctxt->maxatts;
8912 atts = ctxt->atts;
8913 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008914 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008915 atts[nbatts++] = attname;
8916 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008917 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008918 atts[nbatts++] = attvalue;
8919 attvalue += len;
8920 atts[nbatts++] = attvalue;
8921 /*
8922 * tag if some deallocation is needed
8923 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008924 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008925 } else {
8926 if ((attvalue != NULL) && (attvalue[len] == 0))
8927 xmlFree(attvalue);
8928 }
8929
Daniel Veillard37334572008-07-31 08:20:02 +00008930failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008931
8932 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008933 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008934 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8935 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008936 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008937 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8938 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008939 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008940 }
8941 SKIP_BLANKS;
8942 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8943 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008944 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008945 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008946 break;
8947 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008948 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008949 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008950 }
8951
Daniel Veillard0fb18932003-09-07 09:14:37 +00008952 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008953 * The attributes defaulting
8954 */
8955 if (ctxt->attsDefault != NULL) {
8956 xmlDefAttrsPtr defaults;
8957
8958 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8959 if (defaults != NULL) {
8960 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00008961 attname = defaults->values[5 * i];
8962 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00008963
8964 /*
8965 * special work for namespaces defaulted defs
8966 */
8967 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8968 /*
8969 * check that it's not a defined namespace
8970 */
8971 for (j = 1;j <= nbNs;j++)
8972 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8973 break;
8974 if (j <= nbNs) continue;
8975
8976 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008977 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008978 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008979 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008980 nbNs++;
8981 }
8982 } else if (aprefix == ctxt->str_xmlns) {
8983 /*
8984 * check that it's not a defined namespace
8985 */
8986 for (j = 1;j <= nbNs;j++)
8987 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8988 break;
8989 if (j <= nbNs) continue;
8990
8991 nsname = xmlGetNamespace(ctxt, attname);
8992 if (nsname != defaults->values[2]) {
8993 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008994 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008995 nbNs++;
8996 }
8997 } else {
8998 /*
8999 * check that it's not a defined attribute
9000 */
9001 for (j = 0;j < nbatts;j+=5) {
9002 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9003 break;
9004 }
9005 if (j < nbatts) continue;
9006
9007 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9008 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009009 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009010 }
9011 maxatts = ctxt->maxatts;
9012 atts = ctxt->atts;
9013 }
9014 atts[nbatts++] = attname;
9015 atts[nbatts++] = aprefix;
9016 if (aprefix == NULL)
9017 atts[nbatts++] = NULL;
9018 else
9019 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009020 atts[nbatts++] = defaults->values[5 * i + 2];
9021 atts[nbatts++] = defaults->values[5 * i + 3];
9022 if ((ctxt->standalone == 1) &&
9023 (defaults->values[5 * i + 4] != NULL)) {
9024 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9025 "standalone: attribute %s on %s defaulted from external subset\n",
9026 attname, localname);
9027 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009028 nbdef++;
9029 }
9030 }
9031 }
9032 }
9033
Daniel Veillarde70c8772003-11-25 07:21:18 +00009034 /*
9035 * The attributes checkings
9036 */
9037 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009038 /*
9039 * The default namespace does not apply to attribute names.
9040 */
9041 if (atts[i + 1] != NULL) {
9042 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9043 if (nsname == NULL) {
9044 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9045 "Namespace prefix %s for %s on %s is not defined\n",
9046 atts[i + 1], atts[i], localname);
9047 }
9048 atts[i + 2] = nsname;
9049 } else
9050 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009051 /*
9052 * [ WFC: Unique Att Spec ]
9053 * No attribute name may appear more than once in the same
9054 * start-tag or empty-element tag.
9055 * As extended by the Namespace in XML REC.
9056 */
9057 for (j = 0; j < i;j += 5) {
9058 if (atts[i] == atts[j]) {
9059 if (atts[i+1] == atts[j+1]) {
9060 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9061 break;
9062 }
9063 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9064 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9065 "Namespaced Attribute %s in '%s' redefined\n",
9066 atts[i], nsname, NULL);
9067 break;
9068 }
9069 }
9070 }
9071 }
9072
Daniel Veillarde57ec792003-09-10 10:50:59 +00009073 nsname = xmlGetNamespace(ctxt, prefix);
9074 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009075 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9076 "Namespace prefix %s on %s is not defined\n",
9077 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009078 }
9079 *pref = prefix;
9080 *URI = nsname;
9081
9082 /*
9083 * SAX: Start of Element !
9084 */
9085 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9086 (!ctxt->disableSAX)) {
9087 if (nbNs > 0)
9088 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9089 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9090 nbatts / 5, nbdef, atts);
9091 else
9092 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9093 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9094 }
9095
9096 /*
9097 * Free up attribute allocated strings if needed
9098 */
9099 if (attval != 0) {
9100 for (i = 3,j = 0; j < nratts;i += 5,j++)
9101 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9102 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009103 }
9104
9105 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009106
9107base_changed:
9108 /*
9109 * the attribute strings are valid iif the base didn't changed
9110 */
9111 if (attval != 0) {
9112 for (i = 3,j = 0; j < nratts;i += 5,j++)
9113 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9114 xmlFree((xmlChar *) atts[i]);
9115 }
9116 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009117 ctxt->input->line = oldline;
9118 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009119 if (ctxt->wellFormed == 1) {
9120 goto reparse;
9121 }
9122 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009123}
9124
9125/**
9126 * xmlParseEndTag2:
9127 * @ctxt: an XML parser context
9128 * @line: line of the start tag
9129 * @nsNr: number of namespaces on the start tag
9130 *
9131 * parse an end of tag
9132 *
9133 * [42] ETag ::= '</' Name S? '>'
9134 *
9135 * With namespace
9136 *
9137 * [NS 9] ETag ::= '</' QName S? '>'
9138 */
9139
9140static void
9141xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009142 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009143 const xmlChar *name;
9144
9145 GROW;
9146 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009147 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009148 return;
9149 }
9150 SKIP(2);
9151
William M. Brack13dfa872004-09-18 04:52:08 +00009152 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009153 if (ctxt->input->cur[tlen] == '>') {
9154 ctxt->input->cur += tlen + 1;
9155 goto done;
9156 }
9157 ctxt->input->cur += tlen;
9158 name = (xmlChar*)1;
9159 } else {
9160 if (prefix == NULL)
9161 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9162 else
9163 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9164 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009165
9166 /*
9167 * We should definitely be at the ending "S? '>'" part
9168 */
9169 GROW;
9170 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009171 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009172 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009173 } else
9174 NEXT1;
9175
9176 /*
9177 * [ WFC: Element Type Match ]
9178 * The Name in an element's end-tag must match the element type in the
9179 * start-tag.
9180 *
9181 */
9182 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009183 if (name == NULL) name = BAD_CAST "unparseable";
9184 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009185 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009186 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009187 }
9188
9189 /*
9190 * SAX: End of Tag
9191 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009192done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009193 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9194 (!ctxt->disableSAX))
9195 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9196
Daniel Veillard0fb18932003-09-07 09:14:37 +00009197 spacePop(ctxt);
9198 if (nsNr != 0)
9199 nsPop(ctxt, nsNr);
9200 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009201}
9202
9203/**
Owen Taylor3473f882001-02-23 17:55:21 +00009204 * xmlParseCDSect:
9205 * @ctxt: an XML parser context
9206 *
9207 * Parse escaped pure raw content.
9208 *
9209 * [18] CDSect ::= CDStart CData CDEnd
9210 *
9211 * [19] CDStart ::= '<![CDATA['
9212 *
9213 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9214 *
9215 * [21] CDEnd ::= ']]>'
9216 */
9217void
9218xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9219 xmlChar *buf = NULL;
9220 int len = 0;
9221 int size = XML_PARSER_BUFFER_SIZE;
9222 int r, rl;
9223 int s, sl;
9224 int cur, l;
9225 int count = 0;
9226
Daniel Veillard8f597c32003-10-06 08:19:27 +00009227 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009228 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009229 SKIP(9);
9230 } else
9231 return;
9232
9233 ctxt->instate = XML_PARSER_CDATA_SECTION;
9234 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009235 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009236 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009237 ctxt->instate = XML_PARSER_CONTENT;
9238 return;
9239 }
9240 NEXTL(rl);
9241 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009242 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009243 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009244 ctxt->instate = XML_PARSER_CONTENT;
9245 return;
9246 }
9247 NEXTL(sl);
9248 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009249 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009250 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009251 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009252 return;
9253 }
William M. Brack871611b2003-10-18 04:53:14 +00009254 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009255 ((r != ']') || (s != ']') || (cur != '>'))) {
9256 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009257 xmlChar *tmp;
9258
Owen Taylor3473f882001-02-23 17:55:21 +00009259 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009260 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9261 if (tmp == NULL) {
9262 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009263 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009264 return;
9265 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009266 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009267 }
9268 COPY_BUF(rl,buf,len,r);
9269 r = s;
9270 rl = sl;
9271 s = cur;
9272 sl = l;
9273 count++;
9274 if (count > 50) {
9275 GROW;
9276 count = 0;
9277 }
9278 NEXTL(l);
9279 cur = CUR_CHAR(l);
9280 }
9281 buf[len] = 0;
9282 ctxt->instate = XML_PARSER_CONTENT;
9283 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009284 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009285 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009286 xmlFree(buf);
9287 return;
9288 }
9289 NEXTL(l);
9290
9291 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009292 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009293 */
9294 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9295 if (ctxt->sax->cdataBlock != NULL)
9296 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009297 else if (ctxt->sax->characters != NULL)
9298 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009299 }
9300 xmlFree(buf);
9301}
9302
9303/**
9304 * xmlParseContent:
9305 * @ctxt: an XML parser context
9306 *
9307 * Parse a content:
9308 *
9309 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9310 */
9311
9312void
9313xmlParseContent(xmlParserCtxtPtr ctxt) {
9314 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009315 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009316 ((RAW != '<') || (NXT(1) != '/')) &&
9317 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009318 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009319 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009320 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009321
9322 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009323 * First case : a Processing Instruction.
9324 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009325 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009326 xmlParsePI(ctxt);
9327 }
9328
9329 /*
9330 * Second case : a CDSection
9331 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009332 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009333 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009334 xmlParseCDSect(ctxt);
9335 }
9336
9337 /*
9338 * Third case : a comment
9339 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009340 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009341 (NXT(2) == '-') && (NXT(3) == '-')) {
9342 xmlParseComment(ctxt);
9343 ctxt->instate = XML_PARSER_CONTENT;
9344 }
9345
9346 /*
9347 * Fourth case : a sub-element.
9348 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009349 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009350 xmlParseElement(ctxt);
9351 }
9352
9353 /*
9354 * Fifth case : a reference. If if has not been resolved,
9355 * parsing returns it's Name, create the node
9356 */
9357
Daniel Veillard21a0f912001-02-25 19:54:14 +00009358 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009359 xmlParseReference(ctxt);
9360 }
9361
9362 /*
9363 * Last case, text. Note that References are handled directly.
9364 */
9365 else {
9366 xmlParseCharData(ctxt, 0);
9367 }
9368
9369 GROW;
9370 /*
9371 * Pop-up of finished entities.
9372 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009373 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009374 xmlPopInput(ctxt);
9375 SHRINK;
9376
Daniel Veillardfdc91562002-07-01 21:52:03 +00009377 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009378 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9379 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009380 ctxt->instate = XML_PARSER_EOF;
9381 break;
9382 }
9383 }
9384}
9385
9386/**
9387 * xmlParseElement:
9388 * @ctxt: an XML parser context
9389 *
9390 * parse an XML element, this is highly recursive
9391 *
9392 * [39] element ::= EmptyElemTag | STag content ETag
9393 *
9394 * [ WFC: Element Type Match ]
9395 * The Name in an element's end-tag must match the element type in the
9396 * start-tag.
9397 *
Owen Taylor3473f882001-02-23 17:55:21 +00009398 */
9399
9400void
9401xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009402 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009403 const xmlChar *prefix;
9404 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00009405 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009406 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009407 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009408 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009409
Daniel Veillard8915c152008-08-26 13:05:34 +00009410 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9411 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9412 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9413 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9414 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009415 ctxt->instate = XML_PARSER_EOF;
9416 return;
9417 }
9418
Owen Taylor3473f882001-02-23 17:55:21 +00009419 /* Capture start position */
9420 if (ctxt->record_info) {
9421 node_info.begin_pos = ctxt->input->consumed +
9422 (CUR_PTR - ctxt->input->base);
9423 node_info.begin_line = ctxt->input->line;
9424 }
9425
9426 if (ctxt->spaceNr == 0)
9427 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009428 else if (*ctxt->space == -2)
9429 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009430 else
9431 spacePush(ctxt, *ctxt->space);
9432
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009433 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009434#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009435 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009436#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009437 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009438#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009439 else
9440 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009441#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009442 if (name == NULL) {
9443 spacePop(ctxt);
9444 return;
9445 }
9446 namePush(ctxt, name);
9447 ret = ctxt->node;
9448
Daniel Veillard4432df22003-09-28 18:58:27 +00009449#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009450 /*
9451 * [ VC: Root Element Type ]
9452 * The Name in the document type declaration must match the element
9453 * type of the root element.
9454 */
9455 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9456 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9457 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009458#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009459
9460 /*
9461 * Check for an Empty Element.
9462 */
9463 if ((RAW == '/') && (NXT(1) == '>')) {
9464 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009465 if (ctxt->sax2) {
9466 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9467 (!ctxt->disableSAX))
9468 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009469#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009470 } else {
9471 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9472 (!ctxt->disableSAX))
9473 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009474#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009475 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009476 namePop(ctxt);
9477 spacePop(ctxt);
9478 if (nsNr != ctxt->nsNr)
9479 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009480 if ( ret != NULL && ctxt->record_info ) {
9481 node_info.end_pos = ctxt->input->consumed +
9482 (CUR_PTR - ctxt->input->base);
9483 node_info.end_line = ctxt->input->line;
9484 node_info.node = ret;
9485 xmlParserAddNodeInfo(ctxt, &node_info);
9486 }
9487 return;
9488 }
9489 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009490 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009491 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009492 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9493 "Couldn't find end of Start Tag %s line %d\n",
9494 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009495
9496 /*
9497 * end of parsing of this node.
9498 */
9499 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009500 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009501 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009502 if (nsNr != ctxt->nsNr)
9503 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009504
9505 /*
9506 * Capture end position and add node
9507 */
9508 if ( ret != NULL && ctxt->record_info ) {
9509 node_info.end_pos = ctxt->input->consumed +
9510 (CUR_PTR - ctxt->input->base);
9511 node_info.end_line = ctxt->input->line;
9512 node_info.node = ret;
9513 xmlParserAddNodeInfo(ctxt, &node_info);
9514 }
9515 return;
9516 }
9517
9518 /*
9519 * Parse the content of the element:
9520 */
9521 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009522 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009523 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009524 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009525 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009526
9527 /*
9528 * end of parsing of this node.
9529 */
9530 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009531 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009532 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009533 if (nsNr != ctxt->nsNr)
9534 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009535 return;
9536 }
9537
9538 /*
9539 * parse the end of tag: '</' should be here.
9540 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009541 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009542 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009543 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009544 }
9545#ifdef LIBXML_SAX1_ENABLED
9546 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00009547 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00009548#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009549
9550 /*
9551 * Capture end position and add node
9552 */
9553 if ( ret != NULL && ctxt->record_info ) {
9554 node_info.end_pos = ctxt->input->consumed +
9555 (CUR_PTR - ctxt->input->base);
9556 node_info.end_line = ctxt->input->line;
9557 node_info.node = ret;
9558 xmlParserAddNodeInfo(ctxt, &node_info);
9559 }
9560}
9561
9562/**
9563 * xmlParseVersionNum:
9564 * @ctxt: an XML parser context
9565 *
9566 * parse the XML version value.
9567 *
Daniel Veillard34e3f642008-07-29 09:02:27 +00009568 * [26] VersionNum ::= '1.' [0-9]+
9569 *
9570 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +00009571 *
9572 * Returns the string giving the XML version number, or NULL
9573 */
9574xmlChar *
9575xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9576 xmlChar *buf = NULL;
9577 int len = 0;
9578 int size = 10;
9579 xmlChar cur;
9580
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009581 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009582 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009583 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009584 return(NULL);
9585 }
9586 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00009587 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009588 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009589 return(NULL);
9590 }
9591 buf[len++] = cur;
9592 NEXT;
9593 cur=CUR;
9594 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009595 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009596 return(NULL);
9597 }
9598 buf[len++] = cur;
9599 NEXT;
9600 cur=CUR;
9601 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009602 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009603 xmlChar *tmp;
9604
Owen Taylor3473f882001-02-23 17:55:21 +00009605 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009606 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9607 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009608 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009609 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009610 return(NULL);
9611 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009612 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009613 }
9614 buf[len++] = cur;
9615 NEXT;
9616 cur=CUR;
9617 }
9618 buf[len] = 0;
9619 return(buf);
9620}
9621
9622/**
9623 * xmlParseVersionInfo:
9624 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009625 *
Owen Taylor3473f882001-02-23 17:55:21 +00009626 * parse the XML version.
9627 *
9628 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009629 *
Owen Taylor3473f882001-02-23 17:55:21 +00009630 * [25] Eq ::= S? '=' S?
9631 *
9632 * Returns the version string, e.g. "1.0"
9633 */
9634
9635xmlChar *
9636xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9637 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009638
Daniel Veillarda07050d2003-10-19 14:46:32 +00009639 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009640 SKIP(7);
9641 SKIP_BLANKS;
9642 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009643 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009644 return(NULL);
9645 }
9646 NEXT;
9647 SKIP_BLANKS;
9648 if (RAW == '"') {
9649 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009650 version = xmlParseVersionNum(ctxt);
9651 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009652 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009653 } else
9654 NEXT;
9655 } else if (RAW == '\''){
9656 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009657 version = xmlParseVersionNum(ctxt);
9658 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009659 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009660 } else
9661 NEXT;
9662 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009663 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009664 }
9665 }
9666 return(version);
9667}
9668
9669/**
9670 * xmlParseEncName:
9671 * @ctxt: an XML parser context
9672 *
9673 * parse the XML encoding name
9674 *
9675 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9676 *
9677 * Returns the encoding name value or NULL
9678 */
9679xmlChar *
9680xmlParseEncName(xmlParserCtxtPtr ctxt) {
9681 xmlChar *buf = NULL;
9682 int len = 0;
9683 int size = 10;
9684 xmlChar cur;
9685
9686 cur = CUR;
9687 if (((cur >= 'a') && (cur <= 'z')) ||
9688 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009689 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009690 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009691 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009692 return(NULL);
9693 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00009694
Owen Taylor3473f882001-02-23 17:55:21 +00009695 buf[len++] = cur;
9696 NEXT;
9697 cur = CUR;
9698 while (((cur >= 'a') && (cur <= 'z')) ||
9699 ((cur >= 'A') && (cur <= 'Z')) ||
9700 ((cur >= '0') && (cur <= '9')) ||
9701 (cur == '.') || (cur == '_') ||
9702 (cur == '-')) {
9703 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009704 xmlChar *tmp;
9705
Owen Taylor3473f882001-02-23 17:55:21 +00009706 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009707 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9708 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009709 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009710 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009711 return(NULL);
9712 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009713 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009714 }
9715 buf[len++] = cur;
9716 NEXT;
9717 cur = CUR;
9718 if (cur == 0) {
9719 SHRINK;
9720 GROW;
9721 cur = CUR;
9722 }
9723 }
9724 buf[len] = 0;
9725 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009726 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009727 }
9728 return(buf);
9729}
9730
9731/**
9732 * xmlParseEncodingDecl:
9733 * @ctxt: an XML parser context
9734 *
9735 * parse the XML encoding declaration
9736 *
9737 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9738 *
9739 * this setups the conversion filters.
9740 *
9741 * Returns the encoding value or NULL
9742 */
9743
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009744const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009745xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9746 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009747
9748 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009749 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009750 SKIP(8);
9751 SKIP_BLANKS;
9752 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009753 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009754 return(NULL);
9755 }
9756 NEXT;
9757 SKIP_BLANKS;
9758 if (RAW == '"') {
9759 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009760 encoding = xmlParseEncName(ctxt);
9761 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009762 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009763 } else
9764 NEXT;
9765 } else if (RAW == '\''){
9766 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009767 encoding = xmlParseEncName(ctxt);
9768 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009769 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009770 } else
9771 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009772 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009773 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009774 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00009775 /*
9776 * UTF-16 encoding stwich has already taken place at this stage,
9777 * more over the little-endian/big-endian selection is already done
9778 */
9779 if ((encoding != NULL) &&
9780 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9781 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +00009782 /*
9783 * If no encoding was passed to the parser, that we are
9784 * using UTF-16 and no decoder is present i.e. the
9785 * document is apparently UTF-8 compatible, then raise an
9786 * encoding mismatch fatal error
9787 */
9788 if ((ctxt->encoding == NULL) &&
9789 (ctxt->input->buf != NULL) &&
9790 (ctxt->input->buf->encoder == NULL)) {
9791 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9792 "Document labelled UTF-16 but has UTF-8 content\n");
9793 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009794 if (ctxt->encoding != NULL)
9795 xmlFree((xmlChar *) ctxt->encoding);
9796 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009797 }
9798 /*
9799 * UTF-8 encoding is handled natively
9800 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009801 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009802 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9803 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009804 if (ctxt->encoding != NULL)
9805 xmlFree((xmlChar *) ctxt->encoding);
9806 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009807 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009808 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009809 xmlCharEncodingHandlerPtr handler;
9810
9811 if (ctxt->input->encoding != NULL)
9812 xmlFree((xmlChar *) ctxt->input->encoding);
9813 ctxt->input->encoding = encoding;
9814
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009815 handler = xmlFindCharEncodingHandler((const char *) encoding);
9816 if (handler != NULL) {
9817 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009818 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009819 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009820 "Unsupported encoding %s\n", encoding);
9821 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009822 }
9823 }
9824 }
9825 return(encoding);
9826}
9827
9828/**
9829 * xmlParseSDDecl:
9830 * @ctxt: an XML parser context
9831 *
9832 * parse the XML standalone declaration
9833 *
9834 * [32] SDDecl ::= S 'standalone' Eq
9835 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9836 *
9837 * [ VC: Standalone Document Declaration ]
9838 * TODO The standalone document declaration must have the value "no"
9839 * if any external markup declarations contain declarations of:
9840 * - attributes with default values, if elements to which these
9841 * attributes apply appear in the document without specifications
9842 * of values for these attributes, or
9843 * - entities (other than amp, lt, gt, apos, quot), if references
9844 * to those entities appear in the document, or
9845 * - attributes with values subject to normalization, where the
9846 * attribute appears in the document with a value which will change
9847 * as a result of normalization, or
9848 * - element types with element content, if white space occurs directly
9849 * within any instance of those types.
9850 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009851 * Returns:
9852 * 1 if standalone="yes"
9853 * 0 if standalone="no"
9854 * -2 if standalone attribute is missing or invalid
9855 * (A standalone value of -2 means that the XML declaration was found,
9856 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00009857 */
9858
9859int
9860xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009861 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00009862
9863 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009864 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009865 SKIP(10);
9866 SKIP_BLANKS;
9867 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009868 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009869 return(standalone);
9870 }
9871 NEXT;
9872 SKIP_BLANKS;
9873 if (RAW == '\''){
9874 NEXT;
9875 if ((RAW == 'n') && (NXT(1) == 'o')) {
9876 standalone = 0;
9877 SKIP(2);
9878 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9879 (NXT(2) == 's')) {
9880 standalone = 1;
9881 SKIP(3);
9882 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009883 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009884 }
9885 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009886 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009887 } else
9888 NEXT;
9889 } else if (RAW == '"'){
9890 NEXT;
9891 if ((RAW == 'n') && (NXT(1) == 'o')) {
9892 standalone = 0;
9893 SKIP(2);
9894 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9895 (NXT(2) == 's')) {
9896 standalone = 1;
9897 SKIP(3);
9898 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009899 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009900 }
9901 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009902 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009903 } else
9904 NEXT;
9905 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009906 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009907 }
9908 }
9909 return(standalone);
9910}
9911
9912/**
9913 * xmlParseXMLDecl:
9914 * @ctxt: an XML parser context
9915 *
9916 * parse an XML declaration header
9917 *
9918 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9919 */
9920
9921void
9922xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9923 xmlChar *version;
9924
9925 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009926 * This value for standalone indicates that the document has an
9927 * XML declaration but it does not have a standalone attribute.
9928 * It will be overwritten later if a standalone attribute is found.
9929 */
9930 ctxt->input->standalone = -2;
9931
9932 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009933 * We know that '<?xml' is here.
9934 */
9935 SKIP(5);
9936
William M. Brack76e95df2003-10-18 16:20:14 +00009937 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009938 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9939 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009940 }
9941 SKIP_BLANKS;
9942
9943 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009944 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009945 */
9946 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009947 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009948 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009949 } else {
9950 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9951 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +00009952 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +00009953 */
Daniel Veillard34e3f642008-07-29 09:02:27 +00009954 if (ctxt->options & XML_PARSE_OLD10) {
9955 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9956 "Unsupported version '%s'\n",
9957 version);
9958 } else {
9959 if ((version[0] == '1') && ((version[1] == '.'))) {
9960 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9961 "Unsupported version '%s'\n",
9962 version, NULL);
9963 } else {
9964 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9965 "Unsupported version '%s'\n",
9966 version);
9967 }
9968 }
Daniel Veillard19840942001-11-29 16:11:38 +00009969 }
9970 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009971 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009972 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009973 }
Owen Taylor3473f882001-02-23 17:55:21 +00009974
9975 /*
9976 * We may have the encoding declaration
9977 */
William M. Brack76e95df2003-10-18 16:20:14 +00009978 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009979 if ((RAW == '?') && (NXT(1) == '>')) {
9980 SKIP(2);
9981 return;
9982 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009983 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009984 }
9985 xmlParseEncodingDecl(ctxt);
9986 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9987 /*
9988 * The XML REC instructs us to stop parsing right here
9989 */
9990 return;
9991 }
9992
9993 /*
9994 * We may have the standalone status.
9995 */
William M. Brack76e95df2003-10-18 16:20:14 +00009996 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009997 if ((RAW == '?') && (NXT(1) == '>')) {
9998 SKIP(2);
9999 return;
10000 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010001 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010002 }
10003 SKIP_BLANKS;
10004 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10005
10006 SKIP_BLANKS;
10007 if ((RAW == '?') && (NXT(1) == '>')) {
10008 SKIP(2);
10009 } else if (RAW == '>') {
10010 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010011 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010012 NEXT;
10013 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010014 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010015 MOVETO_ENDTAG(CUR_PTR);
10016 NEXT;
10017 }
10018}
10019
10020/**
10021 * xmlParseMisc:
10022 * @ctxt: an XML parser context
10023 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010024 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010025 *
10026 * [27] Misc ::= Comment | PI | S
10027 */
10028
10029void
10030xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010031 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +000010032 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +000010033 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010034 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010035 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010036 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010037 NEXT;
10038 } else
10039 xmlParseComment(ctxt);
10040 }
10041}
10042
10043/**
10044 * xmlParseDocument:
10045 * @ctxt: an XML parser context
10046 *
10047 * parse an XML document (and build a tree if using the standard SAX
10048 * interface).
10049 *
10050 * [1] document ::= prolog element Misc*
10051 *
10052 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10053 *
10054 * Returns 0, -1 in case of error. the parser context is augmented
10055 * as a result of the parsing.
10056 */
10057
10058int
10059xmlParseDocument(xmlParserCtxtPtr ctxt) {
10060 xmlChar start[4];
10061 xmlCharEncoding enc;
10062
10063 xmlInitParser();
10064
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010065 if ((ctxt == NULL) || (ctxt->input == NULL))
10066 return(-1);
10067
Owen Taylor3473f882001-02-23 17:55:21 +000010068 GROW;
10069
10070 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010071 * SAX: detecting the level.
10072 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010073 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010074
10075 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010076 * SAX: beginning of the document processing.
10077 */
10078 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10079 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10080
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010081 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
10082 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +000010083 /*
10084 * Get the 4 first bytes and decode the charset
10085 * if enc != XML_CHAR_ENCODING_NONE
10086 * plug some encoding conversion routines.
10087 */
10088 start[0] = RAW;
10089 start[1] = NXT(1);
10090 start[2] = NXT(2);
10091 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010092 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010093 if (enc != XML_CHAR_ENCODING_NONE) {
10094 xmlSwitchEncoding(ctxt, enc);
10095 }
Owen Taylor3473f882001-02-23 17:55:21 +000010096 }
10097
10098
10099 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010100 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010101 }
10102
10103 /*
10104 * Check for the XMLDecl in the Prolog.
10105 */
10106 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010107 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010108
10109 /*
10110 * Note that we will switch encoding on the fly.
10111 */
10112 xmlParseXMLDecl(ctxt);
10113 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10114 /*
10115 * The XML REC instructs us to stop parsing right here
10116 */
10117 return(-1);
10118 }
10119 ctxt->standalone = ctxt->input->standalone;
10120 SKIP_BLANKS;
10121 } else {
10122 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10123 }
10124 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10125 ctxt->sax->startDocument(ctxt->userData);
10126
10127 /*
10128 * The Misc part of the Prolog
10129 */
10130 GROW;
10131 xmlParseMisc(ctxt);
10132
10133 /*
10134 * Then possibly doc type declaration(s) and more Misc
10135 * (doctypedecl Misc*)?
10136 */
10137 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010138 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010139
10140 ctxt->inSubset = 1;
10141 xmlParseDocTypeDecl(ctxt);
10142 if (RAW == '[') {
10143 ctxt->instate = XML_PARSER_DTD;
10144 xmlParseInternalSubset(ctxt);
10145 }
10146
10147 /*
10148 * Create and update the external subset.
10149 */
10150 ctxt->inSubset = 2;
10151 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10152 (!ctxt->disableSAX))
10153 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10154 ctxt->extSubSystem, ctxt->extSubURI);
10155 ctxt->inSubset = 0;
10156
Daniel Veillardac4118d2008-01-11 05:27:32 +000010157 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010158
10159 ctxt->instate = XML_PARSER_PROLOG;
10160 xmlParseMisc(ctxt);
10161 }
10162
10163 /*
10164 * Time to start parsing the tree itself
10165 */
10166 GROW;
10167 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010168 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10169 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010170 } else {
10171 ctxt->instate = XML_PARSER_CONTENT;
10172 xmlParseElement(ctxt);
10173 ctxt->instate = XML_PARSER_EPILOG;
10174
10175
10176 /*
10177 * The Misc part at the end
10178 */
10179 xmlParseMisc(ctxt);
10180
Daniel Veillard561b7f82002-03-20 21:55:57 +000010181 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010182 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010183 }
10184 ctxt->instate = XML_PARSER_EOF;
10185 }
10186
10187 /*
10188 * SAX: end of the document processing.
10189 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010190 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010191 ctxt->sax->endDocument(ctxt->userData);
10192
Daniel Veillard5997aca2002-03-18 18:36:20 +000010193 /*
10194 * Remove locally kept entity definitions if the tree was not built
10195 */
10196 if ((ctxt->myDoc != NULL) &&
10197 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10198 xmlFreeDoc(ctxt->myDoc);
10199 ctxt->myDoc = NULL;
10200 }
10201
Daniel Veillardae0765b2008-07-31 19:54:59 +000010202 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10203 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10204 if (ctxt->valid)
10205 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10206 if (ctxt->nsWellFormed)
10207 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10208 if (ctxt->options & XML_PARSE_OLD10)
10209 ctxt->myDoc->properties |= XML_DOC_OLD10;
10210 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010211 if (! ctxt->wellFormed) {
10212 ctxt->valid = 0;
10213 return(-1);
10214 }
Owen Taylor3473f882001-02-23 17:55:21 +000010215 return(0);
10216}
10217
10218/**
10219 * xmlParseExtParsedEnt:
10220 * @ctxt: an XML parser context
10221 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010222 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010223 * An external general parsed entity is well-formed if it matches the
10224 * production labeled extParsedEnt.
10225 *
10226 * [78] extParsedEnt ::= TextDecl? content
10227 *
10228 * Returns 0, -1 in case of error. the parser context is augmented
10229 * as a result of the parsing.
10230 */
10231
10232int
10233xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10234 xmlChar start[4];
10235 xmlCharEncoding enc;
10236
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010237 if ((ctxt == NULL) || (ctxt->input == NULL))
10238 return(-1);
10239
Owen Taylor3473f882001-02-23 17:55:21 +000010240 xmlDefaultSAXHandlerInit();
10241
Daniel Veillard309f81d2003-09-23 09:02:53 +000010242 xmlDetectSAX2(ctxt);
10243
Owen Taylor3473f882001-02-23 17:55:21 +000010244 GROW;
10245
10246 /*
10247 * SAX: beginning of the document processing.
10248 */
10249 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10250 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10251
10252 /*
10253 * Get the 4 first bytes and decode the charset
10254 * if enc != XML_CHAR_ENCODING_NONE
10255 * plug some encoding conversion routines.
10256 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010257 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10258 start[0] = RAW;
10259 start[1] = NXT(1);
10260 start[2] = NXT(2);
10261 start[3] = NXT(3);
10262 enc = xmlDetectCharEncoding(start, 4);
10263 if (enc != XML_CHAR_ENCODING_NONE) {
10264 xmlSwitchEncoding(ctxt, enc);
10265 }
Owen Taylor3473f882001-02-23 17:55:21 +000010266 }
10267
10268
10269 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010270 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010271 }
10272
10273 /*
10274 * Check for the XMLDecl in the Prolog.
10275 */
10276 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010277 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010278
10279 /*
10280 * Note that we will switch encoding on the fly.
10281 */
10282 xmlParseXMLDecl(ctxt);
10283 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10284 /*
10285 * The XML REC instructs us to stop parsing right here
10286 */
10287 return(-1);
10288 }
10289 SKIP_BLANKS;
10290 } else {
10291 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10292 }
10293 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10294 ctxt->sax->startDocument(ctxt->userData);
10295
10296 /*
10297 * Doing validity checking on chunk doesn't make sense
10298 */
10299 ctxt->instate = XML_PARSER_CONTENT;
10300 ctxt->validate = 0;
10301 ctxt->loadsubset = 0;
10302 ctxt->depth = 0;
10303
10304 xmlParseContent(ctxt);
10305
10306 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010307 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010308 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010309 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010310 }
10311
10312 /*
10313 * SAX: end of the document processing.
10314 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010315 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010316 ctxt->sax->endDocument(ctxt->userData);
10317
10318 if (! ctxt->wellFormed) return(-1);
10319 return(0);
10320}
10321
Daniel Veillard73b013f2003-09-30 12:36:01 +000010322#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010323/************************************************************************
10324 * *
10325 * Progressive parsing interfaces *
10326 * *
10327 ************************************************************************/
10328
10329/**
10330 * xmlParseLookupSequence:
10331 * @ctxt: an XML parser context
10332 * @first: the first char to lookup
10333 * @next: the next char to lookup or zero
10334 * @third: the next char to lookup or zero
10335 *
10336 * Try to find if a sequence (first, next, third) or just (first next) or
10337 * (first) is available in the input stream.
10338 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10339 * to avoid rescanning sequences of bytes, it DOES change the state of the
10340 * parser, do not use liberally.
10341 *
10342 * Returns the index to the current parsing point if the full sequence
10343 * is available, -1 otherwise.
10344 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010345static int
Owen Taylor3473f882001-02-23 17:55:21 +000010346xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10347 xmlChar next, xmlChar third) {
10348 int base, len;
10349 xmlParserInputPtr in;
10350 const xmlChar *buf;
10351
10352 in = ctxt->input;
10353 if (in == NULL) return(-1);
10354 base = in->cur - in->base;
10355 if (base < 0) return(-1);
10356 if (ctxt->checkIndex > base)
10357 base = ctxt->checkIndex;
10358 if (in->buf == NULL) {
10359 buf = in->base;
10360 len = in->length;
10361 } else {
10362 buf = in->buf->buffer->content;
10363 len = in->buf->buffer->use;
10364 }
10365 /* take into account the sequence length */
10366 if (third) len -= 2;
10367 else if (next) len --;
10368 for (;base < len;base++) {
10369 if (buf[base] == first) {
10370 if (third != 0) {
10371 if ((buf[base + 1] != next) ||
10372 (buf[base + 2] != third)) continue;
10373 } else if (next != 0) {
10374 if (buf[base + 1] != next) continue;
10375 }
10376 ctxt->checkIndex = 0;
10377#ifdef DEBUG_PUSH
10378 if (next == 0)
10379 xmlGenericError(xmlGenericErrorContext,
10380 "PP: lookup '%c' found at %d\n",
10381 first, base);
10382 else if (third == 0)
10383 xmlGenericError(xmlGenericErrorContext,
10384 "PP: lookup '%c%c' found at %d\n",
10385 first, next, base);
10386 else
10387 xmlGenericError(xmlGenericErrorContext,
10388 "PP: lookup '%c%c%c' found at %d\n",
10389 first, next, third, base);
10390#endif
10391 return(base - (in->cur - in->base));
10392 }
10393 }
10394 ctxt->checkIndex = base;
10395#ifdef DEBUG_PUSH
10396 if (next == 0)
10397 xmlGenericError(xmlGenericErrorContext,
10398 "PP: lookup '%c' failed\n", first);
10399 else if (third == 0)
10400 xmlGenericError(xmlGenericErrorContext,
10401 "PP: lookup '%c%c' failed\n", first, next);
10402 else
10403 xmlGenericError(xmlGenericErrorContext,
10404 "PP: lookup '%c%c%c' failed\n", first, next, third);
10405#endif
10406 return(-1);
10407}
10408
10409/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010410 * xmlParseGetLasts:
10411 * @ctxt: an XML parser context
10412 * @lastlt: pointer to store the last '<' from the input
10413 * @lastgt: pointer to store the last '>' from the input
10414 *
10415 * Lookup the last < and > in the current chunk
10416 */
10417static void
10418xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10419 const xmlChar **lastgt) {
10420 const xmlChar *tmp;
10421
10422 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10423 xmlGenericError(xmlGenericErrorContext,
10424 "Internal error: xmlParseGetLasts\n");
10425 return;
10426 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010427 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010428 tmp = ctxt->input->end;
10429 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010430 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010431 if (tmp < ctxt->input->base) {
10432 *lastlt = NULL;
10433 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010434 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010435 *lastlt = tmp;
10436 tmp++;
10437 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10438 if (*tmp == '\'') {
10439 tmp++;
10440 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10441 if (tmp < ctxt->input->end) tmp++;
10442 } else if (*tmp == '"') {
10443 tmp++;
10444 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10445 if (tmp < ctxt->input->end) tmp++;
10446 } else
10447 tmp++;
10448 }
10449 if (tmp < ctxt->input->end)
10450 *lastgt = tmp;
10451 else {
10452 tmp = *lastlt;
10453 tmp--;
10454 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10455 if (tmp >= ctxt->input->base)
10456 *lastgt = tmp;
10457 else
10458 *lastgt = NULL;
10459 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010460 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010461 } else {
10462 *lastlt = NULL;
10463 *lastgt = NULL;
10464 }
10465}
10466/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010467 * xmlCheckCdataPush:
10468 * @cur: pointer to the bock of characters
10469 * @len: length of the block in bytes
10470 *
10471 * Check that the block of characters is okay as SCdata content [20]
10472 *
10473 * Returns the number of bytes to pass if okay, a negative index where an
10474 * UTF-8 error occured otherwise
10475 */
10476static int
10477xmlCheckCdataPush(const xmlChar *utf, int len) {
10478 int ix;
10479 unsigned char c;
10480 int codepoint;
10481
10482 if ((utf == NULL) || (len <= 0))
10483 return(0);
10484
10485 for (ix = 0; ix < len;) { /* string is 0-terminated */
10486 c = utf[ix];
10487 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10488 if (c >= 0x20)
10489 ix++;
10490 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10491 ix++;
10492 else
10493 return(-ix);
10494 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10495 if (ix + 2 > len) return(ix);
10496 if ((utf[ix+1] & 0xc0 ) != 0x80)
10497 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010498 codepoint = (utf[ix] & 0x1f) << 6;
10499 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010500 if (!xmlIsCharQ(codepoint))
10501 return(-ix);
10502 ix += 2;
10503 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10504 if (ix + 3 > len) return(ix);
10505 if (((utf[ix+1] & 0xc0) != 0x80) ||
10506 ((utf[ix+2] & 0xc0) != 0x80))
10507 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010508 codepoint = (utf[ix] & 0xf) << 12;
10509 codepoint |= (utf[ix+1] & 0x3f) << 6;
10510 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010511 if (!xmlIsCharQ(codepoint))
10512 return(-ix);
10513 ix += 3;
10514 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10515 if (ix + 4 > len) return(ix);
10516 if (((utf[ix+1] & 0xc0) != 0x80) ||
10517 ((utf[ix+2] & 0xc0) != 0x80) ||
10518 ((utf[ix+3] & 0xc0) != 0x80))
10519 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010520 codepoint = (utf[ix] & 0x7) << 18;
10521 codepoint |= (utf[ix+1] & 0x3f) << 12;
10522 codepoint |= (utf[ix+2] & 0x3f) << 6;
10523 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010524 if (!xmlIsCharQ(codepoint))
10525 return(-ix);
10526 ix += 4;
10527 } else /* unknown encoding */
10528 return(-ix);
10529 }
10530 return(ix);
10531}
10532
10533/**
Owen Taylor3473f882001-02-23 17:55:21 +000010534 * xmlParseTryOrFinish:
10535 * @ctxt: an XML parser context
10536 * @terminate: last chunk indicator
10537 *
10538 * Try to progress on parsing
10539 *
10540 * Returns zero if no parsing was possible
10541 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010542static int
Owen Taylor3473f882001-02-23 17:55:21 +000010543xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10544 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010545 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000010546 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000010547 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000010548
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010549 if (ctxt->input == NULL)
10550 return(0);
10551
Owen Taylor3473f882001-02-23 17:55:21 +000010552#ifdef DEBUG_PUSH
10553 switch (ctxt->instate) {
10554 case XML_PARSER_EOF:
10555 xmlGenericError(xmlGenericErrorContext,
10556 "PP: try EOF\n"); break;
10557 case XML_PARSER_START:
10558 xmlGenericError(xmlGenericErrorContext,
10559 "PP: try START\n"); break;
10560 case XML_PARSER_MISC:
10561 xmlGenericError(xmlGenericErrorContext,
10562 "PP: try MISC\n");break;
10563 case XML_PARSER_COMMENT:
10564 xmlGenericError(xmlGenericErrorContext,
10565 "PP: try COMMENT\n");break;
10566 case XML_PARSER_PROLOG:
10567 xmlGenericError(xmlGenericErrorContext,
10568 "PP: try PROLOG\n");break;
10569 case XML_PARSER_START_TAG:
10570 xmlGenericError(xmlGenericErrorContext,
10571 "PP: try START_TAG\n");break;
10572 case XML_PARSER_CONTENT:
10573 xmlGenericError(xmlGenericErrorContext,
10574 "PP: try CONTENT\n");break;
10575 case XML_PARSER_CDATA_SECTION:
10576 xmlGenericError(xmlGenericErrorContext,
10577 "PP: try CDATA_SECTION\n");break;
10578 case XML_PARSER_END_TAG:
10579 xmlGenericError(xmlGenericErrorContext,
10580 "PP: try END_TAG\n");break;
10581 case XML_PARSER_ENTITY_DECL:
10582 xmlGenericError(xmlGenericErrorContext,
10583 "PP: try ENTITY_DECL\n");break;
10584 case XML_PARSER_ENTITY_VALUE:
10585 xmlGenericError(xmlGenericErrorContext,
10586 "PP: try ENTITY_VALUE\n");break;
10587 case XML_PARSER_ATTRIBUTE_VALUE:
10588 xmlGenericError(xmlGenericErrorContext,
10589 "PP: try ATTRIBUTE_VALUE\n");break;
10590 case XML_PARSER_DTD:
10591 xmlGenericError(xmlGenericErrorContext,
10592 "PP: try DTD\n");break;
10593 case XML_PARSER_EPILOG:
10594 xmlGenericError(xmlGenericErrorContext,
10595 "PP: try EPILOG\n");break;
10596 case XML_PARSER_PI:
10597 xmlGenericError(xmlGenericErrorContext,
10598 "PP: try PI\n");break;
10599 case XML_PARSER_IGNORE:
10600 xmlGenericError(xmlGenericErrorContext,
10601 "PP: try IGNORE\n");break;
10602 }
10603#endif
10604
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010605 if ((ctxt->input != NULL) &&
10606 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010607 xmlSHRINK(ctxt);
10608 ctxt->checkIndex = 0;
10609 }
10610 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010611
Daniel Veillarda880b122003-04-21 21:36:41 +000010612 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010613 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010614 return(0);
10615
10616
Owen Taylor3473f882001-02-23 17:55:21 +000010617 /*
10618 * Pop-up of finished entities.
10619 */
10620 while ((RAW == 0) && (ctxt->inputNr > 1))
10621 xmlPopInput(ctxt);
10622
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010623 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010624 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010625 avail = ctxt->input->length -
10626 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010627 else {
10628 /*
10629 * If we are operating on converted input, try to flush
10630 * remainng chars to avoid them stalling in the non-converted
10631 * buffer.
10632 */
10633 if ((ctxt->input->buf->raw != NULL) &&
10634 (ctxt->input->buf->raw->use > 0)) {
10635 int base = ctxt->input->base -
10636 ctxt->input->buf->buffer->content;
10637 int current = ctxt->input->cur - ctxt->input->base;
10638
10639 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10640 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10641 ctxt->input->cur = ctxt->input->base + current;
10642 ctxt->input->end =
10643 &ctxt->input->buf->buffer->content[
10644 ctxt->input->buf->buffer->use];
10645 }
10646 avail = ctxt->input->buf->buffer->use -
10647 (ctxt->input->cur - ctxt->input->base);
10648 }
Owen Taylor3473f882001-02-23 17:55:21 +000010649 if (avail < 1)
10650 goto done;
10651 switch (ctxt->instate) {
10652 case XML_PARSER_EOF:
10653 /*
10654 * Document parsing is done !
10655 */
10656 goto done;
10657 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010658 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10659 xmlChar start[4];
10660 xmlCharEncoding enc;
10661
10662 /*
10663 * Very first chars read from the document flow.
10664 */
10665 if (avail < 4)
10666 goto done;
10667
10668 /*
10669 * Get the 4 first bytes and decode the charset
10670 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010671 * plug some encoding conversion routines,
10672 * else xmlSwitchEncoding will set to (default)
10673 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010674 */
10675 start[0] = RAW;
10676 start[1] = NXT(1);
10677 start[2] = NXT(2);
10678 start[3] = NXT(3);
10679 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010680 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010681 break;
10682 }
Owen Taylor3473f882001-02-23 17:55:21 +000010683
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010684 if (avail < 2)
10685 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010686 cur = ctxt->input->cur[0];
10687 next = ctxt->input->cur[1];
10688 if (cur == 0) {
10689 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10690 ctxt->sax->setDocumentLocator(ctxt->userData,
10691 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010692 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010693 ctxt->instate = XML_PARSER_EOF;
10694#ifdef DEBUG_PUSH
10695 xmlGenericError(xmlGenericErrorContext,
10696 "PP: entering EOF\n");
10697#endif
10698 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10699 ctxt->sax->endDocument(ctxt->userData);
10700 goto done;
10701 }
10702 if ((cur == '<') && (next == '?')) {
10703 /* PI or XML decl */
10704 if (avail < 5) return(ret);
10705 if ((!terminate) &&
10706 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10707 return(ret);
10708 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10709 ctxt->sax->setDocumentLocator(ctxt->userData,
10710 &xmlDefaultSAXLocator);
10711 if ((ctxt->input->cur[2] == 'x') &&
10712 (ctxt->input->cur[3] == 'm') &&
10713 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010714 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010715 ret += 5;
10716#ifdef DEBUG_PUSH
10717 xmlGenericError(xmlGenericErrorContext,
10718 "PP: Parsing XML Decl\n");
10719#endif
10720 xmlParseXMLDecl(ctxt);
10721 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10722 /*
10723 * The XML REC instructs us to stop parsing right
10724 * here
10725 */
10726 ctxt->instate = XML_PARSER_EOF;
10727 return(0);
10728 }
10729 ctxt->standalone = ctxt->input->standalone;
10730 if ((ctxt->encoding == NULL) &&
10731 (ctxt->input->encoding != NULL))
10732 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10733 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10734 (!ctxt->disableSAX))
10735 ctxt->sax->startDocument(ctxt->userData);
10736 ctxt->instate = XML_PARSER_MISC;
10737#ifdef DEBUG_PUSH
10738 xmlGenericError(xmlGenericErrorContext,
10739 "PP: entering MISC\n");
10740#endif
10741 } else {
10742 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10743 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10744 (!ctxt->disableSAX))
10745 ctxt->sax->startDocument(ctxt->userData);
10746 ctxt->instate = XML_PARSER_MISC;
10747#ifdef DEBUG_PUSH
10748 xmlGenericError(xmlGenericErrorContext,
10749 "PP: entering MISC\n");
10750#endif
10751 }
10752 } else {
10753 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10754 ctxt->sax->setDocumentLocator(ctxt->userData,
10755 &xmlDefaultSAXLocator);
10756 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010757 if (ctxt->version == NULL) {
10758 xmlErrMemory(ctxt, NULL);
10759 break;
10760 }
Owen Taylor3473f882001-02-23 17:55:21 +000010761 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10762 (!ctxt->disableSAX))
10763 ctxt->sax->startDocument(ctxt->userData);
10764 ctxt->instate = XML_PARSER_MISC;
10765#ifdef DEBUG_PUSH
10766 xmlGenericError(xmlGenericErrorContext,
10767 "PP: entering MISC\n");
10768#endif
10769 }
10770 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010771 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010772 const xmlChar *name;
10773 const xmlChar *prefix;
10774 const xmlChar *URI;
10775 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010776
10777 if ((avail < 2) && (ctxt->inputNr == 1))
10778 goto done;
10779 cur = ctxt->input->cur[0];
10780 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010781 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010782 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010783 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10784 ctxt->sax->endDocument(ctxt->userData);
10785 goto done;
10786 }
10787 if (!terminate) {
10788 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010789 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010790 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010791 goto done;
10792 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10793 goto done;
10794 }
10795 }
10796 if (ctxt->spaceNr == 0)
10797 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010798 else if (*ctxt->space == -2)
10799 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010800 else
10801 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010802#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010803 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010804#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010805 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010806#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010807 else
10808 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010809#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010810 if (name == NULL) {
10811 spacePop(ctxt);
10812 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010813 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10814 ctxt->sax->endDocument(ctxt->userData);
10815 goto done;
10816 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010817#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000010818 /*
10819 * [ VC: Root Element Type ]
10820 * The Name in the document type declaration must match
10821 * the element type of the root element.
10822 */
10823 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10824 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10825 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010826#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010827
10828 /*
10829 * Check for an Empty Element.
10830 */
10831 if ((RAW == '/') && (NXT(1) == '>')) {
10832 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010833
10834 if (ctxt->sax2) {
10835 if ((ctxt->sax != NULL) &&
10836 (ctxt->sax->endElementNs != NULL) &&
10837 (!ctxt->disableSAX))
10838 ctxt->sax->endElementNs(ctxt->userData, name,
10839 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000010840 if (ctxt->nsNr - nsNr > 0)
10841 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010842#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010843 } else {
10844 if ((ctxt->sax != NULL) &&
10845 (ctxt->sax->endElement != NULL) &&
10846 (!ctxt->disableSAX))
10847 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010848#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010849 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010850 spacePop(ctxt);
10851 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010852 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010853 } else {
10854 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010855 }
10856 break;
10857 }
10858 if (RAW == '>') {
10859 NEXT;
10860 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000010861 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000010862 "Couldn't find end of Start Tag %s\n",
10863 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000010864 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010865 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010866 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010867 if (ctxt->sax2)
10868 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010869#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010870 else
10871 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010872#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010873
Daniel Veillarda880b122003-04-21 21:36:41 +000010874 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010875 break;
10876 }
10877 case XML_PARSER_CONTENT: {
10878 const xmlChar *test;
10879 unsigned int cons;
10880 if ((avail < 2) && (ctxt->inputNr == 1))
10881 goto done;
10882 cur = ctxt->input->cur[0];
10883 next = ctxt->input->cur[1];
10884
10885 test = CUR_PTR;
10886 cons = ctxt->input->consumed;
10887 if ((cur == '<') && (next == '/')) {
10888 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010889 break;
10890 } else if ((cur == '<') && (next == '?')) {
10891 if ((!terminate) &&
10892 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10893 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010894 xmlParsePI(ctxt);
10895 } else if ((cur == '<') && (next != '!')) {
10896 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010897 break;
10898 } else if ((cur == '<') && (next == '!') &&
10899 (ctxt->input->cur[2] == '-') &&
10900 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000010901 int term;
10902
10903 if (avail < 4)
10904 goto done;
10905 ctxt->input->cur += 4;
10906 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10907 ctxt->input->cur -= 4;
10908 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000010909 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010910 xmlParseComment(ctxt);
10911 ctxt->instate = XML_PARSER_CONTENT;
10912 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10913 (ctxt->input->cur[2] == '[') &&
10914 (ctxt->input->cur[3] == 'C') &&
10915 (ctxt->input->cur[4] == 'D') &&
10916 (ctxt->input->cur[5] == 'A') &&
10917 (ctxt->input->cur[6] == 'T') &&
10918 (ctxt->input->cur[7] == 'A') &&
10919 (ctxt->input->cur[8] == '[')) {
10920 SKIP(9);
10921 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000010922 break;
10923 } else if ((cur == '<') && (next == '!') &&
10924 (avail < 9)) {
10925 goto done;
10926 } else if (cur == '&') {
10927 if ((!terminate) &&
10928 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10929 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010930 xmlParseReference(ctxt);
10931 } else {
10932 /* TODO Avoid the extra copy, handle directly !!! */
10933 /*
10934 * Goal of the following test is:
10935 * - minimize calls to the SAX 'character' callback
10936 * when they are mergeable
10937 * - handle an problem for isBlank when we only parse
10938 * a sequence of blank chars and the next one is
10939 * not available to check against '<' presence.
10940 * - tries to homogenize the differences in SAX
10941 * callbacks between the push and pull versions
10942 * of the parser.
10943 */
10944 if ((ctxt->inputNr == 1) &&
10945 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10946 if (!terminate) {
10947 if (ctxt->progressive) {
10948 if ((lastlt == NULL) ||
10949 (ctxt->input->cur > lastlt))
10950 goto done;
10951 } else if (xmlParseLookupSequence(ctxt,
10952 '<', 0, 0) < 0) {
10953 goto done;
10954 }
10955 }
10956 }
10957 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010958 xmlParseCharData(ctxt, 0);
10959 }
10960 /*
10961 * Pop-up of finished entities.
10962 */
10963 while ((RAW == 0) && (ctxt->inputNr > 1))
10964 xmlPopInput(ctxt);
10965 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010966 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10967 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010968 ctxt->instate = XML_PARSER_EOF;
10969 break;
10970 }
10971 break;
10972 }
10973 case XML_PARSER_END_TAG:
10974 if (avail < 2)
10975 goto done;
10976 if (!terminate) {
10977 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010978 /* > can be found unescaped in attribute values */
10979 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010980 goto done;
10981 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10982 goto done;
10983 }
10984 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010985 if (ctxt->sax2) {
10986 xmlParseEndTag2(ctxt,
10987 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10988 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010989 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010990 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010991 }
10992#ifdef LIBXML_SAX1_ENABLED
10993 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010994 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010995#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010996 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010997 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010998 } else {
10999 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011000 }
11001 break;
11002 case XML_PARSER_CDATA_SECTION: {
11003 /*
11004 * The Push mode need to have the SAX callback for
11005 * cdataBlock merge back contiguous callbacks.
11006 */
11007 int base;
11008
11009 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11010 if (base < 0) {
11011 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011012 int tmp;
11013
11014 tmp = xmlCheckCdataPush(ctxt->input->cur,
11015 XML_PARSER_BIG_BUFFER_SIZE);
11016 if (tmp < 0) {
11017 tmp = -tmp;
11018 ctxt->input->cur += tmp;
11019 goto encoding_error;
11020 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011021 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11022 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011023 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011024 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011025 else if (ctxt->sax->characters != NULL)
11026 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011027 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011028 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011029 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011030 ctxt->checkIndex = 0;
11031 }
11032 goto done;
11033 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011034 int tmp;
11035
11036 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11037 if ((tmp < 0) || (tmp != base)) {
11038 tmp = -tmp;
11039 ctxt->input->cur += tmp;
11040 goto encoding_error;
11041 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011042 if ((ctxt->sax != NULL) && (base == 0) &&
11043 (ctxt->sax->cdataBlock != NULL) &&
11044 (!ctxt->disableSAX)) {
11045 /*
11046 * Special case to provide identical behaviour
11047 * between pull and push parsers on enpty CDATA
11048 * sections
11049 */
11050 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11051 (!strncmp((const char *)&ctxt->input->cur[-9],
11052 "<![CDATA[", 9)))
11053 ctxt->sax->cdataBlock(ctxt->userData,
11054 BAD_CAST "", 0);
11055 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011056 (!ctxt->disableSAX)) {
11057 if (ctxt->sax->cdataBlock != NULL)
11058 ctxt->sax->cdataBlock(ctxt->userData,
11059 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011060 else if (ctxt->sax->characters != NULL)
11061 ctxt->sax->characters(ctxt->userData,
11062 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011063 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000011064 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011065 ctxt->checkIndex = 0;
11066 ctxt->instate = XML_PARSER_CONTENT;
11067#ifdef DEBUG_PUSH
11068 xmlGenericError(xmlGenericErrorContext,
11069 "PP: entering CONTENT\n");
11070#endif
11071 }
11072 break;
11073 }
Owen Taylor3473f882001-02-23 17:55:21 +000011074 case XML_PARSER_MISC:
11075 SKIP_BLANKS;
11076 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011077 avail = ctxt->input->length -
11078 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011079 else
Daniel Veillarda880b122003-04-21 21:36:41 +000011080 avail = ctxt->input->buf->buffer->use -
11081 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011082 if (avail < 2)
11083 goto done;
11084 cur = ctxt->input->cur[0];
11085 next = ctxt->input->cur[1];
11086 if ((cur == '<') && (next == '?')) {
11087 if ((!terminate) &&
11088 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11089 goto done;
11090#ifdef DEBUG_PUSH
11091 xmlGenericError(xmlGenericErrorContext,
11092 "PP: Parsing PI\n");
11093#endif
11094 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000011095 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011096 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011097 (ctxt->input->cur[2] == '-') &&
11098 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011099 if ((!terminate) &&
11100 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11101 goto done;
11102#ifdef DEBUG_PUSH
11103 xmlGenericError(xmlGenericErrorContext,
11104 "PP: Parsing Comment\n");
11105#endif
11106 xmlParseComment(ctxt);
11107 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011108 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011109 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011110 (ctxt->input->cur[2] == 'D') &&
11111 (ctxt->input->cur[3] == 'O') &&
11112 (ctxt->input->cur[4] == 'C') &&
11113 (ctxt->input->cur[5] == 'T') &&
11114 (ctxt->input->cur[6] == 'Y') &&
11115 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011116 (ctxt->input->cur[8] == 'E')) {
11117 if ((!terminate) &&
11118 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11119 goto done;
11120#ifdef DEBUG_PUSH
11121 xmlGenericError(xmlGenericErrorContext,
11122 "PP: Parsing internal subset\n");
11123#endif
11124 ctxt->inSubset = 1;
11125 xmlParseDocTypeDecl(ctxt);
11126 if (RAW == '[') {
11127 ctxt->instate = XML_PARSER_DTD;
11128#ifdef DEBUG_PUSH
11129 xmlGenericError(xmlGenericErrorContext,
11130 "PP: entering DTD\n");
11131#endif
11132 } else {
11133 /*
11134 * Create and update the external subset.
11135 */
11136 ctxt->inSubset = 2;
11137 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11138 (ctxt->sax->externalSubset != NULL))
11139 ctxt->sax->externalSubset(ctxt->userData,
11140 ctxt->intSubName, ctxt->extSubSystem,
11141 ctxt->extSubURI);
11142 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011143 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011144 ctxt->instate = XML_PARSER_PROLOG;
11145#ifdef DEBUG_PUSH
11146 xmlGenericError(xmlGenericErrorContext,
11147 "PP: entering PROLOG\n");
11148#endif
11149 }
11150 } else if ((cur == '<') && (next == '!') &&
11151 (avail < 9)) {
11152 goto done;
11153 } else {
11154 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011155 ctxt->progressive = 1;
11156 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011157#ifdef DEBUG_PUSH
11158 xmlGenericError(xmlGenericErrorContext,
11159 "PP: entering START_TAG\n");
11160#endif
11161 }
11162 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011163 case XML_PARSER_PROLOG:
11164 SKIP_BLANKS;
11165 if (ctxt->input->buf == NULL)
11166 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11167 else
11168 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11169 if (avail < 2)
11170 goto done;
11171 cur = ctxt->input->cur[0];
11172 next = ctxt->input->cur[1];
11173 if ((cur == '<') && (next == '?')) {
11174 if ((!terminate) &&
11175 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11176 goto done;
11177#ifdef DEBUG_PUSH
11178 xmlGenericError(xmlGenericErrorContext,
11179 "PP: Parsing PI\n");
11180#endif
11181 xmlParsePI(ctxt);
11182 } else if ((cur == '<') && (next == '!') &&
11183 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11184 if ((!terminate) &&
11185 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11186 goto done;
11187#ifdef DEBUG_PUSH
11188 xmlGenericError(xmlGenericErrorContext,
11189 "PP: Parsing Comment\n");
11190#endif
11191 xmlParseComment(ctxt);
11192 ctxt->instate = XML_PARSER_PROLOG;
11193 } else if ((cur == '<') && (next == '!') &&
11194 (avail < 4)) {
11195 goto done;
11196 } else {
11197 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011198 if (ctxt->progressive == 0)
11199 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011200 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011201#ifdef DEBUG_PUSH
11202 xmlGenericError(xmlGenericErrorContext,
11203 "PP: entering START_TAG\n");
11204#endif
11205 }
11206 break;
11207 case XML_PARSER_EPILOG:
11208 SKIP_BLANKS;
11209 if (ctxt->input->buf == NULL)
11210 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11211 else
11212 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11213 if (avail < 2)
11214 goto done;
11215 cur = ctxt->input->cur[0];
11216 next = ctxt->input->cur[1];
11217 if ((cur == '<') && (next == '?')) {
11218 if ((!terminate) &&
11219 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11220 goto done;
11221#ifdef DEBUG_PUSH
11222 xmlGenericError(xmlGenericErrorContext,
11223 "PP: Parsing PI\n");
11224#endif
11225 xmlParsePI(ctxt);
11226 ctxt->instate = XML_PARSER_EPILOG;
11227 } else if ((cur == '<') && (next == '!') &&
11228 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11229 if ((!terminate) &&
11230 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11231 goto done;
11232#ifdef DEBUG_PUSH
11233 xmlGenericError(xmlGenericErrorContext,
11234 "PP: Parsing Comment\n");
11235#endif
11236 xmlParseComment(ctxt);
11237 ctxt->instate = XML_PARSER_EPILOG;
11238 } else if ((cur == '<') && (next == '!') &&
11239 (avail < 4)) {
11240 goto done;
11241 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011242 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011243 ctxt->instate = XML_PARSER_EOF;
11244#ifdef DEBUG_PUSH
11245 xmlGenericError(xmlGenericErrorContext,
11246 "PP: entering EOF\n");
11247#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011248 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011249 ctxt->sax->endDocument(ctxt->userData);
11250 goto done;
11251 }
11252 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011253 case XML_PARSER_DTD: {
11254 /*
11255 * Sorry but progressive parsing of the internal subset
11256 * is not expected to be supported. We first check that
11257 * the full content of the internal subset is available and
11258 * the parsing is launched only at that point.
11259 * Internal subset ends up with "']' S? '>'" in an unescaped
11260 * section and not in a ']]>' sequence which are conditional
11261 * sections (whoever argued to keep that crap in XML deserve
11262 * a place in hell !).
11263 */
11264 int base, i;
11265 xmlChar *buf;
11266 xmlChar quote = 0;
11267
11268 base = ctxt->input->cur - ctxt->input->base;
11269 if (base < 0) return(0);
11270 if (ctxt->checkIndex > base)
11271 base = ctxt->checkIndex;
11272 buf = ctxt->input->buf->buffer->content;
11273 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11274 base++) {
11275 if (quote != 0) {
11276 if (buf[base] == quote)
11277 quote = 0;
11278 continue;
11279 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011280 if ((quote == 0) && (buf[base] == '<')) {
11281 int found = 0;
11282 /* special handling of comments */
11283 if (((unsigned int) base + 4 <
11284 ctxt->input->buf->buffer->use) &&
11285 (buf[base + 1] == '!') &&
11286 (buf[base + 2] == '-') &&
11287 (buf[base + 3] == '-')) {
11288 for (;(unsigned int) base + 3 <
11289 ctxt->input->buf->buffer->use; base++) {
11290 if ((buf[base] == '-') &&
11291 (buf[base + 1] == '-') &&
11292 (buf[base + 2] == '>')) {
11293 found = 1;
11294 base += 2;
11295 break;
11296 }
11297 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011298 if (!found) {
11299#if 0
11300 fprintf(stderr, "unfinished comment\n");
11301#endif
11302 break; /* for */
11303 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011304 continue;
11305 }
11306 }
Owen Taylor3473f882001-02-23 17:55:21 +000011307 if (buf[base] == '"') {
11308 quote = '"';
11309 continue;
11310 }
11311 if (buf[base] == '\'') {
11312 quote = '\'';
11313 continue;
11314 }
11315 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011316#if 0
11317 fprintf(stderr, "%c%c%c%c: ", buf[base],
11318 buf[base + 1], buf[base + 2], buf[base + 3]);
11319#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011320 if ((unsigned int) base +1 >=
11321 ctxt->input->buf->buffer->use)
11322 break;
11323 if (buf[base + 1] == ']') {
11324 /* conditional crap, skip both ']' ! */
11325 base++;
11326 continue;
11327 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011328 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011329 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11330 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011331 if (buf[base + i] == '>') {
11332#if 0
11333 fprintf(stderr, "found\n");
11334#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011335 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011336 }
11337 if (!IS_BLANK_CH(buf[base + i])) {
11338#if 0
11339 fprintf(stderr, "not found\n");
11340#endif
11341 goto not_end_of_int_subset;
11342 }
Owen Taylor3473f882001-02-23 17:55:21 +000011343 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011344#if 0
11345 fprintf(stderr, "end of stream\n");
11346#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011347 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011348
Owen Taylor3473f882001-02-23 17:55:21 +000011349 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011350not_end_of_int_subset:
11351 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011352 }
11353 /*
11354 * We didn't found the end of the Internal subset
11355 */
Owen Taylor3473f882001-02-23 17:55:21 +000011356#ifdef DEBUG_PUSH
11357 if (next == 0)
11358 xmlGenericError(xmlGenericErrorContext,
11359 "PP: lookup of int subset end filed\n");
11360#endif
11361 goto done;
11362
11363found_end_int_subset:
11364 xmlParseInternalSubset(ctxt);
11365 ctxt->inSubset = 2;
11366 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11367 (ctxt->sax->externalSubset != NULL))
11368 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11369 ctxt->extSubSystem, ctxt->extSubURI);
11370 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011371 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011372 ctxt->instate = XML_PARSER_PROLOG;
11373 ctxt->checkIndex = 0;
11374#ifdef DEBUG_PUSH
11375 xmlGenericError(xmlGenericErrorContext,
11376 "PP: entering PROLOG\n");
11377#endif
11378 break;
11379 }
11380 case XML_PARSER_COMMENT:
11381 xmlGenericError(xmlGenericErrorContext,
11382 "PP: internal error, state == COMMENT\n");
11383 ctxt->instate = XML_PARSER_CONTENT;
11384#ifdef DEBUG_PUSH
11385 xmlGenericError(xmlGenericErrorContext,
11386 "PP: entering CONTENT\n");
11387#endif
11388 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011389 case XML_PARSER_IGNORE:
11390 xmlGenericError(xmlGenericErrorContext,
11391 "PP: internal error, state == IGNORE");
11392 ctxt->instate = XML_PARSER_DTD;
11393#ifdef DEBUG_PUSH
11394 xmlGenericError(xmlGenericErrorContext,
11395 "PP: entering DTD\n");
11396#endif
11397 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011398 case XML_PARSER_PI:
11399 xmlGenericError(xmlGenericErrorContext,
11400 "PP: internal error, state == PI\n");
11401 ctxt->instate = XML_PARSER_CONTENT;
11402#ifdef DEBUG_PUSH
11403 xmlGenericError(xmlGenericErrorContext,
11404 "PP: entering CONTENT\n");
11405#endif
11406 break;
11407 case XML_PARSER_ENTITY_DECL:
11408 xmlGenericError(xmlGenericErrorContext,
11409 "PP: internal error, state == ENTITY_DECL\n");
11410 ctxt->instate = XML_PARSER_DTD;
11411#ifdef DEBUG_PUSH
11412 xmlGenericError(xmlGenericErrorContext,
11413 "PP: entering DTD\n");
11414#endif
11415 break;
11416 case XML_PARSER_ENTITY_VALUE:
11417 xmlGenericError(xmlGenericErrorContext,
11418 "PP: internal error, state == ENTITY_VALUE\n");
11419 ctxt->instate = XML_PARSER_CONTENT;
11420#ifdef DEBUG_PUSH
11421 xmlGenericError(xmlGenericErrorContext,
11422 "PP: entering DTD\n");
11423#endif
11424 break;
11425 case XML_PARSER_ATTRIBUTE_VALUE:
11426 xmlGenericError(xmlGenericErrorContext,
11427 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11428 ctxt->instate = XML_PARSER_START_TAG;
11429#ifdef DEBUG_PUSH
11430 xmlGenericError(xmlGenericErrorContext,
11431 "PP: entering START_TAG\n");
11432#endif
11433 break;
11434 case XML_PARSER_SYSTEM_LITERAL:
11435 xmlGenericError(xmlGenericErrorContext,
11436 "PP: internal error, state == SYSTEM_LITERAL\n");
11437 ctxt->instate = XML_PARSER_START_TAG;
11438#ifdef DEBUG_PUSH
11439 xmlGenericError(xmlGenericErrorContext,
11440 "PP: entering START_TAG\n");
11441#endif
11442 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011443 case XML_PARSER_PUBLIC_LITERAL:
11444 xmlGenericError(xmlGenericErrorContext,
11445 "PP: internal error, state == PUBLIC_LITERAL\n");
11446 ctxt->instate = XML_PARSER_START_TAG;
11447#ifdef DEBUG_PUSH
11448 xmlGenericError(xmlGenericErrorContext,
11449 "PP: entering START_TAG\n");
11450#endif
11451 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011452 }
11453 }
11454done:
11455#ifdef DEBUG_PUSH
11456 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11457#endif
11458 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011459encoding_error:
11460 {
11461 char buffer[150];
11462
11463 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11464 ctxt->input->cur[0], ctxt->input->cur[1],
11465 ctxt->input->cur[2], ctxt->input->cur[3]);
11466 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11467 "Input is not proper UTF-8, indicate encoding !\n%s",
11468 BAD_CAST buffer, NULL);
11469 }
11470 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000011471}
11472
11473/**
Owen Taylor3473f882001-02-23 17:55:21 +000011474 * xmlParseChunk:
11475 * @ctxt: an XML parser context
11476 * @chunk: an char array
11477 * @size: the size in byte of the chunk
11478 * @terminate: last chunk indicator
11479 *
11480 * Parse a Chunk of memory
11481 *
11482 * Returns zero if no error, the xmlParserErrors otherwise.
11483 */
11484int
11485xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11486 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000011487 int end_in_lf = 0;
11488
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011489 if (ctxt == NULL)
11490 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000011491 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011492 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000011493 if (ctxt->instate == XML_PARSER_START)
11494 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000011495 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11496 (chunk[size - 1] == '\r')) {
11497 end_in_lf = 1;
11498 size--;
11499 }
Owen Taylor3473f882001-02-23 17:55:21 +000011500 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11501 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11502 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11503 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000011504 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000011505
William M. Bracka3215c72004-07-31 16:24:01 +000011506 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11507 if (res < 0) {
11508 ctxt->errNo = XML_PARSER_EOF;
11509 ctxt->disableSAX = 1;
11510 return (XML_PARSER_EOF);
11511 }
Owen Taylor3473f882001-02-23 17:55:21 +000011512 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11513 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011514 ctxt->input->end =
11515 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011516#ifdef DEBUG_PUSH
11517 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11518#endif
11519
Owen Taylor3473f882001-02-23 17:55:21 +000011520 } else if (ctxt->instate != XML_PARSER_EOF) {
11521 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11522 xmlParserInputBufferPtr in = ctxt->input->buf;
11523 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11524 (in->raw != NULL)) {
11525 int nbchars;
11526
11527 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11528 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011529 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000011530 xmlGenericError(xmlGenericErrorContext,
11531 "xmlParseChunk: encoder error\n");
11532 return(XML_ERR_INVALID_ENCODING);
11533 }
11534 }
11535 }
11536 }
11537 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000011538 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11539 (ctxt->input->buf != NULL)) {
11540 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11541 }
Daniel Veillard14412512005-01-21 23:53:26 +000011542 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011543 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000011544 if (terminate) {
11545 /*
11546 * Check for termination
11547 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011548 int avail = 0;
11549
11550 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011551 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011552 avail = ctxt->input->length -
11553 (ctxt->input->cur - ctxt->input->base);
11554 else
11555 avail = ctxt->input->buf->buffer->use -
11556 (ctxt->input->cur - ctxt->input->base);
11557 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011558
Owen Taylor3473f882001-02-23 17:55:21 +000011559 if ((ctxt->instate != XML_PARSER_EOF) &&
11560 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011561 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011562 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011563 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011564 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011565 }
Owen Taylor3473f882001-02-23 17:55:21 +000011566 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011567 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011568 ctxt->sax->endDocument(ctxt->userData);
11569 }
11570 ctxt->instate = XML_PARSER_EOF;
11571 }
11572 return((xmlParserErrors) ctxt->errNo);
11573}
11574
11575/************************************************************************
11576 * *
11577 * I/O front end functions to the parser *
11578 * *
11579 ************************************************************************/
11580
11581/**
Owen Taylor3473f882001-02-23 17:55:21 +000011582 * xmlCreatePushParserCtxt:
11583 * @sax: a SAX handler
11584 * @user_data: The user data returned on SAX callbacks
11585 * @chunk: a pointer to an array of chars
11586 * @size: number of chars in the array
11587 * @filename: an optional file name or URI
11588 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000011589 * Create a parser context for using the XML parser in push mode.
11590 * If @buffer and @size are non-NULL, the data is used to detect
11591 * the encoding. The remaining characters will be parsed so they
11592 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000011593 * To allow content encoding detection, @size should be >= 4
11594 * The value of @filename is used for fetching external entities
11595 * and error/warning reports.
11596 *
11597 * Returns the new parser context or NULL
11598 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000011599
Owen Taylor3473f882001-02-23 17:55:21 +000011600xmlParserCtxtPtr
11601xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11602 const char *chunk, int size, const char *filename) {
11603 xmlParserCtxtPtr ctxt;
11604 xmlParserInputPtr inputStream;
11605 xmlParserInputBufferPtr buf;
11606 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11607
11608 /*
11609 * plug some encoding conversion routines
11610 */
11611 if ((chunk != NULL) && (size >= 4))
11612 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11613
11614 buf = xmlAllocParserInputBuffer(enc);
11615 if (buf == NULL) return(NULL);
11616
11617 ctxt = xmlNewParserCtxt();
11618 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011619 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011620 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011621 return(NULL);
11622 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011623 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011624 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11625 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011626 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011627 xmlFreeParserInputBuffer(buf);
11628 xmlFreeParserCtxt(ctxt);
11629 return(NULL);
11630 }
Owen Taylor3473f882001-02-23 17:55:21 +000011631 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011632#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011633 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011634#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011635 xmlFree(ctxt->sax);
11636 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11637 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011638 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011639 xmlFreeParserInputBuffer(buf);
11640 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011641 return(NULL);
11642 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011643 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11644 if (sax->initialized == XML_SAX2_MAGIC)
11645 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11646 else
11647 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011648 if (user_data != NULL)
11649 ctxt->userData = user_data;
11650 }
11651 if (filename == NULL) {
11652 ctxt->directory = NULL;
11653 } else {
11654 ctxt->directory = xmlParserGetDirectory(filename);
11655 }
11656
11657 inputStream = xmlNewInputStream(ctxt);
11658 if (inputStream == NULL) {
11659 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011660 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011661 return(NULL);
11662 }
11663
11664 if (filename == NULL)
11665 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011666 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011667 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011668 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011669 if (inputStream->filename == NULL) {
11670 xmlFreeParserCtxt(ctxt);
11671 xmlFreeParserInputBuffer(buf);
11672 return(NULL);
11673 }
11674 }
Owen Taylor3473f882001-02-23 17:55:21 +000011675 inputStream->buf = buf;
11676 inputStream->base = inputStream->buf->buffer->content;
11677 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011678 inputStream->end =
11679 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011680
11681 inputPush(ctxt, inputStream);
11682
William M. Brack3a1cd212005-02-11 14:35:54 +000011683 /*
11684 * If the caller didn't provide an initial 'chunk' for determining
11685 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11686 * that it can be automatically determined later
11687 */
11688 if ((size == 0) || (chunk == NULL)) {
11689 ctxt->charset = XML_CHAR_ENCODING_NONE;
11690 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011691 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11692 int cur = ctxt->input->cur - ctxt->input->base;
11693
Owen Taylor3473f882001-02-23 17:55:21 +000011694 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011695
11696 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11697 ctxt->input->cur = ctxt->input->base + cur;
11698 ctxt->input->end =
11699 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011700#ifdef DEBUG_PUSH
11701 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11702#endif
11703 }
11704
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011705 if (enc != XML_CHAR_ENCODING_NONE) {
11706 xmlSwitchEncoding(ctxt, enc);
11707 }
11708
Owen Taylor3473f882001-02-23 17:55:21 +000011709 return(ctxt);
11710}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011711#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011712
11713/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011714 * xmlStopParser:
11715 * @ctxt: an XML parser context
11716 *
11717 * Blocks further parser processing
11718 */
11719void
11720xmlStopParser(xmlParserCtxtPtr ctxt) {
11721 if (ctxt == NULL)
11722 return;
11723 ctxt->instate = XML_PARSER_EOF;
11724 ctxt->disableSAX = 1;
11725 if (ctxt->input != NULL) {
11726 ctxt->input->cur = BAD_CAST"";
11727 ctxt->input->base = ctxt->input->cur;
11728 }
11729}
11730
11731/**
Owen Taylor3473f882001-02-23 17:55:21 +000011732 * xmlCreateIOParserCtxt:
11733 * @sax: a SAX handler
11734 * @user_data: The user data returned on SAX callbacks
11735 * @ioread: an I/O read function
11736 * @ioclose: an I/O close function
11737 * @ioctx: an I/O handler
11738 * @enc: the charset encoding if known
11739 *
11740 * Create a parser context for using the XML parser with an existing
11741 * I/O stream
11742 *
11743 * Returns the new parser context or NULL
11744 */
11745xmlParserCtxtPtr
11746xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11747 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11748 void *ioctx, xmlCharEncoding enc) {
11749 xmlParserCtxtPtr ctxt;
11750 xmlParserInputPtr inputStream;
11751 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011752
11753 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011754
11755 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11756 if (buf == NULL) return(NULL);
11757
11758 ctxt = xmlNewParserCtxt();
11759 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011760 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011761 return(NULL);
11762 }
11763 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011764#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011765 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011766#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011767 xmlFree(ctxt->sax);
11768 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11769 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011770 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011771 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011772 return(NULL);
11773 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011774 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11775 if (sax->initialized == XML_SAX2_MAGIC)
11776 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11777 else
11778 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011779 if (user_data != NULL)
11780 ctxt->userData = user_data;
11781 }
11782
11783 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11784 if (inputStream == NULL) {
11785 xmlFreeParserCtxt(ctxt);
11786 return(NULL);
11787 }
11788 inputPush(ctxt, inputStream);
11789
11790 return(ctxt);
11791}
11792
Daniel Veillard4432df22003-09-28 18:58:27 +000011793#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011794/************************************************************************
11795 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011796 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000011797 * *
11798 ************************************************************************/
11799
11800/**
11801 * xmlIOParseDTD:
11802 * @sax: the SAX handler block or NULL
11803 * @input: an Input Buffer
11804 * @enc: the charset encoding if known
11805 *
11806 * Load and parse a DTD
11807 *
11808 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000011809 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000011810 */
11811
11812xmlDtdPtr
11813xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11814 xmlCharEncoding enc) {
11815 xmlDtdPtr ret = NULL;
11816 xmlParserCtxtPtr ctxt;
11817 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011818 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000011819
11820 if (input == NULL)
11821 return(NULL);
11822
11823 ctxt = xmlNewParserCtxt();
11824 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000011825 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011826 return(NULL);
11827 }
11828
11829 /*
11830 * Set-up the SAX context
11831 */
11832 if (sax != NULL) {
11833 if (ctxt->sax != NULL)
11834 xmlFree(ctxt->sax);
11835 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000011836 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011837 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011838 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011839
11840 /*
11841 * generate a parser input from the I/O handler
11842 */
11843
Daniel Veillard43caefb2003-12-07 19:32:22 +000011844 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000011845 if (pinput == NULL) {
11846 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000011847 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011848 xmlFreeParserCtxt(ctxt);
11849 return(NULL);
11850 }
11851
11852 /*
11853 * plug some encoding conversion routines here.
11854 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000011855 if (xmlPushInput(ctxt, pinput) < 0) {
11856 if (sax != NULL) ctxt->sax = NULL;
11857 xmlFreeParserCtxt(ctxt);
11858 return(NULL);
11859 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000011860 if (enc != XML_CHAR_ENCODING_NONE) {
11861 xmlSwitchEncoding(ctxt, enc);
11862 }
Owen Taylor3473f882001-02-23 17:55:21 +000011863
11864 pinput->filename = NULL;
11865 pinput->line = 1;
11866 pinput->col = 1;
11867 pinput->base = ctxt->input->cur;
11868 pinput->cur = ctxt->input->cur;
11869 pinput->free = NULL;
11870
11871 /*
11872 * let's parse that entity knowing it's an external subset.
11873 */
11874 ctxt->inSubset = 2;
11875 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000011876 if (ctxt->myDoc == NULL) {
11877 xmlErrMemory(ctxt, "New Doc failed");
11878 return(NULL);
11879 }
11880 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000011881 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11882 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000011883
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011884 if ((enc == XML_CHAR_ENCODING_NONE) &&
11885 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000011886 /*
11887 * Get the 4 first bytes and decode the charset
11888 * if enc != XML_CHAR_ENCODING_NONE
11889 * plug some encoding conversion routines.
11890 */
11891 start[0] = RAW;
11892 start[1] = NXT(1);
11893 start[2] = NXT(2);
11894 start[3] = NXT(3);
11895 enc = xmlDetectCharEncoding(start, 4);
11896 if (enc != XML_CHAR_ENCODING_NONE) {
11897 xmlSwitchEncoding(ctxt, enc);
11898 }
11899 }
11900
Owen Taylor3473f882001-02-23 17:55:21 +000011901 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11902
11903 if (ctxt->myDoc != NULL) {
11904 if (ctxt->wellFormed) {
11905 ret = ctxt->myDoc->extSubset;
11906 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000011907 if (ret != NULL) {
11908 xmlNodePtr tmp;
11909
11910 ret->doc = NULL;
11911 tmp = ret->children;
11912 while (tmp != NULL) {
11913 tmp->doc = NULL;
11914 tmp = tmp->next;
11915 }
11916 }
Owen Taylor3473f882001-02-23 17:55:21 +000011917 } else {
11918 ret = NULL;
11919 }
11920 xmlFreeDoc(ctxt->myDoc);
11921 ctxt->myDoc = NULL;
11922 }
11923 if (sax != NULL) ctxt->sax = NULL;
11924 xmlFreeParserCtxt(ctxt);
11925
11926 return(ret);
11927}
11928
11929/**
11930 * xmlSAXParseDTD:
11931 * @sax: the SAX handler block
11932 * @ExternalID: a NAME* containing the External ID of the DTD
11933 * @SystemID: a NAME* containing the URL to the DTD
11934 *
11935 * Load and parse an external subset.
11936 *
11937 * Returns the resulting xmlDtdPtr or NULL in case of error.
11938 */
11939
11940xmlDtdPtr
11941xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11942 const xmlChar *SystemID) {
11943 xmlDtdPtr ret = NULL;
11944 xmlParserCtxtPtr ctxt;
11945 xmlParserInputPtr input = NULL;
11946 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011947 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000011948
11949 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11950
11951 ctxt = xmlNewParserCtxt();
11952 if (ctxt == NULL) {
11953 return(NULL);
11954 }
11955
11956 /*
11957 * Set-up the SAX context
11958 */
11959 if (sax != NULL) {
11960 if (ctxt->sax != NULL)
11961 xmlFree(ctxt->sax);
11962 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000011963 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011964 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011965
11966 /*
11967 * Canonicalise the system ID
11968 */
11969 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000011970 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011971 xmlFreeParserCtxt(ctxt);
11972 return(NULL);
11973 }
Owen Taylor3473f882001-02-23 17:55:21 +000011974
11975 /*
11976 * Ask the Entity resolver to load the damn thing
11977 */
11978
11979 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011980 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11981 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011982 if (input == NULL) {
11983 if (sax != NULL) ctxt->sax = NULL;
11984 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011985 if (systemIdCanonic != NULL)
11986 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011987 return(NULL);
11988 }
11989
11990 /*
11991 * plug some encoding conversion routines here.
11992 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000011993 if (xmlPushInput(ctxt, input) < 0) {
11994 if (sax != NULL) ctxt->sax = NULL;
11995 xmlFreeParserCtxt(ctxt);
11996 if (systemIdCanonic != NULL)
11997 xmlFree(systemIdCanonic);
11998 return(NULL);
11999 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012000 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12001 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12002 xmlSwitchEncoding(ctxt, enc);
12003 }
Owen Taylor3473f882001-02-23 17:55:21 +000012004
12005 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012006 input->filename = (char *) systemIdCanonic;
12007 else
12008 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012009 input->line = 1;
12010 input->col = 1;
12011 input->base = ctxt->input->cur;
12012 input->cur = ctxt->input->cur;
12013 input->free = NULL;
12014
12015 /*
12016 * let's parse that entity knowing it's an external subset.
12017 */
12018 ctxt->inSubset = 2;
12019 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012020 if (ctxt->myDoc == NULL) {
12021 xmlErrMemory(ctxt, "New Doc failed");
12022 if (sax != NULL) ctxt->sax = NULL;
12023 xmlFreeParserCtxt(ctxt);
12024 return(NULL);
12025 }
12026 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012027 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12028 ExternalID, SystemID);
12029 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12030
12031 if (ctxt->myDoc != NULL) {
12032 if (ctxt->wellFormed) {
12033 ret = ctxt->myDoc->extSubset;
12034 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012035 if (ret != NULL) {
12036 xmlNodePtr tmp;
12037
12038 ret->doc = NULL;
12039 tmp = ret->children;
12040 while (tmp != NULL) {
12041 tmp->doc = NULL;
12042 tmp = tmp->next;
12043 }
12044 }
Owen Taylor3473f882001-02-23 17:55:21 +000012045 } else {
12046 ret = NULL;
12047 }
12048 xmlFreeDoc(ctxt->myDoc);
12049 ctxt->myDoc = NULL;
12050 }
12051 if (sax != NULL) ctxt->sax = NULL;
12052 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012053
Owen Taylor3473f882001-02-23 17:55:21 +000012054 return(ret);
12055}
12056
Daniel Veillard4432df22003-09-28 18:58:27 +000012057
Owen Taylor3473f882001-02-23 17:55:21 +000012058/**
12059 * xmlParseDTD:
12060 * @ExternalID: a NAME* containing the External ID of the DTD
12061 * @SystemID: a NAME* containing the URL to the DTD
12062 *
12063 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012064 *
Owen Taylor3473f882001-02-23 17:55:21 +000012065 * Returns the resulting xmlDtdPtr or NULL in case of error.
12066 */
12067
12068xmlDtdPtr
12069xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12070 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12071}
Daniel Veillard4432df22003-09-28 18:58:27 +000012072#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012073
12074/************************************************************************
12075 * *
12076 * Front ends when parsing an Entity *
12077 * *
12078 ************************************************************************/
12079
12080/**
Owen Taylor3473f882001-02-23 17:55:21 +000012081 * xmlParseCtxtExternalEntity:
12082 * @ctx: the existing parsing context
12083 * @URL: the URL for the entity to load
12084 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012085 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012086 *
12087 * Parse an external general entity within an existing parsing context
12088 * An external general parsed entity is well-formed if it matches the
12089 * production labeled extParsedEnt.
12090 *
12091 * [78] extParsedEnt ::= TextDecl? content
12092 *
12093 * Returns 0 if the entity is well formed, -1 in case of args problem and
12094 * the parser error code otherwise
12095 */
12096
12097int
12098xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012099 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012100 xmlParserCtxtPtr ctxt;
12101 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012102 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012103 xmlSAXHandlerPtr oldsax = NULL;
12104 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012105 xmlChar start[4];
12106 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012107
Daniel Veillardce682bc2004-11-05 17:22:25 +000012108 if (ctx == NULL) return(-1);
12109
Daniel Veillard0161e632008-08-28 15:36:32 +000012110 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12111 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012112 return(XML_ERR_ENTITY_LOOP);
12113 }
12114
Daniel Veillardcda96922001-08-21 10:56:31 +000012115 if (lst != NULL)
12116 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012117 if ((URL == NULL) && (ID == NULL))
12118 return(-1);
12119 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12120 return(-1);
12121
Rob Richards798743a2009-06-19 13:54:25 -040012122 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012123 if (ctxt == NULL) {
12124 return(-1);
12125 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012126
Owen Taylor3473f882001-02-23 17:55:21 +000012127 oldsax = ctxt->sax;
12128 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012129 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012130 newDoc = xmlNewDoc(BAD_CAST "1.0");
12131 if (newDoc == NULL) {
12132 xmlFreeParserCtxt(ctxt);
12133 return(-1);
12134 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012135 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012136 if (ctx->myDoc->dict) {
12137 newDoc->dict = ctx->myDoc->dict;
12138 xmlDictReference(newDoc->dict);
12139 }
Owen Taylor3473f882001-02-23 17:55:21 +000012140 if (ctx->myDoc != NULL) {
12141 newDoc->intSubset = ctx->myDoc->intSubset;
12142 newDoc->extSubset = ctx->myDoc->extSubset;
12143 }
12144 if (ctx->myDoc->URL != NULL) {
12145 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12146 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012147 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12148 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012149 ctxt->sax = oldsax;
12150 xmlFreeParserCtxt(ctxt);
12151 newDoc->intSubset = NULL;
12152 newDoc->extSubset = NULL;
12153 xmlFreeDoc(newDoc);
12154 return(-1);
12155 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012156 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012157 nodePush(ctxt, newDoc->children);
12158 if (ctx->myDoc == NULL) {
12159 ctxt->myDoc = newDoc;
12160 } else {
12161 ctxt->myDoc = ctx->myDoc;
12162 newDoc->children->doc = ctx->myDoc;
12163 }
12164
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012165 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012166 * Get the 4 first bytes and decode the charset
12167 * if enc != XML_CHAR_ENCODING_NONE
12168 * plug some encoding conversion routines.
12169 */
12170 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012171 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12172 start[0] = RAW;
12173 start[1] = NXT(1);
12174 start[2] = NXT(2);
12175 start[3] = NXT(3);
12176 enc = xmlDetectCharEncoding(start, 4);
12177 if (enc != XML_CHAR_ENCODING_NONE) {
12178 xmlSwitchEncoding(ctxt, enc);
12179 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012180 }
12181
Owen Taylor3473f882001-02-23 17:55:21 +000012182 /*
12183 * Parse a possible text declaration first
12184 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012185 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012186 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012187 /*
12188 * An XML-1.0 document can't reference an entity not XML-1.0
12189 */
12190 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12191 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12192 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12193 "Version mismatch between document and entity\n");
12194 }
Owen Taylor3473f882001-02-23 17:55:21 +000012195 }
12196
12197 /*
12198 * Doing validity checking on chunk doesn't make sense
12199 */
12200 ctxt->instate = XML_PARSER_CONTENT;
12201 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012202 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012203 ctxt->loadsubset = ctx->loadsubset;
12204 ctxt->depth = ctx->depth + 1;
12205 ctxt->replaceEntities = ctx->replaceEntities;
12206 if (ctxt->validate) {
12207 ctxt->vctxt.error = ctx->vctxt.error;
12208 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012209 } else {
12210 ctxt->vctxt.error = NULL;
12211 ctxt->vctxt.warning = NULL;
12212 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012213 ctxt->vctxt.nodeTab = NULL;
12214 ctxt->vctxt.nodeNr = 0;
12215 ctxt->vctxt.nodeMax = 0;
12216 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012217 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12218 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012219 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12220 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12221 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012222 ctxt->dictNames = ctx->dictNames;
12223 ctxt->attsDefault = ctx->attsDefault;
12224 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012225 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012226
12227 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012228
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012229 ctx->validate = ctxt->validate;
12230 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012231 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012232 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012233 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012234 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012235 }
12236 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012237 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012238 }
12239
12240 if (!ctxt->wellFormed) {
12241 if (ctxt->errNo == 0)
12242 ret = 1;
12243 else
12244 ret = ctxt->errNo;
12245 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012246 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012247 xmlNodePtr cur;
12248
12249 /*
12250 * Return the newly created nodeset after unlinking it from
12251 * they pseudo parent.
12252 */
12253 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012254 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012255 while (cur != NULL) {
12256 cur->parent = NULL;
12257 cur = cur->next;
12258 }
12259 newDoc->children->children = NULL;
12260 }
12261 ret = 0;
12262 }
12263 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012264 ctxt->dict = NULL;
12265 ctxt->attsDefault = NULL;
12266 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012267 xmlFreeParserCtxt(ctxt);
12268 newDoc->intSubset = NULL;
12269 newDoc->extSubset = NULL;
12270 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012271
Owen Taylor3473f882001-02-23 17:55:21 +000012272 return(ret);
12273}
12274
12275/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012276 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012277 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012278 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012279 * @sax: the SAX handler bloc (possibly NULL)
12280 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12281 * @depth: Used for loop detection, use 0
12282 * @URL: the URL for the entity to load
12283 * @ID: the System ID for the entity to load
12284 * @list: the return value for the set of parsed nodes
12285 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012286 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012287 *
12288 * Returns 0 if the entity is well formed, -1 in case of args problem and
12289 * the parser error code otherwise
12290 */
12291
Daniel Veillard7d515752003-09-26 19:12:37 +000012292static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012293xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12294 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012295 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012296 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012297 xmlParserCtxtPtr ctxt;
12298 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012299 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012300 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012301 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012302 xmlChar start[4];
12303 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012304
Daniel Veillard0161e632008-08-28 15:36:32 +000012305 if (((depth > 40) &&
12306 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12307 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012308 return(XML_ERR_ENTITY_LOOP);
12309 }
12310
Owen Taylor3473f882001-02-23 17:55:21 +000012311 if (list != NULL)
12312 *list = NULL;
12313 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012314 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012315 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012316 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012317
12318
Rob Richards9c0aa472009-03-26 18:10:19 +000012319 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000012320 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012321 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012322 if (oldctxt != NULL) {
12323 ctxt->_private = oldctxt->_private;
12324 ctxt->loadsubset = oldctxt->loadsubset;
12325 ctxt->validate = oldctxt->validate;
12326 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012327 ctxt->record_info = oldctxt->record_info;
12328 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12329 ctxt->node_seq.length = oldctxt->node_seq.length;
12330 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012331 } else {
12332 /*
12333 * Doing validity checking on chunk without context
12334 * doesn't make sense
12335 */
12336 ctxt->_private = NULL;
12337 ctxt->validate = 0;
12338 ctxt->external = 2;
12339 ctxt->loadsubset = 0;
12340 }
Owen Taylor3473f882001-02-23 17:55:21 +000012341 if (sax != NULL) {
12342 oldsax = ctxt->sax;
12343 ctxt->sax = sax;
12344 if (user_data != NULL)
12345 ctxt->userData = user_data;
12346 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012347 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012348 newDoc = xmlNewDoc(BAD_CAST "1.0");
12349 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012350 ctxt->node_seq.maximum = 0;
12351 ctxt->node_seq.length = 0;
12352 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012353 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000012354 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012355 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012356 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000012357 newDoc->intSubset = doc->intSubset;
12358 newDoc->extSubset = doc->extSubset;
12359 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012360 xmlDictReference(newDoc->dict);
12361
Owen Taylor3473f882001-02-23 17:55:21 +000012362 if (doc->URL != NULL) {
12363 newDoc->URL = xmlStrdup(doc->URL);
12364 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012365 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12366 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012367 if (sax != NULL)
12368 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012369 ctxt->node_seq.maximum = 0;
12370 ctxt->node_seq.length = 0;
12371 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012372 xmlFreeParserCtxt(ctxt);
12373 newDoc->intSubset = NULL;
12374 newDoc->extSubset = NULL;
12375 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000012376 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012377 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012378 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012379 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000012380 ctxt->myDoc = doc;
12381 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000012382
Daniel Veillard0161e632008-08-28 15:36:32 +000012383 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012384 * Get the 4 first bytes and decode the charset
12385 * if enc != XML_CHAR_ENCODING_NONE
12386 * plug some encoding conversion routines.
12387 */
12388 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012389 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12390 start[0] = RAW;
12391 start[1] = NXT(1);
12392 start[2] = NXT(2);
12393 start[3] = NXT(3);
12394 enc = xmlDetectCharEncoding(start, 4);
12395 if (enc != XML_CHAR_ENCODING_NONE) {
12396 xmlSwitchEncoding(ctxt, enc);
12397 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012398 }
12399
Owen Taylor3473f882001-02-23 17:55:21 +000012400 /*
12401 * Parse a possible text declaration first
12402 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012403 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012404 xmlParseTextDecl(ctxt);
12405 }
12406
Owen Taylor3473f882001-02-23 17:55:21 +000012407 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012408 ctxt->depth = depth;
12409
12410 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012411
Daniel Veillard561b7f82002-03-20 21:55:57 +000012412 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012413 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000012414 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012415 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012416 }
12417 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012418 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012419 }
12420
12421 if (!ctxt->wellFormed) {
12422 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012423 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000012424 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012425 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000012426 } else {
12427 if (list != NULL) {
12428 xmlNodePtr cur;
12429
12430 /*
12431 * Return the newly created nodeset after unlinking it from
12432 * they pseudo parent.
12433 */
12434 cur = newDoc->children->children;
12435 *list = cur;
12436 while (cur != NULL) {
12437 cur->parent = NULL;
12438 cur = cur->next;
12439 }
12440 newDoc->children->children = NULL;
12441 }
Daniel Veillard7d515752003-09-26 19:12:37 +000012442 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000012443 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012444
12445 /*
12446 * Record in the parent context the number of entities replacement
12447 * done when parsing that reference.
12448 */
12449 oldctxt->nbentities += ctxt->nbentities;
12450 /*
12451 * Also record the size of the entity parsed
12452 */
12453 if (ctxt->input != NULL) {
12454 oldctxt->sizeentities += ctxt->input->consumed;
12455 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12456 }
12457 /*
12458 * And record the last error if any
12459 */
12460 if (ctxt->lastError.code != XML_ERR_OK)
12461 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12462
Owen Taylor3473f882001-02-23 17:55:21 +000012463 if (sax != NULL)
12464 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000012465 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12466 oldctxt->node_seq.length = ctxt->node_seq.length;
12467 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012468 ctxt->node_seq.maximum = 0;
12469 ctxt->node_seq.length = 0;
12470 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012471 xmlFreeParserCtxt(ctxt);
12472 newDoc->intSubset = NULL;
12473 newDoc->extSubset = NULL;
12474 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000012475
Owen Taylor3473f882001-02-23 17:55:21 +000012476 return(ret);
12477}
12478
Daniel Veillard81273902003-09-30 00:43:48 +000012479#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012480/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012481 * xmlParseExternalEntity:
12482 * @doc: the document the chunk pertains to
12483 * @sax: the SAX handler bloc (possibly NULL)
12484 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12485 * @depth: Used for loop detection, use 0
12486 * @URL: the URL for the entity to load
12487 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012488 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000012489 *
12490 * Parse an external general entity
12491 * An external general parsed entity is well-formed if it matches the
12492 * production labeled extParsedEnt.
12493 *
12494 * [78] extParsedEnt ::= TextDecl? content
12495 *
12496 * Returns 0 if the entity is well formed, -1 in case of args problem and
12497 * the parser error code otherwise
12498 */
12499
12500int
12501xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000012502 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012503 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012504 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000012505}
12506
12507/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000012508 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000012509 * @doc: the document the chunk pertains to
12510 * @sax: the SAX handler bloc (possibly NULL)
12511 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12512 * @depth: Used for loop detection, use 0
12513 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000012514 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012515 *
12516 * Parse a well-balanced chunk of an XML document
12517 * called by the parser
12518 * The allowed sequence for the Well Balanced Chunk is the one defined by
12519 * the content production in the XML grammar:
12520 *
12521 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12522 *
12523 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12524 * the parser error code otherwise
12525 */
12526
12527int
12528xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000012529 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012530 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12531 depth, string, lst, 0 );
12532}
Daniel Veillard81273902003-09-30 00:43:48 +000012533#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000012534
12535/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000012536 * xmlParseBalancedChunkMemoryInternal:
12537 * @oldctxt: the existing parsing context
12538 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12539 * @user_data: the user data field for the parser context
12540 * @lst: the return value for the set of parsed nodes
12541 *
12542 *
12543 * Parse a well-balanced chunk of an XML document
12544 * called by the parser
12545 * The allowed sequence for the Well Balanced Chunk is the one defined by
12546 * the content production in the XML grammar:
12547 *
12548 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12549 *
Daniel Veillard7d515752003-09-26 19:12:37 +000012550 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12551 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000012552 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000012553 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000012554 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000012555 */
Daniel Veillard7d515752003-09-26 19:12:37 +000012556static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000012557xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12558 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12559 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012560 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012561 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012562 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012563 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012564 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012565 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000012566 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012567
Daniel Veillard0161e632008-08-28 15:36:32 +000012568 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12569 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012570 return(XML_ERR_ENTITY_LOOP);
12571 }
12572
12573
12574 if (lst != NULL)
12575 *lst = NULL;
12576 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000012577 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012578
12579 size = xmlStrlen(string);
12580
12581 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000012582 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012583 if (user_data != NULL)
12584 ctxt->userData = user_data;
12585 else
12586 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012587 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12588 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012589 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12590 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12591 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012592
12593 oldsax = ctxt->sax;
12594 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012595 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012596 ctxt->replaceEntities = oldctxt->replaceEntities;
12597 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000012598
Daniel Veillarde1ca5032002-12-09 14:13:43 +000012599 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012600 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012601 newDoc = xmlNewDoc(BAD_CAST "1.0");
12602 if (newDoc == NULL) {
12603 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012604 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012605 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000012606 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012607 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012608 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012609 newDoc->dict = ctxt->dict;
12610 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012611 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012612 } else {
12613 ctxt->myDoc = oldctxt->myDoc;
12614 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012615 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012616 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012617 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12618 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012619 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012620 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012621 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012622 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012623 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012624 }
William M. Brack7b9154b2003-09-27 19:23:50 +000012625 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012626 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012627 ctxt->myDoc->children = NULL;
12628 ctxt->myDoc->last = NULL;
12629 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012630 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012631 ctxt->instate = XML_PARSER_CONTENT;
12632 ctxt->depth = oldctxt->depth + 1;
12633
Daniel Veillard328f48c2002-11-15 15:24:34 +000012634 ctxt->validate = 0;
12635 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000012636 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12637 /*
12638 * ID/IDREF registration will be done in xmlValidateElement below
12639 */
12640 ctxt->loadsubset |= XML_SKIP_IDS;
12641 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012642 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012643 ctxt->attsDefault = oldctxt->attsDefault;
12644 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012645
Daniel Veillard68e9e742002-11-16 15:35:11 +000012646 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012647 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012648 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012649 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012650 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012651 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012652 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012653 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012654 }
12655
12656 if (!ctxt->wellFormed) {
12657 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012658 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012659 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012660 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012661 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012662 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012663 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012664
William M. Brack7b9154b2003-09-27 19:23:50 +000012665 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012666 xmlNodePtr cur;
12667
12668 /*
12669 * Return the newly created nodeset after unlinking it from
12670 * they pseudo parent.
12671 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012672 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012673 *lst = cur;
12674 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012675#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012676 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12677 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12678 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012679 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12680 oldctxt->myDoc, cur);
12681 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012682#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012683 cur->parent = NULL;
12684 cur = cur->next;
12685 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012686 ctxt->myDoc->children->children = NULL;
12687 }
12688 if (ctxt->myDoc != NULL) {
12689 xmlFreeNode(ctxt->myDoc->children);
12690 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012691 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012692 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012693
12694 /*
12695 * Record in the parent context the number of entities replacement
12696 * done when parsing that reference.
12697 */
Daniel Veillard4bf899b2008-08-20 17:04:30 +000012698 oldctxt->nbentities += ctxt->nbentities;
Daniel Veillard0161e632008-08-28 15:36:32 +000012699 /*
12700 * Also record the last error if any
12701 */
12702 if (ctxt->lastError.code != XML_ERR_OK)
12703 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12704
Daniel Veillard328f48c2002-11-15 15:24:34 +000012705 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012706 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012707 ctxt->attsDefault = NULL;
12708 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012709 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012710 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012711 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012712 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012713
Daniel Veillard328f48c2002-11-15 15:24:34 +000012714 return(ret);
12715}
12716
Daniel Veillard29b17482004-08-16 00:39:03 +000012717/**
12718 * xmlParseInNodeContext:
12719 * @node: the context node
12720 * @data: the input string
12721 * @datalen: the input string length in bytes
12722 * @options: a combination of xmlParserOption
12723 * @lst: the return value for the set of parsed nodes
12724 *
12725 * Parse a well-balanced chunk of an XML document
12726 * within the context (DTD, namespaces, etc ...) of the given node.
12727 *
12728 * The allowed sequence for the data is a Well Balanced Chunk defined by
12729 * the content production in the XML grammar:
12730 *
12731 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12732 *
12733 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12734 * error code otherwise
12735 */
12736xmlParserErrors
12737xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12738 int options, xmlNodePtr *lst) {
12739#ifdef SAX2
12740 xmlParserCtxtPtr ctxt;
12741 xmlDocPtr doc = NULL;
12742 xmlNodePtr fake, cur;
12743 int nsnr = 0;
12744
12745 xmlParserErrors ret = XML_ERR_OK;
12746
12747 /*
12748 * check all input parameters, grab the document
12749 */
12750 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12751 return(XML_ERR_INTERNAL_ERROR);
12752 switch (node->type) {
12753 case XML_ELEMENT_NODE:
12754 case XML_ATTRIBUTE_NODE:
12755 case XML_TEXT_NODE:
12756 case XML_CDATA_SECTION_NODE:
12757 case XML_ENTITY_REF_NODE:
12758 case XML_PI_NODE:
12759 case XML_COMMENT_NODE:
12760 case XML_DOCUMENT_NODE:
12761 case XML_HTML_DOCUMENT_NODE:
12762 break;
12763 default:
12764 return(XML_ERR_INTERNAL_ERROR);
12765
12766 }
12767 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12768 (node->type != XML_DOCUMENT_NODE) &&
12769 (node->type != XML_HTML_DOCUMENT_NODE))
12770 node = node->parent;
12771 if (node == NULL)
12772 return(XML_ERR_INTERNAL_ERROR);
12773 if (node->type == XML_ELEMENT_NODE)
12774 doc = node->doc;
12775 else
12776 doc = (xmlDocPtr) node;
12777 if (doc == NULL)
12778 return(XML_ERR_INTERNAL_ERROR);
12779
12780 /*
12781 * allocate a context and set-up everything not related to the
12782 * node position in the tree
12783 */
12784 if (doc->type == XML_DOCUMENT_NODE)
12785 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12786#ifdef LIBXML_HTML_ENABLED
12787 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12788 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12789#endif
12790 else
12791 return(XML_ERR_INTERNAL_ERROR);
12792
12793 if (ctxt == NULL)
12794 return(XML_ERR_NO_MEMORY);
12795 fake = xmlNewComment(NULL);
12796 if (fake == NULL) {
12797 xmlFreeParserCtxt(ctxt);
12798 return(XML_ERR_NO_MEMORY);
12799 }
12800 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000012801
12802 /*
12803 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12804 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12805 * we must wait until the last moment to free the original one.
12806 */
Daniel Veillard29b17482004-08-16 00:39:03 +000012807 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000012808 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000012809 xmlDictFree(ctxt->dict);
12810 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000012811 } else
12812 options |= XML_PARSE_NODICT;
12813
Daniel Veillard37334572008-07-31 08:20:02 +000012814 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000012815 xmlDetectSAX2(ctxt);
12816 ctxt->myDoc = doc;
12817
12818 if (node->type == XML_ELEMENT_NODE) {
12819 nodePush(ctxt, node);
12820 /*
12821 * initialize the SAX2 namespaces stack
12822 */
12823 cur = node;
12824 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12825 xmlNsPtr ns = cur->nsDef;
12826 const xmlChar *iprefix, *ihref;
12827
12828 while (ns != NULL) {
12829 if (ctxt->dict) {
12830 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12831 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12832 } else {
12833 iprefix = ns->prefix;
12834 ihref = ns->href;
12835 }
12836
12837 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12838 nsPush(ctxt, iprefix, ihref);
12839 nsnr++;
12840 }
12841 ns = ns->next;
12842 }
12843 cur = cur->parent;
12844 }
12845 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000012846 }
Daniel Veillard29b17482004-08-16 00:39:03 +000012847
12848 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12849 /*
12850 * ID/IDREF registration will be done in xmlValidateElement below
12851 */
12852 ctxt->loadsubset |= XML_SKIP_IDS;
12853 }
12854
Daniel Veillard499cc922006-01-18 17:22:35 +000012855#ifdef LIBXML_HTML_ENABLED
12856 if (doc->type == XML_HTML_DOCUMENT_NODE)
12857 __htmlParseContent(ctxt);
12858 else
12859#endif
12860 xmlParseContent(ctxt);
12861
Daniel Veillard29b17482004-08-16 00:39:03 +000012862 nsPop(ctxt, nsnr);
12863 if ((RAW == '<') && (NXT(1) == '/')) {
12864 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12865 } else if (RAW != 0) {
12866 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12867 }
12868 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12869 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12870 ctxt->wellFormed = 0;
12871 }
12872
12873 if (!ctxt->wellFormed) {
12874 if (ctxt->errNo == 0)
12875 ret = XML_ERR_INTERNAL_ERROR;
12876 else
12877 ret = (xmlParserErrors)ctxt->errNo;
12878 } else {
12879 ret = XML_ERR_OK;
12880 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012881
Daniel Veillard29b17482004-08-16 00:39:03 +000012882 /*
12883 * Return the newly created nodeset after unlinking it from
12884 * the pseudo sibling.
12885 */
Daniel Veillard0161e632008-08-28 15:36:32 +000012886
Daniel Veillard29b17482004-08-16 00:39:03 +000012887 cur = fake->next;
12888 fake->next = NULL;
12889 node->last = fake;
12890
12891 if (cur != NULL) {
12892 cur->prev = NULL;
12893 }
12894
12895 *lst = cur;
12896
12897 while (cur != NULL) {
12898 cur->parent = NULL;
12899 cur = cur->next;
12900 }
12901
12902 xmlUnlinkNode(fake);
12903 xmlFreeNode(fake);
12904
12905
12906 if (ret != XML_ERR_OK) {
12907 xmlFreeNodeList(*lst);
12908 *lst = NULL;
12909 }
William M. Brackc3f81342004-10-03 01:22:44 +000012910
William M. Brackb7b54de2004-10-06 16:38:01 +000012911 if (doc->dict != NULL)
12912 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000012913 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012914
Daniel Veillard29b17482004-08-16 00:39:03 +000012915 return(ret);
12916#else /* !SAX2 */
12917 return(XML_ERR_INTERNAL_ERROR);
12918#endif
12919}
12920
Daniel Veillard81273902003-09-30 00:43:48 +000012921#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000012922/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000012923 * xmlParseBalancedChunkMemoryRecover:
12924 * @doc: the document the chunk pertains to
12925 * @sax: the SAX handler bloc (possibly NULL)
12926 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12927 * @depth: Used for loop detection, use 0
12928 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12929 * @lst: the return value for the set of parsed nodes
12930 * @recover: return nodes even if the data is broken (use 0)
12931 *
12932 *
12933 * Parse a well-balanced chunk of an XML document
12934 * called by the parser
12935 * The allowed sequence for the Well Balanced Chunk is the one defined by
12936 * the content production in the XML grammar:
12937 *
12938 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12939 *
12940 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12941 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000012942 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000012943 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000012944 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12945 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000012946 */
12947int
12948xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000012949 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000012950 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000012951 xmlParserCtxtPtr ctxt;
12952 xmlDocPtr newDoc;
12953 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012954 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012955 int size;
12956 int ret = 0;
12957
Daniel Veillard0161e632008-08-28 15:36:32 +000012958 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000012959 return(XML_ERR_ENTITY_LOOP);
12960 }
12961
12962
Daniel Veillardcda96922001-08-21 10:56:31 +000012963 if (lst != NULL)
12964 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012965 if (string == NULL)
12966 return(-1);
12967
12968 size = xmlStrlen(string);
12969
12970 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12971 if (ctxt == NULL) return(-1);
12972 ctxt->userData = ctxt;
12973 if (sax != NULL) {
12974 oldsax = ctxt->sax;
12975 ctxt->sax = sax;
12976 if (user_data != NULL)
12977 ctxt->userData = user_data;
12978 }
12979 newDoc = xmlNewDoc(BAD_CAST "1.0");
12980 if (newDoc == NULL) {
12981 xmlFreeParserCtxt(ctxt);
12982 return(-1);
12983 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012984 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012985 if ((doc != NULL) && (doc->dict != NULL)) {
12986 xmlDictFree(ctxt->dict);
12987 ctxt->dict = doc->dict;
12988 xmlDictReference(ctxt->dict);
12989 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12990 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12991 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12992 ctxt->dictNames = 1;
12993 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000012994 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012995 }
Owen Taylor3473f882001-02-23 17:55:21 +000012996 if (doc != NULL) {
12997 newDoc->intSubset = doc->intSubset;
12998 newDoc->extSubset = doc->extSubset;
12999 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013000 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13001 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013002 if (sax != NULL)
13003 ctxt->sax = oldsax;
13004 xmlFreeParserCtxt(ctxt);
13005 newDoc->intSubset = NULL;
13006 newDoc->extSubset = NULL;
13007 xmlFreeDoc(newDoc);
13008 return(-1);
13009 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013010 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13011 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013012 if (doc == NULL) {
13013 ctxt->myDoc = newDoc;
13014 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013015 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013016 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013017 /* Ensure that doc has XML spec namespace */
13018 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13019 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013020 }
13021 ctxt->instate = XML_PARSER_CONTENT;
13022 ctxt->depth = depth;
13023
13024 /*
13025 * Doing validity checking on chunk doesn't make sense
13026 */
13027 ctxt->validate = 0;
13028 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013029 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013030
Daniel Veillardb39bc392002-10-26 19:29:51 +000013031 if ( doc != NULL ){
13032 content = doc->children;
13033 doc->children = NULL;
13034 xmlParseContent(ctxt);
13035 doc->children = content;
13036 }
13037 else {
13038 xmlParseContent(ctxt);
13039 }
Owen Taylor3473f882001-02-23 17:55:21 +000013040 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013041 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013042 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013043 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013044 }
13045 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013046 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013047 }
13048
13049 if (!ctxt->wellFormed) {
13050 if (ctxt->errNo == 0)
13051 ret = 1;
13052 else
13053 ret = ctxt->errNo;
13054 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013055 ret = 0;
13056 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013057
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013058 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13059 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013060
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013061 /*
13062 * Return the newly created nodeset after unlinking it from
13063 * they pseudo parent.
13064 */
13065 cur = newDoc->children->children;
13066 *lst = cur;
13067 while (cur != NULL) {
13068 xmlSetTreeDoc(cur, doc);
13069 cur->parent = NULL;
13070 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013071 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013072 newDoc->children->children = NULL;
13073 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013074
13075 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013076 ctxt->sax = oldsax;
13077 xmlFreeParserCtxt(ctxt);
13078 newDoc->intSubset = NULL;
13079 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013080 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013081 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013082
Owen Taylor3473f882001-02-23 17:55:21 +000013083 return(ret);
13084}
13085
13086/**
13087 * xmlSAXParseEntity:
13088 * @sax: the SAX handler block
13089 * @filename: the filename
13090 *
13091 * parse an XML external entity out of context and build a tree.
13092 * It use the given SAX function block to handle the parsing callback.
13093 * If sax is NULL, fallback to the default DOM tree building routines.
13094 *
13095 * [78] extParsedEnt ::= TextDecl? content
13096 *
13097 * This correspond to a "Well Balanced" chunk
13098 *
13099 * Returns the resulting document tree
13100 */
13101
13102xmlDocPtr
13103xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13104 xmlDocPtr ret;
13105 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013106
13107 ctxt = xmlCreateFileParserCtxt(filename);
13108 if (ctxt == NULL) {
13109 return(NULL);
13110 }
13111 if (sax != NULL) {
13112 if (ctxt->sax != NULL)
13113 xmlFree(ctxt->sax);
13114 ctxt->sax = sax;
13115 ctxt->userData = NULL;
13116 }
13117
Owen Taylor3473f882001-02-23 17:55:21 +000013118 xmlParseExtParsedEnt(ctxt);
13119
13120 if (ctxt->wellFormed)
13121 ret = ctxt->myDoc;
13122 else {
13123 ret = NULL;
13124 xmlFreeDoc(ctxt->myDoc);
13125 ctxt->myDoc = NULL;
13126 }
13127 if (sax != NULL)
13128 ctxt->sax = NULL;
13129 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013130
Owen Taylor3473f882001-02-23 17:55:21 +000013131 return(ret);
13132}
13133
13134/**
13135 * xmlParseEntity:
13136 * @filename: the filename
13137 *
13138 * parse an XML external entity out of context and build a tree.
13139 *
13140 * [78] extParsedEnt ::= TextDecl? content
13141 *
13142 * This correspond to a "Well Balanced" chunk
13143 *
13144 * Returns the resulting document tree
13145 */
13146
13147xmlDocPtr
13148xmlParseEntity(const char *filename) {
13149 return(xmlSAXParseEntity(NULL, filename));
13150}
Daniel Veillard81273902003-09-30 00:43:48 +000013151#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013152
13153/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013154 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013155 * @URL: the entity URL
13156 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013157 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013158 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013159 *
13160 * Create a parser context for an external entity
13161 * Automatic support for ZLIB/Compress compressed document is provided
13162 * by default if found at compile-time.
13163 *
13164 * Returns the new parser context or NULL
13165 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013166static xmlParserCtxtPtr
13167xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13168 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013169 xmlParserCtxtPtr ctxt;
13170 xmlParserInputPtr inputStream;
13171 char *directory = NULL;
13172 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013173
Owen Taylor3473f882001-02-23 17:55:21 +000013174 ctxt = xmlNewParserCtxt();
13175 if (ctxt == NULL) {
13176 return(NULL);
13177 }
13178
Daniel Veillard48247b42009-07-10 16:12:46 +020013179 if (pctx != NULL) {
13180 ctxt->options = pctx->options;
13181 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000013182 }
13183
Owen Taylor3473f882001-02-23 17:55:21 +000013184 uri = xmlBuildURI(URL, base);
13185
13186 if (uri == NULL) {
13187 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13188 if (inputStream == NULL) {
13189 xmlFreeParserCtxt(ctxt);
13190 return(NULL);
13191 }
13192
13193 inputPush(ctxt, inputStream);
13194
13195 if ((ctxt->directory == NULL) && (directory == NULL))
13196 directory = xmlParserGetDirectory((char *)URL);
13197 if ((ctxt->directory == NULL) && (directory != NULL))
13198 ctxt->directory = directory;
13199 } else {
13200 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13201 if (inputStream == NULL) {
13202 xmlFree(uri);
13203 xmlFreeParserCtxt(ctxt);
13204 return(NULL);
13205 }
13206
13207 inputPush(ctxt, inputStream);
13208
13209 if ((ctxt->directory == NULL) && (directory == NULL))
13210 directory = xmlParserGetDirectory((char *)uri);
13211 if ((ctxt->directory == NULL) && (directory != NULL))
13212 ctxt->directory = directory;
13213 xmlFree(uri);
13214 }
Owen Taylor3473f882001-02-23 17:55:21 +000013215 return(ctxt);
13216}
13217
Rob Richards9c0aa472009-03-26 18:10:19 +000013218/**
13219 * xmlCreateEntityParserCtxt:
13220 * @URL: the entity URL
13221 * @ID: the entity PUBLIC ID
13222 * @base: a possible base for the target URI
13223 *
13224 * Create a parser context for an external entity
13225 * Automatic support for ZLIB/Compress compressed document is provided
13226 * by default if found at compile-time.
13227 *
13228 * Returns the new parser context or NULL
13229 */
13230xmlParserCtxtPtr
13231xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13232 const xmlChar *base) {
13233 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13234
13235}
13236
Owen Taylor3473f882001-02-23 17:55:21 +000013237/************************************************************************
13238 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000013239 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000013240 * *
13241 ************************************************************************/
13242
13243/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013244 * xmlCreateURLParserCtxt:
13245 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013246 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013247 *
Daniel Veillard61b93382003-11-03 14:28:31 +000013248 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000013249 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000013250 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000013251 *
13252 * Returns the new parser context or NULL
13253 */
13254xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000013255xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000013256{
13257 xmlParserCtxtPtr ctxt;
13258 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000013259 char *directory = NULL;
13260
Owen Taylor3473f882001-02-23 17:55:21 +000013261 ctxt = xmlNewParserCtxt();
13262 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000013263 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000013264 return(NULL);
13265 }
13266
Daniel Veillarddf292f72005-01-16 19:00:15 +000013267 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000013268 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000013269 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000013270
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000013271 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013272 if (inputStream == NULL) {
13273 xmlFreeParserCtxt(ctxt);
13274 return(NULL);
13275 }
13276
Owen Taylor3473f882001-02-23 17:55:21 +000013277 inputPush(ctxt, inputStream);
13278 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000013279 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013280 if ((ctxt->directory == NULL) && (directory != NULL))
13281 ctxt->directory = directory;
13282
13283 return(ctxt);
13284}
13285
Daniel Veillard61b93382003-11-03 14:28:31 +000013286/**
13287 * xmlCreateFileParserCtxt:
13288 * @filename: the filename
13289 *
13290 * Create a parser context for a file content.
13291 * Automatic support for ZLIB/Compress compressed document is provided
13292 * by default if found at compile-time.
13293 *
13294 * Returns the new parser context or NULL
13295 */
13296xmlParserCtxtPtr
13297xmlCreateFileParserCtxt(const char *filename)
13298{
13299 return(xmlCreateURLParserCtxt(filename, 0));
13300}
13301
Daniel Veillard81273902003-09-30 00:43:48 +000013302#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013303/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013304 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000013305 * @sax: the SAX handler block
13306 * @filename: the filename
13307 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13308 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000013309 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000013310 *
13311 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13312 * compressed document is provided by default if found at compile-time.
13313 * It use the given SAX function block to handle the parsing callback.
13314 * If sax is NULL, fallback to the default DOM tree building routines.
13315 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000013316 * User data (void *) is stored within the parser context in the
13317 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000013318 *
Owen Taylor3473f882001-02-23 17:55:21 +000013319 * Returns the resulting document tree
13320 */
13321
13322xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000013323xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13324 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000013325 xmlDocPtr ret;
13326 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013327
Daniel Veillard635ef722001-10-29 11:48:19 +000013328 xmlInitParser();
13329
Owen Taylor3473f882001-02-23 17:55:21 +000013330 ctxt = xmlCreateFileParserCtxt(filename);
13331 if (ctxt == NULL) {
13332 return(NULL);
13333 }
13334 if (sax != NULL) {
13335 if (ctxt->sax != NULL)
13336 xmlFree(ctxt->sax);
13337 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013338 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013339 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000013340 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000013341 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000013342 }
Owen Taylor3473f882001-02-23 17:55:21 +000013343
Daniel Veillard37d2d162008-03-14 10:54:00 +000013344 if (ctxt->directory == NULL)
13345 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013346
Daniel Veillarddad3f682002-11-17 16:47:27 +000013347 ctxt->recovery = recovery;
13348
Owen Taylor3473f882001-02-23 17:55:21 +000013349 xmlParseDocument(ctxt);
13350
William M. Brackc07329e2003-09-08 01:57:30 +000013351 if ((ctxt->wellFormed) || recovery) {
13352 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000013353 if (ret != NULL) {
13354 if (ctxt->input->buf->compressed > 0)
13355 ret->compression = 9;
13356 else
13357 ret->compression = ctxt->input->buf->compressed;
13358 }
William M. Brackc07329e2003-09-08 01:57:30 +000013359 }
Owen Taylor3473f882001-02-23 17:55:21 +000013360 else {
13361 ret = NULL;
13362 xmlFreeDoc(ctxt->myDoc);
13363 ctxt->myDoc = NULL;
13364 }
13365 if (sax != NULL)
13366 ctxt->sax = NULL;
13367 xmlFreeParserCtxt(ctxt);
13368
13369 return(ret);
13370}
13371
13372/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013373 * xmlSAXParseFile:
13374 * @sax: the SAX handler block
13375 * @filename: the filename
13376 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13377 * documents
13378 *
13379 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13380 * compressed document is provided by default if found at compile-time.
13381 * It use the given SAX function block to handle the parsing callback.
13382 * If sax is NULL, fallback to the default DOM tree building routines.
13383 *
13384 * Returns the resulting document tree
13385 */
13386
13387xmlDocPtr
13388xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13389 int recovery) {
13390 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13391}
13392
13393/**
Owen Taylor3473f882001-02-23 17:55:21 +000013394 * xmlRecoverDoc:
13395 * @cur: a pointer to an array of xmlChar
13396 *
13397 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013398 * In the case the document is not Well Formed, a attempt to build a
13399 * tree is tried anyway
13400 *
13401 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013402 */
13403
13404xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020013405xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013406 return(xmlSAXParseDoc(NULL, cur, 1));
13407}
13408
13409/**
13410 * xmlParseFile:
13411 * @filename: the filename
13412 *
13413 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13414 * compressed document is provided by default if found at compile-time.
13415 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000013416 * Returns the resulting document tree if the file was wellformed,
13417 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000013418 */
13419
13420xmlDocPtr
13421xmlParseFile(const char *filename) {
13422 return(xmlSAXParseFile(NULL, filename, 0));
13423}
13424
13425/**
13426 * xmlRecoverFile:
13427 * @filename: the filename
13428 *
13429 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13430 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013431 * In the case the document is not Well Formed, it attempts to build
13432 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000013433 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000013434 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013435 */
13436
13437xmlDocPtr
13438xmlRecoverFile(const char *filename) {
13439 return(xmlSAXParseFile(NULL, filename, 1));
13440}
13441
13442
13443/**
13444 * xmlSetupParserForBuffer:
13445 * @ctxt: an XML parser context
13446 * @buffer: a xmlChar * buffer
13447 * @filename: a file name
13448 *
13449 * Setup the parser context to parse a new buffer; Clears any prior
13450 * contents from the parser context. The buffer parameter must not be
13451 * NULL, but the filename parameter can be
13452 */
13453void
13454xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13455 const char* filename)
13456{
13457 xmlParserInputPtr input;
13458
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013459 if ((ctxt == NULL) || (buffer == NULL))
13460 return;
13461
Owen Taylor3473f882001-02-23 17:55:21 +000013462 input = xmlNewInputStream(ctxt);
13463 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000013464 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013465 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013466 return;
13467 }
13468
13469 xmlClearParserCtxt(ctxt);
13470 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000013471 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013472 input->base = buffer;
13473 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013474 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000013475 inputPush(ctxt, input);
13476}
13477
13478/**
13479 * xmlSAXUserParseFile:
13480 * @sax: a SAX handler
13481 * @user_data: The user data returned on SAX callbacks
13482 * @filename: a file name
13483 *
13484 * parse an XML file and call the given SAX handler routines.
13485 * Automatic support for ZLIB/Compress compressed document is provided
13486 *
13487 * Returns 0 in case of success or a error number otherwise
13488 */
13489int
13490xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13491 const char *filename) {
13492 int ret = 0;
13493 xmlParserCtxtPtr ctxt;
13494
13495 ctxt = xmlCreateFileParserCtxt(filename);
13496 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000013497 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000013498 xmlFree(ctxt->sax);
13499 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013500 xmlDetectSAX2(ctxt);
13501
Owen Taylor3473f882001-02-23 17:55:21 +000013502 if (user_data != NULL)
13503 ctxt->userData = user_data;
13504
13505 xmlParseDocument(ctxt);
13506
13507 if (ctxt->wellFormed)
13508 ret = 0;
13509 else {
13510 if (ctxt->errNo != 0)
13511 ret = ctxt->errNo;
13512 else
13513 ret = -1;
13514 }
13515 if (sax != NULL)
13516 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013517 if (ctxt->myDoc != NULL) {
13518 xmlFreeDoc(ctxt->myDoc);
13519 ctxt->myDoc = NULL;
13520 }
Owen Taylor3473f882001-02-23 17:55:21 +000013521 xmlFreeParserCtxt(ctxt);
13522
13523 return ret;
13524}
Daniel Veillard81273902003-09-30 00:43:48 +000013525#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013526
13527/************************************************************************
13528 * *
13529 * Front ends when parsing from memory *
13530 * *
13531 ************************************************************************/
13532
13533/**
13534 * xmlCreateMemoryParserCtxt:
13535 * @buffer: a pointer to a char array
13536 * @size: the size of the array
13537 *
13538 * Create a parser context for an XML in-memory document.
13539 *
13540 * Returns the new parser context or NULL
13541 */
13542xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013543xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013544 xmlParserCtxtPtr ctxt;
13545 xmlParserInputPtr input;
13546 xmlParserInputBufferPtr buf;
13547
13548 if (buffer == NULL)
13549 return(NULL);
13550 if (size <= 0)
13551 return(NULL);
13552
13553 ctxt = xmlNewParserCtxt();
13554 if (ctxt == NULL)
13555 return(NULL);
13556
Daniel Veillard53350552003-09-18 13:35:51 +000013557 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000013558 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013559 if (buf == NULL) {
13560 xmlFreeParserCtxt(ctxt);
13561 return(NULL);
13562 }
Owen Taylor3473f882001-02-23 17:55:21 +000013563
13564 input = xmlNewInputStream(ctxt);
13565 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013566 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000013567 xmlFreeParserCtxt(ctxt);
13568 return(NULL);
13569 }
13570
13571 input->filename = NULL;
13572 input->buf = buf;
13573 input->base = input->buf->buffer->content;
13574 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013575 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000013576
13577 inputPush(ctxt, input);
13578 return(ctxt);
13579}
13580
Daniel Veillard81273902003-09-30 00:43:48 +000013581#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013582/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013583 * xmlSAXParseMemoryWithData:
13584 * @sax: the SAX handler block
13585 * @buffer: an pointer to a char array
13586 * @size: the size of the array
13587 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13588 * documents
13589 * @data: the userdata
13590 *
13591 * parse an XML in-memory block and use the given SAX function block
13592 * to handle the parsing callback. If sax is NULL, fallback to the default
13593 * DOM tree building routines.
13594 *
13595 * User data (void *) is stored within the parser context in the
13596 * context's _private member, so it is available nearly everywhere in libxml
13597 *
13598 * Returns the resulting document tree
13599 */
13600
13601xmlDocPtr
13602xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13603 int size, int recovery, void *data) {
13604 xmlDocPtr ret;
13605 xmlParserCtxtPtr ctxt;
13606
Daniel Veillardab2a7632009-07-09 08:45:03 +020013607 xmlInitParser();
13608
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013609 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13610 if (ctxt == NULL) return(NULL);
13611 if (sax != NULL) {
13612 if (ctxt->sax != NULL)
13613 xmlFree(ctxt->sax);
13614 ctxt->sax = sax;
13615 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013616 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013617 if (data!=NULL) {
13618 ctxt->_private=data;
13619 }
13620
Daniel Veillardadba5f12003-04-04 16:09:01 +000013621 ctxt->recovery = recovery;
13622
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013623 xmlParseDocument(ctxt);
13624
13625 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13626 else {
13627 ret = NULL;
13628 xmlFreeDoc(ctxt->myDoc);
13629 ctxt->myDoc = NULL;
13630 }
13631 if (sax != NULL)
13632 ctxt->sax = NULL;
13633 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020013634
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013635 return(ret);
13636}
13637
13638/**
Owen Taylor3473f882001-02-23 17:55:21 +000013639 * xmlSAXParseMemory:
13640 * @sax: the SAX handler block
13641 * @buffer: an pointer to a char array
13642 * @size: the size of the array
13643 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13644 * documents
13645 *
13646 * parse an XML in-memory block and use the given SAX function block
13647 * to handle the parsing callback. If sax is NULL, fallback to the default
13648 * DOM tree building routines.
13649 *
13650 * Returns the resulting document tree
13651 */
13652xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000013653xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13654 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013655 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013656}
13657
13658/**
13659 * xmlParseMemory:
13660 * @buffer: an pointer to a char array
13661 * @size: the size of the array
13662 *
13663 * parse an XML in-memory block and build a tree.
13664 *
13665 * Returns the resulting document tree
13666 */
13667
Daniel Veillard50822cb2001-07-26 20:05:51 +000013668xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013669 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13670}
13671
13672/**
13673 * xmlRecoverMemory:
13674 * @buffer: an pointer to a char array
13675 * @size: the size of the array
13676 *
13677 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013678 * In the case the document is not Well Formed, an attempt to
13679 * build a tree is tried anyway
13680 *
13681 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000013682 */
13683
Daniel Veillard50822cb2001-07-26 20:05:51 +000013684xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013685 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13686}
13687
13688/**
13689 * xmlSAXUserParseMemory:
13690 * @sax: a SAX handler
13691 * @user_data: The user data returned on SAX callbacks
13692 * @buffer: an in-memory XML document input
13693 * @size: the length of the XML document in bytes
13694 *
13695 * A better SAX parsing routine.
13696 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020013697 *
Owen Taylor3473f882001-02-23 17:55:21 +000013698 * Returns 0 in case of success or a error number otherwise
13699 */
13700int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013701 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013702 int ret = 0;
13703 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020013704
13705 xmlInitParser();
13706
Owen Taylor3473f882001-02-23 17:55:21 +000013707 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13708 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013709 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13710 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000013711 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013712 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013713
Daniel Veillard30211a02001-04-26 09:33:18 +000013714 if (user_data != NULL)
13715 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020013716
Owen Taylor3473f882001-02-23 17:55:21 +000013717 xmlParseDocument(ctxt);
13718
13719 if (ctxt->wellFormed)
13720 ret = 0;
13721 else {
13722 if (ctxt->errNo != 0)
13723 ret = ctxt->errNo;
13724 else
13725 ret = -1;
13726 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013727 if (sax != NULL)
13728 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013729 if (ctxt->myDoc != NULL) {
13730 xmlFreeDoc(ctxt->myDoc);
13731 ctxt->myDoc = NULL;
13732 }
Owen Taylor3473f882001-02-23 17:55:21 +000013733 xmlFreeParserCtxt(ctxt);
13734
13735 return ret;
13736}
Daniel Veillard81273902003-09-30 00:43:48 +000013737#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013738
13739/**
13740 * xmlCreateDocParserCtxt:
13741 * @cur: a pointer to an array of xmlChar
13742 *
13743 * Creates a parser context for an XML in-memory document.
13744 *
13745 * Returns the new parser context or NULL
13746 */
13747xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013748xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013749 int len;
13750
13751 if (cur == NULL)
13752 return(NULL);
13753 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013754 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000013755}
13756
Daniel Veillard81273902003-09-30 00:43:48 +000013757#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013758/**
13759 * xmlSAXParseDoc:
13760 * @sax: the SAX handler block
13761 * @cur: a pointer to an array of xmlChar
13762 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13763 * documents
13764 *
13765 * parse an XML in-memory document and build a tree.
13766 * It use the given SAX function block to handle the parsing callback.
13767 * If sax is NULL, fallback to the default DOM tree building routines.
13768 *
13769 * Returns the resulting document tree
13770 */
13771
13772xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013773xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000013774 xmlDocPtr ret;
13775 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000013776 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013777
Daniel Veillard38936062004-11-04 17:45:11 +000013778 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013779
13780
13781 ctxt = xmlCreateDocParserCtxt(cur);
13782 if (ctxt == NULL) return(NULL);
13783 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000013784 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013785 ctxt->sax = sax;
13786 ctxt->userData = NULL;
13787 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013788 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013789
13790 xmlParseDocument(ctxt);
13791 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13792 else {
13793 ret = NULL;
13794 xmlFreeDoc(ctxt->myDoc);
13795 ctxt->myDoc = NULL;
13796 }
Daniel Veillard34099b42004-11-04 17:34:35 +000013797 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000013798 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000013799 xmlFreeParserCtxt(ctxt);
13800
13801 return(ret);
13802}
13803
13804/**
13805 * xmlParseDoc:
13806 * @cur: a pointer to an array of xmlChar
13807 *
13808 * parse an XML in-memory document and build a tree.
13809 *
13810 * Returns the resulting document tree
13811 */
13812
13813xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013814xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013815 return(xmlSAXParseDoc(NULL, cur, 0));
13816}
Daniel Veillard81273902003-09-30 00:43:48 +000013817#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013818
Daniel Veillard81273902003-09-30 00:43:48 +000013819#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000013820/************************************************************************
13821 * *
13822 * Specific function to keep track of entities references *
13823 * and used by the XSLT debugger *
13824 * *
13825 ************************************************************************/
13826
13827static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13828
13829/**
13830 * xmlAddEntityReference:
13831 * @ent : A valid entity
13832 * @firstNode : A valid first node for children of entity
13833 * @lastNode : A valid last node of children entity
13834 *
13835 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13836 */
13837static void
13838xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13839 xmlNodePtr lastNode)
13840{
13841 if (xmlEntityRefFunc != NULL) {
13842 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13843 }
13844}
13845
13846
13847/**
13848 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000013849 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000013850 *
13851 * Set the function to call call back when a xml reference has been made
13852 */
13853void
13854xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13855{
13856 xmlEntityRefFunc = func;
13857}
Daniel Veillard81273902003-09-30 00:43:48 +000013858#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013859
13860/************************************************************************
13861 * *
13862 * Miscellaneous *
13863 * *
13864 ************************************************************************/
13865
13866#ifdef LIBXML_XPATH_ENABLED
13867#include <libxml/xpath.h>
13868#endif
13869
Daniel Veillardffa3c742005-07-21 13:24:09 +000013870extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000013871static int xmlParserInitialized = 0;
13872
13873/**
13874 * xmlInitParser:
13875 *
13876 * Initialization function for the XML parser.
13877 * This is not reentrant. Call once before processing in case of
13878 * use in multithreaded programs.
13879 */
13880
13881void
13882xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000013883 if (xmlParserInitialized != 0)
13884 return;
Owen Taylor3473f882001-02-23 17:55:21 +000013885
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013886#ifdef LIBXML_THREAD_ENABLED
13887 __xmlGlobalInitMutexLock();
13888 if (xmlParserInitialized == 0) {
13889#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020013890 xmlInitGlobals();
13891 xmlInitThreads();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013892 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13893 (xmlGenericError == NULL))
13894 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013895 xmlInitMemory();
13896 xmlInitCharEncodingHandlers();
13897 xmlDefaultSAXHandlerInit();
13898 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013899#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013900 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013901#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013902#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013903 htmlInitAutoClose();
13904 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013905#endif
13906#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013907 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013908#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013909 xmlParserInitialized = 1;
13910#ifdef LIBXML_THREAD_ENABLED
13911 }
13912 __xmlGlobalInitMutexUnlock();
13913#endif
Owen Taylor3473f882001-02-23 17:55:21 +000013914}
13915
13916/**
13917 * xmlCleanupParser:
13918 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000013919 * This function name is somewhat misleading. It does not clean up
13920 * parser state, it cleans up memory allocated by the library itself.
13921 * It is a cleanup function for the XML library. It tries to reclaim all
13922 * related global memory allocated for the library processing.
13923 * It doesn't deallocate any document related memory. One should
13924 * call xmlCleanupParser() only when the process has finished using
13925 * the library and all XML/HTML documents built with it.
13926 * See also xmlInitParser() which has the opposite function of preparing
13927 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000013928 *
13929 * WARNING: if your application is multithreaded or has plugin support
13930 * calling this may crash the application if another thread or
13931 * a plugin is still using libxml2. It's sometimes very hard to
13932 * guess if libxml2 is in use in the application, some libraries
13933 * or plugins may use it without notice. In case of doubt abstain
13934 * from calling this function or do it just before calling exit()
13935 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000013936 */
13937
13938void
13939xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000013940 if (!xmlParserInitialized)
13941 return;
13942
Owen Taylor3473f882001-02-23 17:55:21 +000013943 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000013944#ifdef LIBXML_CATALOG_ENABLED
13945 xmlCatalogCleanup();
13946#endif
Daniel Veillard14412512005-01-21 23:53:26 +000013947 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000013948 xmlCleanupInputCallbacks();
13949#ifdef LIBXML_OUTPUT_ENABLED
13950 xmlCleanupOutputCallbacks();
13951#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013952#ifdef LIBXML_SCHEMAS_ENABLED
13953 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000013954 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013955#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000013956 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000013957 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000013958 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000013959 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000013960 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000013961}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013962
13963/************************************************************************
13964 * *
13965 * New set (2.6.0) of simpler and more flexible APIs *
13966 * *
13967 ************************************************************************/
13968
13969/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013970 * DICT_FREE:
13971 * @str: a string
13972 *
13973 * Free a string if it is not owned by the "dict" dictionnary in the
13974 * current scope
13975 */
13976#define DICT_FREE(str) \
13977 if ((str) && ((!dict) || \
13978 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13979 xmlFree((char *)(str));
13980
13981/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013982 * xmlCtxtReset:
13983 * @ctxt: an XML parser context
13984 *
13985 * Reset a parser context
13986 */
13987void
13988xmlCtxtReset(xmlParserCtxtPtr ctxt)
13989{
13990 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013991 xmlDictPtr dict;
13992
13993 if (ctxt == NULL)
13994 return;
13995
13996 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013997
13998 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13999 xmlFreeInputStream(input);
14000 }
14001 ctxt->inputNr = 0;
14002 ctxt->input = NULL;
14003
14004 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014005 if (ctxt->spaceTab != NULL) {
14006 ctxt->spaceTab[0] = -1;
14007 ctxt->space = &ctxt->spaceTab[0];
14008 } else {
14009 ctxt->space = NULL;
14010 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014011
14012
14013 ctxt->nodeNr = 0;
14014 ctxt->node = NULL;
14015
14016 ctxt->nameNr = 0;
14017 ctxt->name = NULL;
14018
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014019 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014020 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014021 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014022 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014023 DICT_FREE(ctxt->directory);
14024 ctxt->directory = NULL;
14025 DICT_FREE(ctxt->extSubURI);
14026 ctxt->extSubURI = NULL;
14027 DICT_FREE(ctxt->extSubSystem);
14028 ctxt->extSubSystem = NULL;
14029 if (ctxt->myDoc != NULL)
14030 xmlFreeDoc(ctxt->myDoc);
14031 ctxt->myDoc = NULL;
14032
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014033 ctxt->standalone = -1;
14034 ctxt->hasExternalSubset = 0;
14035 ctxt->hasPErefs = 0;
14036 ctxt->html = 0;
14037 ctxt->external = 0;
14038 ctxt->instate = XML_PARSER_START;
14039 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014040
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014041 ctxt->wellFormed = 1;
14042 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014043 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014044 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014045#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014046 ctxt->vctxt.userData = ctxt;
14047 ctxt->vctxt.error = xmlParserValidityError;
14048 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014049#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014050 ctxt->record_info = 0;
14051 ctxt->nbChars = 0;
14052 ctxt->checkIndex = 0;
14053 ctxt->inSubset = 0;
14054 ctxt->errNo = XML_ERR_OK;
14055 ctxt->depth = 0;
14056 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14057 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014058 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014059 ctxt->sizeentities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014060 xmlInitNodeInfoSeq(&ctxt->node_seq);
14061
14062 if (ctxt->attsDefault != NULL) {
14063 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14064 ctxt->attsDefault = NULL;
14065 }
14066 if (ctxt->attsSpecial != NULL) {
14067 xmlHashFree(ctxt->attsSpecial, NULL);
14068 ctxt->attsSpecial = NULL;
14069 }
14070
Daniel Veillard4432df22003-09-28 18:58:27 +000014071#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014072 if (ctxt->catalogs != NULL)
14073 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014074#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014075 if (ctxt->lastError.code != XML_ERR_OK)
14076 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014077}
14078
14079/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014080 * xmlCtxtResetPush:
14081 * @ctxt: an XML parser context
14082 * @chunk: a pointer to an array of chars
14083 * @size: number of chars in the array
14084 * @filename: an optional file name or URI
14085 * @encoding: the document encoding, or NULL
14086 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014087 * Reset a push parser context
14088 *
14089 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014090 */
14091int
14092xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14093 int size, const char *filename, const char *encoding)
14094{
14095 xmlParserInputPtr inputStream;
14096 xmlParserInputBufferPtr buf;
14097 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14098
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014099 if (ctxt == NULL)
14100 return(1);
14101
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014102 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14103 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14104
14105 buf = xmlAllocParserInputBuffer(enc);
14106 if (buf == NULL)
14107 return(1);
14108
14109 if (ctxt == NULL) {
14110 xmlFreeParserInputBuffer(buf);
14111 return(1);
14112 }
14113
14114 xmlCtxtReset(ctxt);
14115
14116 if (ctxt->pushTab == NULL) {
14117 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14118 sizeof(xmlChar *));
14119 if (ctxt->pushTab == NULL) {
14120 xmlErrMemory(ctxt, NULL);
14121 xmlFreeParserInputBuffer(buf);
14122 return(1);
14123 }
14124 }
14125
14126 if (filename == NULL) {
14127 ctxt->directory = NULL;
14128 } else {
14129 ctxt->directory = xmlParserGetDirectory(filename);
14130 }
14131
14132 inputStream = xmlNewInputStream(ctxt);
14133 if (inputStream == NULL) {
14134 xmlFreeParserInputBuffer(buf);
14135 return(1);
14136 }
14137
14138 if (filename == NULL)
14139 inputStream->filename = NULL;
14140 else
14141 inputStream->filename = (char *)
14142 xmlCanonicPath((const xmlChar *) filename);
14143 inputStream->buf = buf;
14144 inputStream->base = inputStream->buf->buffer->content;
14145 inputStream->cur = inputStream->buf->buffer->content;
14146 inputStream->end =
14147 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
14148
14149 inputPush(ctxt, inputStream);
14150
14151 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14152 (ctxt->input->buf != NULL)) {
14153 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
14154 int cur = ctxt->input->cur - ctxt->input->base;
14155
14156 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14157
14158 ctxt->input->base = ctxt->input->buf->buffer->content + base;
14159 ctxt->input->cur = ctxt->input->base + cur;
14160 ctxt->input->end =
14161 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
14162 use];
14163#ifdef DEBUG_PUSH
14164 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14165#endif
14166 }
14167
14168 if (encoding != NULL) {
14169 xmlCharEncodingHandlerPtr hdlr;
14170
Daniel Veillard37334572008-07-31 08:20:02 +000014171 if (ctxt->encoding != NULL)
14172 xmlFree((xmlChar *) ctxt->encoding);
14173 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14174
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014175 hdlr = xmlFindCharEncodingHandler(encoding);
14176 if (hdlr != NULL) {
14177 xmlSwitchToEncoding(ctxt, hdlr);
14178 } else {
14179 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14180 "Unsupported encoding %s\n", BAD_CAST encoding);
14181 }
14182 } else if (enc != XML_CHAR_ENCODING_NONE) {
14183 xmlSwitchEncoding(ctxt, enc);
14184 }
14185
14186 return(0);
14187}
14188
Daniel Veillard37334572008-07-31 08:20:02 +000014189
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014190/**
Daniel Veillard37334572008-07-31 08:20:02 +000014191 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014192 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014193 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014194 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014195 *
14196 * Applies the options to the parser context
14197 *
14198 * Returns 0 in case of success, the set of unknown or unimplemented options
14199 * in case of error.
14200 */
Daniel Veillard37334572008-07-31 08:20:02 +000014201static int
14202xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014203{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014204 if (ctxt == NULL)
14205 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014206 if (encoding != NULL) {
14207 if (ctxt->encoding != NULL)
14208 xmlFree((xmlChar *) ctxt->encoding);
14209 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14210 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014211 if (options & XML_PARSE_RECOVER) {
14212 ctxt->recovery = 1;
14213 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014214 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014215 } else
14216 ctxt->recovery = 0;
14217 if (options & XML_PARSE_DTDLOAD) {
14218 ctxt->loadsubset = XML_DETECT_IDS;
14219 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014220 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014221 } else
14222 ctxt->loadsubset = 0;
14223 if (options & XML_PARSE_DTDATTR) {
14224 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14225 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014226 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014227 }
14228 if (options & XML_PARSE_NOENT) {
14229 ctxt->replaceEntities = 1;
14230 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14231 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014232 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014233 } else
14234 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014235 if (options & XML_PARSE_PEDANTIC) {
14236 ctxt->pedantic = 1;
14237 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014238 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014239 } else
14240 ctxt->pedantic = 0;
14241 if (options & XML_PARSE_NOBLANKS) {
14242 ctxt->keepBlanks = 0;
14243 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14244 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014245 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014246 } else
14247 ctxt->keepBlanks = 1;
14248 if (options & XML_PARSE_DTDVALID) {
14249 ctxt->validate = 1;
14250 if (options & XML_PARSE_NOWARNING)
14251 ctxt->vctxt.warning = NULL;
14252 if (options & XML_PARSE_NOERROR)
14253 ctxt->vctxt.error = NULL;
14254 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014255 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014256 } else
14257 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000014258 if (options & XML_PARSE_NOWARNING) {
14259 ctxt->sax->warning = NULL;
14260 options -= XML_PARSE_NOWARNING;
14261 }
14262 if (options & XML_PARSE_NOERROR) {
14263 ctxt->sax->error = NULL;
14264 ctxt->sax->fatalError = NULL;
14265 options -= XML_PARSE_NOERROR;
14266 }
Daniel Veillard81273902003-09-30 00:43:48 +000014267#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014268 if (options & XML_PARSE_SAX1) {
14269 ctxt->sax->startElement = xmlSAX2StartElement;
14270 ctxt->sax->endElement = xmlSAX2EndElement;
14271 ctxt->sax->startElementNs = NULL;
14272 ctxt->sax->endElementNs = NULL;
14273 ctxt->sax->initialized = 1;
14274 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014275 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014276 }
Daniel Veillard81273902003-09-30 00:43:48 +000014277#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014278 if (options & XML_PARSE_NODICT) {
14279 ctxt->dictNames = 0;
14280 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014281 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014282 } else {
14283 ctxt->dictNames = 1;
14284 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014285 if (options & XML_PARSE_NOCDATA) {
14286 ctxt->sax->cdataBlock = NULL;
14287 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014288 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014289 }
14290 if (options & XML_PARSE_NSCLEAN) {
14291 ctxt->options |= XML_PARSE_NSCLEAN;
14292 options -= XML_PARSE_NSCLEAN;
14293 }
Daniel Veillard61b93382003-11-03 14:28:31 +000014294 if (options & XML_PARSE_NONET) {
14295 ctxt->options |= XML_PARSE_NONET;
14296 options -= XML_PARSE_NONET;
14297 }
Daniel Veillard8874b942005-08-25 13:19:21 +000014298 if (options & XML_PARSE_COMPACT) {
14299 ctxt->options |= XML_PARSE_COMPACT;
14300 options -= XML_PARSE_COMPACT;
14301 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000014302 if (options & XML_PARSE_OLD10) {
14303 ctxt->options |= XML_PARSE_OLD10;
14304 options -= XML_PARSE_OLD10;
14305 }
Daniel Veillard8915c152008-08-26 13:05:34 +000014306 if (options & XML_PARSE_NOBASEFIX) {
14307 ctxt->options |= XML_PARSE_NOBASEFIX;
14308 options -= XML_PARSE_NOBASEFIX;
14309 }
14310 if (options & XML_PARSE_HUGE) {
14311 ctxt->options |= XML_PARSE_HUGE;
14312 options -= XML_PARSE_HUGE;
14313 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000014314 if (options & XML_PARSE_OLDSAX) {
14315 ctxt->options |= XML_PARSE_OLDSAX;
14316 options -= XML_PARSE_OLDSAX;
14317 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000014318 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014319 return (options);
14320}
14321
14322/**
Daniel Veillard37334572008-07-31 08:20:02 +000014323 * xmlCtxtUseOptions:
14324 * @ctxt: an XML parser context
14325 * @options: a combination of xmlParserOption
14326 *
14327 * Applies the options to the parser context
14328 *
14329 * Returns 0 in case of success, the set of unknown or unimplemented options
14330 * in case of error.
14331 */
14332int
14333xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14334{
14335 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14336}
14337
14338/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014339 * xmlDoRead:
14340 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000014341 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014342 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014343 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014344 * @reuse: keep the context for reuse
14345 *
14346 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000014347 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014348 * Returns the resulting document tree or NULL
14349 */
14350static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014351xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14352 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014353{
14354 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000014355
14356 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014357 if (encoding != NULL) {
14358 xmlCharEncodingHandlerPtr hdlr;
14359
14360 hdlr = xmlFindCharEncodingHandler(encoding);
14361 if (hdlr != NULL)
14362 xmlSwitchToEncoding(ctxt, hdlr);
14363 }
Daniel Veillard60942de2003-09-25 21:05:58 +000014364 if ((URL != NULL) && (ctxt->input != NULL) &&
14365 (ctxt->input->filename == NULL))
14366 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014367 xmlParseDocument(ctxt);
14368 if ((ctxt->wellFormed) || ctxt->recovery)
14369 ret = ctxt->myDoc;
14370 else {
14371 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014372 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014373 xmlFreeDoc(ctxt->myDoc);
14374 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014375 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014376 ctxt->myDoc = NULL;
14377 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014378 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014379 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014380
14381 return (ret);
14382}
14383
14384/**
14385 * xmlReadDoc:
14386 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014387 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014388 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014389 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014390 *
14391 * parse an XML in-memory document and build a tree.
14392 *
14393 * Returns the resulting document tree
14394 */
14395xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014396xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014397{
14398 xmlParserCtxtPtr ctxt;
14399
14400 if (cur == NULL)
14401 return (NULL);
14402
14403 ctxt = xmlCreateDocParserCtxt(cur);
14404 if (ctxt == NULL)
14405 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014406 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014407}
14408
14409/**
14410 * xmlReadFile:
14411 * @filename: a file or URL
14412 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014413 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014414 *
14415 * parse an XML file from the filesystem or the network.
14416 *
14417 * Returns the resulting document tree
14418 */
14419xmlDocPtr
14420xmlReadFile(const char *filename, const char *encoding, int options)
14421{
14422 xmlParserCtxtPtr ctxt;
14423
Daniel Veillard61b93382003-11-03 14:28:31 +000014424 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014425 if (ctxt == NULL)
14426 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014427 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014428}
14429
14430/**
14431 * xmlReadMemory:
14432 * @buffer: a pointer to a char array
14433 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014434 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014435 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014436 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014437 *
14438 * parse an XML in-memory document and build a tree.
14439 *
14440 * Returns the resulting document tree
14441 */
14442xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014443xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014444{
14445 xmlParserCtxtPtr ctxt;
14446
14447 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14448 if (ctxt == NULL)
14449 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014450 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014451}
14452
14453/**
14454 * xmlReadFd:
14455 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014456 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014457 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014458 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014459 *
14460 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014461 * NOTE that the file descriptor will not be closed when the
14462 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014463 *
14464 * Returns the resulting document tree
14465 */
14466xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014467xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014468{
14469 xmlParserCtxtPtr ctxt;
14470 xmlParserInputBufferPtr input;
14471 xmlParserInputPtr stream;
14472
14473 if (fd < 0)
14474 return (NULL);
14475
14476 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14477 if (input == NULL)
14478 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014479 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014480 ctxt = xmlNewParserCtxt();
14481 if (ctxt == NULL) {
14482 xmlFreeParserInputBuffer(input);
14483 return (NULL);
14484 }
14485 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14486 if (stream == NULL) {
14487 xmlFreeParserInputBuffer(input);
14488 xmlFreeParserCtxt(ctxt);
14489 return (NULL);
14490 }
14491 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014492 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014493}
14494
14495/**
14496 * xmlReadIO:
14497 * @ioread: an I/O read function
14498 * @ioclose: an I/O close function
14499 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014500 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014501 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014502 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014503 *
14504 * parse an XML document from I/O functions and source and build a tree.
14505 *
14506 * Returns the resulting document tree
14507 */
14508xmlDocPtr
14509xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000014510 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014511{
14512 xmlParserCtxtPtr ctxt;
14513 xmlParserInputBufferPtr input;
14514 xmlParserInputPtr stream;
14515
14516 if (ioread == NULL)
14517 return (NULL);
14518
14519 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14520 XML_CHAR_ENCODING_NONE);
14521 if (input == NULL)
14522 return (NULL);
14523 ctxt = xmlNewParserCtxt();
14524 if (ctxt == NULL) {
14525 xmlFreeParserInputBuffer(input);
14526 return (NULL);
14527 }
14528 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14529 if (stream == NULL) {
14530 xmlFreeParserInputBuffer(input);
14531 xmlFreeParserCtxt(ctxt);
14532 return (NULL);
14533 }
14534 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014535 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014536}
14537
14538/**
14539 * xmlCtxtReadDoc:
14540 * @ctxt: an XML parser context
14541 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014542 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014543 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014544 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014545 *
14546 * parse an XML in-memory document and build a tree.
14547 * This reuses the existing @ctxt parser context
14548 *
14549 * Returns the resulting document tree
14550 */
14551xmlDocPtr
14552xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000014553 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014554{
14555 xmlParserInputPtr stream;
14556
14557 if (cur == NULL)
14558 return (NULL);
14559 if (ctxt == NULL)
14560 return (NULL);
14561
14562 xmlCtxtReset(ctxt);
14563
14564 stream = xmlNewStringInputStream(ctxt, cur);
14565 if (stream == NULL) {
14566 return (NULL);
14567 }
14568 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014569 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014570}
14571
14572/**
14573 * xmlCtxtReadFile:
14574 * @ctxt: an XML parser context
14575 * @filename: a file or URL
14576 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014577 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014578 *
14579 * parse an XML file from the filesystem or the network.
14580 * This reuses the existing @ctxt parser context
14581 *
14582 * Returns the resulting document tree
14583 */
14584xmlDocPtr
14585xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14586 const char *encoding, int options)
14587{
14588 xmlParserInputPtr stream;
14589
14590 if (filename == NULL)
14591 return (NULL);
14592 if (ctxt == NULL)
14593 return (NULL);
14594
14595 xmlCtxtReset(ctxt);
14596
Daniel Veillard29614c72004-11-26 10:47:26 +000014597 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014598 if (stream == NULL) {
14599 return (NULL);
14600 }
14601 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014602 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014603}
14604
14605/**
14606 * xmlCtxtReadMemory:
14607 * @ctxt: an XML parser context
14608 * @buffer: a pointer to a char array
14609 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014610 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014611 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014612 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014613 *
14614 * parse an XML in-memory document and build a tree.
14615 * This reuses the existing @ctxt parser context
14616 *
14617 * Returns the resulting document tree
14618 */
14619xmlDocPtr
14620xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000014621 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014622{
14623 xmlParserInputBufferPtr input;
14624 xmlParserInputPtr stream;
14625
14626 if (ctxt == NULL)
14627 return (NULL);
14628 if (buffer == NULL)
14629 return (NULL);
14630
14631 xmlCtxtReset(ctxt);
14632
14633 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14634 if (input == NULL) {
14635 return(NULL);
14636 }
14637
14638 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14639 if (stream == NULL) {
14640 xmlFreeParserInputBuffer(input);
14641 return(NULL);
14642 }
14643
14644 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014645 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014646}
14647
14648/**
14649 * xmlCtxtReadFd:
14650 * @ctxt: an XML parser context
14651 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014652 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014653 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014654 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014655 *
14656 * parse an XML from a file descriptor and build a tree.
14657 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014658 * NOTE that the file descriptor will not be closed when the
14659 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014660 *
14661 * Returns the resulting document tree
14662 */
14663xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014664xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14665 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014666{
14667 xmlParserInputBufferPtr input;
14668 xmlParserInputPtr stream;
14669
14670 if (fd < 0)
14671 return (NULL);
14672 if (ctxt == NULL)
14673 return (NULL);
14674
14675 xmlCtxtReset(ctxt);
14676
14677
14678 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14679 if (input == NULL)
14680 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014681 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014682 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14683 if (stream == NULL) {
14684 xmlFreeParserInputBuffer(input);
14685 return (NULL);
14686 }
14687 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014688 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014689}
14690
14691/**
14692 * xmlCtxtReadIO:
14693 * @ctxt: an XML parser context
14694 * @ioread: an I/O read function
14695 * @ioclose: an I/O close function
14696 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014697 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014698 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014699 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014700 *
14701 * parse an XML document from I/O functions and source and build a tree.
14702 * This reuses the existing @ctxt parser context
14703 *
14704 * Returns the resulting document tree
14705 */
14706xmlDocPtr
14707xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14708 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000014709 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014710 const char *encoding, int options)
14711{
14712 xmlParserInputBufferPtr input;
14713 xmlParserInputPtr stream;
14714
14715 if (ioread == NULL)
14716 return (NULL);
14717 if (ctxt == NULL)
14718 return (NULL);
14719
14720 xmlCtxtReset(ctxt);
14721
14722 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14723 XML_CHAR_ENCODING_NONE);
14724 if (input == NULL)
14725 return (NULL);
14726 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14727 if (stream == NULL) {
14728 xmlFreeParserInputBuffer(input);
14729 return (NULL);
14730 }
14731 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014732 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014733}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000014734
14735#define bottom_parser
14736#include "elfgcchack.h"