blob: f127bb5746c5c043175ad33356ac2308cf44226a [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000044#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000045#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000046#include <libxml/threads.h>
47#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000048#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000057#ifdef LIBXML_CATALOG_ENABLED
58#include <libxml/catalog.h>
59#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000060#ifdef LIBXML_SCHEMAS_ENABLED
61#include <libxml/xmlschemastypes.h>
62#include <libxml/relaxng.h>
63#endif
Owen Taylor3473f882001-02-23 17:55:21 +000064#ifdef HAVE_CTYPE_H
65#include <ctype.h>
66#endif
67#ifdef HAVE_STDLIB_H
68#include <stdlib.h>
69#endif
70#ifdef HAVE_SYS_STAT_H
71#include <sys/stat.h>
72#endif
73#ifdef HAVE_FCNTL_H
74#include <fcntl.h>
75#endif
76#ifdef HAVE_UNISTD_H
77#include <unistd.h>
78#endif
79#ifdef HAVE_ZLIB_H
80#include <zlib.h>
81#endif
82
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000083/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +000084 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000085 *
Daniel Veillard8915c152008-08-26 13:05:34 +000086 * arbitrary depth limit for the XML documents that we allow to
87 * process. This is not a limitation of the parser but a safety
88 * boundary feature. It can be disabled with the XML_PARSE_HUGE
89 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +000090 */
Daniel Veillard8915c152008-08-26 13:05:34 +000091unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +000092
Daniel Veillard0fb18932003-09-07 09:14:37 +000093#define SAX2 1
94
Daniel Veillard21a0f912001-02-25 19:54:14 +000095#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000096#define XML_PARSER_BUFFER_SIZE 100
97
Daniel Veillard5997aca2002-03-18 18:36:20 +000098#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
99
Owen Taylor3473f882001-02-23 17:55:21 +0000100/*
Owen Taylor3473f882001-02-23 17:55:21 +0000101 * List of XML prefixed PI allowed by W3C specs
102 */
103
Daniel Veillardb44025c2001-10-11 22:55:55 +0000104static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000105 "xml-stylesheet",
106 NULL
107};
108
Daniel Veillarda07050d2003-10-19 14:46:32 +0000109
Owen Taylor3473f882001-02-23 17:55:21 +0000110/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +0000111xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
112 const xmlChar **str);
113
Daniel Veillard7d515752003-09-26 19:12:37 +0000114static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000115xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
116 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000117 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000118 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000119
Daniel Veillard37334572008-07-31 08:20:02 +0000120static int
121xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
122 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000123#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000124static void
125xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
126 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000127#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000128
Daniel Veillard7d515752003-09-26 19:12:37 +0000129static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000130xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
131 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000132
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000133static int
134xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
135
Daniel Veillarde57ec792003-09-10 10:50:59 +0000136/************************************************************************
137 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000138 * Some factorized error routines *
139 * *
140 ************************************************************************/
141
142/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000143 * xmlErrAttributeDup:
144 * @ctxt: an XML parser context
145 * @prefix: the attribute prefix
146 * @localname: the attribute localname
147 *
148 * Handle a redefinition of attribute error
149 */
150static void
151xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
152 const xmlChar * localname)
153{
Daniel Veillard157fee02003-10-31 10:36:03 +0000154 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
155 (ctxt->instate == XML_PARSER_EOF))
156 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000157 if (ctxt != NULL)
158 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000159 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000160 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000161 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
162 (const char *) localname, NULL, NULL, 0, 0,
163 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000164 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000165 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000166 ctxt->errNo, XML_ERR_FATAL, NULL, 0,
167 (const char *) prefix, (const char *) localname,
168 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
169 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000170 if (ctxt != NULL) {
171 ctxt->wellFormed = 0;
172 if (ctxt->recovery == 0)
173 ctxt->disableSAX = 1;
174 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000175}
176
177/**
178 * xmlFatalErr:
179 * @ctxt: an XML parser context
180 * @error: the error number
181 * @extra: extra information string
182 *
183 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
184 */
185static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000186xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000187{
188 const char *errmsg;
189
Daniel Veillard157fee02003-10-31 10:36:03 +0000190 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
191 (ctxt->instate == XML_PARSER_EOF))
192 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000193 switch (error) {
194 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000195 errmsg = "CharRef: invalid hexadecimal value\n";
196 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000197 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000198 errmsg = "CharRef: invalid decimal value\n";
199 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000200 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000201 errmsg = "CharRef: invalid value\n";
202 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000203 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000204 errmsg = "internal error";
205 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000206 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000207 errmsg = "PEReference at end of document\n";
208 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000209 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000210 errmsg = "PEReference in prolog\n";
211 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000212 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000213 errmsg = "PEReference in epilog\n";
214 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000215 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000216 errmsg = "PEReference: no name\n";
217 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000218 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000219 errmsg = "PEReference: expecting ';'\n";
220 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000221 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000222 errmsg = "Detected an entity reference loop\n";
223 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000224 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000225 errmsg = "EntityValue: \" or ' expected\n";
226 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000227 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000228 errmsg = "PEReferences forbidden in internal subset\n";
229 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000230 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000231 errmsg = "EntityValue: \" or ' expected\n";
232 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000233 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000234 errmsg = "AttValue: \" or ' expected\n";
235 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000236 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000237 errmsg = "Unescaped '<' not allowed in attributes values\n";
238 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000239 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000240 errmsg = "SystemLiteral \" or ' expected\n";
241 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000242 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000243 errmsg = "Unfinished System or Public ID \" or ' expected\n";
244 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000245 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000246 errmsg = "Sequence ']]>' not allowed in content\n";
247 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000248 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000249 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
250 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000251 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000252 errmsg = "PUBLIC, the Public Identifier is missing\n";
253 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000254 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000255 errmsg = "Comment must not contain '--' (double-hyphen)\n";
256 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000257 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000258 errmsg = "xmlParsePI : no target name\n";
259 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000260 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000261 errmsg = "Invalid PI name\n";
262 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000263 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000264 errmsg = "NOTATION: Name expected here\n";
265 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000266 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000267 errmsg = "'>' required to close NOTATION declaration\n";
268 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000269 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000270 errmsg = "Entity value required\n";
271 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000272 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000273 errmsg = "Fragment not allowed";
274 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000275 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000276 errmsg = "'(' required to start ATTLIST enumeration\n";
277 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000278 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000279 errmsg = "NmToken expected in ATTLIST enumeration\n";
280 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000281 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000282 errmsg = "')' required to finish ATTLIST enumeration\n";
283 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000284 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000285 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
286 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000287 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000288 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
289 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000291 errmsg = "ContentDecl : Name or '(' expected\n";
292 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000293 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000294 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
295 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000297 errmsg =
298 "PEReference: forbidden within markup decl in internal subset\n";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "expected '>'\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "XML conditional section '[' expected\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "Content error in the external subset\n";
308 break;
309 case XML_ERR_CONDSEC_INVALID_KEYWORD:
310 errmsg =
311 "conditional section INCLUDE or IGNORE keyword expected\n";
312 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000313 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000314 errmsg = "XML conditional section not closed\n";
315 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000316 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000317 errmsg = "Text declaration '<?xml' required\n";
318 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000319 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000320 errmsg = "parsing XML declaration: '?>' expected\n";
321 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000322 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000323 errmsg = "external parsed entities cannot be standalone\n";
324 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000325 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000326 errmsg = "EntityRef: expecting ';'\n";
327 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000328 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000329 errmsg = "DOCTYPE improperly terminated\n";
330 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000331 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000332 errmsg = "EndTag: '</' not found\n";
333 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000334 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000335 errmsg = "expected '='\n";
336 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000337 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000338 errmsg = "String not closed expecting \" or '\n";
339 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000340 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000341 errmsg = "String not started expecting ' or \"\n";
342 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000343 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000344 errmsg = "Invalid XML encoding name\n";
345 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000346 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000347 errmsg = "standalone accepts only 'yes' or 'no'\n";
348 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000349 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000350 errmsg = "Document is empty\n";
351 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000352 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000353 errmsg = "Extra content at the end of the document\n";
354 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000355 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000356 errmsg = "chunk is not well balanced\n";
357 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000358 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000359 errmsg = "extra content at the end of well balanced chunk\n";
360 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000361 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000362 errmsg = "Malformed declaration expecting version\n";
363 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000364#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000365 case:
366 errmsg = "\n";
367 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000368#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000369 default:
370 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000371 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000372 if (ctxt != NULL)
373 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000374 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000375 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
376 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000377 if (ctxt != NULL) {
378 ctxt->wellFormed = 0;
379 if (ctxt->recovery == 0)
380 ctxt->disableSAX = 1;
381 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000382}
383
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000384/**
385 * xmlFatalErrMsg:
386 * @ctxt: an XML parser context
387 * @error: the error number
388 * @msg: the error message
389 *
390 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
391 */
392static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000393xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
394 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000395{
Daniel Veillard157fee02003-10-31 10:36:03 +0000396 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
397 (ctxt->instate == XML_PARSER_EOF))
398 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000399 if (ctxt != NULL)
400 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000401 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000402 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000403 if (ctxt != NULL) {
404 ctxt->wellFormed = 0;
405 if (ctxt->recovery == 0)
406 ctxt->disableSAX = 1;
407 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000408}
409
410/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000411 * xmlWarningMsg:
412 * @ctxt: an XML parser context
413 * @error: the error number
414 * @msg: the error message
415 * @str1: extra data
416 * @str2: extra data
417 *
418 * Handle a warning.
419 */
420static void
421xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
422 const char *msg, const xmlChar *str1, const xmlChar *str2)
423{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000424 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000425
Daniel Veillard157fee02003-10-31 10:36:03 +0000426 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
427 (ctxt->instate == XML_PARSER_EOF))
428 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000429 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
430 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000431 schannel = ctxt->sax->serror;
432 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000433 (ctxt->sax) ? ctxt->sax->warning : NULL,
434 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000435 ctxt, NULL, XML_FROM_PARSER, error,
436 XML_ERR_WARNING, NULL, 0,
437 (const char *) str1, (const char *) str2, NULL, 0, 0,
438 msg, (const char *) str1, (const char *) str2);
439}
440
441/**
442 * xmlValidityError:
443 * @ctxt: an XML parser context
444 * @error: the error number
445 * @msg: the error message
446 * @str1: extra data
447 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000448 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000449 */
450static void
451xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000452 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000453{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000454 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000455
456 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
457 (ctxt->instate == XML_PARSER_EOF))
458 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000459 if (ctxt != NULL) {
460 ctxt->errNo = error;
461 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
462 schannel = ctxt->sax->serror;
463 }
Daniel Veillardc790bf42003-10-11 10:50:10 +0000464 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000465 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000466 ctxt, NULL, XML_FROM_DTD, error,
467 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000468 (const char *) str2, NULL, 0, 0,
469 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000470 if (ctxt != NULL) {
471 ctxt->valid = 0;
472 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000473}
474
475/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000476 * xmlFatalErrMsgInt:
477 * @ctxt: an XML parser context
478 * @error: the error number
479 * @msg: the error message
480 * @val: an integer value
481 *
482 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
483 */
484static void
485xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000486 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000487{
Daniel Veillard157fee02003-10-31 10:36:03 +0000488 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
489 (ctxt->instate == XML_PARSER_EOF))
490 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000491 if (ctxt != NULL)
492 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000493 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000494 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
495 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000496 if (ctxt != NULL) {
497 ctxt->wellFormed = 0;
498 if (ctxt->recovery == 0)
499 ctxt->disableSAX = 1;
500 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000501}
502
503/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000504 * xmlFatalErrMsgStrIntStr:
505 * @ctxt: an XML parser context
506 * @error: the error number
507 * @msg: the error message
508 * @str1: an string info
509 * @val: an integer value
510 * @str2: an string info
511 *
512 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
513 */
514static void
515xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
516 const char *msg, const xmlChar *str1, int val,
517 const xmlChar *str2)
518{
Daniel Veillard157fee02003-10-31 10:36:03 +0000519 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
520 (ctxt->instate == XML_PARSER_EOF))
521 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000522 if (ctxt != NULL)
523 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000524 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000525 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
526 NULL, 0, (const char *) str1, (const char *) str2,
527 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000528 if (ctxt != NULL) {
529 ctxt->wellFormed = 0;
530 if (ctxt->recovery == 0)
531 ctxt->disableSAX = 1;
532 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000533}
534
535/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000536 * xmlFatalErrMsgStr:
537 * @ctxt: an XML parser context
538 * @error: the error number
539 * @msg: the error message
540 * @val: a string value
541 *
542 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
543 */
544static void
545xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000546 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000547{
Daniel Veillard157fee02003-10-31 10:36:03 +0000548 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
549 (ctxt->instate == XML_PARSER_EOF))
550 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000551 if (ctxt != NULL)
552 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000553 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000554 XML_FROM_PARSER, error, XML_ERR_FATAL,
555 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
556 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000557 if (ctxt != NULL) {
558 ctxt->wellFormed = 0;
559 if (ctxt->recovery == 0)
560 ctxt->disableSAX = 1;
561 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000562}
563
564/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000565 * xmlErrMsgStr:
566 * @ctxt: an XML parser context
567 * @error: the error number
568 * @msg: the error message
569 * @val: a string value
570 *
571 * Handle a non fatal parser error
572 */
573static void
574xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
575 const char *msg, const xmlChar * val)
576{
Daniel Veillard157fee02003-10-31 10:36:03 +0000577 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
578 (ctxt->instate == XML_PARSER_EOF))
579 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000580 if (ctxt != NULL)
581 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000582 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000583 XML_FROM_PARSER, error, XML_ERR_ERROR,
584 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
585 val);
586}
587
588/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000589 * xmlNsErr:
590 * @ctxt: an XML parser context
591 * @error: the error number
592 * @msg: the message
593 * @info1: extra information string
594 * @info2: extra information string
595 *
596 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
597 */
598static void
599xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
600 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000601 const xmlChar * info1, const xmlChar * info2,
602 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000603{
Daniel Veillard157fee02003-10-31 10:36:03 +0000604 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
605 (ctxt->instate == XML_PARSER_EOF))
606 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000607 if (ctxt != NULL)
608 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000609 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000610 XML_ERR_ERROR, NULL, 0, (const char *) info1,
611 (const char *) info2, (const char *) info3, 0, 0, msg,
612 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000613 if (ctxt != NULL)
614 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000615}
616
Daniel Veillard37334572008-07-31 08:20:02 +0000617/**
618 * xmlNsWarn
619 * @ctxt: an XML parser context
620 * @error: the error number
621 * @msg: the message
622 * @info1: extra information string
623 * @info2: extra information string
624 *
625 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
626 */
627static void
628xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
629 const char *msg,
630 const xmlChar * info1, const xmlChar * info2,
631 const xmlChar * info3)
632{
633 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
634 (ctxt->instate == XML_PARSER_EOF))
635 return;
636 if (ctxt != NULL)
637 ctxt->errNo = error;
638 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
639 XML_ERR_WARNING, NULL, 0, (const char *) info1,
640 (const char *) info2, (const char *) info3, 0, 0, msg,
641 info1, info2, info3);
642}
643
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000644/************************************************************************
645 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000646 * Library wide options *
647 * *
648 ************************************************************************/
649
650/**
651 * xmlHasFeature:
652 * @feature: the feature to be examined
653 *
654 * Examines if the library has been compiled with a given feature.
655 *
656 * Returns a non-zero value if the feature exist, otherwise zero.
657 * Returns zero (0) if the feature does not exist or an unknown
658 * unknown feature is requested, non-zero otherwise.
659 */
660int
661xmlHasFeature(xmlFeature feature)
662{
663 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000664 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000665#ifdef LIBXML_THREAD_ENABLED
666 return(1);
667#else
668 return(0);
669#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000670 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000671#ifdef LIBXML_TREE_ENABLED
672 return(1);
673#else
674 return(0);
675#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000676 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000677#ifdef LIBXML_OUTPUT_ENABLED
678 return(1);
679#else
680 return(0);
681#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000682 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000683#ifdef LIBXML_PUSH_ENABLED
684 return(1);
685#else
686 return(0);
687#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000688 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000689#ifdef LIBXML_READER_ENABLED
690 return(1);
691#else
692 return(0);
693#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000694 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000695#ifdef LIBXML_PATTERN_ENABLED
696 return(1);
697#else
698 return(0);
699#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000700 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000701#ifdef LIBXML_WRITER_ENABLED
702 return(1);
703#else
704 return(0);
705#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000706 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000707#ifdef LIBXML_SAX1_ENABLED
708 return(1);
709#else
710 return(0);
711#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000712 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000713#ifdef LIBXML_FTP_ENABLED
714 return(1);
715#else
716 return(0);
717#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000718 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000719#ifdef LIBXML_HTTP_ENABLED
720 return(1);
721#else
722 return(0);
723#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000724 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000725#ifdef LIBXML_VALID_ENABLED
726 return(1);
727#else
728 return(0);
729#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000730 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000731#ifdef LIBXML_HTML_ENABLED
732 return(1);
733#else
734 return(0);
735#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000736 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000737#ifdef LIBXML_LEGACY_ENABLED
738 return(1);
739#else
740 return(0);
741#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000742 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000743#ifdef LIBXML_C14N_ENABLED
744 return(1);
745#else
746 return(0);
747#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000748 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000749#ifdef LIBXML_CATALOG_ENABLED
750 return(1);
751#else
752 return(0);
753#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000754 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000755#ifdef LIBXML_XPATH_ENABLED
756 return(1);
757#else
758 return(0);
759#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000760 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000761#ifdef LIBXML_XPTR_ENABLED
762 return(1);
763#else
764 return(0);
765#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000766 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000767#ifdef LIBXML_XINCLUDE_ENABLED
768 return(1);
769#else
770 return(0);
771#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000772 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000773#ifdef LIBXML_ICONV_ENABLED
774 return(1);
775#else
776 return(0);
777#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000778 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000779#ifdef LIBXML_ISO8859X_ENABLED
780 return(1);
781#else
782 return(0);
783#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000784 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000785#ifdef LIBXML_UNICODE_ENABLED
786 return(1);
787#else
788 return(0);
789#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000790 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000791#ifdef LIBXML_REGEXP_ENABLED
792 return(1);
793#else
794 return(0);
795#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000796 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000797#ifdef LIBXML_AUTOMATA_ENABLED
798 return(1);
799#else
800 return(0);
801#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000802 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000803#ifdef LIBXML_EXPR_ENABLED
804 return(1);
805#else
806 return(0);
807#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000808 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000809#ifdef LIBXML_SCHEMAS_ENABLED
810 return(1);
811#else
812 return(0);
813#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000814 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000815#ifdef LIBXML_SCHEMATRON_ENABLED
816 return(1);
817#else
818 return(0);
819#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000820 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000821#ifdef LIBXML_MODULES_ENABLED
822 return(1);
823#else
824 return(0);
825#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000826 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000827#ifdef LIBXML_DEBUG_ENABLED
828 return(1);
829#else
830 return(0);
831#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000832 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000833#ifdef DEBUG_MEMORY_LOCATION
834 return(1);
835#else
836 return(0);
837#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000838 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000839#ifdef LIBXML_DEBUG_RUNTIME
840 return(1);
841#else
842 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000843#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000844 case XML_WITH_ZLIB:
845#ifdef LIBXML_ZLIB_ENABLED
846 return(1);
847#else
848 return(0);
849#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000850 default:
851 break;
852 }
853 return(0);
854}
855
856/************************************************************************
857 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000858 * SAX2 defaulted attributes handling *
859 * *
860 ************************************************************************/
861
862/**
863 * xmlDetectSAX2:
864 * @ctxt: an XML parser context
865 *
866 * Do the SAX2 detection and specific intialization
867 */
868static void
869xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
870 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000871#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000872 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
873 ((ctxt->sax->startElementNs != NULL) ||
874 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +0000875#else
876 ctxt->sax2 = 1;
877#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000878
879 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
880 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
881 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +0000882 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
883 (ctxt->str_xml_ns == NULL)) {
884 xmlErrMemory(ctxt, NULL);
885 }
Daniel Veillarde57ec792003-09-10 10:50:59 +0000886}
887
Daniel Veillarde57ec792003-09-10 10:50:59 +0000888typedef struct _xmlDefAttrs xmlDefAttrs;
889typedef xmlDefAttrs *xmlDefAttrsPtr;
890struct _xmlDefAttrs {
891 int nbAttrs; /* number of defaulted attributes on that element */
892 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +0000893 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +0000894};
Daniel Veillarde57ec792003-09-10 10:50:59 +0000895
896/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000897 * xmlAttrNormalizeSpace:
898 * @src: the source string
899 * @dst: the target string
900 *
901 * Normalize the space in non CDATA attribute values:
902 * If the attribute type is not CDATA, then the XML processor MUST further
903 * process the normalized attribute value by discarding any leading and
904 * trailing space (#x20) characters, and by replacing sequences of space
905 * (#x20) characters by a single space (#x20) character.
906 * Note that the size of dst need to be at least src, and if one doesn't need
907 * to preserve dst (and it doesn't come from a dictionary or read-only) then
908 * passing src as dst is just fine.
909 *
910 * Returns a pointer to the normalized value (dst) or NULL if no conversion
911 * is needed.
912 */
913static xmlChar *
914xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
915{
916 if ((src == NULL) || (dst == NULL))
917 return(NULL);
918
919 while (*src == 0x20) src++;
920 while (*src != 0) {
921 if (*src == 0x20) {
922 while (*src == 0x20) src++;
923 if (*src != 0)
924 *dst++ = 0x20;
925 } else {
926 *dst++ = *src++;
927 }
928 }
929 *dst = 0;
930 if (dst == src)
931 return(NULL);
932 return(dst);
933}
934
935/**
936 * xmlAttrNormalizeSpace2:
937 * @src: the source string
938 *
939 * Normalize the space in non CDATA attribute values, a slightly more complex
940 * front end to avoid allocation problems when running on attribute values
941 * coming from the input.
942 *
943 * Returns a pointer to the normalized value (dst) or NULL if no conversion
944 * is needed.
945 */
946static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +0000947xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000948{
949 int i;
950 int remove_head = 0;
951 int need_realloc = 0;
952 const xmlChar *cur;
953
954 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
955 return(NULL);
956 i = *len;
957 if (i <= 0)
958 return(NULL);
959
960 cur = src;
961 while (*cur == 0x20) {
962 cur++;
963 remove_head++;
964 }
965 while (*cur != 0) {
966 if (*cur == 0x20) {
967 cur++;
968 if ((*cur == 0x20) || (*cur == 0)) {
969 need_realloc = 1;
970 break;
971 }
972 } else
973 cur++;
974 }
975 if (need_realloc) {
976 xmlChar *ret;
977
978 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
979 if (ret == NULL) {
980 xmlErrMemory(ctxt, NULL);
981 return(NULL);
982 }
983 xmlAttrNormalizeSpace(ret, ret);
984 *len = (int) strlen((const char *)ret);
985 return(ret);
986 } else if (remove_head) {
987 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +0000988 memmove(src, src + remove_head, 1 + *len);
989 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +0000990 }
991 return(NULL);
992}
993
994/**
Daniel Veillarde57ec792003-09-10 10:50:59 +0000995 * xmlAddDefAttrs:
996 * @ctxt: an XML parser context
997 * @fullname: the element fullname
998 * @fullattr: the attribute fullname
999 * @value: the attribute value
1000 *
1001 * Add a defaulted attribute for an element
1002 */
1003static void
1004xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1005 const xmlChar *fullname,
1006 const xmlChar *fullattr,
1007 const xmlChar *value) {
1008 xmlDefAttrsPtr defaults;
1009 int len;
1010 const xmlChar *name;
1011 const xmlChar *prefix;
1012
Daniel Veillard6a31b832008-03-26 14:06:44 +00001013 /*
1014 * Allows to detect attribute redefinitions
1015 */
1016 if (ctxt->attsSpecial != NULL) {
1017 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1018 return;
1019 }
1020
Daniel Veillarde57ec792003-09-10 10:50:59 +00001021 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001022 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001023 if (ctxt->attsDefault == NULL)
1024 goto mem_error;
1025 }
1026
1027 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001028 * split the element name into prefix:localname , the string found
1029 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001030 */
1031 name = xmlSplitQName3(fullname, &len);
1032 if (name == NULL) {
1033 name = xmlDictLookup(ctxt->dict, fullname, -1);
1034 prefix = NULL;
1035 } else {
1036 name = xmlDictLookup(ctxt->dict, name, -1);
1037 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1038 }
1039
1040 /*
1041 * make sure there is some storage
1042 */
1043 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1044 if (defaults == NULL) {
1045 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001046 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001047 if (defaults == NULL)
1048 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001049 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001050 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001051 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1052 defaults, NULL) < 0) {
1053 xmlFree(defaults);
1054 goto mem_error;
1055 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001056 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001057 xmlDefAttrsPtr temp;
1058
1059 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001060 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001061 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001062 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001063 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001064 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001065 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1066 defaults, NULL) < 0) {
1067 xmlFree(defaults);
1068 goto mem_error;
1069 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001070 }
1071
1072 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001073 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001074 * are within the DTD and hen not associated to namespace names.
1075 */
1076 name = xmlSplitQName3(fullattr, &len);
1077 if (name == NULL) {
1078 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1079 prefix = NULL;
1080 } else {
1081 name = xmlDictLookup(ctxt->dict, name, -1);
1082 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1083 }
1084
Daniel Veillardae0765b2008-07-31 19:54:59 +00001085 defaults->values[5 * defaults->nbAttrs] = name;
1086 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001087 /* intern the string and precompute the end */
1088 len = xmlStrlen(value);
1089 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001090 defaults->values[5 * defaults->nbAttrs + 2] = value;
1091 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1092 if (ctxt->external)
1093 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1094 else
1095 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001096 defaults->nbAttrs++;
1097
1098 return;
1099
1100mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001101 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001102 return;
1103}
1104
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001105/**
1106 * xmlAddSpecialAttr:
1107 * @ctxt: an XML parser context
1108 * @fullname: the element fullname
1109 * @fullattr: the attribute fullname
1110 * @type: the attribute type
1111 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001112 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001113 */
1114static void
1115xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1116 const xmlChar *fullname,
1117 const xmlChar *fullattr,
1118 int type)
1119{
1120 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001121 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001122 if (ctxt->attsSpecial == NULL)
1123 goto mem_error;
1124 }
1125
Daniel Veillardac4118d2008-01-11 05:27:32 +00001126 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1127 return;
1128
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001129 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1130 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001131 return;
1132
1133mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001134 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001135 return;
1136}
1137
Daniel Veillard4432df22003-09-28 18:58:27 +00001138/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001139 * xmlCleanSpecialAttrCallback:
1140 *
1141 * Removes CDATA attributes from the special attribute table
1142 */
1143static void
1144xmlCleanSpecialAttrCallback(void *payload, void *data,
1145 const xmlChar *fullname, const xmlChar *fullattr,
1146 const xmlChar *unused ATTRIBUTE_UNUSED) {
1147 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1148
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001149 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001150 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1151 }
1152}
1153
1154/**
1155 * xmlCleanSpecialAttr:
1156 * @ctxt: an XML parser context
1157 *
1158 * Trim the list of attributes defined to remove all those of type
1159 * CDATA as they are not special. This call should be done when finishing
1160 * to parse the DTD and before starting to parse the document root.
1161 */
1162static void
1163xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1164{
1165 if (ctxt->attsSpecial == NULL)
1166 return;
1167
1168 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1169
1170 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1171 xmlHashFree(ctxt->attsSpecial, NULL);
1172 ctxt->attsSpecial = NULL;
1173 }
1174 return;
1175}
1176
1177/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001178 * xmlCheckLanguageID:
1179 * @lang: pointer to the string value
1180 *
1181 * Checks that the value conforms to the LanguageID production:
1182 *
1183 * NOTE: this is somewhat deprecated, those productions were removed from
1184 * the XML Second edition.
1185 *
1186 * [33] LanguageID ::= Langcode ('-' Subcode)*
1187 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1188 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1189 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1190 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1191 * [38] Subcode ::= ([a-z] | [A-Z])+
1192 *
1193 * Returns 1 if correct 0 otherwise
1194 **/
1195int
1196xmlCheckLanguageID(const xmlChar * lang)
1197{
1198 const xmlChar *cur = lang;
1199
1200 if (cur == NULL)
1201 return (0);
1202 if (((cur[0] == 'i') && (cur[1] == '-')) ||
1203 ((cur[0] == 'I') && (cur[1] == '-'))) {
1204 /*
1205 * IANA code
1206 */
1207 cur += 2;
1208 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1209 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1210 cur++;
1211 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
1212 ((cur[0] == 'X') && (cur[1] == '-'))) {
1213 /*
1214 * User code
1215 */
1216 cur += 2;
1217 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1218 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1219 cur++;
1220 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1221 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
1222 /*
1223 * ISO639
1224 */
1225 cur++;
1226 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1227 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1228 cur++;
1229 else
1230 return (0);
1231 } else
1232 return (0);
1233 while (cur[0] != 0) { /* non input consuming */
1234 if (cur[0] != '-')
1235 return (0);
1236 cur++;
1237 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1238 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1239 cur++;
1240 else
1241 return (0);
1242 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
1243 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1244 cur++;
1245 }
1246 return (1);
1247}
1248
Owen Taylor3473f882001-02-23 17:55:21 +00001249/************************************************************************
1250 * *
1251 * Parser stacks related functions and macros *
1252 * *
1253 ************************************************************************/
1254
1255xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1256 const xmlChar ** str);
1257
Daniel Veillard0fb18932003-09-07 09:14:37 +00001258#ifdef SAX2
1259/**
1260 * nsPush:
1261 * @ctxt: an XML parser context
1262 * @prefix: the namespace prefix or NULL
1263 * @URL: the namespace name
1264 *
1265 * Pushes a new parser namespace on top of the ns stack
1266 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001267 * Returns -1 in case of error, -2 if the namespace should be discarded
1268 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001269 */
1270static int
1271nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1272{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001273 if (ctxt->options & XML_PARSE_NSCLEAN) {
1274 int i;
1275 for (i = 0;i < ctxt->nsNr;i += 2) {
1276 if (ctxt->nsTab[i] == prefix) {
1277 /* in scope */
1278 if (ctxt->nsTab[i + 1] == URL)
1279 return(-2);
1280 /* out of scope keep it */
1281 break;
1282 }
1283 }
1284 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001285 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1286 ctxt->nsMax = 10;
1287 ctxt->nsNr = 0;
1288 ctxt->nsTab = (const xmlChar **)
1289 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1290 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001291 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001292 ctxt->nsMax = 0;
1293 return (-1);
1294 }
1295 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001296 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001297 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001298 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1299 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1300 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001301 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001302 ctxt->nsMax /= 2;
1303 return (-1);
1304 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001305 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001306 }
1307 ctxt->nsTab[ctxt->nsNr++] = prefix;
1308 ctxt->nsTab[ctxt->nsNr++] = URL;
1309 return (ctxt->nsNr);
1310}
1311/**
1312 * nsPop:
1313 * @ctxt: an XML parser context
1314 * @nr: the number to pop
1315 *
1316 * Pops the top @nr parser prefix/namespace from the ns stack
1317 *
1318 * Returns the number of namespaces removed
1319 */
1320static int
1321nsPop(xmlParserCtxtPtr ctxt, int nr)
1322{
1323 int i;
1324
1325 if (ctxt->nsTab == NULL) return(0);
1326 if (ctxt->nsNr < nr) {
1327 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1328 nr = ctxt->nsNr;
1329 }
1330 if (ctxt->nsNr <= 0)
1331 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001332
Daniel Veillard0fb18932003-09-07 09:14:37 +00001333 for (i = 0;i < nr;i++) {
1334 ctxt->nsNr--;
1335 ctxt->nsTab[ctxt->nsNr] = NULL;
1336 }
1337 return(nr);
1338}
1339#endif
1340
1341static int
1342xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1343 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001344 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001345 int maxatts;
1346
1347 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001348 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001349 atts = (const xmlChar **)
1350 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001351 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001352 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001353 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1354 if (attallocs == NULL) goto mem_error;
1355 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001356 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001357 } else if (nr + 5 > ctxt->maxatts) {
1358 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001359 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1360 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001361 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001362 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001363 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1364 (maxatts / 5) * sizeof(int));
1365 if (attallocs == NULL) goto mem_error;
1366 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001367 ctxt->maxatts = maxatts;
1368 }
1369 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001370mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001371 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001372 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001373}
1374
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001375/**
1376 * inputPush:
1377 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001378 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001379 *
1380 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001381 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001382 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001383 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001384int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001385inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1386{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001387 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001388 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001389 if (ctxt->inputNr >= ctxt->inputMax) {
1390 ctxt->inputMax *= 2;
1391 ctxt->inputTab =
1392 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1393 ctxt->inputMax *
1394 sizeof(ctxt->inputTab[0]));
1395 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001396 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001397 xmlFreeInputStream(value);
1398 ctxt->inputMax /= 2;
1399 value = NULL;
1400 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001401 }
1402 }
1403 ctxt->inputTab[ctxt->inputNr] = value;
1404 ctxt->input = value;
1405 return (ctxt->inputNr++);
1406}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001407/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001408 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001409 * @ctxt: an XML parser context
1410 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001411 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001412 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001413 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001414 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001415xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001416inputPop(xmlParserCtxtPtr ctxt)
1417{
1418 xmlParserInputPtr ret;
1419
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001420 if (ctxt == NULL)
1421 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001422 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001423 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001424 ctxt->inputNr--;
1425 if (ctxt->inputNr > 0)
1426 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1427 else
1428 ctxt->input = NULL;
1429 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001430 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001431 return (ret);
1432}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001433/**
1434 * nodePush:
1435 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001436 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001437 *
1438 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001439 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001440 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001441 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001442int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001443nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1444{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001445 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001446 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001447 xmlNodePtr *tmp;
1448
1449 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1450 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001451 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001452 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001453 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001454 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001455 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001456 ctxt->nodeTab = tmp;
1457 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001458 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001459 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1460 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001461 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001462 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001463 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001464 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001465 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001466 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001467 ctxt->nodeTab[ctxt->nodeNr] = value;
1468 ctxt->node = value;
1469 return (ctxt->nodeNr++);
1470}
Daniel Veillard8915c152008-08-26 13:05:34 +00001471
Daniel Veillard1c732d22002-11-30 11:22:59 +00001472/**
1473 * nodePop:
1474 * @ctxt: an XML parser context
1475 *
1476 * Pops the top element node from the node stack
1477 *
1478 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001479 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001480xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001481nodePop(xmlParserCtxtPtr ctxt)
1482{
1483 xmlNodePtr ret;
1484
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001485 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001486 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001487 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001488 ctxt->nodeNr--;
1489 if (ctxt->nodeNr > 0)
1490 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1491 else
1492 ctxt->node = NULL;
1493 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001494 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001495 return (ret);
1496}
Daniel Veillarda2351322004-06-27 12:08:10 +00001497
1498#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001499/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001500 * nameNsPush:
1501 * @ctxt: an XML parser context
1502 * @value: the element name
1503 * @prefix: the element prefix
1504 * @URI: the element namespace name
1505 *
1506 * Pushes a new element name/prefix/URL on top of the name stack
1507 *
1508 * Returns -1 in case of error, the index in the stack otherwise
1509 */
1510static int
1511nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1512 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1513{
1514 if (ctxt->nameNr >= ctxt->nameMax) {
1515 const xmlChar * *tmp;
1516 void **tmp2;
1517 ctxt->nameMax *= 2;
1518 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1519 ctxt->nameMax *
1520 sizeof(ctxt->nameTab[0]));
1521 if (tmp == NULL) {
1522 ctxt->nameMax /= 2;
1523 goto mem_error;
1524 }
1525 ctxt->nameTab = tmp;
1526 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1527 ctxt->nameMax * 3 *
1528 sizeof(ctxt->pushTab[0]));
1529 if (tmp2 == NULL) {
1530 ctxt->nameMax /= 2;
1531 goto mem_error;
1532 }
1533 ctxt->pushTab = tmp2;
1534 }
1535 ctxt->nameTab[ctxt->nameNr] = value;
1536 ctxt->name = value;
1537 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1538 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001539 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001540 return (ctxt->nameNr++);
1541mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001542 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001543 return (-1);
1544}
1545/**
1546 * nameNsPop:
1547 * @ctxt: an XML parser context
1548 *
1549 * Pops the top element/prefix/URI name from the name stack
1550 *
1551 * Returns the name just removed
1552 */
1553static const xmlChar *
1554nameNsPop(xmlParserCtxtPtr ctxt)
1555{
1556 const xmlChar *ret;
1557
1558 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001559 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001560 ctxt->nameNr--;
1561 if (ctxt->nameNr > 0)
1562 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1563 else
1564 ctxt->name = NULL;
1565 ret = ctxt->nameTab[ctxt->nameNr];
1566 ctxt->nameTab[ctxt->nameNr] = NULL;
1567 return (ret);
1568}
Daniel Veillarda2351322004-06-27 12:08:10 +00001569#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001570
1571/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001572 * namePush:
1573 * @ctxt: an XML parser context
1574 * @value: the element name
1575 *
1576 * Pushes a new element name on top of the name stack
1577 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001578 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001579 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001580int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001581namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001582{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001583 if (ctxt == NULL) return (-1);
1584
Daniel Veillard1c732d22002-11-30 11:22:59 +00001585 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001586 const xmlChar * *tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001587 ctxt->nameMax *= 2;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001588 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Daniel Veillard1c732d22002-11-30 11:22:59 +00001589 ctxt->nameMax *
1590 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001591 if (tmp == NULL) {
1592 ctxt->nameMax /= 2;
1593 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001594 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001595 ctxt->nameTab = tmp;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001596 }
1597 ctxt->nameTab[ctxt->nameNr] = value;
1598 ctxt->name = value;
1599 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001600mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001601 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001602 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001603}
1604/**
1605 * namePop:
1606 * @ctxt: an XML parser context
1607 *
1608 * Pops the top element name from the name stack
1609 *
1610 * Returns the name just removed
1611 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001612const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001613namePop(xmlParserCtxtPtr ctxt)
1614{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001615 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001616
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001617 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1618 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001619 ctxt->nameNr--;
1620 if (ctxt->nameNr > 0)
1621 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1622 else
1623 ctxt->name = NULL;
1624 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001625 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001626 return (ret);
1627}
Owen Taylor3473f882001-02-23 17:55:21 +00001628
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001629static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001630 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001631 int *tmp;
1632
Owen Taylor3473f882001-02-23 17:55:21 +00001633 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001634 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1635 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1636 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001637 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001638 ctxt->spaceMax /=2;
1639 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001640 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001641 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001642 }
1643 ctxt->spaceTab[ctxt->spaceNr] = val;
1644 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1645 return(ctxt->spaceNr++);
1646}
1647
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001648static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001649 int ret;
1650 if (ctxt->spaceNr <= 0) return(0);
1651 ctxt->spaceNr--;
1652 if (ctxt->spaceNr > 0)
1653 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1654 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001655 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001656 ret = ctxt->spaceTab[ctxt->spaceNr];
1657 ctxt->spaceTab[ctxt->spaceNr] = -1;
1658 return(ret);
1659}
1660
1661/*
1662 * Macros for accessing the content. Those should be used only by the parser,
1663 * and not exported.
1664 *
1665 * Dirty macros, i.e. one often need to make assumption on the context to
1666 * use them
1667 *
1668 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1669 * To be used with extreme caution since operations consuming
1670 * characters may move the input buffer to a different location !
1671 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1672 * This should be used internally by the parser
1673 * only to compare to ASCII values otherwise it would break when
1674 * running with UTF-8 encoding.
1675 * RAW same as CUR but in the input buffer, bypass any token
1676 * extraction that may have been done
1677 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1678 * to compare on ASCII based substring.
1679 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001680 * strings without newlines within the parser.
1681 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1682 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001683 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1684 *
1685 * NEXT Skip to the next character, this does the proper decoding
1686 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001687 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001688 * CUR_CHAR(l) returns the current unicode character (int), set l
1689 * to the number of xmlChars used for the encoding [0-5].
1690 * CUR_SCHAR same but operate on a string instead of the context
1691 * COPY_BUF copy the current unicode char to the target buffer, increment
1692 * the index
1693 * GROW, SHRINK handling of input buffers
1694 */
1695
Daniel Veillardfdc91562002-07-01 21:52:03 +00001696#define RAW (*ctxt->input->cur)
1697#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001698#define NXT(val) ctxt->input->cur[(val)]
1699#define CUR_PTR ctxt->input->cur
1700
Daniel Veillarda07050d2003-10-19 14:46:32 +00001701#define CMP4( s, c1, c2, c3, c4 ) \
1702 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1703 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1704#define CMP5( s, c1, c2, c3, c4, c5 ) \
1705 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1706#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1707 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1708#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1709 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1710#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1711 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1712#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1713 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1714 ((unsigned char *) s)[ 8 ] == c9 )
1715#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1716 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1717 ((unsigned char *) s)[ 9 ] == c10 )
1718
Owen Taylor3473f882001-02-23 17:55:21 +00001719#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001720 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001721 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001722 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001723 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1724 xmlPopInput(ctxt); \
1725 } while (0)
1726
Daniel Veillard0b787f32004-03-26 17:29:53 +00001727#define SKIPL(val) do { \
1728 int skipl; \
1729 for(skipl=0; skipl<val; skipl++) { \
1730 if (*(ctxt->input->cur) == '\n') { \
1731 ctxt->input->line++; ctxt->input->col = 1; \
1732 } else ctxt->input->col++; \
1733 ctxt->nbChars++; \
1734 ctxt->input->cur++; \
1735 } \
1736 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1737 if ((*ctxt->input->cur == 0) && \
1738 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1739 xmlPopInput(ctxt); \
1740 } while (0)
1741
Daniel Veillarda880b122003-04-21 21:36:41 +00001742#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001743 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1744 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001745 xmlSHRINK (ctxt);
1746
1747static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1748 xmlParserInputShrink(ctxt->input);
1749 if ((*ctxt->input->cur == 0) &&
1750 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1751 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00001752 }
Owen Taylor3473f882001-02-23 17:55:21 +00001753
Daniel Veillarda880b122003-04-21 21:36:41 +00001754#define GROW if ((ctxt->progressive == 0) && \
1755 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001756 xmlGROW (ctxt);
1757
1758static void xmlGROW (xmlParserCtxtPtr ctxt) {
1759 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1760 if ((*ctxt->input->cur == 0) &&
1761 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1762 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00001763}
Owen Taylor3473f882001-02-23 17:55:21 +00001764
1765#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
1766
1767#define NEXT xmlNextChar(ctxt)
1768
Daniel Veillard21a0f912001-02-25 19:54:14 +00001769#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001770 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001771 ctxt->input->cur++; \
1772 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001773 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00001774 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
1775 }
1776
Owen Taylor3473f882001-02-23 17:55:21 +00001777#define NEXTL(l) do { \
1778 if (*(ctxt->input->cur) == '\n') { \
1779 ctxt->input->line++; ctxt->input->col = 1; \
1780 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00001781 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00001782 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00001783 } while (0)
1784
1785#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
1786#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
1787
1788#define COPY_BUF(l,b,i,v) \
1789 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001790 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00001791
1792/**
1793 * xmlSkipBlankChars:
1794 * @ctxt: the XML parser context
1795 *
1796 * skip all blanks character found at that point in the input streams.
1797 * It pops up finished entities in the process if allowable at that point.
1798 *
1799 * Returns the number of space chars skipped
1800 */
1801
1802int
1803xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001804 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001805
1806 /*
1807 * It's Okay to use CUR/NEXT here since all the blanks are on
1808 * the ASCII range.
1809 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001810 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
1811 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00001812 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00001813 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00001814 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001815 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00001816 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00001817 if (*cur == '\n') {
1818 ctxt->input->line++; ctxt->input->col = 1;
1819 }
1820 cur++;
1821 res++;
1822 if (*cur == 0) {
1823 ctxt->input->cur = cur;
1824 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
1825 cur = ctxt->input->cur;
1826 }
1827 }
1828 ctxt->input->cur = cur;
1829 } else {
1830 int cur;
1831 do {
1832 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00001833 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00001834 NEXT;
1835 cur = CUR;
1836 res++;
1837 }
1838 while ((cur == 0) && (ctxt->inputNr > 1) &&
1839 (ctxt->instate != XML_PARSER_COMMENT)) {
1840 xmlPopInput(ctxt);
1841 cur = CUR;
1842 }
1843 /*
1844 * Need to handle support of entities branching here
1845 */
1846 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1847 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
1848 }
Owen Taylor3473f882001-02-23 17:55:21 +00001849 return(res);
1850}
1851
1852/************************************************************************
1853 * *
1854 * Commodity functions to handle entities *
1855 * *
1856 ************************************************************************/
1857
1858/**
1859 * xmlPopInput:
1860 * @ctxt: an XML parser context
1861 *
1862 * xmlPopInput: the current input pointed by ctxt->input came to an end
1863 * pop it and return the next char.
1864 *
1865 * Returns the current xmlChar in the parser context
1866 */
1867xmlChar
1868xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00001869 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001870 if (xmlParserDebugEntities)
1871 xmlGenericError(xmlGenericErrorContext,
1872 "Popping input %d\n", ctxt->inputNr);
1873 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00001874 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00001875 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1876 return(xmlPopInput(ctxt));
1877 return(CUR);
1878}
1879
1880/**
1881 * xmlPushInput:
1882 * @ctxt: an XML parser context
1883 * @input: an XML parser input fragment (entity, XML fragment ...).
1884 *
1885 * xmlPushInput: switch to a new input stream which is stacked on top
1886 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001887 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00001888 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001889int
Owen Taylor3473f882001-02-23 17:55:21 +00001890xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001891 int ret;
1892 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001893
1894 if (xmlParserDebugEntities) {
1895 if ((ctxt->input != NULL) && (ctxt->input->filename))
1896 xmlGenericError(xmlGenericErrorContext,
1897 "%s(%d): ", ctxt->input->filename,
1898 ctxt->input->line);
1899 xmlGenericError(xmlGenericErrorContext,
1900 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1901 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001902 ret = inputPush(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00001903 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001904 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00001905}
1906
1907/**
1908 * xmlParseCharRef:
1909 * @ctxt: an XML parser context
1910 *
1911 * parse Reference declarations
1912 *
1913 * [66] CharRef ::= '&#' [0-9]+ ';' |
1914 * '&#x' [0-9a-fA-F]+ ';'
1915 *
1916 * [ WFC: Legal Character ]
1917 * Characters referred to using character references must match the
1918 * production for Char.
1919 *
1920 * Returns the value parsed (as an int), 0 in case of error
1921 */
1922int
1923xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00001924 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001925 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00001926 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00001927
Owen Taylor3473f882001-02-23 17:55:21 +00001928 /*
1929 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1930 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001931 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00001932 (NXT(2) == 'x')) {
1933 SKIP(3);
1934 GROW;
1935 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001936 if (count++ > 20) {
1937 count = 0;
1938 GROW;
1939 }
1940 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001941 val = val * 16 + (CUR - '0');
1942 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
1943 val = val * 16 + (CUR - 'a') + 10;
1944 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
1945 val = val * 16 + (CUR - 'A') + 10;
1946 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001947 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001948 val = 0;
1949 break;
1950 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001951 if (val > 0x10FFFF)
1952 outofrange = val;
1953
Owen Taylor3473f882001-02-23 17:55:21 +00001954 NEXT;
1955 count++;
1956 }
1957 if (RAW == ';') {
1958 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001959 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001960 ctxt->nbChars ++;
1961 ctxt->input->cur++;
1962 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00001963 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00001964 SKIP(2);
1965 GROW;
1966 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00001967 if (count++ > 20) {
1968 count = 0;
1969 GROW;
1970 }
1971 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00001972 val = val * 10 + (CUR - '0');
1973 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001974 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001975 val = 0;
1976 break;
1977 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00001978 if (val > 0x10FFFF)
1979 outofrange = val;
1980
Owen Taylor3473f882001-02-23 17:55:21 +00001981 NEXT;
1982 count++;
1983 }
1984 if (RAW == ';') {
1985 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00001986 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00001987 ctxt->nbChars ++;
1988 ctxt->input->cur++;
1989 }
1990 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001991 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00001992 }
1993
1994 /*
1995 * [ WFC: Legal Character ]
1996 * Characters referred to using character references must match the
1997 * production for Char.
1998 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00001999 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002000 return(val);
2001 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002002 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2003 "xmlParseCharRef: invalid xmlChar value %d\n",
2004 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002005 }
2006 return(0);
2007}
2008
2009/**
2010 * xmlParseStringCharRef:
2011 * @ctxt: an XML parser context
2012 * @str: a pointer to an index in the string
2013 *
2014 * parse Reference declarations, variant parsing from a string rather
2015 * than an an input flow.
2016 *
2017 * [66] CharRef ::= '&#' [0-9]+ ';' |
2018 * '&#x' [0-9a-fA-F]+ ';'
2019 *
2020 * [ WFC: Legal Character ]
2021 * Characters referred to using character references must match the
2022 * production for Char.
2023 *
2024 * Returns the value parsed (as an int), 0 in case of error, str will be
2025 * updated to the current value of the index
2026 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002027static int
Owen Taylor3473f882001-02-23 17:55:21 +00002028xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2029 const xmlChar *ptr;
2030 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002031 unsigned int val = 0;
2032 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002033
2034 if ((str == NULL) || (*str == NULL)) return(0);
2035 ptr = *str;
2036 cur = *ptr;
2037 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2038 ptr += 3;
2039 cur = *ptr;
2040 while (cur != ';') { /* Non input consuming loop */
2041 if ((cur >= '0') && (cur <= '9'))
2042 val = val * 16 + (cur - '0');
2043 else if ((cur >= 'a') && (cur <= 'f'))
2044 val = val * 16 + (cur - 'a') + 10;
2045 else if ((cur >= 'A') && (cur <= 'F'))
2046 val = val * 16 + (cur - 'A') + 10;
2047 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002048 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002049 val = 0;
2050 break;
2051 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002052 if (val > 0x10FFFF)
2053 outofrange = val;
2054
Owen Taylor3473f882001-02-23 17:55:21 +00002055 ptr++;
2056 cur = *ptr;
2057 }
2058 if (cur == ';')
2059 ptr++;
2060 } else if ((cur == '&') && (ptr[1] == '#')){
2061 ptr += 2;
2062 cur = *ptr;
2063 while (cur != ';') { /* Non input consuming loops */
2064 if ((cur >= '0') && (cur <= '9'))
2065 val = val * 10 + (cur - '0');
2066 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002067 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002068 val = 0;
2069 break;
2070 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002071 if (val > 0x10FFFF)
2072 outofrange = val;
2073
Owen Taylor3473f882001-02-23 17:55:21 +00002074 ptr++;
2075 cur = *ptr;
2076 }
2077 if (cur == ';')
2078 ptr++;
2079 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002080 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002081 return(0);
2082 }
2083 *str = ptr;
2084
2085 /*
2086 * [ WFC: Legal Character ]
2087 * Characters referred to using character references must match the
2088 * production for Char.
2089 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002090 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002091 return(val);
2092 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002093 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2094 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2095 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002096 }
2097 return(0);
2098}
2099
2100/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002101 * xmlNewBlanksWrapperInputStream:
2102 * @ctxt: an XML parser context
2103 * @entity: an Entity pointer
2104 *
2105 * Create a new input stream for wrapping
2106 * blanks around a PEReference
2107 *
2108 * Returns the new input stream or NULL
2109 */
2110
2111static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2112
Daniel Veillardf4862f02002-09-10 11:13:43 +00002113static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002114xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2115 xmlParserInputPtr input;
2116 xmlChar *buffer;
2117 size_t length;
2118 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002119 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2120 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002121 return(NULL);
2122 }
2123 if (xmlParserDebugEntities)
2124 xmlGenericError(xmlGenericErrorContext,
2125 "new blanks wrapper for entity: %s\n", entity->name);
2126 input = xmlNewInputStream(ctxt);
2127 if (input == NULL) {
2128 return(NULL);
2129 }
2130 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002131 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002132 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002133 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002134 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002135 return(NULL);
2136 }
2137 buffer [0] = ' ';
2138 buffer [1] = '%';
2139 buffer [length-3] = ';';
2140 buffer [length-2] = ' ';
2141 buffer [length-1] = 0;
2142 memcpy(buffer + 2, entity->name, length - 5);
2143 input->free = deallocblankswrapper;
2144 input->base = buffer;
2145 input->cur = buffer;
2146 input->length = length;
2147 input->end = &buffer[length];
2148 return(input);
2149}
2150
2151/**
Owen Taylor3473f882001-02-23 17:55:21 +00002152 * xmlParserHandlePEReference:
2153 * @ctxt: the parser context
2154 *
2155 * [69] PEReference ::= '%' Name ';'
2156 *
2157 * [ WFC: No Recursion ]
2158 * A parsed entity must not contain a recursive
2159 * reference to itself, either directly or indirectly.
2160 *
2161 * [ WFC: Entity Declared ]
2162 * In a document without any DTD, a document with only an internal DTD
2163 * subset which contains no parameter entity references, or a document
2164 * with "standalone='yes'", ... ... The declaration of a parameter
2165 * entity must precede any reference to it...
2166 *
2167 * [ VC: Entity Declared ]
2168 * In a document with an external subset or external parameter entities
2169 * with "standalone='no'", ... ... The declaration of a parameter entity
2170 * must precede any reference to it...
2171 *
2172 * [ WFC: In DTD ]
2173 * Parameter-entity references may only appear in the DTD.
2174 * NOTE: misleading but this is handled.
2175 *
2176 * A PEReference may have been detected in the current input stream
2177 * the handling is done accordingly to
2178 * http://www.w3.org/TR/REC-xml#entproc
2179 * i.e.
2180 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002181 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002182 */
2183void
2184xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002185 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002186 xmlEntityPtr entity = NULL;
2187 xmlParserInputPtr input;
2188
Owen Taylor3473f882001-02-23 17:55:21 +00002189 if (RAW != '%') return;
2190 switch(ctxt->instate) {
2191 case XML_PARSER_CDATA_SECTION:
2192 return;
2193 case XML_PARSER_COMMENT:
2194 return;
2195 case XML_PARSER_START_TAG:
2196 return;
2197 case XML_PARSER_END_TAG:
2198 return;
2199 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002200 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002201 return;
2202 case XML_PARSER_PROLOG:
2203 case XML_PARSER_START:
2204 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002205 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002206 return;
2207 case XML_PARSER_ENTITY_DECL:
2208 case XML_PARSER_CONTENT:
2209 case XML_PARSER_ATTRIBUTE_VALUE:
2210 case XML_PARSER_PI:
2211 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002212 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002213 /* we just ignore it there */
2214 return;
2215 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002216 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002217 return;
2218 case XML_PARSER_ENTITY_VALUE:
2219 /*
2220 * NOTE: in the case of entity values, we don't do the
2221 * substitution here since we need the literal
2222 * entity value to be able to save the internal
2223 * subset of the document.
2224 * This will be handled by xmlStringDecodeEntities
2225 */
2226 return;
2227 case XML_PARSER_DTD:
2228 /*
2229 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2230 * In the internal DTD subset, parameter-entity references
2231 * can occur only where markup declarations can occur, not
2232 * within markup declarations.
2233 * In that case this is handled in xmlParseMarkupDecl
2234 */
2235 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2236 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002237 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002238 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002239 break;
2240 case XML_PARSER_IGNORE:
2241 return;
2242 }
2243
2244 NEXT;
2245 name = xmlParseName(ctxt);
2246 if (xmlParserDebugEntities)
2247 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002248 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002249 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002250 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002251 } else {
2252 if (RAW == ';') {
2253 NEXT;
2254 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2255 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2256 if (entity == NULL) {
2257
2258 /*
2259 * [ WFC: Entity Declared ]
2260 * In a document without any DTD, a document with only an
2261 * internal DTD subset which contains no parameter entity
2262 * references, or a document with "standalone='yes'", ...
2263 * ... The declaration of a parameter entity must precede
2264 * any reference to it...
2265 */
2266 if ((ctxt->standalone == 1) ||
2267 ((ctxt->hasExternalSubset == 0) &&
2268 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002269 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002270 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002271 } else {
2272 /*
2273 * [ VC: Entity Declared ]
2274 * In a document with an external subset or external
2275 * parameter entities with "standalone='no'", ...
2276 * ... The declaration of a parameter entity must precede
2277 * any reference to it...
2278 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002279 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2280 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2281 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002282 name, NULL);
Daniel Veillard24eb9782003-10-04 21:08:09 +00002283 } else
2284 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2285 "PEReference: %%%s; not found\n",
2286 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002287 ctxt->valid = 0;
2288 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002289 } else if (ctxt->input->free != deallocblankswrapper) {
2290 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002291 if (xmlPushInput(ctxt, input) < 0)
2292 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002293 } else {
2294 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2295 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002296 xmlChar start[4];
2297 xmlCharEncoding enc;
2298
Owen Taylor3473f882001-02-23 17:55:21 +00002299 /*
2300 * handle the extra spaces added before and after
2301 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002302 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002303 */
2304 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002305 if (xmlPushInput(ctxt, input) < 0)
2306 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002307
2308 /*
2309 * Get the 4 first bytes and decode the charset
2310 * if enc != XML_CHAR_ENCODING_NONE
2311 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002312 * Note that, since we may have some non-UTF8
2313 * encoding (like UTF16, bug 135229), the 'length'
2314 * is not known, but we can calculate based upon
2315 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002316 */
2317 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002318 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002319 start[0] = RAW;
2320 start[1] = NXT(1);
2321 start[2] = NXT(2);
2322 start[3] = NXT(3);
2323 enc = xmlDetectCharEncoding(start, 4);
2324 if (enc != XML_CHAR_ENCODING_NONE) {
2325 xmlSwitchEncoding(ctxt, enc);
2326 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002327 }
2328
Owen Taylor3473f882001-02-23 17:55:21 +00002329 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002330 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2331 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002332 xmlParseTextDecl(ctxt);
2333 }
Owen Taylor3473f882001-02-23 17:55:21 +00002334 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002335 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2336 "PEReference: %s is not a parameter entity\n",
2337 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002338 }
2339 }
2340 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002341 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002342 }
Owen Taylor3473f882001-02-23 17:55:21 +00002343 }
2344}
2345
2346/*
2347 * Macro used to grow the current buffer.
2348 */
2349#define growBuffer(buffer) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002350 xmlChar *tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002351 buffer##_size *= 2; \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002352 tmp = (xmlChar *) \
Daniel Veillard68b6e022008-03-31 09:26:00 +00002353 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002354 if (tmp == NULL) goto mem_error; \
2355 buffer = tmp; \
Owen Taylor3473f882001-02-23 17:55:21 +00002356}
2357
2358/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002359 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002360 * @ctxt: the parser context
2361 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002362 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002363 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2364 * @end: an end marker xmlChar, 0 if none
2365 * @end2: an end marker xmlChar, 0 if none
2366 * @end3: an end marker xmlChar, 0 if none
2367 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002368 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002369 *
2370 * [67] Reference ::= EntityRef | CharRef
2371 *
2372 * [69] PEReference ::= '%' Name ';'
2373 *
2374 * Returns A newly allocated string with the substitution done. The caller
2375 * must deallocate it !
2376 */
2377xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002378xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2379 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002380 xmlChar *buffer = NULL;
2381 int buffer_size = 0;
2382
2383 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002384 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002385 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002386 xmlEntityPtr ent;
2387 int c,l;
2388 int nbchars = 0;
2389
Daniel Veillarda82b1822004-11-08 16:24:57 +00002390 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002391 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002392 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002393
Daniel Veillard8915c152008-08-26 13:05:34 +00002394 if (((ctxt->depth > 20) || (ctxt->nbentities >= 100000)) &&
2395 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002396 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002397 return(NULL);
2398 }
2399
2400 /*
2401 * allocate a translation buffer.
2402 */
2403 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002404 buffer = (xmlChar *) xmlMallocAtomic(buffer_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002405 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002406
2407 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002408 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002409 * we are operating on already parsed values.
2410 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002411 if (str < last)
2412 c = CUR_SCHAR(str, l);
2413 else
2414 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002415 while ((c != 0) && (c != end) && /* non input consuming loop */
2416 (c != end2) && (c != end3)) {
2417
2418 if (c == 0) break;
2419 if ((c == '&') && (str[1] == '#')) {
2420 int val = xmlParseStringCharRef(ctxt, &str);
2421 if (val != 0) {
2422 COPY_BUF(0,buffer,nbchars,val);
2423 }
Daniel Veillardbedc9772005-09-28 21:42:15 +00002424 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2425 growBuffer(buffer);
2426 }
Owen Taylor3473f882001-02-23 17:55:21 +00002427 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2428 if (xmlParserDebugEntities)
2429 xmlGenericError(xmlGenericErrorContext,
2430 "String decoding Entity Reference: %.30s\n",
2431 str);
2432 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002433 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2434 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002435 goto int_error;
2436 ctxt->nbentities++;
2437 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002438 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002439 if ((ent != NULL) &&
2440 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2441 if (ent->content != NULL) {
2442 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002443 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2444 growBuffer(buffer);
2445 }
Owen Taylor3473f882001-02-23 17:55:21 +00002446 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002447 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2448 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002449 }
2450 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002451 ctxt->depth++;
2452 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2453 0, 0, 0);
2454 ctxt->depth--;
2455 if (rep != NULL) {
2456 current = rep;
2457 while (*current != 0) { /* non input consuming loop */
2458 buffer[nbchars++] = *current++;
2459 if (nbchars >
2460 buffer_size - XML_PARSER_BUFFER_SIZE) {
2461 growBuffer(buffer);
2462 }
2463 }
2464 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002465 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002466 }
2467 } else if (ent != NULL) {
2468 int i = xmlStrlen(ent->name);
2469 const xmlChar *cur = ent->name;
2470
2471 buffer[nbchars++] = '&';
2472 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
2473 growBuffer(buffer);
2474 }
2475 for (;i > 0;i--)
2476 buffer[nbchars++] = *cur++;
2477 buffer[nbchars++] = ';';
2478 }
2479 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2480 if (xmlParserDebugEntities)
2481 xmlGenericError(xmlGenericErrorContext,
2482 "String decoding PE Reference: %.30s\n", str);
2483 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002484 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2485 goto int_error;
2486 ctxt->nbentities++;
2487 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002488 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002489 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002490 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002491 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002492 }
Owen Taylor3473f882001-02-23 17:55:21 +00002493 ctxt->depth++;
2494 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2495 0, 0, 0);
2496 ctxt->depth--;
2497 if (rep != NULL) {
2498 current = rep;
2499 while (*current != 0) { /* non input consuming loop */
2500 buffer[nbchars++] = *current++;
2501 if (nbchars >
2502 buffer_size - XML_PARSER_BUFFER_SIZE) {
2503 growBuffer(buffer);
2504 }
2505 }
2506 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002507 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002508 }
2509 }
2510 } else {
2511 COPY_BUF(l,buffer,nbchars,c);
2512 str += l;
2513 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
2514 growBuffer(buffer);
2515 }
2516 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002517 if (str < last)
2518 c = CUR_SCHAR(str, l);
2519 else
2520 c = 0;
Daniel Veillard8915c152008-08-26 13:05:34 +00002521 if ((nbchars > 100000) &&
2522 (ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
2523 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
2524 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
2525 "Excessive lenght of attribute: %d use XML_PARSE_HUGE option\n",
2526 nbchars);
2527 goto int_error;
2528 }
Owen Taylor3473f882001-02-23 17:55:21 +00002529 }
2530 buffer[nbchars++] = 0;
2531 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002532
2533mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002534 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002535int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002536 if (rep != NULL)
2537 xmlFree(rep);
2538 if (buffer != NULL)
2539 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002540 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002541}
2542
Daniel Veillarde57ec792003-09-10 10:50:59 +00002543/**
2544 * xmlStringDecodeEntities:
2545 * @ctxt: the parser context
2546 * @str: the input string
2547 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2548 * @end: an end marker xmlChar, 0 if none
2549 * @end2: an end marker xmlChar, 0 if none
2550 * @end3: an end marker xmlChar, 0 if none
2551 *
2552 * Takes a entity string content and process to do the adequate substitutions.
2553 *
2554 * [67] Reference ::= EntityRef | CharRef
2555 *
2556 * [69] PEReference ::= '%' Name ';'
2557 *
2558 * Returns A newly allocated string with the substitution done. The caller
2559 * must deallocate it !
2560 */
2561xmlChar *
2562xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2563 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002564 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002565 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2566 end, end2, end3));
2567}
Owen Taylor3473f882001-02-23 17:55:21 +00002568
2569/************************************************************************
2570 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002571 * Commodity functions, cleanup needed ? *
2572 * *
2573 ************************************************************************/
2574
2575/**
2576 * areBlanks:
2577 * @ctxt: an XML parser context
2578 * @str: a xmlChar *
2579 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002580 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002581 *
2582 * Is this a sequence of blank chars that one can ignore ?
2583 *
2584 * Returns 1 if ignorable 0 otherwise.
2585 */
2586
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002587static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2588 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002589 int i, ret;
2590 xmlNodePtr lastChild;
2591
Daniel Veillard05c13a22001-09-09 08:38:09 +00002592 /*
2593 * Don't spend time trying to differentiate them, the same callback is
2594 * used !
2595 */
2596 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002597 return(0);
2598
Owen Taylor3473f882001-02-23 17:55:21 +00002599 /*
2600 * Check for xml:space value.
2601 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002602 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2603 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002604 return(0);
2605
2606 /*
2607 * Check that the string is made of blanks
2608 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002609 if (blank_chars == 0) {
2610 for (i = 0;i < len;i++)
2611 if (!(IS_BLANK_CH(str[i]))) return(0);
2612 }
Owen Taylor3473f882001-02-23 17:55:21 +00002613
2614 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002615 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002616 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002617 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002618 if (ctxt->myDoc != NULL) {
2619 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2620 if (ret == 0) return(1);
2621 if (ret == 1) return(0);
2622 }
2623
2624 /*
2625 * Otherwise, heuristic :-\
2626 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002627 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002628 if ((ctxt->node->children == NULL) &&
2629 (RAW == '<') && (NXT(1) == '/')) return(0);
2630
2631 lastChild = xmlGetLastChild(ctxt->node);
2632 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002633 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2634 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002635 } else if (xmlNodeIsText(lastChild))
2636 return(0);
2637 else if ((ctxt->node->children != NULL) &&
2638 (xmlNodeIsText(ctxt->node->children)))
2639 return(0);
2640 return(1);
2641}
2642
Owen Taylor3473f882001-02-23 17:55:21 +00002643/************************************************************************
2644 * *
2645 * Extra stuff for namespace support *
2646 * Relates to http://www.w3.org/TR/WD-xml-names *
2647 * *
2648 ************************************************************************/
2649
2650/**
2651 * xmlSplitQName:
2652 * @ctxt: an XML parser context
2653 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002654 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002655 *
2656 * parse an UTF8 encoded XML qualified name string
2657 *
2658 * [NS 5] QName ::= (Prefix ':')? LocalPart
2659 *
2660 * [NS 6] Prefix ::= NCName
2661 *
2662 * [NS 7] LocalPart ::= NCName
2663 *
2664 * Returns the local part, and prefix is updated
2665 * to get the Prefix if any.
2666 */
2667
2668xmlChar *
2669xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2670 xmlChar buf[XML_MAX_NAMELEN + 5];
2671 xmlChar *buffer = NULL;
2672 int len = 0;
2673 int max = XML_MAX_NAMELEN;
2674 xmlChar *ret = NULL;
2675 const xmlChar *cur = name;
2676 int c;
2677
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002678 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002679 *prefix = NULL;
2680
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002681 if (cur == NULL) return(NULL);
2682
Owen Taylor3473f882001-02-23 17:55:21 +00002683#ifndef XML_XML_NAMESPACE
2684 /* xml: prefix is not really a namespace */
2685 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2686 (cur[2] == 'l') && (cur[3] == ':'))
2687 return(xmlStrdup(name));
2688#endif
2689
Daniel Veillard597bc482003-07-24 16:08:28 +00002690 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002691 if (cur[0] == ':')
2692 return(xmlStrdup(name));
2693
2694 c = *cur++;
2695 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2696 buf[len++] = c;
2697 c = *cur++;
2698 }
2699 if (len >= max) {
2700 /*
2701 * Okay someone managed to make a huge name, so he's ready to pay
2702 * for the processing speed.
2703 */
2704 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002705
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002706 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002707 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002708 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002709 return(NULL);
2710 }
2711 memcpy(buffer, buf, len);
2712 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2713 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002714 xmlChar *tmp;
2715
Owen Taylor3473f882001-02-23 17:55:21 +00002716 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002717 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002718 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002719 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002720 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002721 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002722 return(NULL);
2723 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002724 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002725 }
2726 buffer[len++] = c;
2727 c = *cur++;
2728 }
2729 buffer[len] = 0;
2730 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002731
Daniel Veillard597bc482003-07-24 16:08:28 +00002732 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002733 if (buffer != NULL)
2734 xmlFree(buffer);
2735 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002736 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002737 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002738
Owen Taylor3473f882001-02-23 17:55:21 +00002739 if (buffer == NULL)
2740 ret = xmlStrndup(buf, len);
2741 else {
2742 ret = buffer;
2743 buffer = NULL;
2744 max = XML_MAX_NAMELEN;
2745 }
2746
2747
2748 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002749 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002750 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002751 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002752 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002753 }
Owen Taylor3473f882001-02-23 17:55:21 +00002754 len = 0;
2755
Daniel Veillardbb284f42002-10-16 18:02:47 +00002756 /*
2757 * Check that the first character is proper to start
2758 * a new name
2759 */
2760 if (!(((c >= 0x61) && (c <= 0x7A)) ||
2761 ((c >= 0x41) && (c <= 0x5A)) ||
2762 (c == '_') || (c == ':'))) {
2763 int l;
2764 int first = CUR_SCHAR(cur, l);
2765
2766 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002767 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00002768 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002769 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00002770 }
2771 }
2772 cur++;
2773
Owen Taylor3473f882001-02-23 17:55:21 +00002774 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
2775 buf[len++] = c;
2776 c = *cur++;
2777 }
2778 if (len >= max) {
2779 /*
2780 * Okay someone managed to make a huge name, so he's ready to pay
2781 * for the processing speed.
2782 */
2783 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002784
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002785 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002786 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002787 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002788 return(NULL);
2789 }
2790 memcpy(buffer, buf, len);
2791 while (c != 0) { /* tested bigname2.xml */
2792 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002793 xmlChar *tmp;
2794
Owen Taylor3473f882001-02-23 17:55:21 +00002795 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002796 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002797 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002798 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002799 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00002800 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00002801 return(NULL);
2802 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002803 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002804 }
2805 buffer[len++] = c;
2806 c = *cur++;
2807 }
2808 buffer[len] = 0;
2809 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00002810
Owen Taylor3473f882001-02-23 17:55:21 +00002811 if (buffer == NULL)
2812 ret = xmlStrndup(buf, len);
2813 else {
2814 ret = buffer;
2815 }
2816 }
2817
2818 return(ret);
2819}
2820
2821/************************************************************************
2822 * *
2823 * The parser itself *
2824 * Relates to http://www.w3.org/TR/REC-xml *
2825 * *
2826 ************************************************************************/
2827
Daniel Veillard34e3f642008-07-29 09:02:27 +00002828/************************************************************************
2829 * *
2830 * Routines to parse Name, NCName and NmToken *
2831 * *
2832 ************************************************************************/
2833unsigned long nbParseName = 0;
2834unsigned long nbParseNmToken = 0;
2835unsigned long nbParseNCName = 0;
2836unsigned long nbParseNCNameComplex = 0;
2837unsigned long nbParseNameComplex = 0;
2838unsigned long nbParseStringName = 0;
2839/*
2840 * The two following functions are related to the change of accepted
2841 * characters for Name and NmToken in the Revision 5 of XML-1.0
2842 * They correspond to the modified production [4] and the new production [4a]
2843 * changes in that revision. Also note that the macros used for the
2844 * productions Letter, Digit, CombiningChar and Extender are not needed
2845 * anymore.
2846 * We still keep compatibility to pre-revision5 parsing semantic if the
2847 * new XML_PARSE_OLD10 option is given to the parser.
2848 */
2849static int
2850xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
2851 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2852 /*
2853 * Use the new checks of production [4] [4a] amd [5] of the
2854 * Update 5 of XML-1.0
2855 */
2856 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2857 (((c >= 'a') && (c <= 'z')) ||
2858 ((c >= 'A') && (c <= 'Z')) ||
2859 (c == '_') || (c == ':') ||
2860 ((c >= 0xC0) && (c <= 0xD6)) ||
2861 ((c >= 0xD8) && (c <= 0xF6)) ||
2862 ((c >= 0xF8) && (c <= 0x2FF)) ||
2863 ((c >= 0x370) && (c <= 0x37D)) ||
2864 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2865 ((c >= 0x200C) && (c <= 0x200D)) ||
2866 ((c >= 0x2070) && (c <= 0x218F)) ||
2867 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2868 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2869 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2870 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2871 ((c >= 0x10000) && (c <= 0xEFFFF))))
2872 return(1);
2873 } else {
2874 if (IS_LETTER(c) || (c == '_') || (c == ':'))
2875 return(1);
2876 }
2877 return(0);
2878}
2879
2880static int
2881xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
2882 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2883 /*
2884 * Use the new checks of production [4] [4a] amd [5] of the
2885 * Update 5 of XML-1.0
2886 */
2887 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2888 (((c >= 'a') && (c <= 'z')) ||
2889 ((c >= 'A') && (c <= 'Z')) ||
2890 ((c >= '0') && (c <= '9')) || /* !start */
2891 (c == '_') || (c == ':') ||
2892 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2893 ((c >= 0xC0) && (c <= 0xD6)) ||
2894 ((c >= 0xD8) && (c <= 0xF6)) ||
2895 ((c >= 0xF8) && (c <= 0x2FF)) ||
2896 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2897 ((c >= 0x370) && (c <= 0x37D)) ||
2898 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2899 ((c >= 0x200C) && (c <= 0x200D)) ||
2900 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2901 ((c >= 0x2070) && (c <= 0x218F)) ||
2902 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2903 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2904 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2905 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2906 ((c >= 0x10000) && (c <= 0xEFFFF))))
2907 return(1);
2908 } else {
2909 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
2910 (c == '.') || (c == '-') ||
2911 (c == '_') || (c == ':') ||
2912 (IS_COMBINING(c)) ||
2913 (IS_EXTENDER(c)))
2914 return(1);
2915 }
2916 return(0);
2917}
2918
Daniel Veillarde57ec792003-09-10 10:50:59 +00002919static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002920 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00002921
Daniel Veillard34e3f642008-07-29 09:02:27 +00002922static const xmlChar *
2923xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
2924 int len = 0, l;
2925 int c;
2926 int count = 0;
2927
2928 nbParseNameComplex++;
2929
2930 /*
2931 * Handler for more complex cases
2932 */
2933 GROW;
2934 c = CUR_CHAR(l);
2935 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
2936 /*
2937 * Use the new checks of production [4] [4a] amd [5] of the
2938 * Update 5 of XML-1.0
2939 */
2940 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2941 (!(((c >= 'a') && (c <= 'z')) ||
2942 ((c >= 'A') && (c <= 'Z')) ||
2943 (c == '_') || (c == ':') ||
2944 ((c >= 0xC0) && (c <= 0xD6)) ||
2945 ((c >= 0xD8) && (c <= 0xF6)) ||
2946 ((c >= 0xF8) && (c <= 0x2FF)) ||
2947 ((c >= 0x370) && (c <= 0x37D)) ||
2948 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2949 ((c >= 0x200C) && (c <= 0x200D)) ||
2950 ((c >= 0x2070) && (c <= 0x218F)) ||
2951 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2952 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2953 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2954 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2955 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
2956 return(NULL);
2957 }
2958 len += l;
2959 NEXTL(l);
2960 c = CUR_CHAR(l);
2961 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
2962 (((c >= 'a') && (c <= 'z')) ||
2963 ((c >= 'A') && (c <= 'Z')) ||
2964 ((c >= '0') && (c <= '9')) || /* !start */
2965 (c == '_') || (c == ':') ||
2966 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
2967 ((c >= 0xC0) && (c <= 0xD6)) ||
2968 ((c >= 0xD8) && (c <= 0xF6)) ||
2969 ((c >= 0xF8) && (c <= 0x2FF)) ||
2970 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
2971 ((c >= 0x370) && (c <= 0x37D)) ||
2972 ((c >= 0x37F) && (c <= 0x1FFF)) ||
2973 ((c >= 0x200C) && (c <= 0x200D)) ||
2974 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
2975 ((c >= 0x2070) && (c <= 0x218F)) ||
2976 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
2977 ((c >= 0x3001) && (c <= 0xD7FF)) ||
2978 ((c >= 0xF900) && (c <= 0xFDCF)) ||
2979 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
2980 ((c >= 0x10000) && (c <= 0xEFFFF))
2981 )) {
2982 if (count++ > 100) {
2983 count = 0;
2984 GROW;
2985 }
2986 len += l;
2987 NEXTL(l);
2988 c = CUR_CHAR(l);
2989 }
2990 } else {
2991 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
2992 (!IS_LETTER(c) && (c != '_') &&
2993 (c != ':'))) {
2994 return(NULL);
2995 }
2996 len += l;
2997 NEXTL(l);
2998 c = CUR_CHAR(l);
2999
3000 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3001 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3002 (c == '.') || (c == '-') ||
3003 (c == '_') || (c == ':') ||
3004 (IS_COMBINING(c)) ||
3005 (IS_EXTENDER(c)))) {
3006 if (count++ > 100) {
3007 count = 0;
3008 GROW;
3009 }
3010 len += l;
3011 NEXTL(l);
3012 c = CUR_CHAR(l);
3013 }
3014 }
3015 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3016 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3017 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3018}
3019
Owen Taylor3473f882001-02-23 17:55:21 +00003020/**
3021 * xmlParseName:
3022 * @ctxt: an XML parser context
3023 *
3024 * parse an XML name.
3025 *
3026 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3027 * CombiningChar | Extender
3028 *
3029 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3030 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003031 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003032 *
3033 * Returns the Name parsed or NULL
3034 */
3035
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003036const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003037xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003038 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003039 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003040 int count = 0;
3041
3042 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003043
Daniel Veillard34e3f642008-07-29 09:02:27 +00003044 nbParseName++;
3045
Daniel Veillard48b2f892001-02-25 16:11:03 +00003046 /*
3047 * Accelerator for simple ASCII names
3048 */
3049 in = ctxt->input->cur;
3050 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3051 ((*in >= 0x41) && (*in <= 0x5A)) ||
3052 (*in == '_') || (*in == ':')) {
3053 in++;
3054 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3055 ((*in >= 0x41) && (*in <= 0x5A)) ||
3056 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003057 (*in == '_') || (*in == '-') ||
3058 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003059 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003060 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003061 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003062 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003063 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003064 ctxt->nbChars += count;
3065 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003066 if (ret == NULL)
3067 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003068 return(ret);
3069 }
3070 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003071 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003072 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003073}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003074
Daniel Veillard34e3f642008-07-29 09:02:27 +00003075static const xmlChar *
3076xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3077 int len = 0, l;
3078 int c;
3079 int count = 0;
3080
3081 nbParseNCNameComplex++;
3082
3083 /*
3084 * Handler for more complex cases
3085 */
3086 GROW;
3087 c = CUR_CHAR(l);
3088 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3089 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3090 return(NULL);
3091 }
3092
3093 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3094 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3095 if (count++ > 100) {
3096 count = 0;
3097 GROW;
3098 }
3099 len += l;
3100 NEXTL(l);
3101 c = CUR_CHAR(l);
3102 }
3103 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3104}
3105
3106/**
3107 * xmlParseNCName:
3108 * @ctxt: an XML parser context
3109 * @len: lenght of the string parsed
3110 *
3111 * parse an XML name.
3112 *
3113 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3114 * CombiningChar | Extender
3115 *
3116 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3117 *
3118 * Returns the Name parsed or NULL
3119 */
3120
3121static const xmlChar *
3122xmlParseNCName(xmlParserCtxtPtr ctxt) {
3123 const xmlChar *in;
3124 const xmlChar *ret;
3125 int count = 0;
3126
3127 nbParseNCName++;
3128
3129 /*
3130 * Accelerator for simple ASCII names
3131 */
3132 in = ctxt->input->cur;
3133 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3134 ((*in >= 0x41) && (*in <= 0x5A)) ||
3135 (*in == '_')) {
3136 in++;
3137 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3138 ((*in >= 0x41) && (*in <= 0x5A)) ||
3139 ((*in >= 0x30) && (*in <= 0x39)) ||
3140 (*in == '_') || (*in == '-') ||
3141 (*in == '.'))
3142 in++;
3143 if ((*in > 0) && (*in < 0x80)) {
3144 count = in - ctxt->input->cur;
3145 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3146 ctxt->input->cur = in;
3147 ctxt->nbChars += count;
3148 ctxt->input->col += count;
3149 if (ret == NULL) {
3150 xmlErrMemory(ctxt, NULL);
3151 }
3152 return(ret);
3153 }
3154 }
3155 return(xmlParseNCNameComplex(ctxt));
3156}
3157
Daniel Veillard46de64e2002-05-29 08:21:33 +00003158/**
3159 * xmlParseNameAndCompare:
3160 * @ctxt: an XML parser context
3161 *
3162 * parse an XML name and compares for match
3163 * (specialized for endtag parsing)
3164 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003165 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3166 * and the name for mismatch
3167 */
3168
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003169static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003170xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003171 register const xmlChar *cmp = other;
3172 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003173 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003174
3175 GROW;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003176
Daniel Veillard46de64e2002-05-29 08:21:33 +00003177 in = ctxt->input->cur;
3178 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003179 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003180 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003181 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003182 }
William M. Brack76e95df2003-10-18 16:20:14 +00003183 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003184 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003185 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003186 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003187 }
3188 /* failure (or end of input buffer), check with full function */
3189 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003190 /* strings coming from the dictionnary direct compare possible */
3191 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003192 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003193 }
3194 return ret;
3195}
3196
Owen Taylor3473f882001-02-23 17:55:21 +00003197/**
3198 * xmlParseStringName:
3199 * @ctxt: an XML parser context
3200 * @str: a pointer to the string pointer (IN/OUT)
3201 *
3202 * parse an XML name.
3203 *
3204 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3205 * CombiningChar | Extender
3206 *
3207 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3208 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003209 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003210 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003211 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003212 * is updated to the current location in the string.
3213 */
3214
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003215static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003216xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3217 xmlChar buf[XML_MAX_NAMELEN + 5];
3218 const xmlChar *cur = *str;
3219 int len = 0, l;
3220 int c;
3221
Daniel Veillard34e3f642008-07-29 09:02:27 +00003222 nbParseStringName++;
3223
Owen Taylor3473f882001-02-23 17:55:21 +00003224 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003225 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003226 return(NULL);
3227 }
3228
Daniel Veillard34e3f642008-07-29 09:02:27 +00003229 COPY_BUF(l,buf,len,c);
3230 cur += l;
3231 c = CUR_SCHAR(cur, l);
3232 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003233 COPY_BUF(l,buf,len,c);
3234 cur += l;
3235 c = CUR_SCHAR(cur, l);
3236 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3237 /*
3238 * Okay someone managed to make a huge name, so he's ready to pay
3239 * for the processing speed.
3240 */
3241 xmlChar *buffer;
3242 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003243
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003244 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003245 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003246 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003247 return(NULL);
3248 }
3249 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003250 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003251 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003252 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003253 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003254 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003255 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003256 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003257 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003258 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003259 return(NULL);
3260 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003261 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003262 }
3263 COPY_BUF(l,buffer,len,c);
3264 cur += l;
3265 c = CUR_SCHAR(cur, l);
3266 }
3267 buffer[len] = 0;
3268 *str = cur;
3269 return(buffer);
3270 }
3271 }
3272 *str = cur;
3273 return(xmlStrndup(buf, len));
3274}
3275
3276/**
3277 * xmlParseNmtoken:
3278 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003279 *
Owen Taylor3473f882001-02-23 17:55:21 +00003280 * parse an XML Nmtoken.
3281 *
3282 * [7] Nmtoken ::= (NameChar)+
3283 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003284 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003285 *
3286 * Returns the Nmtoken parsed or NULL
3287 */
3288
3289xmlChar *
3290xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3291 xmlChar buf[XML_MAX_NAMELEN + 5];
3292 int len = 0, l;
3293 int c;
3294 int count = 0;
3295
Daniel Veillard34e3f642008-07-29 09:02:27 +00003296 nbParseNmToken++;
3297
Owen Taylor3473f882001-02-23 17:55:21 +00003298 GROW;
3299 c = CUR_CHAR(l);
3300
Daniel Veillard34e3f642008-07-29 09:02:27 +00003301 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003302 if (count++ > 100) {
3303 count = 0;
3304 GROW;
3305 }
3306 COPY_BUF(l,buf,len,c);
3307 NEXTL(l);
3308 c = CUR_CHAR(l);
3309 if (len >= XML_MAX_NAMELEN) {
3310 /*
3311 * Okay someone managed to make a huge token, so he's ready to pay
3312 * for the processing speed.
3313 */
3314 xmlChar *buffer;
3315 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003316
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003317 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003318 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003319 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003320 return(NULL);
3321 }
3322 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003323 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003324 if (count++ > 100) {
3325 count = 0;
3326 GROW;
3327 }
3328 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003329 xmlChar *tmp;
3330
Owen Taylor3473f882001-02-23 17:55:21 +00003331 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003332 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003333 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003334 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003335 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003336 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003337 return(NULL);
3338 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003339 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003340 }
3341 COPY_BUF(l,buffer,len,c);
3342 NEXTL(l);
3343 c = CUR_CHAR(l);
3344 }
3345 buffer[len] = 0;
3346 return(buffer);
3347 }
3348 }
3349 if (len == 0)
3350 return(NULL);
3351 return(xmlStrndup(buf, len));
3352}
3353
3354/**
3355 * xmlParseEntityValue:
3356 * @ctxt: an XML parser context
3357 * @orig: if non-NULL store a copy of the original entity value
3358 *
3359 * parse a value for ENTITY declarations
3360 *
3361 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3362 * "'" ([^%&'] | PEReference | Reference)* "'"
3363 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003364 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003365 */
3366
3367xmlChar *
3368xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3369 xmlChar *buf = NULL;
3370 int len = 0;
3371 int size = XML_PARSER_BUFFER_SIZE;
3372 int c, l;
3373 xmlChar stop;
3374 xmlChar *ret = NULL;
3375 const xmlChar *cur = NULL;
3376 xmlParserInputPtr input;
3377
3378 if (RAW == '"') stop = '"';
3379 else if (RAW == '\'') stop = '\'';
3380 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003381 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003382 return(NULL);
3383 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003384 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003385 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003386 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003387 return(NULL);
3388 }
3389
3390 /*
3391 * The content of the entity definition is copied in a buffer.
3392 */
3393
3394 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3395 input = ctxt->input;
3396 GROW;
3397 NEXT;
3398 c = CUR_CHAR(l);
3399 /*
3400 * NOTE: 4.4.5 Included in Literal
3401 * When a parameter entity reference appears in a literal entity
3402 * value, ... a single or double quote character in the replacement
3403 * text is always treated as a normal data character and will not
3404 * terminate the literal.
3405 * In practice it means we stop the loop only when back at parsing
3406 * the initial entity and the quote is found
3407 */
William M. Brack871611b2003-10-18 04:53:14 +00003408 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003409 (ctxt->input != input))) {
3410 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003411 xmlChar *tmp;
3412
Owen Taylor3473f882001-02-23 17:55:21 +00003413 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003414 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3415 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003416 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003417 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003418 return(NULL);
3419 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003420 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003421 }
3422 COPY_BUF(l,buf,len,c);
3423 NEXTL(l);
3424 /*
3425 * Pop-up of finished entities.
3426 */
3427 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3428 xmlPopInput(ctxt);
3429
3430 GROW;
3431 c = CUR_CHAR(l);
3432 if (c == 0) {
3433 GROW;
3434 c = CUR_CHAR(l);
3435 }
3436 }
3437 buf[len] = 0;
3438
3439 /*
3440 * Raise problem w.r.t. '&' and '%' being used in non-entities
3441 * reference constructs. Note Charref will be handled in
3442 * xmlStringDecodeEntities()
3443 */
3444 cur = buf;
3445 while (*cur != 0) { /* non input consuming */
3446 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3447 xmlChar *name;
3448 xmlChar tmp = *cur;
3449
3450 cur++;
3451 name = xmlParseStringName(ctxt, &cur);
3452 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003453 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003454 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003455 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003456 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003457 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3458 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003459 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003460 }
3461 if (name != NULL)
3462 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003463 if (*cur == 0)
3464 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003465 }
3466 cur++;
3467 }
3468
3469 /*
3470 * Then PEReference entities are substituted.
3471 */
3472 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003473 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003474 xmlFree(buf);
3475 } else {
3476 NEXT;
3477 /*
3478 * NOTE: 4.4.7 Bypassed
3479 * When a general entity reference appears in the EntityValue in
3480 * an entity declaration, it is bypassed and left as is.
3481 * so XML_SUBSTITUTE_REF is not set here.
3482 */
3483 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3484 0, 0, 0);
3485 if (orig != NULL)
3486 *orig = buf;
3487 else
3488 xmlFree(buf);
3489 }
3490
3491 return(ret);
3492}
3493
3494/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003495 * xmlParseAttValueComplex:
3496 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003497 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003498 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003499 *
3500 * parse a value for an attribute, this is the fallback function
3501 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003502 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003503 *
3504 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3505 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003506static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003507xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003508 xmlChar limit = 0;
3509 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003510 xmlChar *rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003511 int len = 0;
3512 int buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003513 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003514 xmlChar *current = NULL;
3515 xmlEntityPtr ent;
3516
Owen Taylor3473f882001-02-23 17:55:21 +00003517 if (NXT(0) == '"') {
3518 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3519 limit = '"';
3520 NEXT;
3521 } else if (NXT(0) == '\'') {
3522 limit = '\'';
3523 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3524 NEXT;
3525 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003526 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003527 return(NULL);
3528 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003529
Owen Taylor3473f882001-02-23 17:55:21 +00003530 /*
3531 * allocate a translation buffer.
3532 */
3533 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003534 buf = (xmlChar *) xmlMallocAtomic(buf_size * sizeof(xmlChar));
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003535 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003536
3537 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003538 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003539 */
3540 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003541 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003542 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003543 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003544 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003545 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003546 if (NXT(1) == '#') {
3547 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003548
Owen Taylor3473f882001-02-23 17:55:21 +00003549 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003550 if (ctxt->replaceEntities) {
3551 if (len > buf_size - 10) {
3552 growBuffer(buf);
3553 }
3554 buf[len++] = '&';
3555 } else {
3556 /*
3557 * The reparsing will be done in xmlStringGetNodeList()
3558 * called by the attribute() function in SAX.c
3559 */
Daniel Veillard319a7422001-09-11 09:27:09 +00003560 if (len > buf_size - 10) {
3561 growBuffer(buf);
3562 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003563 buf[len++] = '&';
3564 buf[len++] = '#';
3565 buf[len++] = '3';
3566 buf[len++] = '8';
3567 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003568 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003569 } else if (val != 0) {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003570 if (len > buf_size - 10) {
3571 growBuffer(buf);
3572 }
Owen Taylor3473f882001-02-23 17:55:21 +00003573 len += xmlCopyChar(0, &buf[len], val);
3574 }
3575 } else {
3576 ent = xmlParseEntityRef(ctxt);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00003577 ctxt->nbentities++;
3578 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00003579 ctxt->nbentities += ent->checked;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003580 if ((ent != NULL) &&
3581 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
3582 if (len > buf_size - 10) {
3583 growBuffer(buf);
3584 }
3585 if ((ctxt->replaceEntities == 0) &&
3586 (ent->content[0] == '&')) {
3587 buf[len++] = '&';
3588 buf[len++] = '#';
3589 buf[len++] = '3';
3590 buf[len++] = '8';
3591 buf[len++] = ';';
3592 } else {
3593 buf[len++] = ent->content[0];
3594 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003595 } else if ((ent != NULL) &&
3596 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003597 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3598 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003599 XML_SUBSTITUTE_REF,
3600 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003601 if (rep != NULL) {
3602 current = rep;
3603 while (*current != 0) { /* non input consuming */
3604 buf[len++] = *current++;
3605 if (len > buf_size - 10) {
3606 growBuffer(buf);
3607 }
3608 }
3609 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003610 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003611 }
3612 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003613 if (len > buf_size - 10) {
3614 growBuffer(buf);
3615 }
Owen Taylor3473f882001-02-23 17:55:21 +00003616 if (ent->content != NULL)
3617 buf[len++] = ent->content[0];
3618 }
3619 } else if (ent != NULL) {
3620 int i = xmlStrlen(ent->name);
3621 const xmlChar *cur = ent->name;
3622
3623 /*
3624 * This may look absurd but is needed to detect
3625 * entities problems
3626 */
3627 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3628 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003629 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003630 XML_SUBSTITUTE_REF, 0, 0, 0);
3631 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003632 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003633 rep = NULL;
3634 }
Owen Taylor3473f882001-02-23 17:55:21 +00003635 }
3636
3637 /*
3638 * Just output the reference
3639 */
3640 buf[len++] = '&';
3641 if (len > buf_size - i - 10) {
3642 growBuffer(buf);
3643 }
3644 for (;i > 0;i--)
3645 buf[len++] = *cur++;
3646 buf[len++] = ';';
3647 }
3648 }
3649 } else {
3650 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003651 if ((len != 0) || (!normalize)) {
3652 if ((!normalize) || (!in_space)) {
3653 COPY_BUF(l,buf,len,0x20);
3654 if (len > buf_size - 10) {
3655 growBuffer(buf);
3656 }
3657 }
3658 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003659 }
3660 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003661 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003662 COPY_BUF(l,buf,len,c);
3663 if (len > buf_size - 10) {
3664 growBuffer(buf);
3665 }
3666 }
3667 NEXTL(l);
3668 }
3669 GROW;
3670 c = CUR_CHAR(l);
Daniel Veillard8915c152008-08-26 13:05:34 +00003671 if ((len > 100000) &&
3672 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
3673 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
3674 "Excessive lenght of attribute: %d use XML_PARSE_HUGE option\n",
3675 len);
3676 goto int_error;
3677 }
Owen Taylor3473f882001-02-23 17:55:21 +00003678 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003679 if ((in_space) && (normalize)) {
3680 while (buf[len - 1] == 0x20) len--;
3681 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003682 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003683 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003684 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003685 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003686 if ((c != 0) && (!IS_CHAR(c))) {
3687 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3688 "invalid character in attribute value\n");
3689 } else {
3690 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3691 "AttValue: ' expected\n");
3692 }
Owen Taylor3473f882001-02-23 17:55:21 +00003693 } else
3694 NEXT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00003695 if (attlen != NULL) *attlen = len;
Owen Taylor3473f882001-02-23 17:55:21 +00003696 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003697
3698mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003699 xmlErrMemory(ctxt, NULL);
Daniel Veillard8915c152008-08-26 13:05:34 +00003700int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00003701 if (buf != NULL)
3702 xmlFree(buf);
3703 if (rep != NULL)
3704 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003705 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003706}
3707
3708/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003709 * xmlParseAttValue:
3710 * @ctxt: an XML parser context
3711 *
3712 * parse a value for an attribute
3713 * Note: the parser won't do substitution of entities here, this
3714 * will be handled later in xmlStringGetNodeList
3715 *
3716 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3717 * "'" ([^<&'] | Reference)* "'"
3718 *
3719 * 3.3.3 Attribute-Value Normalization:
3720 * Before the value of an attribute is passed to the application or
3721 * checked for validity, the XML processor must normalize it as follows:
3722 * - a character reference is processed by appending the referenced
3723 * character to the attribute value
3724 * - an entity reference is processed by recursively processing the
3725 * replacement text of the entity
3726 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3727 * appending #x20 to the normalized value, except that only a single
3728 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3729 * parsed entity or the literal entity value of an internal parsed entity
3730 * - other characters are processed by appending them to the normalized value
3731 * If the declared value is not CDATA, then the XML processor must further
3732 * process the normalized attribute value by discarding any leading and
3733 * trailing space (#x20) characters, and by replacing sequences of space
3734 * (#x20) characters by a single space (#x20) character.
3735 * All attributes for which no declaration has been read should be treated
3736 * by a non-validating parser as if declared CDATA.
3737 *
3738 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3739 */
3740
3741
3742xmlChar *
3743xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00003744 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003745 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00003746}
3747
3748/**
Owen Taylor3473f882001-02-23 17:55:21 +00003749 * xmlParseSystemLiteral:
3750 * @ctxt: an XML parser context
3751 *
3752 * parse an XML Literal
3753 *
3754 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
3755 *
3756 * Returns the SystemLiteral parsed or NULL
3757 */
3758
3759xmlChar *
3760xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
3761 xmlChar *buf = NULL;
3762 int len = 0;
3763 int size = XML_PARSER_BUFFER_SIZE;
3764 int cur, l;
3765 xmlChar stop;
3766 int state = ctxt->instate;
3767 int count = 0;
3768
3769 SHRINK;
3770 if (RAW == '"') {
3771 NEXT;
3772 stop = '"';
3773 } else if (RAW == '\'') {
3774 NEXT;
3775 stop = '\'';
3776 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003777 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003778 return(NULL);
3779 }
3780
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003781 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003782 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003783 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003784 return(NULL);
3785 }
3786 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
3787 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00003788 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003789 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003790 xmlChar *tmp;
3791
Owen Taylor3473f882001-02-23 17:55:21 +00003792 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003793 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3794 if (tmp == NULL) {
3795 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003796 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003797 ctxt->instate = (xmlParserInputState) state;
3798 return(NULL);
3799 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003800 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003801 }
3802 count++;
3803 if (count > 50) {
3804 GROW;
3805 count = 0;
3806 }
3807 COPY_BUF(l,buf,len,cur);
3808 NEXTL(l);
3809 cur = CUR_CHAR(l);
3810 if (cur == 0) {
3811 GROW;
3812 SHRINK;
3813 cur = CUR_CHAR(l);
3814 }
3815 }
3816 buf[len] = 0;
3817 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00003818 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003819 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003820 } else {
3821 NEXT;
3822 }
3823 return(buf);
3824}
3825
3826/**
3827 * xmlParsePubidLiteral:
3828 * @ctxt: an XML parser context
3829 *
3830 * parse an XML public literal
3831 *
3832 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
3833 *
3834 * Returns the PubidLiteral parsed or NULL.
3835 */
3836
3837xmlChar *
3838xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
3839 xmlChar *buf = NULL;
3840 int len = 0;
3841 int size = XML_PARSER_BUFFER_SIZE;
3842 xmlChar cur;
3843 xmlChar stop;
3844 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003845 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00003846
3847 SHRINK;
3848 if (RAW == '"') {
3849 NEXT;
3850 stop = '"';
3851 } else if (RAW == '\'') {
3852 NEXT;
3853 stop = '\'';
3854 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003855 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003856 return(NULL);
3857 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003858 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003859 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003860 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003861 return(NULL);
3862 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003863 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00003864 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00003865 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003866 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003867 xmlChar *tmp;
3868
Owen Taylor3473f882001-02-23 17:55:21 +00003869 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003870 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3871 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003872 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003873 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003874 return(NULL);
3875 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003876 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003877 }
3878 buf[len++] = cur;
3879 count++;
3880 if (count > 50) {
3881 GROW;
3882 count = 0;
3883 }
3884 NEXT;
3885 cur = CUR;
3886 if (cur == 0) {
3887 GROW;
3888 SHRINK;
3889 cur = CUR;
3890 }
3891 }
3892 buf[len] = 0;
3893 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003894 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003895 } else {
3896 NEXT;
3897 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003898 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00003899 return(buf);
3900}
3901
Daniel Veillard48b2f892001-02-25 16:11:03 +00003902void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00003903
3904/*
3905 * used for the test in the inner loop of the char data testing
3906 */
3907static const unsigned char test_char_data[256] = {
3908 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3909 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
3910 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3911 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3912 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
3913 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
3914 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
3915 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
3916 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
3917 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
3918 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
3919 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
3920 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
3921 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
3922 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
3923 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
3924 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
3925 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3926 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3927 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3928 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3929 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3930 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3931 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3932 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3933 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3934 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3935 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3936 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3937 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3938 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
3939 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
3940};
3941
Owen Taylor3473f882001-02-23 17:55:21 +00003942/**
3943 * xmlParseCharData:
3944 * @ctxt: an XML parser context
3945 * @cdata: int indicating whether we are within a CDATA section
3946 *
3947 * parse a CharData section.
3948 * if we are within a CDATA section ']]>' marks an end of section.
3949 *
3950 * The right angle bracket (>) may be represented using the string "&gt;",
3951 * and must, for compatibility, be escaped using "&gt;" or a character
3952 * reference when it appears in the string "]]>" in content, when that
3953 * string is not marking the end of a CDATA section.
3954 *
3955 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
3956 */
3957
3958void
3959xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00003960 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003961 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00003962 int line = ctxt->input->line;
3963 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00003964 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003965
3966 SHRINK;
3967 GROW;
3968 /*
3969 * Accelerated common case where input don't need to be
3970 * modified before passing it to the handler.
3971 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00003972 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003973 in = ctxt->input->cur;
3974 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003975get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00003976 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003977 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003978 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003979 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003980 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00003981 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003982 goto get_more_space;
3983 }
3984 if (*in == '<') {
3985 nbchar = in - ctxt->input->cur;
3986 if (nbchar > 0) {
3987 const xmlChar *tmp = ctxt->input->cur;
3988 ctxt->input->cur = in;
3989
Daniel Veillard34099b42004-11-04 17:34:35 +00003990 if ((ctxt->sax != NULL) &&
3991 (ctxt->sax->ignorableWhitespace !=
3992 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003993 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00003994 if (ctxt->sax->ignorableWhitespace != NULL)
3995 ctxt->sax->ignorableWhitespace(ctxt->userData,
3996 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00003997 } else {
3998 if (ctxt->sax->characters != NULL)
3999 ctxt->sax->characters(ctxt->userData,
4000 tmp, nbchar);
4001 if (*ctxt->space == -1)
4002 *ctxt->space = -2;
4003 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004004 } else if ((ctxt->sax != NULL) &&
4005 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004006 ctxt->sax->characters(ctxt->userData,
4007 tmp, nbchar);
4008 }
4009 }
4010 return;
4011 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004012
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004013get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004014 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004015 while (test_char_data[*in]) {
4016 in++;
4017 ccol++;
4018 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004019 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004020 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004021 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004022 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004023 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004024 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004025 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004026 }
4027 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004028 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004029 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004030 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004031 return;
4032 }
4033 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004034 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004035 goto get_more;
4036 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004037 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004038 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004039 if ((ctxt->sax != NULL) &&
4040 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004041 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004042 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004043 const xmlChar *tmp = ctxt->input->cur;
4044 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004045
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004046 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004047 if (ctxt->sax->ignorableWhitespace != NULL)
4048 ctxt->sax->ignorableWhitespace(ctxt->userData,
4049 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004050 } else {
4051 if (ctxt->sax->characters != NULL)
4052 ctxt->sax->characters(ctxt->userData,
4053 tmp, nbchar);
4054 if (*ctxt->space == -1)
4055 *ctxt->space = -2;
4056 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004057 line = ctxt->input->line;
4058 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004059 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004060 if (ctxt->sax->characters != NULL)
4061 ctxt->sax->characters(ctxt->userData,
4062 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004063 line = ctxt->input->line;
4064 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004065 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004066 }
4067 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004068 if (*in == 0xD) {
4069 in++;
4070 if (*in == 0xA) {
4071 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004072 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004073 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004074 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004075 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004076 in--;
4077 }
4078 if (*in == '<') {
4079 return;
4080 }
4081 if (*in == '&') {
4082 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004083 }
4084 SHRINK;
4085 GROW;
4086 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004087 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004088 nbchar = 0;
4089 }
Daniel Veillard50582112001-03-26 22:52:16 +00004090 ctxt->input->line = line;
4091 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004092 xmlParseCharDataComplex(ctxt, cdata);
4093}
4094
Daniel Veillard01c13b52002-12-10 15:19:08 +00004095/**
4096 * xmlParseCharDataComplex:
4097 * @ctxt: an XML parser context
4098 * @cdata: int indicating whether we are within a CDATA section
4099 *
4100 * parse a CharData section.this is the fallback function
4101 * of xmlParseCharData() when the parsing requires handling
4102 * of non-ASCII characters.
4103 */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004104void
4105xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004106 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4107 int nbchar = 0;
4108 int cur, l;
4109 int count = 0;
4110
4111 SHRINK;
4112 GROW;
4113 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004114 while ((cur != '<') && /* checked */
4115 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004116 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004117 if ((cur == ']') && (NXT(1) == ']') &&
4118 (NXT(2) == '>')) {
4119 if (cdata) break;
4120 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004121 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004122 }
4123 }
4124 COPY_BUF(l,buf,nbchar,cur);
4125 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004126 buf[nbchar] = 0;
4127
Owen Taylor3473f882001-02-23 17:55:21 +00004128 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004129 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004130 */
4131 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004132 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004133 if (ctxt->sax->ignorableWhitespace != NULL)
4134 ctxt->sax->ignorableWhitespace(ctxt->userData,
4135 buf, nbchar);
4136 } else {
4137 if (ctxt->sax->characters != NULL)
4138 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004139 if ((ctxt->sax->characters !=
4140 ctxt->sax->ignorableWhitespace) &&
4141 (*ctxt->space == -1))
4142 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004143 }
4144 }
4145 nbchar = 0;
4146 }
4147 count++;
4148 if (count > 50) {
4149 GROW;
4150 count = 0;
4151 }
4152 NEXTL(l);
4153 cur = CUR_CHAR(l);
4154 }
4155 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004156 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004157 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004158 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004159 */
4160 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004161 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004162 if (ctxt->sax->ignorableWhitespace != NULL)
4163 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4164 } else {
4165 if (ctxt->sax->characters != NULL)
4166 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004167 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4168 (*ctxt->space == -1))
4169 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004170 }
4171 }
4172 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004173 if ((cur != 0) && (!IS_CHAR(cur))) {
4174 /* Generate the error and skip the offending character */
4175 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4176 "PCDATA invalid Char value %d\n",
4177 cur);
4178 NEXTL(l);
4179 }
Owen Taylor3473f882001-02-23 17:55:21 +00004180}
4181
4182/**
4183 * xmlParseExternalID:
4184 * @ctxt: an XML parser context
4185 * @publicID: a xmlChar** receiving PubidLiteral
4186 * @strict: indicate whether we should restrict parsing to only
4187 * production [75], see NOTE below
4188 *
4189 * Parse an External ID or a Public ID
4190 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004191 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004192 * 'PUBLIC' S PubidLiteral S SystemLiteral
4193 *
4194 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4195 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4196 *
4197 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4198 *
4199 * Returns the function returns SystemLiteral and in the second
4200 * case publicID receives PubidLiteral, is strict is off
4201 * it is possible to return NULL and have publicID set.
4202 */
4203
4204xmlChar *
4205xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4206 xmlChar *URI = NULL;
4207
4208 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004209
4210 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004211 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004212 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004213 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004214 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4215 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004216 }
4217 SKIP_BLANKS;
4218 URI = xmlParseSystemLiteral(ctxt);
4219 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004220 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004221 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004222 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004223 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004224 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004225 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004226 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004227 }
4228 SKIP_BLANKS;
4229 *publicID = xmlParsePubidLiteral(ctxt);
4230 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004231 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004232 }
4233 if (strict) {
4234 /*
4235 * We don't handle [83] so "S SystemLiteral" is required.
4236 */
William M. Brack76e95df2003-10-18 16:20:14 +00004237 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004238 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004239 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004240 }
4241 } else {
4242 /*
4243 * We handle [83] so we return immediately, if
4244 * "S SystemLiteral" is not detected. From a purely parsing
4245 * point of view that's a nice mess.
4246 */
4247 const xmlChar *ptr;
4248 GROW;
4249
4250 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004251 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004252
William M. Brack76e95df2003-10-18 16:20:14 +00004253 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004254 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4255 }
4256 SKIP_BLANKS;
4257 URI = xmlParseSystemLiteral(ctxt);
4258 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004259 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004260 }
4261 }
4262 return(URI);
4263}
4264
4265/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004266 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004267 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004268 * @buf: the already parsed part of the buffer
4269 * @len: number of bytes filles in the buffer
4270 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004271 *
4272 * Skip an XML (SGML) comment <!-- .... -->
4273 * The spec says that "For compatibility, the string "--" (double-hyphen)
4274 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004275 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004276 *
4277 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4278 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004279static void
4280xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004281 int q, ql;
4282 int r, rl;
4283 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00004284 int count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004285 int inputid;
4286
4287 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004288
Owen Taylor3473f882001-02-23 17:55:21 +00004289 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004290 len = 0;
4291 size = XML_PARSER_BUFFER_SIZE;
4292 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4293 if (buf == NULL) {
4294 xmlErrMemory(ctxt, NULL);
4295 return;
4296 }
Owen Taylor3473f882001-02-23 17:55:21 +00004297 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004298 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004299 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004300 if (q == 0)
4301 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004302 if (!IS_CHAR(q)) {
4303 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4304 "xmlParseComment: invalid xmlChar value %d\n",
4305 q);
4306 xmlFree (buf);
4307 return;
4308 }
Owen Taylor3473f882001-02-23 17:55:21 +00004309 NEXTL(ql);
4310 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004311 if (r == 0)
4312 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004313 if (!IS_CHAR(r)) {
4314 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4315 "xmlParseComment: invalid xmlChar value %d\n",
4316 q);
4317 xmlFree (buf);
4318 return;
4319 }
Owen Taylor3473f882001-02-23 17:55:21 +00004320 NEXTL(rl);
4321 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004322 if (cur == 0)
4323 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004324 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004325 ((cur != '>') ||
4326 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004327 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004328 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004329 }
4330 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004331 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004332 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004333 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4334 if (new_buf == NULL) {
4335 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004336 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004337 return;
4338 }
William M. Bracka3215c72004-07-31 16:24:01 +00004339 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004340 }
4341 COPY_BUF(ql,buf,len,q);
4342 q = r;
4343 ql = rl;
4344 r = cur;
4345 rl = l;
4346
4347 count++;
4348 if (count > 50) {
4349 GROW;
4350 count = 0;
4351 }
4352 NEXTL(l);
4353 cur = CUR_CHAR(l);
4354 if (cur == 0) {
4355 SHRINK;
4356 GROW;
4357 cur = CUR_CHAR(l);
4358 }
4359 }
4360 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004361 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004362 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004363 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004364 } else if (!IS_CHAR(cur)) {
4365 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4366 "xmlParseComment: invalid xmlChar value %d\n",
4367 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004368 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004369 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004370 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4371 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004372 }
4373 NEXT;
4374 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4375 (!ctxt->disableSAX))
4376 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004377 }
Daniel Veillardda629342007-08-01 07:49:06 +00004378 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004379 return;
4380not_terminated:
4381 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4382 "Comment not terminated\n", NULL);
4383 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004384 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004385}
Daniel Veillardda629342007-08-01 07:49:06 +00004386
Daniel Veillard4c778d82005-01-23 17:37:44 +00004387/**
4388 * xmlParseComment:
4389 * @ctxt: an XML parser context
4390 *
4391 * Skip an XML (SGML) comment <!-- .... -->
4392 * The spec says that "For compatibility, the string "--" (double-hyphen)
4393 * must not occur within comments. "
4394 *
4395 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4396 */
4397void
4398xmlParseComment(xmlParserCtxtPtr ctxt) {
4399 xmlChar *buf = NULL;
4400 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004401 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004402 xmlParserInputState state;
4403 const xmlChar *in;
4404 int nbchar = 0, ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004405 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004406
4407 /*
4408 * Check that there is a comment right here.
4409 */
4410 if ((RAW != '<') || (NXT(1) != '!') ||
4411 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004412 state = ctxt->instate;
4413 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004414 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004415 SKIP(4);
4416 SHRINK;
4417 GROW;
4418
4419 /*
4420 * Accelerated common case where input don't need to be
4421 * modified before passing it to the handler.
4422 */
4423 in = ctxt->input->cur;
4424 do {
4425 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004426 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004427 ctxt->input->line++; ctxt->input->col = 1;
4428 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004429 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004430 }
4431get_more:
4432 ccol = ctxt->input->col;
4433 while (((*in > '-') && (*in <= 0x7F)) ||
4434 ((*in >= 0x20) && (*in < '-')) ||
4435 (*in == 0x09)) {
4436 in++;
4437 ccol++;
4438 }
4439 ctxt->input->col = ccol;
4440 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004441 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004442 ctxt->input->line++; ctxt->input->col = 1;
4443 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004444 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004445 goto get_more;
4446 }
4447 nbchar = in - ctxt->input->cur;
4448 /*
4449 * save current set of data
4450 */
4451 if (nbchar > 0) {
4452 if ((ctxt->sax != NULL) &&
4453 (ctxt->sax->comment != NULL)) {
4454 if (buf == NULL) {
4455 if ((*in == '-') && (in[1] == '-'))
4456 size = nbchar + 1;
4457 else
4458 size = XML_PARSER_BUFFER_SIZE + nbchar;
4459 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4460 if (buf == NULL) {
4461 xmlErrMemory(ctxt, NULL);
4462 ctxt->instate = state;
4463 return;
4464 }
4465 len = 0;
4466 } else if (len + nbchar + 1 >= size) {
4467 xmlChar *new_buf;
4468 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4469 new_buf = (xmlChar *) xmlRealloc(buf,
4470 size * sizeof(xmlChar));
4471 if (new_buf == NULL) {
4472 xmlFree (buf);
4473 xmlErrMemory(ctxt, NULL);
4474 ctxt->instate = state;
4475 return;
4476 }
4477 buf = new_buf;
4478 }
4479 memcpy(&buf[len], ctxt->input->cur, nbchar);
4480 len += nbchar;
4481 buf[len] = 0;
4482 }
4483 }
4484 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004485 if (*in == 0xA) {
4486 in++;
4487 ctxt->input->line++; ctxt->input->col = 1;
4488 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004489 if (*in == 0xD) {
4490 in++;
4491 if (*in == 0xA) {
4492 ctxt->input->cur = in;
4493 in++;
4494 ctxt->input->line++; ctxt->input->col = 1;
4495 continue; /* while */
4496 }
4497 in--;
4498 }
4499 SHRINK;
4500 GROW;
4501 in = ctxt->input->cur;
4502 if (*in == '-') {
4503 if (in[1] == '-') {
4504 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004505 if (ctxt->input->id != inputid) {
4506 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4507 "comment doesn't start and stop in the same entity\n");
4508 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004509 SKIP(3);
4510 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4511 (!ctxt->disableSAX)) {
4512 if (buf != NULL)
4513 ctxt->sax->comment(ctxt->userData, buf);
4514 else
4515 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4516 }
4517 if (buf != NULL)
4518 xmlFree(buf);
4519 ctxt->instate = state;
4520 return;
4521 }
4522 if (buf != NULL)
4523 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4524 "Comment not terminated \n<!--%.50s\n",
4525 buf);
4526 else
4527 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4528 "Comment not terminated \n", NULL);
4529 in++;
4530 ctxt->input->col++;
4531 }
4532 in++;
4533 ctxt->input->col++;
4534 goto get_more;
4535 }
4536 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4537 xmlParseCommentComplex(ctxt, buf, len, size);
4538 ctxt->instate = state;
4539 return;
4540}
4541
Owen Taylor3473f882001-02-23 17:55:21 +00004542
4543/**
4544 * xmlParsePITarget:
4545 * @ctxt: an XML parser context
4546 *
4547 * parse the name of a PI
4548 *
4549 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4550 *
4551 * Returns the PITarget name or NULL
4552 */
4553
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004554const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004555xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004556 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004557
4558 name = xmlParseName(ctxt);
4559 if ((name != NULL) &&
4560 ((name[0] == 'x') || (name[0] == 'X')) &&
4561 ((name[1] == 'm') || (name[1] == 'M')) &&
4562 ((name[2] == 'l') || (name[2] == 'L'))) {
4563 int i;
4564 if ((name[0] == 'x') && (name[1] == 'm') &&
4565 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004566 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004567 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004568 return(name);
4569 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004570 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004571 return(name);
4572 }
4573 for (i = 0;;i++) {
4574 if (xmlW3CPIs[i] == NULL) break;
4575 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4576 return(name);
4577 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004578 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4579 "xmlParsePITarget: invalid name prefix 'xml'\n",
4580 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004581 }
Daniel Veillard37334572008-07-31 08:20:02 +00004582 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4583 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4584 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4585 }
Owen Taylor3473f882001-02-23 17:55:21 +00004586 return(name);
4587}
4588
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004589#ifdef LIBXML_CATALOG_ENABLED
4590/**
4591 * xmlParseCatalogPI:
4592 * @ctxt: an XML parser context
4593 * @catalog: the PI value string
4594 *
4595 * parse an XML Catalog Processing Instruction.
4596 *
4597 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4598 *
4599 * Occurs only if allowed by the user and if happening in the Misc
4600 * part of the document before any doctype informations
4601 * This will add the given catalog to the parsing context in order
4602 * to be used if there is a resolution need further down in the document
4603 */
4604
4605static void
4606xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4607 xmlChar *URL = NULL;
4608 const xmlChar *tmp, *base;
4609 xmlChar marker;
4610
4611 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004612 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004613 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4614 goto error;
4615 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004616 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004617 if (*tmp != '=') {
4618 return;
4619 }
4620 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004621 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004622 marker = *tmp;
4623 if ((marker != '\'') && (marker != '"'))
4624 goto error;
4625 tmp++;
4626 base = tmp;
4627 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4628 if (*tmp == 0)
4629 goto error;
4630 URL = xmlStrndup(base, tmp - base);
4631 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004632 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004633 if (*tmp != 0)
4634 goto error;
4635
4636 if (URL != NULL) {
4637 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4638 xmlFree(URL);
4639 }
4640 return;
4641
4642error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004643 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4644 "Catalog PI syntax error: %s\n",
4645 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004646 if (URL != NULL)
4647 xmlFree(URL);
4648}
4649#endif
4650
Owen Taylor3473f882001-02-23 17:55:21 +00004651/**
4652 * xmlParsePI:
4653 * @ctxt: an XML parser context
4654 *
4655 * parse an XML Processing Instruction.
4656 *
4657 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4658 *
4659 * The processing is transfered to SAX once parsed.
4660 */
4661
4662void
4663xmlParsePI(xmlParserCtxtPtr ctxt) {
4664 xmlChar *buf = NULL;
4665 int len = 0;
4666 int size = XML_PARSER_BUFFER_SIZE;
4667 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004668 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004669 xmlParserInputState state;
4670 int count = 0;
4671
4672 if ((RAW == '<') && (NXT(1) == '?')) {
4673 xmlParserInputPtr input = ctxt->input;
4674 state = ctxt->instate;
4675 ctxt->instate = XML_PARSER_PI;
4676 /*
4677 * this is a Processing Instruction.
4678 */
4679 SKIP(2);
4680 SHRINK;
4681
4682 /*
4683 * Parse the target name and check for special support like
4684 * namespace.
4685 */
4686 target = xmlParsePITarget(ctxt);
4687 if (target != NULL) {
4688 if ((RAW == '?') && (NXT(1) == '>')) {
4689 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004690 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4691 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004692 }
4693 SKIP(2);
4694
4695 /*
4696 * SAX: PI detected.
4697 */
4698 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4699 (ctxt->sax->processingInstruction != NULL))
4700 ctxt->sax->processingInstruction(ctxt->userData,
4701 target, NULL);
4702 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004703 return;
4704 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004705 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004706 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004707 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004708 ctxt->instate = state;
4709 return;
4710 }
4711 cur = CUR;
4712 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004713 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4714 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004715 }
4716 SKIP_BLANKS;
4717 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004718 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004719 ((cur != '?') || (NXT(1) != '>'))) {
4720 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004721 xmlChar *tmp;
4722
Owen Taylor3473f882001-02-23 17:55:21 +00004723 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004724 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4725 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004726 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004727 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004728 ctxt->instate = state;
4729 return;
4730 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004731 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004732 }
4733 count++;
4734 if (count > 50) {
4735 GROW;
4736 count = 0;
4737 }
4738 COPY_BUF(l,buf,len,cur);
4739 NEXTL(l);
4740 cur = CUR_CHAR(l);
4741 if (cur == 0) {
4742 SHRINK;
4743 GROW;
4744 cur = CUR_CHAR(l);
4745 }
4746 }
4747 buf[len] = 0;
4748 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004749 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
4750 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004751 } else {
4752 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004753 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4754 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004755 }
4756 SKIP(2);
4757
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004758#ifdef LIBXML_CATALOG_ENABLED
4759 if (((state == XML_PARSER_MISC) ||
4760 (state == XML_PARSER_START)) &&
4761 (xmlStrEqual(target, XML_CATALOG_PI))) {
4762 xmlCatalogAllow allow = xmlCatalogGetDefaults();
4763 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
4764 (allow == XML_CATA_ALLOW_ALL))
4765 xmlParseCatalogPI(ctxt, buf);
4766 }
4767#endif
4768
4769
Owen Taylor3473f882001-02-23 17:55:21 +00004770 /*
4771 * SAX: PI detected.
4772 */
4773 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4774 (ctxt->sax->processingInstruction != NULL))
4775 ctxt->sax->processingInstruction(ctxt->userData,
4776 target, buf);
4777 }
4778 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004779 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004780 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004781 }
4782 ctxt->instate = state;
4783 }
4784}
4785
4786/**
4787 * xmlParseNotationDecl:
4788 * @ctxt: an XML parser context
4789 *
4790 * parse a notation declaration
4791 *
4792 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
4793 *
4794 * Hence there is actually 3 choices:
4795 * 'PUBLIC' S PubidLiteral
4796 * 'PUBLIC' S PubidLiteral S SystemLiteral
4797 * and 'SYSTEM' S SystemLiteral
4798 *
4799 * See the NOTE on xmlParseExternalID().
4800 */
4801
4802void
4803xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004804 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004805 xmlChar *Pubid;
4806 xmlChar *Systemid;
4807
Daniel Veillarda07050d2003-10-19 14:46:32 +00004808 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004809 xmlParserInputPtr input = ctxt->input;
4810 SHRINK;
4811 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00004812 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004813 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4814 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004815 return;
4816 }
4817 SKIP_BLANKS;
4818
Daniel Veillard76d66f42001-05-16 21:05:17 +00004819 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004820 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004821 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004822 return;
4823 }
William M. Brack76e95df2003-10-18 16:20:14 +00004824 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004825 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004826 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004827 return;
4828 }
Daniel Veillard37334572008-07-31 08:20:02 +00004829 if (xmlStrchr(name, ':') != NULL) {
4830 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4831 "colon are forbidden from notation names '%s'\n",
4832 name, NULL, NULL);
4833 }
Owen Taylor3473f882001-02-23 17:55:21 +00004834 SKIP_BLANKS;
4835
4836 /*
4837 * Parse the IDs.
4838 */
4839 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
4840 SKIP_BLANKS;
4841
4842 if (RAW == '>') {
4843 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004844 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4845 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004846 }
4847 NEXT;
4848 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4849 (ctxt->sax->notationDecl != NULL))
4850 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
4851 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004852 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004853 }
Owen Taylor3473f882001-02-23 17:55:21 +00004854 if (Systemid != NULL) xmlFree(Systemid);
4855 if (Pubid != NULL) xmlFree(Pubid);
4856 }
4857}
4858
4859/**
4860 * xmlParseEntityDecl:
4861 * @ctxt: an XML parser context
4862 *
4863 * parse <!ENTITY declarations
4864 *
4865 * [70] EntityDecl ::= GEDecl | PEDecl
4866 *
4867 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
4868 *
4869 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
4870 *
4871 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
4872 *
4873 * [74] PEDef ::= EntityValue | ExternalID
4874 *
4875 * [76] NDataDecl ::= S 'NDATA' S Name
4876 *
4877 * [ VC: Notation Declared ]
4878 * The Name must match the declared name of a notation.
4879 */
4880
4881void
4882xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004883 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004884 xmlChar *value = NULL;
4885 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004886 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00004887 int isParameter = 0;
4888 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004889 int skipped;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00004890 unsigned long oldnbent = ctxt->nbentities;
Owen Taylor3473f882001-02-23 17:55:21 +00004891
Daniel Veillard4c778d82005-01-23 17:37:44 +00004892 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00004893 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004894 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004895 SHRINK;
4896 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00004897 skipped = SKIP_BLANKS;
4898 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004899 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4900 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004901 }
Owen Taylor3473f882001-02-23 17:55:21 +00004902
4903 if (RAW == '%') {
4904 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00004905 skipped = SKIP_BLANKS;
4906 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004907 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4908 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004909 }
Owen Taylor3473f882001-02-23 17:55:21 +00004910 isParameter = 1;
4911 }
4912
Daniel Veillard76d66f42001-05-16 21:05:17 +00004913 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004914 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004915 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
4916 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004917 return;
4918 }
Daniel Veillard37334572008-07-31 08:20:02 +00004919 if (xmlStrchr(name, ':') != NULL) {
4920 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4921 "colon are forbidden from entities names '%s'\n",
4922 name, NULL, NULL);
4923 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00004924 skipped = SKIP_BLANKS;
4925 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004926 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4927 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004928 }
Owen Taylor3473f882001-02-23 17:55:21 +00004929
Daniel Veillardf5582f12002-06-11 10:08:16 +00004930 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00004931 /*
4932 * handle the various case of definitions...
4933 */
4934 if (isParameter) {
4935 if ((RAW == '"') || (RAW == '\'')) {
4936 value = xmlParseEntityValue(ctxt, &orig);
4937 if (value) {
4938 if ((ctxt->sax != NULL) &&
4939 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4940 ctxt->sax->entityDecl(ctxt->userData, name,
4941 XML_INTERNAL_PARAMETER_ENTITY,
4942 NULL, NULL, value);
4943 }
4944 } else {
4945 URI = xmlParseExternalID(ctxt, &literal, 1);
4946 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004947 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004948 }
4949 if (URI) {
4950 xmlURIPtr uri;
4951
4952 uri = xmlParseURI((const char *) URI);
4953 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00004954 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
4955 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004956 /*
4957 * This really ought to be a well formedness error
4958 * but the XML Core WG decided otherwise c.f. issue
4959 * E26 of the XML erratas.
4960 */
Owen Taylor3473f882001-02-23 17:55:21 +00004961 } else {
4962 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004963 /*
4964 * Okay this is foolish to block those but not
4965 * invalid URIs.
4966 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004967 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004968 } else {
4969 if ((ctxt->sax != NULL) &&
4970 (!ctxt->disableSAX) &&
4971 (ctxt->sax->entityDecl != NULL))
4972 ctxt->sax->entityDecl(ctxt->userData, name,
4973 XML_EXTERNAL_PARAMETER_ENTITY,
4974 literal, URI, NULL);
4975 }
4976 xmlFreeURI(uri);
4977 }
4978 }
4979 }
4980 } else {
4981 if ((RAW == '"') || (RAW == '\'')) {
4982 value = xmlParseEntityValue(ctxt, &orig);
4983 if ((ctxt->sax != NULL) &&
4984 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
4985 ctxt->sax->entityDecl(ctxt->userData, name,
4986 XML_INTERNAL_GENERAL_ENTITY,
4987 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00004988 /*
4989 * For expat compatibility in SAX mode.
4990 */
4991 if ((ctxt->myDoc == NULL) ||
4992 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
4993 if (ctxt->myDoc == NULL) {
4994 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00004995 if (ctxt->myDoc == NULL) {
4996 xmlErrMemory(ctxt, "New Doc failed");
4997 return;
4998 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00004999 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005000 }
5001 if (ctxt->myDoc->intSubset == NULL)
5002 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5003 BAD_CAST "fake", NULL, NULL);
5004
Daniel Veillard1af9a412003-08-20 22:54:39 +00005005 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5006 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005007 }
Owen Taylor3473f882001-02-23 17:55:21 +00005008 } else {
5009 URI = xmlParseExternalID(ctxt, &literal, 1);
5010 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005011 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005012 }
5013 if (URI) {
5014 xmlURIPtr uri;
5015
5016 uri = xmlParseURI((const char *)URI);
5017 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005018 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5019 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005020 /*
5021 * This really ought to be a well formedness error
5022 * but the XML Core WG decided otherwise c.f. issue
5023 * E26 of the XML erratas.
5024 */
Owen Taylor3473f882001-02-23 17:55:21 +00005025 } else {
5026 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005027 /*
5028 * Okay this is foolish to block those but not
5029 * invalid URIs.
5030 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005031 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005032 }
5033 xmlFreeURI(uri);
5034 }
5035 }
William M. Brack76e95df2003-10-18 16:20:14 +00005036 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005037 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5038 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005039 }
5040 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005041 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005042 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005043 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005044 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5045 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005046 }
5047 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005048 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005049 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5050 (ctxt->sax->unparsedEntityDecl != NULL))
5051 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5052 literal, URI, ndata);
5053 } else {
5054 if ((ctxt->sax != NULL) &&
5055 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5056 ctxt->sax->entityDecl(ctxt->userData, name,
5057 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5058 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005059 /*
5060 * For expat compatibility in SAX mode.
5061 * assuming the entity repalcement was asked for
5062 */
5063 if ((ctxt->replaceEntities != 0) &&
5064 ((ctxt->myDoc == NULL) ||
5065 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5066 if (ctxt->myDoc == NULL) {
5067 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005068 if (ctxt->myDoc == NULL) {
5069 xmlErrMemory(ctxt, "New Doc failed");
5070 return;
5071 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005072 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005073 }
5074
5075 if (ctxt->myDoc->intSubset == NULL)
5076 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5077 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005078 xmlSAX2EntityDecl(ctxt, name,
5079 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5080 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005081 }
Owen Taylor3473f882001-02-23 17:55:21 +00005082 }
5083 }
5084 }
5085 SKIP_BLANKS;
5086 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005087 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005088 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005089 } else {
5090 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005091 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5092 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005093 }
5094 NEXT;
5095 }
5096 if (orig != NULL) {
5097 /*
5098 * Ugly mechanism to save the raw entity value.
5099 */
5100 xmlEntityPtr cur = NULL;
5101
5102 if (isParameter) {
5103 if ((ctxt->sax != NULL) &&
5104 (ctxt->sax->getParameterEntity != NULL))
5105 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5106 } else {
5107 if ((ctxt->sax != NULL) &&
5108 (ctxt->sax->getEntity != NULL))
5109 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005110 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005111 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005112 }
Owen Taylor3473f882001-02-23 17:55:21 +00005113 }
5114 if (cur != NULL) {
Daniel Veillardf4f4e482008-08-25 08:57:48 +00005115 cur->checked = ctxt->nbentities - oldnbent;
Owen Taylor3473f882001-02-23 17:55:21 +00005116 if (cur->orig != NULL)
5117 xmlFree(orig);
5118 else
5119 cur->orig = orig;
5120 } else
5121 xmlFree(orig);
5122 }
Owen Taylor3473f882001-02-23 17:55:21 +00005123 if (value != NULL) xmlFree(value);
5124 if (URI != NULL) xmlFree(URI);
5125 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005126 }
5127}
5128
5129/**
5130 * xmlParseDefaultDecl:
5131 * @ctxt: an XML parser context
5132 * @value: Receive a possible fixed default value for the attribute
5133 *
5134 * Parse an attribute default declaration
5135 *
5136 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5137 *
5138 * [ VC: Required Attribute ]
5139 * if the default declaration is the keyword #REQUIRED, then the
5140 * attribute must be specified for all elements of the type in the
5141 * attribute-list declaration.
5142 *
5143 * [ VC: Attribute Default Legal ]
5144 * The declared default value must meet the lexical constraints of
5145 * the declared attribute type c.f. xmlValidateAttributeDecl()
5146 *
5147 * [ VC: Fixed Attribute Default ]
5148 * if an attribute has a default value declared with the #FIXED
5149 * keyword, instances of that attribute must match the default value.
5150 *
5151 * [ WFC: No < in Attribute Values ]
5152 * handled in xmlParseAttValue()
5153 *
5154 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5155 * or XML_ATTRIBUTE_FIXED.
5156 */
5157
5158int
5159xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5160 int val;
5161 xmlChar *ret;
5162
5163 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005164 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005165 SKIP(9);
5166 return(XML_ATTRIBUTE_REQUIRED);
5167 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005168 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005169 SKIP(8);
5170 return(XML_ATTRIBUTE_IMPLIED);
5171 }
5172 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005173 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005174 SKIP(6);
5175 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005176 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005177 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5178 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005179 }
5180 SKIP_BLANKS;
5181 }
5182 ret = xmlParseAttValue(ctxt);
5183 ctxt->instate = XML_PARSER_DTD;
5184 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005185 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005186 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005187 } else
5188 *value = ret;
5189 return(val);
5190}
5191
5192/**
5193 * xmlParseNotationType:
5194 * @ctxt: an XML parser context
5195 *
5196 * parse an Notation attribute type.
5197 *
5198 * Note: the leading 'NOTATION' S part has already being parsed...
5199 *
5200 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5201 *
5202 * [ VC: Notation Attributes ]
5203 * Values of this type must match one of the notation names included
5204 * in the declaration; all notation names in the declaration must be declared.
5205 *
5206 * Returns: the notation attribute tree built while parsing
5207 */
5208
5209xmlEnumerationPtr
5210xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005211 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005212 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005213
5214 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005215 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005216 return(NULL);
5217 }
5218 SHRINK;
5219 do {
5220 NEXT;
5221 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005222 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005223 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005224 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5225 "Name expected in NOTATION declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005226 return(ret);
5227 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005228 tmp = ret;
5229 while (tmp != NULL) {
5230 if (xmlStrEqual(name, tmp->name)) {
5231 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5232 "standalone: attribute notation value token %s duplicated\n",
5233 name, NULL);
5234 if (!xmlDictOwns(ctxt->dict, name))
5235 xmlFree((xmlChar *) name);
5236 break;
5237 }
5238 tmp = tmp->next;
5239 }
5240 if (tmp == NULL) {
5241 cur = xmlCreateEnumeration(name);
5242 if (cur == NULL) return(ret);
5243 if (last == NULL) ret = last = cur;
5244 else {
5245 last->next = cur;
5246 last = cur;
5247 }
Owen Taylor3473f882001-02-23 17:55:21 +00005248 }
5249 SKIP_BLANKS;
5250 } while (RAW == '|');
5251 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005252 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005253 if ((last != NULL) && (last != ret))
5254 xmlFreeEnumeration(last);
5255 return(ret);
5256 }
5257 NEXT;
5258 return(ret);
5259}
5260
5261/**
5262 * xmlParseEnumerationType:
5263 * @ctxt: an XML parser context
5264 *
5265 * parse an Enumeration attribute type.
5266 *
5267 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5268 *
5269 * [ VC: Enumeration ]
5270 * Values of this type must match one of the Nmtoken tokens in
5271 * the declaration
5272 *
5273 * Returns: the enumeration attribute tree built while parsing
5274 */
5275
5276xmlEnumerationPtr
5277xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5278 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005279 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005280
5281 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005282 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005283 return(NULL);
5284 }
5285 SHRINK;
5286 do {
5287 NEXT;
5288 SKIP_BLANKS;
5289 name = xmlParseNmtoken(ctxt);
5290 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005291 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005292 return(ret);
5293 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005294 tmp = ret;
5295 while (tmp != NULL) {
5296 if (xmlStrEqual(name, tmp->name)) {
5297 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5298 "standalone: attribute enumeration value token %s duplicated\n",
5299 name, NULL);
5300 if (!xmlDictOwns(ctxt->dict, name))
5301 xmlFree(name);
5302 break;
5303 }
5304 tmp = tmp->next;
5305 }
5306 if (tmp == NULL) {
5307 cur = xmlCreateEnumeration(name);
5308 if (!xmlDictOwns(ctxt->dict, name))
5309 xmlFree(name);
5310 if (cur == NULL) return(ret);
5311 if (last == NULL) ret = last = cur;
5312 else {
5313 last->next = cur;
5314 last = cur;
5315 }
Owen Taylor3473f882001-02-23 17:55:21 +00005316 }
5317 SKIP_BLANKS;
5318 } while (RAW == '|');
5319 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005320 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005321 return(ret);
5322 }
5323 NEXT;
5324 return(ret);
5325}
5326
5327/**
5328 * xmlParseEnumeratedType:
5329 * @ctxt: an XML parser context
5330 * @tree: the enumeration tree built while parsing
5331 *
5332 * parse an Enumerated attribute type.
5333 *
5334 * [57] EnumeratedType ::= NotationType | Enumeration
5335 *
5336 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5337 *
5338 *
5339 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5340 */
5341
5342int
5343xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005344 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005345 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005346 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005347 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5348 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005349 return(0);
5350 }
5351 SKIP_BLANKS;
5352 *tree = xmlParseNotationType(ctxt);
5353 if (*tree == NULL) return(0);
5354 return(XML_ATTRIBUTE_NOTATION);
5355 }
5356 *tree = xmlParseEnumerationType(ctxt);
5357 if (*tree == NULL) return(0);
5358 return(XML_ATTRIBUTE_ENUMERATION);
5359}
5360
5361/**
5362 * xmlParseAttributeType:
5363 * @ctxt: an XML parser context
5364 * @tree: the enumeration tree built while parsing
5365 *
5366 * parse the Attribute list def for an element
5367 *
5368 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5369 *
5370 * [55] StringType ::= 'CDATA'
5371 *
5372 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5373 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5374 *
5375 * Validity constraints for attribute values syntax are checked in
5376 * xmlValidateAttributeValue()
5377 *
5378 * [ VC: ID ]
5379 * Values of type ID must match the Name production. A name must not
5380 * appear more than once in an XML document as a value of this type;
5381 * i.e., ID values must uniquely identify the elements which bear them.
5382 *
5383 * [ VC: One ID per Element Type ]
5384 * No element type may have more than one ID attribute specified.
5385 *
5386 * [ VC: ID Attribute Default ]
5387 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5388 *
5389 * [ VC: IDREF ]
5390 * Values of type IDREF must match the Name production, and values
5391 * of type IDREFS must match Names; each IDREF Name must match the value
5392 * of an ID attribute on some element in the XML document; i.e. IDREF
5393 * values must match the value of some ID attribute.
5394 *
5395 * [ VC: Entity Name ]
5396 * Values of type ENTITY must match the Name production, values
5397 * of type ENTITIES must match Names; each Entity Name must match the
5398 * name of an unparsed entity declared in the DTD.
5399 *
5400 * [ VC: Name Token ]
5401 * Values of type NMTOKEN must match the Nmtoken production; values
5402 * of type NMTOKENS must match Nmtokens.
5403 *
5404 * Returns the attribute type
5405 */
5406int
5407xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5408 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005409 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005410 SKIP(5);
5411 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005412 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005413 SKIP(6);
5414 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005415 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005416 SKIP(5);
5417 return(XML_ATTRIBUTE_IDREF);
5418 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5419 SKIP(2);
5420 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005421 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005422 SKIP(6);
5423 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005424 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005425 SKIP(8);
5426 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005427 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005428 SKIP(8);
5429 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005430 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005431 SKIP(7);
5432 return(XML_ATTRIBUTE_NMTOKEN);
5433 }
5434 return(xmlParseEnumeratedType(ctxt, tree));
5435}
5436
5437/**
5438 * xmlParseAttributeListDecl:
5439 * @ctxt: an XML parser context
5440 *
5441 * : parse the Attribute list def for an element
5442 *
5443 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5444 *
5445 * [53] AttDef ::= S Name S AttType S DefaultDecl
5446 *
5447 */
5448void
5449xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005450 const xmlChar *elemName;
5451 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005452 xmlEnumerationPtr tree;
5453
Daniel Veillarda07050d2003-10-19 14:46:32 +00005454 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005455 xmlParserInputPtr input = ctxt->input;
5456
5457 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005458 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005459 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005460 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005461 }
5462 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005463 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005464 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005465 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5466 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005467 return;
5468 }
5469 SKIP_BLANKS;
5470 GROW;
5471 while (RAW != '>') {
5472 const xmlChar *check = CUR_PTR;
5473 int type;
5474 int def;
5475 xmlChar *defaultValue = NULL;
5476
5477 GROW;
5478 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005479 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005480 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005481 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5482 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005483 break;
5484 }
5485 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005486 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005487 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005488 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005489 break;
5490 }
5491 SKIP_BLANKS;
5492
5493 type = xmlParseAttributeType(ctxt, &tree);
5494 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005495 break;
5496 }
5497
5498 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005499 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005500 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5501 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005502 if (tree != NULL)
5503 xmlFreeEnumeration(tree);
5504 break;
5505 }
5506 SKIP_BLANKS;
5507
5508 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5509 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005510 if (defaultValue != NULL)
5511 xmlFree(defaultValue);
5512 if (tree != NULL)
5513 xmlFreeEnumeration(tree);
5514 break;
5515 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005516 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5517 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005518
5519 GROW;
5520 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005521 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005522 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005523 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005524 if (defaultValue != NULL)
5525 xmlFree(defaultValue);
5526 if (tree != NULL)
5527 xmlFreeEnumeration(tree);
5528 break;
5529 }
5530 SKIP_BLANKS;
5531 }
5532 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005533 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5534 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005535 if (defaultValue != NULL)
5536 xmlFree(defaultValue);
5537 if (tree != NULL)
5538 xmlFreeEnumeration(tree);
5539 break;
5540 }
5541 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5542 (ctxt->sax->attributeDecl != NULL))
5543 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5544 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005545 else if (tree != NULL)
5546 xmlFreeEnumeration(tree);
5547
5548 if ((ctxt->sax2) && (defaultValue != NULL) &&
5549 (def != XML_ATTRIBUTE_IMPLIED) &&
5550 (def != XML_ATTRIBUTE_REQUIRED)) {
5551 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5552 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005553 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005554 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5555 }
Owen Taylor3473f882001-02-23 17:55:21 +00005556 if (defaultValue != NULL)
5557 xmlFree(defaultValue);
5558 GROW;
5559 }
5560 if (RAW == '>') {
5561 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00005562 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5563 "Attribute list declaration doesn't start and stop in the same entity\n",
5564 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005565 }
5566 NEXT;
5567 }
Owen Taylor3473f882001-02-23 17:55:21 +00005568 }
5569}
5570
5571/**
5572 * xmlParseElementMixedContentDecl:
5573 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005574 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005575 *
5576 * parse the declaration for a Mixed Element content
5577 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5578 *
5579 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5580 * '(' S? '#PCDATA' S? ')'
5581 *
5582 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5583 *
5584 * [ VC: No Duplicate Types ]
5585 * The same name must not appear more than once in a single
5586 * mixed-content declaration.
5587 *
5588 * returns: the list of the xmlElementContentPtr describing the element choices
5589 */
5590xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005591xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005592 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005593 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005594
5595 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005596 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005597 SKIP(7);
5598 SKIP_BLANKS;
5599 SHRINK;
5600 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005601 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005602 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5603"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005604 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005605 }
Owen Taylor3473f882001-02-23 17:55:21 +00005606 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005607 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005608 if (ret == NULL)
5609 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005610 if (RAW == '*') {
5611 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5612 NEXT;
5613 }
5614 return(ret);
5615 }
5616 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005617 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005618 if (ret == NULL) return(NULL);
5619 }
5620 while (RAW == '|') {
5621 NEXT;
5622 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005623 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005624 if (ret == NULL) return(NULL);
5625 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005626 if (cur != NULL)
5627 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005628 cur = ret;
5629 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005630 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005631 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005632 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005633 if (n->c1 != NULL)
5634 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005635 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005636 if (n != NULL)
5637 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005638 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005639 }
5640 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005641 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005642 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005643 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005644 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005645 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005646 return(NULL);
5647 }
5648 SKIP_BLANKS;
5649 GROW;
5650 }
5651 if ((RAW == ')') && (NXT(1) == '*')) {
5652 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005653 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005654 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005655 if (cur->c2 != NULL)
5656 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005657 }
5658 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005659 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005660 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5661"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005662 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005663 }
Owen Taylor3473f882001-02-23 17:55:21 +00005664 SKIP(2);
5665 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005666 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005667 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005668 return(NULL);
5669 }
5670
5671 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005672 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005673 }
5674 return(ret);
5675}
5676
5677/**
5678 * xmlParseElementChildrenContentDecl:
5679 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005680 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005681 *
5682 * parse the declaration for a Mixed Element content
5683 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5684 *
5685 *
5686 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5687 *
5688 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5689 *
5690 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5691 *
5692 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5693 *
5694 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5695 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005696 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005697 * opening or closing parentheses in a choice, seq, or Mixed
5698 * construct is contained in the replacement text for a parameter
5699 * entity, both must be contained in the same replacement text. For
5700 * interoperability, if a parameter-entity reference appears in a
5701 * choice, seq, or Mixed construct, its replacement text should not
5702 * be empty, and neither the first nor last non-blank character of
5703 * the replacement text should be a connector (| or ,).
5704 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005705 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005706 * hierarchy.
5707 */
5708xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005709xmlParseElementChildrenContentDecl (xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005710 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005711 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005712 xmlChar type = 0;
5713
5714 SKIP_BLANKS;
5715 GROW;
5716 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005717 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005718
Owen Taylor3473f882001-02-23 17:55:21 +00005719 /* Recurse on first child */
5720 NEXT;
5721 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005722 cur = ret = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005723 SKIP_BLANKS;
5724 GROW;
5725 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005726 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005727 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005728 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005729 return(NULL);
5730 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005731 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005732 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005733 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00005734 return(NULL);
5735 }
Owen Taylor3473f882001-02-23 17:55:21 +00005736 GROW;
5737 if (RAW == '?') {
5738 cur->ocur = XML_ELEMENT_CONTENT_OPT;
5739 NEXT;
5740 } else if (RAW == '*') {
5741 cur->ocur = XML_ELEMENT_CONTENT_MULT;
5742 NEXT;
5743 } else if (RAW == '+') {
5744 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
5745 NEXT;
5746 } else {
5747 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
5748 }
Owen Taylor3473f882001-02-23 17:55:21 +00005749 GROW;
5750 }
5751 SKIP_BLANKS;
5752 SHRINK;
5753 while (RAW != ')') {
5754 /*
5755 * Each loop we parse one separator and one element.
5756 */
5757 if (RAW == ',') {
5758 if (type == 0) type = CUR;
5759
5760 /*
5761 * Detect "Name | Name , Name" error
5762 */
5763 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005764 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005765 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005766 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005767 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005768 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005769 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005770 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005771 return(NULL);
5772 }
5773 NEXT;
5774
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005775 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00005776 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005777 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005778 xmlFreeDocElementContent(ctxt->myDoc, last);
5779 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005780 return(NULL);
5781 }
5782 if (last == NULL) {
5783 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005784 if (ret != NULL)
5785 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005786 ret = cur = op;
5787 } else {
5788 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005789 if (op != NULL)
5790 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005791 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005792 if (last != NULL)
5793 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005794 cur =op;
5795 last = NULL;
5796 }
5797 } else if (RAW == '|') {
5798 if (type == 0) type = CUR;
5799
5800 /*
5801 * Detect "Name , Name | Name" error
5802 */
5803 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005804 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005805 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005806 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005807 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005808 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005809 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005810 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005811 return(NULL);
5812 }
5813 NEXT;
5814
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005815 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005816 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00005817 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005818 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005819 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005820 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005821 return(NULL);
5822 }
5823 if (last == NULL) {
5824 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005825 if (ret != NULL)
5826 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005827 ret = cur = op;
5828 } else {
5829 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005830 if (op != NULL)
5831 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005832 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005833 if (last != NULL)
5834 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00005835 cur =op;
5836 last = NULL;
5837 }
5838 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005839 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00005840 if ((last != NULL) && (last != ret))
5841 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00005842 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005843 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005844 return(NULL);
5845 }
5846 GROW;
5847 SKIP_BLANKS;
5848 GROW;
5849 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005850 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005851 /* Recurse on second child */
5852 NEXT;
5853 SKIP_BLANKS;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005854 last = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005855 SKIP_BLANKS;
5856 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005857 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005858 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005859 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005860 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005861 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00005862 return(NULL);
5863 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005864 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005865 if (last == NULL) {
5866 if (ret != NULL)
5867 xmlFreeDocElementContent(ctxt->myDoc, ret);
5868 return(NULL);
5869 }
Owen Taylor3473f882001-02-23 17:55:21 +00005870 if (RAW == '?') {
5871 last->ocur = XML_ELEMENT_CONTENT_OPT;
5872 NEXT;
5873 } else if (RAW == '*') {
5874 last->ocur = XML_ELEMENT_CONTENT_MULT;
5875 NEXT;
5876 } else if (RAW == '+') {
5877 last->ocur = XML_ELEMENT_CONTENT_PLUS;
5878 NEXT;
5879 } else {
5880 last->ocur = XML_ELEMENT_CONTENT_ONCE;
5881 }
5882 }
5883 SKIP_BLANKS;
5884 GROW;
5885 }
5886 if ((cur != NULL) && (last != NULL)) {
5887 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005888 if (last != NULL)
5889 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005890 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005891 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005892 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5893"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005894 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005895 }
Owen Taylor3473f882001-02-23 17:55:21 +00005896 NEXT;
5897 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005898 if (ret != NULL) {
5899 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
5900 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5901 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5902 else
5903 ret->ocur = XML_ELEMENT_CONTENT_OPT;
5904 }
Owen Taylor3473f882001-02-23 17:55:21 +00005905 NEXT;
5906 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005907 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00005908 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005909 cur = ret;
5910 /*
5911 * Some normalization:
5912 * (a | b* | c?)* == (a | b | c)*
5913 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005914 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005915 if ((cur->c1 != NULL) &&
5916 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5917 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
5918 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5919 if ((cur->c2 != NULL) &&
5920 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5921 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
5922 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5923 cur = cur->c2;
5924 }
5925 }
Owen Taylor3473f882001-02-23 17:55:21 +00005926 NEXT;
5927 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005928 if (ret != NULL) {
5929 int found = 0;
5930
William M. Brackf8f2e8f2004-05-14 04:37:41 +00005931 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
5932 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
5933 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00005934 else
5935 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005936 /*
5937 * Some normalization:
5938 * (a | b*)+ == (a | b)*
5939 * (a | b?)+ == (a | b)*
5940 */
Daniel Veillard30e76072006-03-09 14:13:55 +00005941 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00005942 if ((cur->c1 != NULL) &&
5943 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
5944 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
5945 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
5946 found = 1;
5947 }
5948 if ((cur->c2 != NULL) &&
5949 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
5950 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
5951 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
5952 found = 1;
5953 }
5954 cur = cur->c2;
5955 }
5956 if (found)
5957 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5958 }
Owen Taylor3473f882001-02-23 17:55:21 +00005959 NEXT;
5960 }
5961 return(ret);
5962}
5963
5964/**
5965 * xmlParseElementContentDecl:
5966 * @ctxt: an XML parser context
5967 * @name: the name of the element being defined.
5968 * @result: the Element Content pointer will be stored here if any
5969 *
5970 * parse the declaration for an Element content either Mixed or Children,
5971 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
5972 *
5973 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
5974 *
5975 * returns: the type of element content XML_ELEMENT_TYPE_xxx
5976 */
5977
5978int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005979xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00005980 xmlElementContentPtr *result) {
5981
5982 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005983 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00005984 int res;
5985
5986 *result = NULL;
5987
5988 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005989 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005990 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005991 return(-1);
5992 }
5993 NEXT;
5994 GROW;
5995 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005996 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005997 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00005998 res = XML_ELEMENT_TYPE_MIXED;
5999 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006000 tree = xmlParseElementChildrenContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006001 res = XML_ELEMENT_TYPE_ELEMENT;
6002 }
Owen Taylor3473f882001-02-23 17:55:21 +00006003 SKIP_BLANKS;
6004 *result = tree;
6005 return(res);
6006}
6007
6008/**
6009 * xmlParseElementDecl:
6010 * @ctxt: an XML parser context
6011 *
6012 * parse an Element declaration.
6013 *
6014 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6015 *
6016 * [ VC: Unique Element Type Declaration ]
6017 * No element type may be declared more than once
6018 *
6019 * Returns the type of the element, or -1 in case of error
6020 */
6021int
6022xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006023 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006024 int ret = -1;
6025 xmlElementContentPtr content = NULL;
6026
Daniel Veillard4c778d82005-01-23 17:37:44 +00006027 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006028 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006029 xmlParserInputPtr input = ctxt->input;
6030
6031 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006032 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006033 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6034 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006035 }
6036 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006037 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006038 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006039 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6040 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006041 return(-1);
6042 }
6043 while ((RAW == 0) && (ctxt->inputNr > 1))
6044 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006045 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006046 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6047 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006048 }
6049 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006050 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006051 SKIP(5);
6052 /*
6053 * Element must always be empty.
6054 */
6055 ret = XML_ELEMENT_TYPE_EMPTY;
6056 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6057 (NXT(2) == 'Y')) {
6058 SKIP(3);
6059 /*
6060 * Element is a generic container.
6061 */
6062 ret = XML_ELEMENT_TYPE_ANY;
6063 } else if (RAW == '(') {
6064 ret = xmlParseElementContentDecl(ctxt, name, &content);
6065 } else {
6066 /*
6067 * [ WFC: PEs in Internal Subset ] error handling.
6068 */
6069 if ((RAW == '%') && (ctxt->external == 0) &&
6070 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006071 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006072 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006073 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006074 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006075 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6076 }
Owen Taylor3473f882001-02-23 17:55:21 +00006077 return(-1);
6078 }
6079
6080 SKIP_BLANKS;
6081 /*
6082 * Pop-up of finished entities.
6083 */
6084 while ((RAW == 0) && (ctxt->inputNr > 1))
6085 xmlPopInput(ctxt);
6086 SKIP_BLANKS;
6087
6088 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006089 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006090 if (content != NULL) {
6091 xmlFreeDocElementContent(ctxt->myDoc, content);
6092 }
Owen Taylor3473f882001-02-23 17:55:21 +00006093 } else {
6094 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006095 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6096 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006097 }
6098
6099 NEXT;
6100 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006101 (ctxt->sax->elementDecl != NULL)) {
6102 if (content != NULL)
6103 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006104 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6105 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006106 if ((content != NULL) && (content->parent == NULL)) {
6107 /*
6108 * this is a trick: if xmlAddElementDecl is called,
6109 * instead of copying the full tree it is plugged directly
6110 * if called from the parser. Avoid duplicating the
6111 * interfaces or change the API/ABI
6112 */
6113 xmlFreeDocElementContent(ctxt->myDoc, content);
6114 }
6115 } else if (content != NULL) {
6116 xmlFreeDocElementContent(ctxt->myDoc, content);
6117 }
Owen Taylor3473f882001-02-23 17:55:21 +00006118 }
Owen Taylor3473f882001-02-23 17:55:21 +00006119 }
6120 return(ret);
6121}
6122
6123/**
Owen Taylor3473f882001-02-23 17:55:21 +00006124 * xmlParseConditionalSections
6125 * @ctxt: an XML parser context
6126 *
6127 * [61] conditionalSect ::= includeSect | ignoreSect
6128 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6129 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6130 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6131 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6132 */
6133
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006134static void
Owen Taylor3473f882001-02-23 17:55:21 +00006135xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006136 int id = ctxt->input->id;
6137
Owen Taylor3473f882001-02-23 17:55:21 +00006138 SKIP(3);
6139 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006140 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006141 SKIP(7);
6142 SKIP_BLANKS;
6143 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006144 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006145 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006146 if (ctxt->input->id != id) {
6147 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6148 "All markup of the conditional section is not in the same entity\n",
6149 NULL, NULL);
6150 }
Owen Taylor3473f882001-02-23 17:55:21 +00006151 NEXT;
6152 }
6153 if (xmlParserDebugEntities) {
6154 if ((ctxt->input != NULL) && (ctxt->input->filename))
6155 xmlGenericError(xmlGenericErrorContext,
6156 "%s(%d): ", ctxt->input->filename,
6157 ctxt->input->line);
6158 xmlGenericError(xmlGenericErrorContext,
6159 "Entering INCLUDE Conditional Section\n");
6160 }
6161
6162 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6163 (NXT(2) != '>'))) {
6164 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006165 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006166
6167 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6168 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006169 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006170 NEXT;
6171 } else if (RAW == '%') {
6172 xmlParsePEReference(ctxt);
6173 } else
6174 xmlParseMarkupDecl(ctxt);
6175
6176 /*
6177 * Pop-up of finished entities.
6178 */
6179 while ((RAW == 0) && (ctxt->inputNr > 1))
6180 xmlPopInput(ctxt);
6181
Daniel Veillardfdc91562002-07-01 21:52:03 +00006182 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006183 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006184 break;
6185 }
6186 }
6187 if (xmlParserDebugEntities) {
6188 if ((ctxt->input != NULL) && (ctxt->input->filename))
6189 xmlGenericError(xmlGenericErrorContext,
6190 "%s(%d): ", ctxt->input->filename,
6191 ctxt->input->line);
6192 xmlGenericError(xmlGenericErrorContext,
6193 "Leaving INCLUDE Conditional Section\n");
6194 }
6195
Daniel Veillarda07050d2003-10-19 14:46:32 +00006196 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006197 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006198 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006199 int depth = 0;
6200
6201 SKIP(6);
6202 SKIP_BLANKS;
6203 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006204 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006205 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006206 if (ctxt->input->id != id) {
6207 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6208 "All markup of the conditional section is not in the same entity\n",
6209 NULL, NULL);
6210 }
Owen Taylor3473f882001-02-23 17:55:21 +00006211 NEXT;
6212 }
6213 if (xmlParserDebugEntities) {
6214 if ((ctxt->input != NULL) && (ctxt->input->filename))
6215 xmlGenericError(xmlGenericErrorContext,
6216 "%s(%d): ", ctxt->input->filename,
6217 ctxt->input->line);
6218 xmlGenericError(xmlGenericErrorContext,
6219 "Entering IGNORE Conditional Section\n");
6220 }
6221
6222 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006223 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006224 * But disable SAX event generating DTD building in the meantime
6225 */
6226 state = ctxt->disableSAX;
6227 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006228 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006229 ctxt->instate = XML_PARSER_IGNORE;
6230
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006231 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006232 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6233 depth++;
6234 SKIP(3);
6235 continue;
6236 }
6237 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6238 if (--depth >= 0) SKIP(3);
6239 continue;
6240 }
6241 NEXT;
6242 continue;
6243 }
6244
6245 ctxt->disableSAX = state;
6246 ctxt->instate = instate;
6247
6248 if (xmlParserDebugEntities) {
6249 if ((ctxt->input != NULL) && (ctxt->input->filename))
6250 xmlGenericError(xmlGenericErrorContext,
6251 "%s(%d): ", ctxt->input->filename,
6252 ctxt->input->line);
6253 xmlGenericError(xmlGenericErrorContext,
6254 "Leaving IGNORE Conditional Section\n");
6255 }
6256
6257 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006258 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006259 }
6260
6261 if (RAW == 0)
6262 SHRINK;
6263
6264 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006265 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006266 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006267 if (ctxt->input->id != id) {
6268 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6269 "All markup of the conditional section is not in the same entity\n",
6270 NULL, NULL);
6271 }
Owen Taylor3473f882001-02-23 17:55:21 +00006272 SKIP(3);
6273 }
6274}
6275
6276/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006277 * xmlParseMarkupDecl:
6278 * @ctxt: an XML parser context
6279 *
6280 * parse Markup declarations
6281 *
6282 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6283 * NotationDecl | PI | Comment
6284 *
6285 * [ VC: Proper Declaration/PE Nesting ]
6286 * Parameter-entity replacement text must be properly nested with
6287 * markup declarations. That is to say, if either the first character
6288 * or the last character of a markup declaration (markupdecl above) is
6289 * contained in the replacement text for a parameter-entity reference,
6290 * both must be contained in the same replacement text.
6291 *
6292 * [ WFC: PEs in Internal Subset ]
6293 * In the internal DTD subset, parameter-entity references can occur
6294 * only where markup declarations can occur, not within markup declarations.
6295 * (This does not apply to references that occur in external parameter
6296 * entities or to the external subset.)
6297 */
6298void
6299xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6300 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006301 if (CUR == '<') {
6302 if (NXT(1) == '!') {
6303 switch (NXT(2)) {
6304 case 'E':
6305 if (NXT(3) == 'L')
6306 xmlParseElementDecl(ctxt);
6307 else if (NXT(3) == 'N')
6308 xmlParseEntityDecl(ctxt);
6309 break;
6310 case 'A':
6311 xmlParseAttributeListDecl(ctxt);
6312 break;
6313 case 'N':
6314 xmlParseNotationDecl(ctxt);
6315 break;
6316 case '-':
6317 xmlParseComment(ctxt);
6318 break;
6319 default:
6320 /* there is an error but it will be detected later */
6321 break;
6322 }
6323 } else if (NXT(1) == '?') {
6324 xmlParsePI(ctxt);
6325 }
6326 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006327 /*
6328 * This is only for internal subset. On external entities,
6329 * the replacement is done before parsing stage
6330 */
6331 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6332 xmlParsePEReference(ctxt);
6333
6334 /*
6335 * Conditional sections are allowed from entities included
6336 * by PE References in the internal subset.
6337 */
6338 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6339 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6340 xmlParseConditionalSections(ctxt);
6341 }
6342 }
6343
6344 ctxt->instate = XML_PARSER_DTD;
6345}
6346
6347/**
6348 * xmlParseTextDecl:
6349 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006350 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006351 * parse an XML declaration header for external entities
6352 *
6353 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006354 */
6355
6356void
6357xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6358 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006359 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006360
6361 /*
6362 * We know that '<?xml' is here.
6363 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006364 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006365 SKIP(5);
6366 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006367 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006368 return;
6369 }
6370
William M. Brack76e95df2003-10-18 16:20:14 +00006371 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006372 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6373 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006374 }
6375 SKIP_BLANKS;
6376
6377 /*
6378 * We may have the VersionInfo here.
6379 */
6380 version = xmlParseVersionInfo(ctxt);
6381 if (version == NULL)
6382 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006383 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006384 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006385 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6386 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006387 }
6388 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006389 ctxt->input->version = version;
6390
6391 /*
6392 * We must have the encoding declaration
6393 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006394 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006395 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6396 /*
6397 * The XML REC instructs us to stop parsing right here
6398 */
6399 return;
6400 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006401 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6402 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6403 "Missing encoding in text declaration\n");
6404 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006405
6406 SKIP_BLANKS;
6407 if ((RAW == '?') && (NXT(1) == '>')) {
6408 SKIP(2);
6409 } else if (RAW == '>') {
6410 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006411 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006412 NEXT;
6413 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006414 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006415 MOVETO_ENDTAG(CUR_PTR);
6416 NEXT;
6417 }
6418}
6419
6420/**
Owen Taylor3473f882001-02-23 17:55:21 +00006421 * xmlParseExternalSubset:
6422 * @ctxt: an XML parser context
6423 * @ExternalID: the external identifier
6424 * @SystemID: the system identifier (or URL)
6425 *
6426 * parse Markup declarations from an external subset
6427 *
6428 * [30] extSubset ::= textDecl? extSubsetDecl
6429 *
6430 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6431 */
6432void
6433xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6434 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006435 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006436 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006437
6438 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
6439 (ctxt->input->end - ctxt->input->cur >= 4)) {
6440 xmlChar start[4];
6441 xmlCharEncoding enc;
6442
6443 start[0] = RAW;
6444 start[1] = NXT(1);
6445 start[2] = NXT(2);
6446 start[3] = NXT(3);
6447 enc = xmlDetectCharEncoding(start, 4);
6448 if (enc != XML_CHAR_ENCODING_NONE)
6449 xmlSwitchEncoding(ctxt, enc);
6450 }
6451
Daniel Veillarda07050d2003-10-19 14:46:32 +00006452 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006453 xmlParseTextDecl(ctxt);
6454 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6455 /*
6456 * The XML REC instructs us to stop parsing right here
6457 */
6458 ctxt->instate = XML_PARSER_EOF;
6459 return;
6460 }
6461 }
6462 if (ctxt->myDoc == NULL) {
6463 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006464 if (ctxt->myDoc == NULL) {
6465 xmlErrMemory(ctxt, "New Doc failed");
6466 return;
6467 }
6468 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006469 }
6470 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6471 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6472
6473 ctxt->instate = XML_PARSER_DTD;
6474 ctxt->external = 1;
6475 while (((RAW == '<') && (NXT(1) == '?')) ||
6476 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006477 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006478 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006479 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006480
6481 GROW;
6482 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6483 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006484 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006485 NEXT;
6486 } else if (RAW == '%') {
6487 xmlParsePEReference(ctxt);
6488 } else
6489 xmlParseMarkupDecl(ctxt);
6490
6491 /*
6492 * Pop-up of finished entities.
6493 */
6494 while ((RAW == 0) && (ctxt->inputNr > 1))
6495 xmlPopInput(ctxt);
6496
Daniel Veillardfdc91562002-07-01 21:52:03 +00006497 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006498 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006499 break;
6500 }
6501 }
6502
6503 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006504 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006505 }
6506
6507}
6508
6509/**
6510 * xmlParseReference:
6511 * @ctxt: an XML parser context
6512 *
6513 * parse and handle entity references in content, depending on the SAX
6514 * interface, this may end-up in a call to character() if this is a
6515 * CharRef, a predefined entity, if there is no reference() callback.
6516 * or if the parser was asked to switch to that mode.
6517 *
6518 * [67] Reference ::= EntityRef | CharRef
6519 */
6520void
6521xmlParseReference(xmlParserCtxtPtr ctxt) {
6522 xmlEntityPtr ent;
6523 xmlChar *val;
6524 if (RAW != '&') return;
6525
6526 if (NXT(1) == '#') {
6527 int i = 0;
6528 xmlChar out[10];
6529 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006530 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006531
Daniel Veillarddc171602008-03-26 17:41:38 +00006532 if (value == 0)
6533 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006534 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6535 /*
6536 * So we are using non-UTF-8 buffers
6537 * Check that the char fit on 8bits, if not
6538 * generate a CharRef.
6539 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006540 if (value <= 0xFF) {
6541 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006542 out[1] = 0;
6543 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6544 (!ctxt->disableSAX))
6545 ctxt->sax->characters(ctxt->userData, out, 1);
6546 } else {
6547 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006548 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006549 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006550 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006551 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6552 (!ctxt->disableSAX))
6553 ctxt->sax->reference(ctxt->userData, out);
6554 }
6555 } else {
6556 /*
6557 * Just encode the value in UTF-8
6558 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006559 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006560 out[i] = 0;
6561 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6562 (!ctxt->disableSAX))
6563 ctxt->sax->characters(ctxt->userData, out, i);
6564 }
6565 } else {
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006566 int was_checked;
6567
Owen Taylor3473f882001-02-23 17:55:21 +00006568 ent = xmlParseEntityRef(ctxt);
6569 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006570 if (!ctxt->wellFormed)
6571 return;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006572 ctxt->nbentities++;
Daniel Veillard8915c152008-08-26 13:05:34 +00006573 if ((ctxt->nbentities >= 100000) &&
6574 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006575 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6576 return;
6577 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006578 was_checked = ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00006579 if ((ent->name != NULL) &&
6580 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
6581 xmlNodePtr list = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +00006582 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006583
6584
6585 /*
6586 * The first reference to the entity trigger a parsing phase
6587 * where the ent->children is filled with the result from
6588 * the parsing.
6589 */
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006590 if (ent->checked == 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00006591 xmlChar *value;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006592
Owen Taylor3473f882001-02-23 17:55:21 +00006593 value = ent->content;
6594
6595 /*
6596 * Check that this entity is well formed
6597 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +00006598 if ((value != NULL) && (value[0] != 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006599 (value[1] == 0) && (value[0] == '<') &&
6600 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
6601 /*
6602 * DONE: get definite answer on this !!!
6603 * Lots of entity decls are used to declare a single
6604 * char
6605 * <!ENTITY lt "<">
6606 * Which seems to be valid since
6607 * 2.4: The ampersand character (&) and the left angle
6608 * bracket (<) may appear in their literal form only
6609 * when used ... They are also legal within the literal
6610 * entity value of an internal entity declaration;i
6611 * see "4.3.2 Well-Formed Parsed Entities".
6612 * IMHO 2.4 and 4.3.2 are directly in contradiction.
6613 * Looking at the OASIS test suite and James Clark
6614 * tests, this is broken. However the XML REC uses
6615 * it. Is the XML REC not well-formed ????
6616 * This is a hack to avoid this problem
6617 *
6618 * ANSWER: since lt gt amp .. are already defined,
6619 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006620 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00006621 * is lousy but acceptable.
6622 */
6623 list = xmlNewDocText(ctxt->myDoc, value);
6624 if (list != NULL) {
6625 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
6626 (ent->children == NULL)) {
6627 ent->children = list;
6628 ent->last = list;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006629 ent->owner = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006630 list->parent = (xmlNodePtr) ent;
6631 } else {
6632 xmlFreeNodeList(list);
6633 }
6634 } else if (list != NULL) {
6635 xmlFreeNodeList(list);
6636 }
6637 } else {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006638 unsigned long oldnbent = ctxt->nbentities;
Owen Taylor3473f882001-02-23 17:55:21 +00006639 /*
6640 * 4.3.2: An internal general parsed entity is well-formed
6641 * if its replacement text matches the production labeled
6642 * content.
6643 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006644
6645 void *user_data;
6646 /*
6647 * This is a bit hackish but this seems the best
6648 * way to make sure both SAX and DOM entity support
6649 * behaves okay.
6650 */
6651 if (ctxt->userData == ctxt)
6652 user_data = NULL;
6653 else
6654 user_data = ctxt->userData;
6655
Owen Taylor3473f882001-02-23 17:55:21 +00006656 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6657 ctxt->depth++;
Daniel Veillard328f48c2002-11-15 15:24:34 +00006658 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6659 value, user_data, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006660 ctxt->depth--;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006661
Owen Taylor3473f882001-02-23 17:55:21 +00006662 } else if (ent->etype ==
6663 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6664 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006665 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00006666 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00006667 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00006668 ctxt->depth--;
6669 } else {
Daniel Veillard7d515752003-09-26 19:12:37 +00006670 ret = XML_ERR_ENTITY_PE_INTERNAL;
Daniel Veillardf403d292003-10-05 13:51:35 +00006671 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6672 "invalid entity type found\n", NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006673 }
Daniel Veillardf4f4e482008-08-25 08:57:48 +00006674 ent->checked = ctxt->nbentities - oldnbent;
Owen Taylor3473f882001-02-23 17:55:21 +00006675 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006676 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006677 return;
Daniel Veillard7d515752003-09-26 19:12:37 +00006678 } else if ((ret == XML_ERR_OK) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006679 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6680 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00006681 (ent->children == NULL)) {
6682 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006683 if (ctxt->replaceEntities) {
6684 /*
6685 * Prune it directly in the generated document
6686 * except for single text nodes.
6687 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006688 if (((list->type == XML_TEXT_NODE) &&
6689 (list->next == NULL)) ||
6690 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillard62f313b2001-07-04 19:49:14 +00006691 list->parent = (xmlNodePtr) ent;
6692 list = NULL;
Daniel Veillard2d84a892002-12-30 00:01:08 +00006693 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006694 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006695 ent->owner = 0;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006696 while (list != NULL) {
6697 list->parent = (xmlNodePtr) ctxt->node;
Daniel Veillard68e9e742002-11-16 15:35:11 +00006698 list->doc = ctxt->myDoc;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006699 if (list->next == NULL)
6700 ent->last = list;
6701 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00006702 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006703 list = ent->children;
Daniel Veillard81273902003-09-30 00:43:48 +00006704#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006705 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6706 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard81273902003-09-30 00:43:48 +00006707#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006708 }
6709 } else {
Daniel Veillard2d84a892002-12-30 00:01:08 +00006710 ent->owner = 1;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006711 while (list != NULL) {
6712 list->parent = (xmlNodePtr) ent;
6713 if (list->next == NULL)
6714 ent->last = list;
6715 list = list->next;
6716 }
Owen Taylor3473f882001-02-23 17:55:21 +00006717 }
6718 } else {
6719 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006720 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006721 }
William M. Brackb670e2e2003-09-27 01:05:55 +00006722 } else if ((ret != XML_ERR_OK) &&
6723 (ret != XML_WAR_UNDECLARED_ENTITY)) {
Daniel Veillard1ca1be22007-05-02 16:50:03 +00006724 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
6725 "Entity '%s' failed to parse\n", ent->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006726 } else if (list != NULL) {
6727 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006728 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006729 }
6730 }
Daniel Veillardf4f4e482008-08-25 08:57:48 +00006731 if (ent->checked == 0)
6732 ent->checked = 1;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006733 }
Daniel Veillardf4f4e482008-08-25 08:57:48 +00006734 ctxt->nbentities += ent->checked;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006735
6736 if (ent->children == NULL) {
6737 /*
6738 * Probably running in SAX mode and the callbacks don't
6739 * build the entity content. So unless we already went
6740 * though parsing for first checking go though the entity
6741 * content to generate callbacks associated to the entity
6742 */
Daniel Veillardf4f4e482008-08-25 08:57:48 +00006743 if (was_checked != 0) {
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006744 void *user_data;
6745 /*
6746 * This is a bit hackish but this seems the best
6747 * way to make sure both SAX and DOM entity support
6748 * behaves okay.
6749 */
6750 if (ctxt->userData == ctxt)
6751 user_data = NULL;
6752 else
6753 user_data = ctxt->userData;
6754
6755 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6756 ctxt->depth++;
6757 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
6758 ent->content, user_data, NULL);
6759 ctxt->depth--;
6760 } else if (ent->etype ==
6761 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6762 ctxt->depth++;
6763 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
6764 ctxt->sax, user_data, ctxt->depth,
6765 ent->URI, ent->ExternalID, NULL);
6766 ctxt->depth--;
6767 } else {
6768 ret = XML_ERR_ENTITY_PE_INTERNAL;
6769 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6770 "invalid entity type found\n", NULL);
6771 }
6772 if (ret == XML_ERR_ENTITY_LOOP) {
6773 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
6774 return;
6775 }
6776 }
6777 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6778 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
6779 /*
6780 * Entity reference callback comes second, it's somewhat
6781 * superfluous but a compatibility to historical behaviour
6782 */
6783 ctxt->sax->reference(ctxt->userData, ent->name);
6784 }
6785 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006786 }
6787 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006788 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006789 /*
6790 * Create a node.
6791 */
6792 ctxt->sax->reference(ctxt->userData, ent->name);
6793 return;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006794 }
6795 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
William M. Brack1227fb32004-10-25 23:17:53 +00006796 /*
6797 * There is a problem on the handling of _private for entities
6798 * (bug 155816): Should we copy the content of the field from
6799 * the entity (possibly overwriting some value set by the user
6800 * when a copy is created), should we leave it alone, or should
6801 * we try to take care of different situations? The problem
6802 * is exacerbated by the usage of this field by the xmlReader.
6803 * To fix this bug, we look at _private on the created node
6804 * and, if it's NULL, we copy in whatever was in the entity.
6805 * If it's not NULL we leave it alone. This is somewhat of a
6806 * hack - maybe we should have further tests to determine
6807 * what to do.
6808 */
Owen Taylor3473f882001-02-23 17:55:21 +00006809 if ((ctxt->node != NULL) && (ent->children != NULL)) {
6810 /*
6811 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00006812 * a simple tree copy for all references except the first
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006813 * In the first occurrence list contains the replacement.
6814 * progressive == 2 means we are operating on the Reader
6815 * and since nodes are discarded we must copy all the time.
Owen Taylor3473f882001-02-23 17:55:21 +00006816 */
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006817 if (((list == NULL) && (ent->owner == 0)) ||
6818 (ctxt->parseMode == XML_PARSE_READER)) {
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006819 xmlNodePtr nw = NULL, cur, firstChild = NULL;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006820
6821 /*
6822 * when operating on a reader, the entities definitions
6823 * are always owning the entities subtree.
6824 if (ctxt->parseMode == XML_PARSE_READER)
6825 ent->owner = 1;
6826 */
6827
Daniel Veillard62f313b2001-07-04 19:49:14 +00006828 cur = ent->children;
6829 while (cur != NULL) {
Daniel Veillard03a53c32004-10-26 16:06:51 +00006830 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006831 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006832 if (nw->_private == NULL)
6833 nw->_private = cur->_private;
Daniel Veillard8f872442003-01-09 23:19:02 +00006834 if (firstChild == NULL){
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006835 firstChild = nw;
Daniel Veillard8f872442003-01-09 23:19:02 +00006836 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006837 nw = xmlAddChild(ctxt->node, nw);
Daniel Veillard8107a222002-01-13 14:10:10 +00006838 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006839 if (cur == ent->last) {
6840 /*
6841 * needed to detect some strange empty
6842 * node cases in the reader tests
6843 */
6844 if ((ctxt->parseMode == XML_PARSE_READER) &&
Daniel Veillard30e76072006-03-09 14:13:55 +00006845 (nw != NULL) &&
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006846 (nw->type == XML_ELEMENT_NODE) &&
6847 (nw->children == NULL))
6848 nw->extra = 1;
6849
Daniel Veillard62f313b2001-07-04 19:49:14 +00006850 break;
Daniel Veillard0df3bc32004-06-08 12:03:41 +00006851 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00006852 cur = cur->next;
6853 }
Daniel Veillard81273902003-09-30 00:43:48 +00006854#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +00006855 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006856 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006857#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006858 } else if (list == NULL) {
6859 xmlNodePtr nw = NULL, cur, next, last,
6860 firstChild = NULL;
6861 /*
6862 * Copy the entity child list and make it the new
6863 * entity child list. The goal is to make sure any
6864 * ID or REF referenced will be the one from the
6865 * document content and not the entity copy.
6866 */
6867 cur = ent->children;
6868 ent->children = NULL;
6869 last = ent->last;
6870 ent->last = NULL;
6871 while (cur != NULL) {
6872 next = cur->next;
6873 cur->next = NULL;
6874 cur->parent = NULL;
Daniel Veillard03a53c32004-10-26 16:06:51 +00006875 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006876 if (nw != NULL) {
William M. Brack1227fb32004-10-25 23:17:53 +00006877 if (nw->_private == NULL)
6878 nw->_private = cur->_private;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006879 if (firstChild == NULL){
6880 firstChild = cur;
6881 }
6882 xmlAddChild((xmlNodePtr) ent, nw);
6883 xmlAddChild(ctxt->node, cur);
6884 }
6885 if (cur == last)
6886 break;
6887 cur = next;
6888 }
6889 ent->owner = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00006890#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillardef8dd7b2003-03-23 12:02:56 +00006891 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
6892 xmlAddEntityReference(ent, firstChild, nw);
Daniel Veillard81273902003-09-30 00:43:48 +00006893#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard62f313b2001-07-04 19:49:14 +00006894 } else {
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006895 const xmlChar *nbktext;
6896
Daniel Veillard62f313b2001-07-04 19:49:14 +00006897 /*
6898 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006899 * node with a possible previous text one which
6900 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00006901 */
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006902 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
6903 -1);
Daniel Veillard62f313b2001-07-04 19:49:14 +00006904 if (ent->children->type == XML_TEXT_NODE)
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006905 ent->children->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006906 if ((ent->last != ent->children) &&
6907 (ent->last->type == XML_TEXT_NODE))
Daniel Veillard370ba3d2004-10-25 16:23:56 +00006908 ent->last->name = nbktext;
Daniel Veillard62f313b2001-07-04 19:49:14 +00006909 xmlAddChildList(ctxt->node, ent->children);
6910 }
6911
Owen Taylor3473f882001-02-23 17:55:21 +00006912 /*
6913 * This is to avoid a nasty side effect, see
6914 * characters() in SAX.c
6915 */
6916 ctxt->nodemem = 0;
6917 ctxt->nodelen = 0;
6918 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006919 }
6920 }
6921 } else {
6922 val = ent->content;
6923 if (val == NULL) return;
6924 /*
6925 * inline the entity.
6926 */
6927 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6928 (!ctxt->disableSAX))
6929 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6930 }
6931 }
6932}
6933
6934/**
6935 * xmlParseEntityRef:
6936 * @ctxt: an XML parser context
6937 *
6938 * parse ENTITY references declarations
6939 *
6940 * [68] EntityRef ::= '&' Name ';'
6941 *
6942 * [ WFC: Entity Declared ]
6943 * In a document without any DTD, a document with only an internal DTD
6944 * subset which contains no parameter entity references, or a document
6945 * with "standalone='yes'", the Name given in the entity reference
6946 * must match that in an entity declaration, except that well-formed
6947 * documents need not declare any of the following entities: amp, lt,
6948 * gt, apos, quot. The declaration of a parameter entity must precede
6949 * any reference to it. Similarly, the declaration of a general entity
6950 * must precede any reference to it which appears in a default value in an
6951 * attribute-list declaration. Note that if entities are declared in the
6952 * external subset or in external parameter entities, a non-validating
6953 * processor is not obligated to read and process their declarations;
6954 * for such documents, the rule that an entity must be declared is a
6955 * well-formedness constraint only if standalone='yes'.
6956 *
6957 * [ WFC: Parsed Entity ]
6958 * An entity reference must not contain the name of an unparsed entity
6959 *
6960 * Returns the xmlEntityPtr if found, or NULL otherwise.
6961 */
6962xmlEntityPtr
6963xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006964 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006965 xmlEntityPtr ent = NULL;
6966
6967 GROW;
6968
6969 if (RAW == '&') {
6970 NEXT;
6971 name = xmlParseName(ctxt);
6972 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006973 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6974 "xmlParseEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006975 } else {
6976 if (RAW == ';') {
6977 NEXT;
6978 /*
6979 * Ask first SAX for entity resolution, otherwise try the
6980 * predefined set.
6981 */
6982 if (ctxt->sax != NULL) {
6983 if (ctxt->sax->getEntity != NULL)
6984 ent = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006985 if ((ctxt->wellFormed == 1 ) && (ent == NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00006986 ent = xmlGetPredefinedEntity(name);
Daniel Veillard39eb88b2003-03-11 11:21:28 +00006987 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
6988 (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00006989 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00006990 }
Owen Taylor3473f882001-02-23 17:55:21 +00006991 }
6992 /*
6993 * [ WFC: Entity Declared ]
6994 * In a document without any DTD, a document with only an
6995 * internal DTD subset which contains no parameter entity
6996 * references, or a document with "standalone='yes'", the
6997 * Name given in the entity reference must match that in an
6998 * entity declaration, except that well-formed documents
6999 * need not declare any of the following entities: amp, lt,
7000 * gt, apos, quot.
7001 * The declaration of a parameter entity must precede any
7002 * reference to it.
7003 * Similarly, the declaration of a general entity must
7004 * precede any reference to it which appears in a default
7005 * value in an attribute-list declaration. Note that if
7006 * entities are declared in the external subset or in
7007 * external parameter entities, a non-validating processor
7008 * is not obligated to read and process their declarations;
7009 * for such documents, the rule that an entity must be
7010 * declared is a well-formedness constraint only if
7011 * standalone='yes'.
7012 */
7013 if (ent == NULL) {
7014 if ((ctxt->standalone == 1) ||
7015 ((ctxt->hasExternalSubset == 0) &&
7016 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007017 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00007018 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007019 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00007020 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00007021 "Entity '%s' not defined\n", name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00007022 if ((ctxt->inSubset == 0) &&
7023 (ctxt->sax != NULL) &&
7024 (ctxt->sax->reference != NULL)) {
Daniel Veillarda9557952006-10-12 12:53:15 +00007025 ctxt->sax->reference(ctxt->userData, name);
Daniel Veillard53bd1f32005-08-24 14:46:07 +00007026 }
Owen Taylor3473f882001-02-23 17:55:21 +00007027 }
Daniel Veillardf403d292003-10-05 13:51:35 +00007028 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00007029 }
7030
7031 /*
7032 * [ WFC: Parsed Entity ]
7033 * An entity reference must not contain the name of an
7034 * unparsed entity
7035 */
7036 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007037 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00007038 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007039 }
7040
7041 /*
7042 * [ WFC: No External Entity References ]
7043 * Attribute values cannot contain direct or indirect
7044 * entity references to external entities.
7045 */
7046 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7047 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007048 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7049 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007050 }
7051 /*
7052 * [ WFC: No < in Attribute Values ]
7053 * The replacement text of any entity referred to directly or
7054 * indirectly in an attribute value (other than "&lt;") must
7055 * not contain a <.
7056 */
7057 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7058 (ent != NULL) &&
7059 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
7060 (ent->content != NULL) &&
7061 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007062 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
Owen Taylor3473f882001-02-23 17:55:21 +00007063 "'<' in entity '%s' is not allowed in attributes values\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007064 }
7065
7066 /*
7067 * Internal check, no parameter entities here ...
7068 */
7069 else {
7070 switch (ent->etype) {
7071 case XML_INTERNAL_PARAMETER_ENTITY:
7072 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007073 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7074 "Attempt to reference the parameter entity '%s'\n",
7075 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007076 break;
7077 default:
7078 break;
7079 }
7080 }
7081
7082 /*
7083 * [ WFC: No Recursion ]
7084 * A parsed entity must not contain a recursive reference
7085 * to itself, either directly or indirectly.
7086 * Done somewhere else
7087 */
7088
7089 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007090 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007091 }
Owen Taylor3473f882001-02-23 17:55:21 +00007092 }
7093 }
7094 return(ent);
7095}
7096
7097/**
7098 * xmlParseStringEntityRef:
7099 * @ctxt: an XML parser context
7100 * @str: a pointer to an index in the string
7101 *
7102 * parse ENTITY references declarations, but this version parses it from
7103 * a string value.
7104 *
7105 * [68] EntityRef ::= '&' Name ';'
7106 *
7107 * [ WFC: Entity Declared ]
7108 * In a document without any DTD, a document with only an internal DTD
7109 * subset which contains no parameter entity references, or a document
7110 * with "standalone='yes'", the Name given in the entity reference
7111 * must match that in an entity declaration, except that well-formed
7112 * documents need not declare any of the following entities: amp, lt,
7113 * gt, apos, quot. The declaration of a parameter entity must precede
7114 * any reference to it. Similarly, the declaration of a general entity
7115 * must precede any reference to it which appears in a default value in an
7116 * attribute-list declaration. Note that if entities are declared in the
7117 * external subset or in external parameter entities, a non-validating
7118 * processor is not obligated to read and process their declarations;
7119 * for such documents, the rule that an entity must be declared is a
7120 * well-formedness constraint only if standalone='yes'.
7121 *
7122 * [ WFC: Parsed Entity ]
7123 * An entity reference must not contain the name of an unparsed entity
7124 *
7125 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7126 * is updated to the current location in the string.
7127 */
7128xmlEntityPtr
7129xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7130 xmlChar *name;
7131 const xmlChar *ptr;
7132 xmlChar cur;
7133 xmlEntityPtr ent = NULL;
7134
7135 if ((str == NULL) || (*str == NULL))
7136 return(NULL);
7137 ptr = *str;
7138 cur = *ptr;
7139 if (cur == '&') {
7140 ptr++;
7141 cur = *ptr;
7142 name = xmlParseStringName(ctxt, &ptr);
7143 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007144 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7145 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007146 } else {
7147 if (*ptr == ';') {
7148 ptr++;
7149 /*
7150 * Ask first SAX for entity resolution, otherwise try the
7151 * predefined set.
7152 */
7153 if (ctxt->sax != NULL) {
7154 if (ctxt->sax->getEntity != NULL)
7155 ent = ctxt->sax->getEntity(ctxt->userData, name);
7156 if (ent == NULL)
7157 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00007158 if ((ent == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00007159 ent = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00007160 }
Owen Taylor3473f882001-02-23 17:55:21 +00007161 }
7162 /*
7163 * [ WFC: Entity Declared ]
7164 * In a document without any DTD, a document with only an
7165 * internal DTD subset which contains no parameter entity
7166 * references, or a document with "standalone='yes'", the
7167 * Name given in the entity reference must match that in an
7168 * entity declaration, except that well-formed documents
7169 * need not declare any of the following entities: amp, lt,
7170 * gt, apos, quot.
7171 * The declaration of a parameter entity must precede any
7172 * reference to it.
7173 * Similarly, the declaration of a general entity must
7174 * precede any reference to it which appears in a default
7175 * value in an attribute-list declaration. Note that if
7176 * entities are declared in the external subset or in
7177 * external parameter entities, a non-validating processor
7178 * is not obligated to read and process their declarations;
7179 * for such documents, the rule that an entity must be
7180 * declared is a well-formedness constraint only if
7181 * standalone='yes'.
7182 */
7183 if (ent == NULL) {
7184 if ((ctxt->standalone == 1) ||
7185 ((ctxt->hasExternalSubset == 0) &&
7186 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007187 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00007188 "Entity '%s' not defined\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007189 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00007190 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
Daniel Veillard24eb9782003-10-04 21:08:09 +00007191 "Entity '%s' not defined\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00007192 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007193 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00007194 /* TODO ? check regressions ctxt->valid = 0; */
Owen Taylor3473f882001-02-23 17:55:21 +00007195 }
7196
7197 /*
7198 * [ WFC: Parsed Entity ]
7199 * An entity reference must not contain the name of an
7200 * unparsed entity
7201 */
7202 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007203 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00007204 "Entity reference to unparsed entity %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007205 }
7206
7207 /*
7208 * [ WFC: No External Entity References ]
7209 * Attribute values cannot contain direct or indirect
7210 * entity references to external entities.
7211 */
7212 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7213 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007214 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
Owen Taylor3473f882001-02-23 17:55:21 +00007215 "Attribute references external entity '%s'\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007216 }
7217 /*
7218 * [ WFC: No < in Attribute Values ]
7219 * The replacement text of any entity referred to directly or
7220 * indirectly in an attribute value (other than "&lt;") must
7221 * not contain a <.
7222 */
7223 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7224 (ent != NULL) &&
7225 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
7226 (ent->content != NULL) &&
7227 (xmlStrchr(ent->content, '<'))) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007228 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7229 "'<' in entity '%s' is not allowed in attributes values\n",
7230 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007231 }
7232
7233 /*
7234 * Internal check, no parameter entities here ...
7235 */
7236 else {
7237 switch (ent->etype) {
7238 case XML_INTERNAL_PARAMETER_ENTITY:
7239 case XML_EXTERNAL_PARAMETER_ENTITY:
Daniel Veillardf403d292003-10-05 13:51:35 +00007240 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7241 "Attempt to reference the parameter entity '%s'\n",
7242 name);
Owen Taylor3473f882001-02-23 17:55:21 +00007243 break;
7244 default:
7245 break;
7246 }
7247 }
7248
7249 /*
7250 * [ WFC: No Recursion ]
7251 * A parsed entity must not contain a recursive reference
7252 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007253 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00007254 */
7255
7256 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007257 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007258 }
7259 xmlFree(name);
7260 }
7261 }
7262 *str = ptr;
7263 return(ent);
7264}
7265
7266/**
7267 * xmlParsePEReference:
7268 * @ctxt: an XML parser context
7269 *
7270 * parse PEReference declarations
7271 * The entity content is handled directly by pushing it's content as
7272 * a new input stream.
7273 *
7274 * [69] PEReference ::= '%' Name ';'
7275 *
7276 * [ WFC: No Recursion ]
7277 * A parsed entity must not contain a recursive
7278 * reference to itself, either directly or indirectly.
7279 *
7280 * [ WFC: Entity Declared ]
7281 * In a document without any DTD, a document with only an internal DTD
7282 * subset which contains no parameter entity references, or a document
7283 * with "standalone='yes'", ... ... The declaration of a parameter
7284 * entity must precede any reference to it...
7285 *
7286 * [ VC: Entity Declared ]
7287 * In a document with an external subset or external parameter entities
7288 * with "standalone='no'", ... ... The declaration of a parameter entity
7289 * must precede any reference to it...
7290 *
7291 * [ WFC: In DTD ]
7292 * Parameter-entity references may only appear in the DTD.
7293 * NOTE: misleading but this is handled.
7294 */
7295void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007296xmlParsePEReference(xmlParserCtxtPtr ctxt)
7297{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007298 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007299 xmlEntityPtr entity = NULL;
7300 xmlParserInputPtr input;
7301
7302 if (RAW == '%') {
7303 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00007304 name = xmlParseName(ctxt);
Daniel Veillard8f597c32003-10-06 08:19:27 +00007305 if (name == NULL) {
7306 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7307 "xmlParsePEReference: no name\n");
7308 } else {
7309 if (RAW == ';') {
7310 NEXT;
7311 if ((ctxt->sax != NULL) &&
7312 (ctxt->sax->getParameterEntity != NULL))
7313 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7314 name);
7315 if (entity == NULL) {
7316 /*
7317 * [ WFC: Entity Declared ]
7318 * In a document without any DTD, a document with only an
7319 * internal DTD subset which contains no parameter entity
7320 * references, or a document with "standalone='yes'", ...
7321 * ... The declaration of a parameter entity must precede
7322 * any reference to it...
7323 */
7324 if ((ctxt->standalone == 1) ||
7325 ((ctxt->hasExternalSubset == 0) &&
7326 (ctxt->hasPErefs == 0))) {
7327 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7328 "PEReference: %%%s; not found\n",
7329 name);
7330 } else {
7331 /*
7332 * [ VC: Entity Declared ]
7333 * In a document with an external subset or external
7334 * parameter entities with "standalone='no'", ...
7335 * ... The declaration of a parameter entity must
7336 * precede any reference to it...
7337 */
7338 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7339 "PEReference: %%%s; not found\n",
7340 name, NULL);
7341 ctxt->valid = 0;
7342 }
7343 } else {
7344 /*
7345 * Internal checking in case the entity quest barfed
7346 */
7347 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7348 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7349 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7350 "Internal: %%%s; is not a parameter entity\n",
7351 name, NULL);
7352 } else if (ctxt->input->free != deallocblankswrapper) {
7353 input =
7354 xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00007355 if (xmlPushInput(ctxt, input) < 0)
7356 return;
Daniel Veillard8f597c32003-10-06 08:19:27 +00007357 } else {
7358 /*
7359 * TODO !!!
7360 * handle the extra spaces added before and after
7361 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7362 */
7363 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00007364 if (xmlPushInput(ctxt, input) < 0)
7365 return;
Daniel Veillard8f597c32003-10-06 08:19:27 +00007366 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00007367 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
William M. Brack76e95df2003-10-18 16:20:14 +00007368 (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard8f597c32003-10-06 08:19:27 +00007369 xmlParseTextDecl(ctxt);
7370 if (ctxt->errNo ==
7371 XML_ERR_UNSUPPORTED_ENCODING) {
7372 /*
7373 * The XML REC instructs us to stop parsing
7374 * right here
7375 */
7376 ctxt->instate = XML_PARSER_EOF;
7377 return;
7378 }
7379 }
7380 }
7381 }
7382 ctxt->hasPErefs = 1;
7383 } else {
7384 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7385 }
7386 }
Owen Taylor3473f882001-02-23 17:55:21 +00007387 }
7388}
7389
7390/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007391 * xmlLoadEntityContent:
7392 * @ctxt: an XML parser context
7393 * @entity: an unloaded system entity
7394 *
7395 * Load the original content of the given system entity from the
7396 * ExternalID/SystemID given. This is to be used for Included in Literal
7397 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7398 *
7399 * Returns 0 in case of success and -1 in case of failure
7400 */
7401static int
7402xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7403 xmlParserInputPtr input;
7404 xmlBufferPtr buf;
7405 int l, c;
7406 int count = 0;
7407
7408 if ((ctxt == NULL) || (entity == NULL) ||
7409 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7410 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7411 (entity->content != NULL)) {
7412 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7413 "xmlLoadEntityContent parameter error");
7414 return(-1);
7415 }
7416
7417 if (xmlParserDebugEntities)
7418 xmlGenericError(xmlGenericErrorContext,
7419 "Reading %s entity content input\n", entity->name);
7420
7421 buf = xmlBufferCreate();
7422 if (buf == NULL) {
7423 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7424 "xmlLoadEntityContent parameter error");
7425 return(-1);
7426 }
7427
7428 input = xmlNewEntityInputStream(ctxt, entity);
7429 if (input == NULL) {
7430 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7431 "xmlLoadEntityContent input error");
7432 xmlBufferFree(buf);
7433 return(-1);
7434 }
7435
7436 /*
7437 * Push the entity as the current input, read char by char
7438 * saving to the buffer until the end of the entity or an error
7439 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00007440 if (xmlPushInput(ctxt, input) < 0) {
7441 xmlBufferFree(buf);
7442 return(-1);
7443 }
7444
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007445 GROW;
7446 c = CUR_CHAR(l);
7447 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7448 (IS_CHAR(c))) {
7449 xmlBufferAdd(buf, ctxt->input->cur, l);
7450 if (count++ > 100) {
7451 count = 0;
7452 GROW;
7453 }
7454 NEXTL(l);
7455 c = CUR_CHAR(l);
7456 }
7457
7458 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7459 xmlPopInput(ctxt);
7460 } else if (!IS_CHAR(c)) {
7461 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7462 "xmlLoadEntityContent: invalid char value %d\n",
7463 c);
7464 xmlBufferFree(buf);
7465 return(-1);
7466 }
7467 entity->content = buf->content;
7468 buf->content = NULL;
7469 xmlBufferFree(buf);
7470
7471 return(0);
7472}
7473
7474/**
Owen Taylor3473f882001-02-23 17:55:21 +00007475 * xmlParseStringPEReference:
7476 * @ctxt: an XML parser context
7477 * @str: a pointer to an index in the string
7478 *
7479 * parse PEReference declarations
7480 *
7481 * [69] PEReference ::= '%' Name ';'
7482 *
7483 * [ WFC: No Recursion ]
7484 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007485 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007486 *
7487 * [ WFC: Entity Declared ]
7488 * In a document without any DTD, a document with only an internal DTD
7489 * subset which contains no parameter entity references, or a document
7490 * with "standalone='yes'", ... ... The declaration of a parameter
7491 * entity must precede any reference to it...
7492 *
7493 * [ VC: Entity Declared ]
7494 * In a document with an external subset or external parameter entities
7495 * with "standalone='no'", ... ... The declaration of a parameter entity
7496 * must precede any reference to it...
7497 *
7498 * [ WFC: In DTD ]
7499 * Parameter-entity references may only appear in the DTD.
7500 * NOTE: misleading but this is handled.
7501 *
7502 * Returns the string of the entity content.
7503 * str is updated to the current value of the index
7504 */
7505xmlEntityPtr
7506xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7507 const xmlChar *ptr;
7508 xmlChar cur;
7509 xmlChar *name;
7510 xmlEntityPtr entity = NULL;
7511
7512 if ((str == NULL) || (*str == NULL)) return(NULL);
7513 ptr = *str;
7514 cur = *ptr;
7515 if (cur == '%') {
7516 ptr++;
7517 cur = *ptr;
7518 name = xmlParseStringName(ctxt, &ptr);
7519 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007520 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7521 "xmlParseStringPEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007522 } else {
7523 cur = *ptr;
7524 if (cur == ';') {
7525 ptr++;
7526 cur = *ptr;
7527 if ((ctxt->sax != NULL) &&
7528 (ctxt->sax->getParameterEntity != NULL))
7529 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7530 name);
7531 if (entity == NULL) {
7532 /*
7533 * [ WFC: Entity Declared ]
7534 * In a document without any DTD, a document with only an
7535 * internal DTD subset which contains no parameter entity
7536 * references, or a document with "standalone='yes'", ...
7537 * ... The declaration of a parameter entity must precede
7538 * any reference to it...
7539 */
7540 if ((ctxt->standalone == 1) ||
7541 ((ctxt->hasExternalSubset == 0) &&
7542 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007543 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00007544 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007545 } else {
7546 /*
7547 * [ VC: Entity Declared ]
7548 * In a document with an external subset or external
7549 * parameter entities with "standalone='no'", ...
7550 * ... The declaration of a parameter entity must
7551 * precede any reference to it...
7552 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00007553 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7554 "PEReference: %%%s; not found\n",
7555 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007556 ctxt->valid = 0;
7557 }
7558 } else {
7559 /*
7560 * Internal checking in case the entity quest barfed
7561 */
7562 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7563 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007564 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7565 "%%%s; is not a parameter entity\n",
7566 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007567 }
7568 }
7569 ctxt->hasPErefs = 1;
7570 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007571 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007572 }
7573 xmlFree(name);
7574 }
7575 }
7576 *str = ptr;
7577 return(entity);
7578}
7579
7580/**
7581 * xmlParseDocTypeDecl:
7582 * @ctxt: an XML parser context
7583 *
7584 * parse a DOCTYPE declaration
7585 *
7586 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7587 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7588 *
7589 * [ VC: Root Element Type ]
7590 * The Name in the document type declaration must match the element
7591 * type of the root element.
7592 */
7593
7594void
7595xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007596 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007597 xmlChar *ExternalID = NULL;
7598 xmlChar *URI = NULL;
7599
7600 /*
7601 * We know that '<!DOCTYPE' has been detected.
7602 */
7603 SKIP(9);
7604
7605 SKIP_BLANKS;
7606
7607 /*
7608 * Parse the DOCTYPE name.
7609 */
7610 name = xmlParseName(ctxt);
7611 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007612 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7613 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007614 }
7615 ctxt->intSubName = name;
7616
7617 SKIP_BLANKS;
7618
7619 /*
7620 * Check for SystemID and ExternalID
7621 */
7622 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7623
7624 if ((URI != NULL) || (ExternalID != NULL)) {
7625 ctxt->hasExternalSubset = 1;
7626 }
7627 ctxt->extSubURI = URI;
7628 ctxt->extSubSystem = ExternalID;
7629
7630 SKIP_BLANKS;
7631
7632 /*
7633 * Create and update the internal subset.
7634 */
7635 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7636 (!ctxt->disableSAX))
7637 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
7638
7639 /*
7640 * Is there any internal subset declarations ?
7641 * they are handled separately in xmlParseInternalSubset()
7642 */
7643 if (RAW == '[')
7644 return;
7645
7646 /*
7647 * We should be at the end of the DOCTYPE declaration.
7648 */
7649 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007650 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007651 }
7652 NEXT;
7653}
7654
7655/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007656 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00007657 * @ctxt: an XML parser context
7658 *
7659 * parse the internal subset declaration
7660 *
7661 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7662 */
7663
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007664static void
Owen Taylor3473f882001-02-23 17:55:21 +00007665xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7666 /*
7667 * Is there any DTD definition ?
7668 */
7669 if (RAW == '[') {
7670 ctxt->instate = XML_PARSER_DTD;
7671 NEXT;
7672 /*
7673 * Parse the succession of Markup declarations and
7674 * PEReferences.
7675 * Subsequence (markupdecl | PEReference | S)*
7676 */
7677 while (RAW != ']') {
7678 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007679 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007680
7681 SKIP_BLANKS;
7682 xmlParseMarkupDecl(ctxt);
7683 xmlParsePEReference(ctxt);
7684
7685 /*
7686 * Pop-up of finished entities.
7687 */
7688 while ((RAW == 0) && (ctxt->inputNr > 1))
7689 xmlPopInput(ctxt);
7690
7691 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007692 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00007693 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007694 break;
7695 }
7696 }
7697 if (RAW == ']') {
7698 NEXT;
7699 SKIP_BLANKS;
7700 }
7701 }
7702
7703 /*
7704 * We should be at the end of the DOCTYPE declaration.
7705 */
7706 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007707 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007708 }
7709 NEXT;
7710}
7711
Daniel Veillard81273902003-09-30 00:43:48 +00007712#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00007713/**
7714 * xmlParseAttribute:
7715 * @ctxt: an XML parser context
7716 * @value: a xmlChar ** used to store the value of the attribute
7717 *
7718 * parse an attribute
7719 *
7720 * [41] Attribute ::= Name Eq AttValue
7721 *
7722 * [ WFC: No External Entity References ]
7723 * Attribute values cannot contain direct or indirect entity references
7724 * to external entities.
7725 *
7726 * [ WFC: No < in Attribute Values ]
7727 * The replacement text of any entity referred to directly or indirectly in
7728 * an attribute value (other than "&lt;") must not contain a <.
7729 *
7730 * [ VC: Attribute Value Type ]
7731 * The attribute must have been declared; the value must be of the type
7732 * declared for it.
7733 *
7734 * [25] Eq ::= S? '=' S?
7735 *
7736 * With namespace:
7737 *
7738 * [NS 11] Attribute ::= QName Eq AttValue
7739 *
7740 * Also the case QName == xmlns:??? is handled independently as a namespace
7741 * definition.
7742 *
7743 * Returns the attribute name, and the value in *value.
7744 */
7745
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007746const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007747xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007748 const xmlChar *name;
7749 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00007750
7751 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00007752 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007753 name = xmlParseName(ctxt);
7754 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007755 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007756 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007757 return(NULL);
7758 }
7759
7760 /*
7761 * read the value
7762 */
7763 SKIP_BLANKS;
7764 if (RAW == '=') {
7765 NEXT;
7766 SKIP_BLANKS;
7767 val = xmlParseAttValue(ctxt);
7768 ctxt->instate = XML_PARSER_CONTENT;
7769 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00007770 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00007771 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007772 return(NULL);
7773 }
7774
7775 /*
7776 * Check that xml:lang conforms to the specification
7777 * No more registered as an error, just generate a warning now
7778 * since this was deprecated in XML second edition
7779 */
7780 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
7781 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00007782 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
7783 "Malformed value for xml:lang : %s\n",
7784 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007785 }
7786 }
7787
7788 /*
7789 * Check that xml:space conforms to the specification
7790 */
7791 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
7792 if (xmlStrEqual(val, BAD_CAST "default"))
7793 *(ctxt->space) = 0;
7794 else if (xmlStrEqual(val, BAD_CAST "preserve"))
7795 *(ctxt->space) = 1;
7796 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00007797 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00007798"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00007799 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007800 }
7801 }
7802
7803 *value = val;
7804 return(name);
7805}
7806
7807/**
7808 * xmlParseStartTag:
7809 * @ctxt: an XML parser context
7810 *
7811 * parse a start of tag either for rule element or
7812 * EmptyElement. In both case we don't parse the tag closing chars.
7813 *
7814 * [40] STag ::= '<' Name (S Attribute)* S? '>'
7815 *
7816 * [ WFC: Unique Att Spec ]
7817 * No attribute name may appear more than once in the same start-tag or
7818 * empty-element tag.
7819 *
7820 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
7821 *
7822 * [ WFC: Unique Att Spec ]
7823 * No attribute name may appear more than once in the same start-tag or
7824 * empty-element tag.
7825 *
7826 * With namespace:
7827 *
7828 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
7829 *
7830 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
7831 *
7832 * Returns the element name parsed
7833 */
7834
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007835const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00007836xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007837 const xmlChar *name;
7838 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00007839 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007840 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00007841 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007842 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007843 int i;
7844
7845 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00007846 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00007847
7848 name = xmlParseName(ctxt);
7849 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007850 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00007851 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007852 return(NULL);
7853 }
7854
7855 /*
7856 * Now parse the attributes, it ends up with the ending
7857 *
7858 * (S Attribute)* S?
7859 */
7860 SKIP_BLANKS;
7861 GROW;
7862
Daniel Veillard21a0f912001-02-25 19:54:14 +00007863 while ((RAW != '>') &&
7864 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00007865 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00007866 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00007867 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00007868
7869 attname = xmlParseAttribute(ctxt, &attvalue);
7870 if ((attname != NULL) && (attvalue != NULL)) {
7871 /*
7872 * [ WFC: Unique Att Spec ]
7873 * No attribute name may appear more than once in the same
7874 * start-tag or empty-element tag.
7875 */
7876 for (i = 0; i < nbatts;i += 2) {
7877 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007878 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00007879 xmlFree(attvalue);
7880 goto failed;
7881 }
7882 }
Owen Taylor3473f882001-02-23 17:55:21 +00007883 /*
7884 * Add the pair to atts
7885 */
7886 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007887 maxatts = 22; /* allow for 10 attrs by default */
7888 atts = (const xmlChar **)
7889 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00007890 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007891 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007892 if (attvalue != NULL)
7893 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007894 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007895 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007896 ctxt->atts = atts;
7897 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007898 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007899 const xmlChar **n;
7900
Owen Taylor3473f882001-02-23 17:55:21 +00007901 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007902 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007903 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007904 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007905 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007906 if (attvalue != NULL)
7907 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007908 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00007909 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007910 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007911 ctxt->atts = atts;
7912 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00007913 }
7914 atts[nbatts++] = attname;
7915 atts[nbatts++] = attvalue;
7916 atts[nbatts] = NULL;
7917 atts[nbatts + 1] = NULL;
7918 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007919 if (attvalue != NULL)
7920 xmlFree(attvalue);
7921 }
7922
7923failed:
7924
Daniel Veillard3772de32002-12-17 10:31:45 +00007925 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00007926 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
7927 break;
William M. Brack76e95df2003-10-18 16:20:14 +00007928 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007929 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7930 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007931 }
7932 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00007933 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
7934 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007935 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
7936 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007937 break;
7938 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007939 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00007940 GROW;
7941 }
7942
7943 /*
7944 * SAX: Start of Element !
7945 */
7946 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00007947 (!ctxt->disableSAX)) {
7948 if (nbatts > 0)
7949 ctxt->sax->startElement(ctxt->userData, name, atts);
7950 else
7951 ctxt->sax->startElement(ctxt->userData, name, NULL);
7952 }
Owen Taylor3473f882001-02-23 17:55:21 +00007953
7954 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007955 /* Free only the content strings */
7956 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00007957 if (atts[i] != NULL)
7958 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00007959 }
7960 return(name);
7961}
7962
7963/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00007964 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00007965 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00007966 * @line: line of the start tag
7967 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00007968 *
7969 * parse an end of tag
7970 *
7971 * [42] ETag ::= '</' Name S? '>'
7972 *
7973 * With namespace
7974 *
7975 * [NS 9] ETag ::= '</' QName S? '>'
7976 */
7977
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00007978static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00007979xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007980 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007981
7982 GROW;
7983 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00007984 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007985 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007986 return;
7987 }
7988 SKIP(2);
7989
Daniel Veillard46de64e2002-05-29 08:21:33 +00007990 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00007991
7992 /*
7993 * We should definitely be at the ending "S? '>'" part
7994 */
7995 GROW;
7996 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00007997 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00007998 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007999 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008000 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008001
8002 /*
8003 * [ WFC: Element Type Match ]
8004 * The Name in an element's end-tag must match the element type in the
8005 * start-tag.
8006 *
8007 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008008 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008009 if (name == NULL) name = BAD_CAST "unparseable";
8010 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008011 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008012 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008013 }
8014
8015 /*
8016 * SAX: End of Tag
8017 */
8018 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8019 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008020 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008021
Daniel Veillarde57ec792003-09-10 10:50:59 +00008022 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008023 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008024 return;
8025}
8026
8027/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008028 * xmlParseEndTag:
8029 * @ctxt: an XML parser context
8030 *
8031 * parse an end of tag
8032 *
8033 * [42] ETag ::= '</' Name S? '>'
8034 *
8035 * With namespace
8036 *
8037 * [NS 9] ETag ::= '</' QName S? '>'
8038 */
8039
8040void
8041xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008042 xmlParseEndTag1(ctxt, 0);
8043}
Daniel Veillard81273902003-09-30 00:43:48 +00008044#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008045
8046/************************************************************************
8047 * *
8048 * SAX 2 specific operations *
8049 * *
8050 ************************************************************************/
8051
Daniel Veillard0fb18932003-09-07 09:14:37 +00008052/*
8053 * xmlGetNamespace:
8054 * @ctxt: an XML parser context
8055 * @prefix: the prefix to lookup
8056 *
8057 * Lookup the namespace name for the @prefix (which ca be NULL)
8058 * The prefix must come from the @ctxt->dict dictionnary
8059 *
8060 * Returns the namespace name or NULL if not bound
8061 */
8062static const xmlChar *
8063xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8064 int i;
8065
Daniel Veillarde57ec792003-09-10 10:50:59 +00008066 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008067 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008068 if (ctxt->nsTab[i] == prefix) {
8069 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8070 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008071 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008072 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008073 return(NULL);
8074}
8075
8076/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008077 * xmlParseQName:
8078 * @ctxt: an XML parser context
8079 * @prefix: pointer to store the prefix part
8080 *
8081 * parse an XML Namespace QName
8082 *
8083 * [6] QName ::= (Prefix ':')? LocalPart
8084 * [7] Prefix ::= NCName
8085 * [8] LocalPart ::= NCName
8086 *
8087 * Returns the Name parsed or NULL
8088 */
8089
8090static const xmlChar *
8091xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8092 const xmlChar *l, *p;
8093
8094 GROW;
8095
8096 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008097 if (l == NULL) {
8098 if (CUR == ':') {
8099 l = xmlParseName(ctxt);
8100 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008101 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8102 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008103 *prefix = NULL;
8104 return(l);
8105 }
8106 }
8107 return(NULL);
8108 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008109 if (CUR == ':') {
8110 NEXT;
8111 p = l;
8112 l = xmlParseNCName(ctxt);
8113 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008114 xmlChar *tmp;
8115
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008116 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8117 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008118 l = xmlParseNmtoken(ctxt);
8119 if (l == NULL)
8120 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8121 else {
8122 tmp = xmlBuildQName(l, p, NULL, 0);
8123 xmlFree((char *)l);
8124 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008125 p = xmlDictLookup(ctxt->dict, tmp, -1);
8126 if (tmp != NULL) xmlFree(tmp);
8127 *prefix = NULL;
8128 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008129 }
8130 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008131 xmlChar *tmp;
8132
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008133 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8134 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008135 NEXT;
8136 tmp = (xmlChar *) xmlParseName(ctxt);
8137 if (tmp != NULL) {
8138 tmp = xmlBuildQName(tmp, l, NULL, 0);
8139 l = xmlDictLookup(ctxt->dict, tmp, -1);
8140 if (tmp != NULL) xmlFree(tmp);
8141 *prefix = p;
8142 return(l);
8143 }
8144 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8145 l = xmlDictLookup(ctxt->dict, tmp, -1);
8146 if (tmp != NULL) xmlFree(tmp);
8147 *prefix = p;
8148 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008149 }
8150 *prefix = p;
8151 } else
8152 *prefix = NULL;
8153 return(l);
8154}
8155
8156/**
8157 * xmlParseQNameAndCompare:
8158 * @ctxt: an XML parser context
8159 * @name: the localname
8160 * @prefix: the prefix, if any.
8161 *
8162 * parse an XML name and compares for match
8163 * (specialized for endtag parsing)
8164 *
8165 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8166 * and the name for mismatch
8167 */
8168
8169static const xmlChar *
8170xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8171 xmlChar const *prefix) {
8172 const xmlChar *cmp = name;
8173 const xmlChar *in;
8174 const xmlChar *ret;
8175 const xmlChar *prefix2;
8176
8177 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8178
8179 GROW;
8180 in = ctxt->input->cur;
8181
8182 cmp = prefix;
8183 while (*in != 0 && *in == *cmp) {
8184 ++in;
8185 ++cmp;
8186 }
8187 if ((*cmp == 0) && (*in == ':')) {
8188 in++;
8189 cmp = name;
8190 while (*in != 0 && *in == *cmp) {
8191 ++in;
8192 ++cmp;
8193 }
William M. Brack76e95df2003-10-18 16:20:14 +00008194 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008195 /* success */
8196 ctxt->input->cur = in;
8197 return((const xmlChar*) 1);
8198 }
8199 }
8200 /*
8201 * all strings coms from the dictionary, equality can be done directly
8202 */
8203 ret = xmlParseQName (ctxt, &prefix2);
8204 if ((ret == name) && (prefix == prefix2))
8205 return((const xmlChar*) 1);
8206 return ret;
8207}
8208
8209/**
8210 * xmlParseAttValueInternal:
8211 * @ctxt: an XML parser context
8212 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008213 * @alloc: whether the attribute was reallocated as a new string
8214 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008215 *
8216 * parse a value for an attribute.
8217 * NOTE: if no normalization is needed, the routine will return pointers
8218 * directly from the data buffer.
8219 *
8220 * 3.3.3 Attribute-Value Normalization:
8221 * Before the value of an attribute is passed to the application or
8222 * checked for validity, the XML processor must normalize it as follows:
8223 * - a character reference is processed by appending the referenced
8224 * character to the attribute value
8225 * - an entity reference is processed by recursively processing the
8226 * replacement text of the entity
8227 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8228 * appending #x20 to the normalized value, except that only a single
8229 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8230 * parsed entity or the literal entity value of an internal parsed entity
8231 * - other characters are processed by appending them to the normalized value
8232 * If the declared value is not CDATA, then the XML processor must further
8233 * process the normalized attribute value by discarding any leading and
8234 * trailing space (#x20) characters, and by replacing sequences of space
8235 * (#x20) characters by a single space (#x20) character.
8236 * All attributes for which no declaration has been read should be treated
8237 * by a non-validating parser as if declared CDATA.
8238 *
8239 * Returns the AttValue parsed or NULL. The value has to be freed by the
8240 * caller if it was copied, this can be detected by val[*len] == 0.
8241 */
8242
8243static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008244xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8245 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008246{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008247 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008248 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008249 xmlChar *ret = NULL;
8250
8251 GROW;
8252 in = (xmlChar *) CUR_PTR;
8253 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008254 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008255 return (NULL);
8256 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008257 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008258
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008259 /*
8260 * try to handle in this routine the most common case where no
8261 * allocation of a new string is required and where content is
8262 * pure ASCII.
8263 */
8264 limit = *in++;
8265 end = ctxt->input->end;
8266 start = in;
8267 if (in >= end) {
8268 const xmlChar *oldbase = ctxt->input->base;
8269 GROW;
8270 if (oldbase != ctxt->input->base) {
8271 long delta = ctxt->input->base - oldbase;
8272 start = start + delta;
8273 in = in + delta;
8274 }
8275 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008276 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008277 if (normalize) {
8278 /*
8279 * Skip any leading spaces
8280 */
8281 while ((in < end) && (*in != limit) &&
8282 ((*in == 0x20) || (*in == 0x9) ||
8283 (*in == 0xA) || (*in == 0xD))) {
8284 in++;
8285 start = in;
8286 if (in >= end) {
8287 const xmlChar *oldbase = ctxt->input->base;
8288 GROW;
8289 if (oldbase != ctxt->input->base) {
8290 long delta = ctxt->input->base - oldbase;
8291 start = start + delta;
8292 in = in + delta;
8293 }
8294 end = ctxt->input->end;
8295 }
8296 }
8297 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8298 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8299 if ((*in++ == 0x20) && (*in == 0x20)) break;
8300 if (in >= end) {
8301 const xmlChar *oldbase = ctxt->input->base;
8302 GROW;
8303 if (oldbase != ctxt->input->base) {
8304 long delta = ctxt->input->base - oldbase;
8305 start = start + delta;
8306 in = in + delta;
8307 }
8308 end = ctxt->input->end;
8309 }
8310 }
8311 last = in;
8312 /*
8313 * skip the trailing blanks
8314 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008315 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008316 while ((in < end) && (*in != limit) &&
8317 ((*in == 0x20) || (*in == 0x9) ||
8318 (*in == 0xA) || (*in == 0xD))) {
8319 in++;
8320 if (in >= end) {
8321 const xmlChar *oldbase = ctxt->input->base;
8322 GROW;
8323 if (oldbase != ctxt->input->base) {
8324 long delta = ctxt->input->base - oldbase;
8325 start = start + delta;
8326 in = in + delta;
8327 last = last + delta;
8328 }
8329 end = ctxt->input->end;
8330 }
8331 }
8332 if (*in != limit) goto need_complex;
8333 } else {
8334 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8335 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8336 in++;
8337 if (in >= end) {
8338 const xmlChar *oldbase = ctxt->input->base;
8339 GROW;
8340 if (oldbase != ctxt->input->base) {
8341 long delta = ctxt->input->base - oldbase;
8342 start = start + delta;
8343 in = in + delta;
8344 }
8345 end = ctxt->input->end;
8346 }
8347 }
8348 last = in;
8349 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008350 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008351 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008352 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008353 *len = last - start;
8354 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008355 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008356 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008357 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008358 }
8359 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008360 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008361 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008362need_complex:
8363 if (alloc) *alloc = 1;
8364 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008365}
8366
8367/**
8368 * xmlParseAttribute2:
8369 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008370 * @pref: the element prefix
8371 * @elem: the element name
8372 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008373 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008374 * @len: an int * to save the length of the attribute
8375 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008376 *
8377 * parse an attribute in the new SAX2 framework.
8378 *
8379 * Returns the attribute name, and the value in *value, .
8380 */
8381
8382static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008383xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008384 const xmlChar * pref, const xmlChar * elem,
8385 const xmlChar ** prefix, xmlChar ** value,
8386 int *len, int *alloc)
8387{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008388 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008389 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008390 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008391
8392 *value = NULL;
8393 GROW;
8394 name = xmlParseQName(ctxt, prefix);
8395 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008396 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8397 "error parsing attribute name\n");
8398 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008399 }
8400
8401 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008402 * get the type if needed
8403 */
8404 if (ctxt->attsSpecial != NULL) {
8405 int type;
8406
8407 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008408 pref, elem, *prefix, name);
8409 if (type != 0)
8410 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008411 }
8412
8413 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008414 * read the value
8415 */
8416 SKIP_BLANKS;
8417 if (RAW == '=') {
8418 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008419 SKIP_BLANKS;
8420 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8421 if (normalize) {
8422 /*
8423 * Sometimes a second normalisation pass for spaces is needed
8424 * but that only happens if charrefs or entities refernces
8425 * have been used in the attribute value, i.e. the attribute
8426 * value have been extracted in an allocated string already.
8427 */
8428 if (*alloc) {
8429 const xmlChar *val2;
8430
8431 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008432 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008433 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008434 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008435 }
8436 }
8437 }
8438 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008439 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008440 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8441 "Specification mandate value for attribute %s\n",
8442 name);
8443 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008444 }
8445
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008446 if (*prefix == ctxt->str_xml) {
8447 /*
8448 * Check that xml:lang conforms to the specification
8449 * No more registered as an error, just generate a warning now
8450 * since this was deprecated in XML second edition
8451 */
8452 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8453 internal_val = xmlStrndup(val, *len);
8454 if (!xmlCheckLanguageID(internal_val)) {
8455 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8456 "Malformed value for xml:lang : %s\n",
8457 internal_val, NULL);
8458 }
8459 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008460
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008461 /*
8462 * Check that xml:space conforms to the specification
8463 */
8464 if (xmlStrEqual(name, BAD_CAST "space")) {
8465 internal_val = xmlStrndup(val, *len);
8466 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8467 *(ctxt->space) = 0;
8468 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8469 *(ctxt->space) = 1;
8470 else {
8471 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8472 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8473 internal_val, NULL);
8474 }
8475 }
8476 if (internal_val) {
8477 xmlFree(internal_val);
8478 }
8479 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008480
8481 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008482 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008483}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008484/**
8485 * xmlParseStartTag2:
8486 * @ctxt: an XML parser context
8487 *
8488 * parse a start of tag either for rule element or
8489 * EmptyElement. In both case we don't parse the tag closing chars.
8490 * This routine is called when running SAX2 parsing
8491 *
8492 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8493 *
8494 * [ WFC: Unique Att Spec ]
8495 * No attribute name may appear more than once in the same start-tag or
8496 * empty-element tag.
8497 *
8498 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8499 *
8500 * [ WFC: Unique Att Spec ]
8501 * No attribute name may appear more than once in the same start-tag or
8502 * empty-element tag.
8503 *
8504 * With namespace:
8505 *
8506 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8507 *
8508 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8509 *
8510 * Returns the element name parsed
8511 */
8512
8513static const xmlChar *
8514xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008515 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008516 const xmlChar *localname;
8517 const xmlChar *prefix;
8518 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008519 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008520 const xmlChar *nsname;
8521 xmlChar *attvalue;
8522 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008523 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008524 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008525 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008526 const xmlChar *base;
8527 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008528 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008529
8530 if (RAW != '<') return(NULL);
8531 NEXT1;
8532
8533 /*
8534 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8535 * point since the attribute values may be stored as pointers to
8536 * the buffer and calling SHRINK would destroy them !
8537 * The Shrinking is only possible once the full set of attribute
8538 * callbacks have been done.
8539 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008540reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008541 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008542 base = ctxt->input->base;
8543 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008544 oldline = ctxt->input->line;
8545 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008546 nbatts = 0;
8547 nratts = 0;
8548 nbdef = 0;
8549 nbNs = 0;
8550 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008551 /* Forget any namespaces added during an earlier parse of this element. */
8552 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008553
8554 localname = xmlParseQName(ctxt, &prefix);
8555 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008556 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8557 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008558 return(NULL);
8559 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008560 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008561
8562 /*
8563 * Now parse the attributes, it ends up with the ending
8564 *
8565 * (S Attribute)* S?
8566 */
8567 SKIP_BLANKS;
8568 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008569 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008570
8571 while ((RAW != '>') &&
8572 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008573 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008574 const xmlChar *q = CUR_PTR;
8575 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008576 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008577
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008578 attname = xmlParseAttribute2(ctxt, prefix, localname,
8579 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008580 if (ctxt->input->base != base) {
8581 if ((attvalue != NULL) && (alloc != 0))
8582 xmlFree(attvalue);
8583 attvalue = NULL;
8584 goto base_changed;
8585 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008586 if ((attname != NULL) && (attvalue != NULL)) {
8587 if (len < 0) len = xmlStrlen(attvalue);
8588 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008589 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8590 xmlURIPtr uri;
8591
8592 if (*URL != 0) {
8593 uri = xmlParseURI((const char *) URL);
8594 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008595 xmlNsErr(ctxt, XML_WAR_NS_URI,
8596 "xmlns: '%s' is not a valid URI\n",
8597 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008598 } else {
Daniel Veillard37334572008-07-31 08:20:02 +00008599 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8600 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8601 "xmlns: URI %s is not absolute\n",
8602 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008603 }
8604 xmlFreeURI(uri);
8605 }
Daniel Veillard37334572008-07-31 08:20:02 +00008606 if (URL == ctxt->str_xml_ns) {
8607 if (attname != ctxt->str_xml) {
8608 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8609 "xml namespace URI cannot be the default namespace\n",
8610 NULL, NULL, NULL);
8611 }
8612 goto skip_default_ns;
8613 }
8614 if ((len == 29) &&
8615 (xmlStrEqual(URL,
8616 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8617 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8618 "reuse of the xmlns namespace name is forbidden\n",
8619 NULL, NULL, NULL);
8620 goto skip_default_ns;
8621 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008622 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008623 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008624 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008625 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008626 for (j = 1;j <= nbNs;j++)
8627 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8628 break;
8629 if (j <= nbNs)
8630 xmlErrAttributeDup(ctxt, NULL, attname);
8631 else
8632 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008633skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008634 if (alloc != 0) xmlFree(attvalue);
8635 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008636 continue;
8637 }
8638 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008639 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8640 xmlURIPtr uri;
8641
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008642 if (attname == ctxt->str_xml) {
8643 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008644 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8645 "xml namespace prefix mapped to wrong URI\n",
8646 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008647 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008648 /*
8649 * Do not keep a namespace definition node
8650 */
Daniel Veillard37334572008-07-31 08:20:02 +00008651 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008652 }
Daniel Veillard37334572008-07-31 08:20:02 +00008653 if (URL == ctxt->str_xml_ns) {
8654 if (attname != ctxt->str_xml) {
8655 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8656 "xml namespace URI mapped to wrong prefix\n",
8657 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008658 }
Daniel Veillard37334572008-07-31 08:20:02 +00008659 goto skip_ns;
8660 }
8661 if (attname == ctxt->str_xmlns) {
8662 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8663 "redefinition of the xmlns prefix is forbidden\n",
8664 NULL, NULL, NULL);
8665 goto skip_ns;
8666 }
8667 if ((len == 29) &&
8668 (xmlStrEqual(URL,
8669 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8670 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8671 "reuse of the xmlns namespace name is forbidden\n",
8672 NULL, NULL, NULL);
8673 goto skip_ns;
8674 }
8675 if ((URL == NULL) || (URL[0] == 0)) {
8676 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8677 "xmlns:%s: Empty XML namespace is not allowed\n",
8678 attname, NULL, NULL);
8679 goto skip_ns;
8680 } else {
8681 uri = xmlParseURI((const char *) URL);
8682 if (uri == NULL) {
8683 xmlNsErr(ctxt, XML_WAR_NS_URI,
8684 "xmlns:%s: '%s' is not a valid URI\n",
8685 attname, URL, NULL);
8686 } else {
8687 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
8688 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8689 "xmlns:%s: URI %s is not absolute\n",
8690 attname, URL, NULL);
8691 }
8692 xmlFreeURI(uri);
8693 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008694 }
8695
Daniel Veillard0fb18932003-09-07 09:14:37 +00008696 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008697 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008698 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008699 for (j = 1;j <= nbNs;j++)
8700 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8701 break;
8702 if (j <= nbNs)
8703 xmlErrAttributeDup(ctxt, aprefix, attname);
8704 else
8705 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00008706skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00008707 if (alloc != 0) xmlFree(attvalue);
8708 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00008709 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008710 continue;
8711 }
8712
8713 /*
8714 * Add the pair to atts
8715 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008716 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8717 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008718 if (attvalue[len] == 0)
8719 xmlFree(attvalue);
8720 goto failed;
8721 }
8722 maxatts = ctxt->maxatts;
8723 atts = ctxt->atts;
8724 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008725 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008726 atts[nbatts++] = attname;
8727 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008728 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008729 atts[nbatts++] = attvalue;
8730 attvalue += len;
8731 atts[nbatts++] = attvalue;
8732 /*
8733 * tag if some deallocation is needed
8734 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008735 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008736 } else {
8737 if ((attvalue != NULL) && (attvalue[len] == 0))
8738 xmlFree(attvalue);
8739 }
8740
Daniel Veillard37334572008-07-31 08:20:02 +00008741failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008742
8743 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00008744 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008745 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8746 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008747 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008748 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8749 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00008750 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008751 }
8752 SKIP_BLANKS;
8753 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8754 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008755 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008756 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008757 break;
8758 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008759 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008760 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008761 }
8762
Daniel Veillard0fb18932003-09-07 09:14:37 +00008763 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00008764 * The attributes defaulting
8765 */
8766 if (ctxt->attsDefault != NULL) {
8767 xmlDefAttrsPtr defaults;
8768
8769 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
8770 if (defaults != NULL) {
8771 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00008772 attname = defaults->values[5 * i];
8773 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00008774
8775 /*
8776 * special work for namespaces defaulted defs
8777 */
8778 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
8779 /*
8780 * check that it's not a defined namespace
8781 */
8782 for (j = 1;j <= nbNs;j++)
8783 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8784 break;
8785 if (j <= nbNs) continue;
8786
8787 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008788 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008789 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008790 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008791 nbNs++;
8792 }
8793 } else if (aprefix == ctxt->str_xmlns) {
8794 /*
8795 * check that it's not a defined namespace
8796 */
8797 for (j = 1;j <= nbNs;j++)
8798 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
8799 break;
8800 if (j <= nbNs) continue;
8801
8802 nsname = xmlGetNamespace(ctxt, attname);
8803 if (nsname != defaults->values[2]) {
8804 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00008805 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008806 nbNs++;
8807 }
8808 } else {
8809 /*
8810 * check that it's not a defined attribute
8811 */
8812 for (j = 0;j < nbatts;j+=5) {
8813 if ((attname == atts[j]) && (aprefix == atts[j+1]))
8814 break;
8815 }
8816 if (j < nbatts) continue;
8817
8818 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
8819 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00008820 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008821 }
8822 maxatts = ctxt->maxatts;
8823 atts = ctxt->atts;
8824 }
8825 atts[nbatts++] = attname;
8826 atts[nbatts++] = aprefix;
8827 if (aprefix == NULL)
8828 atts[nbatts++] = NULL;
8829 else
8830 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008831 atts[nbatts++] = defaults->values[5 * i + 2];
8832 atts[nbatts++] = defaults->values[5 * i + 3];
8833 if ((ctxt->standalone == 1) &&
8834 (defaults->values[5 * i + 4] != NULL)) {
8835 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
8836 "standalone: attribute %s on %s defaulted from external subset\n",
8837 attname, localname);
8838 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008839 nbdef++;
8840 }
8841 }
8842 }
8843 }
8844
Daniel Veillarde70c8772003-11-25 07:21:18 +00008845 /*
8846 * The attributes checkings
8847 */
8848 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00008849 /*
8850 * The default namespace does not apply to attribute names.
8851 */
8852 if (atts[i + 1] != NULL) {
8853 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
8854 if (nsname == NULL) {
8855 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8856 "Namespace prefix %s for %s on %s is not defined\n",
8857 atts[i + 1], atts[i], localname);
8858 }
8859 atts[i + 2] = nsname;
8860 } else
8861 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00008862 /*
8863 * [ WFC: Unique Att Spec ]
8864 * No attribute name may appear more than once in the same
8865 * start-tag or empty-element tag.
8866 * As extended by the Namespace in XML REC.
8867 */
8868 for (j = 0; j < i;j += 5) {
8869 if (atts[i] == atts[j]) {
8870 if (atts[i+1] == atts[j+1]) {
8871 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
8872 break;
8873 }
8874 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
8875 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
8876 "Namespaced Attribute %s in '%s' redefined\n",
8877 atts[i], nsname, NULL);
8878 break;
8879 }
8880 }
8881 }
8882 }
8883
Daniel Veillarde57ec792003-09-10 10:50:59 +00008884 nsname = xmlGetNamespace(ctxt, prefix);
8885 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008886 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
8887 "Namespace prefix %s on %s is not defined\n",
8888 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008889 }
8890 *pref = prefix;
8891 *URI = nsname;
8892
8893 /*
8894 * SAX: Start of Element !
8895 */
8896 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
8897 (!ctxt->disableSAX)) {
8898 if (nbNs > 0)
8899 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8900 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
8901 nbatts / 5, nbdef, atts);
8902 else
8903 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
8904 nsname, 0, NULL, nbatts / 5, nbdef, atts);
8905 }
8906
8907 /*
8908 * Free up attribute allocated strings if needed
8909 */
8910 if (attval != 0) {
8911 for (i = 3,j = 0; j < nratts;i += 5,j++)
8912 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8913 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008914 }
8915
8916 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008917
8918base_changed:
8919 /*
8920 * the attribute strings are valid iif the base didn't changed
8921 */
8922 if (attval != 0) {
8923 for (i = 3,j = 0; j < nratts;i += 5,j++)
8924 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
8925 xmlFree((xmlChar *) atts[i]);
8926 }
8927 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008928 ctxt->input->line = oldline;
8929 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008930 if (ctxt->wellFormed == 1) {
8931 goto reparse;
8932 }
8933 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008934}
8935
8936/**
8937 * xmlParseEndTag2:
8938 * @ctxt: an XML parser context
8939 * @line: line of the start tag
8940 * @nsNr: number of namespaces on the start tag
8941 *
8942 * parse an end of tag
8943 *
8944 * [42] ETag ::= '</' Name S? '>'
8945 *
8946 * With namespace
8947 *
8948 * [NS 9] ETag ::= '</' QName S? '>'
8949 */
8950
8951static void
8952xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008953 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008954 const xmlChar *name;
8955
8956 GROW;
8957 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008958 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008959 return;
8960 }
8961 SKIP(2);
8962
William M. Brack13dfa872004-09-18 04:52:08 +00008963 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008964 if (ctxt->input->cur[tlen] == '>') {
8965 ctxt->input->cur += tlen + 1;
8966 goto done;
8967 }
8968 ctxt->input->cur += tlen;
8969 name = (xmlChar*)1;
8970 } else {
8971 if (prefix == NULL)
8972 name = xmlParseNameAndCompare(ctxt, ctxt->name);
8973 else
8974 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
8975 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008976
8977 /*
8978 * We should definitely be at the ending "S? '>'" part
8979 */
8980 GROW;
8981 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008982 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008983 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008984 } else
8985 NEXT1;
8986
8987 /*
8988 * [ WFC: Element Type Match ]
8989 * The Name in an element's end-tag must match the element type in the
8990 * start-tag.
8991 *
8992 */
8993 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008994 if (name == NULL) name = BAD_CAST "unparseable";
8995 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00008996 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008997 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008998 }
8999
9000 /*
9001 * SAX: End of Tag
9002 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009003done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009004 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9005 (!ctxt->disableSAX))
9006 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9007
Daniel Veillard0fb18932003-09-07 09:14:37 +00009008 spacePop(ctxt);
9009 if (nsNr != 0)
9010 nsPop(ctxt, nsNr);
9011 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009012}
9013
9014/**
Owen Taylor3473f882001-02-23 17:55:21 +00009015 * xmlParseCDSect:
9016 * @ctxt: an XML parser context
9017 *
9018 * Parse escaped pure raw content.
9019 *
9020 * [18] CDSect ::= CDStart CData CDEnd
9021 *
9022 * [19] CDStart ::= '<![CDATA['
9023 *
9024 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9025 *
9026 * [21] CDEnd ::= ']]>'
9027 */
9028void
9029xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9030 xmlChar *buf = NULL;
9031 int len = 0;
9032 int size = XML_PARSER_BUFFER_SIZE;
9033 int r, rl;
9034 int s, sl;
9035 int cur, l;
9036 int count = 0;
9037
Daniel Veillard8f597c32003-10-06 08:19:27 +00009038 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009039 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009040 SKIP(9);
9041 } else
9042 return;
9043
9044 ctxt->instate = XML_PARSER_CDATA_SECTION;
9045 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009046 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009047 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009048 ctxt->instate = XML_PARSER_CONTENT;
9049 return;
9050 }
9051 NEXTL(rl);
9052 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009053 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009054 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009055 ctxt->instate = XML_PARSER_CONTENT;
9056 return;
9057 }
9058 NEXTL(sl);
9059 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009060 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009061 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009062 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009063 return;
9064 }
William M. Brack871611b2003-10-18 04:53:14 +00009065 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009066 ((r != ']') || (s != ']') || (cur != '>'))) {
9067 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009068 xmlChar *tmp;
9069
Owen Taylor3473f882001-02-23 17:55:21 +00009070 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009071 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9072 if (tmp == NULL) {
9073 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009074 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009075 return;
9076 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009077 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009078 }
9079 COPY_BUF(rl,buf,len,r);
9080 r = s;
9081 rl = sl;
9082 s = cur;
9083 sl = l;
9084 count++;
9085 if (count > 50) {
9086 GROW;
9087 count = 0;
9088 }
9089 NEXTL(l);
9090 cur = CUR_CHAR(l);
9091 }
9092 buf[len] = 0;
9093 ctxt->instate = XML_PARSER_CONTENT;
9094 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009095 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009096 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009097 xmlFree(buf);
9098 return;
9099 }
9100 NEXTL(l);
9101
9102 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009103 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009104 */
9105 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9106 if (ctxt->sax->cdataBlock != NULL)
9107 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009108 else if (ctxt->sax->characters != NULL)
9109 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009110 }
9111 xmlFree(buf);
9112}
9113
9114/**
9115 * xmlParseContent:
9116 * @ctxt: an XML parser context
9117 *
9118 * Parse a content:
9119 *
9120 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9121 */
9122
9123void
9124xmlParseContent(xmlParserCtxtPtr ctxt) {
9125 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009126 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009127 ((RAW != '<') || (NXT(1) != '/')) &&
9128 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009129 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009130 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009131 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009132
9133 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009134 * First case : a Processing Instruction.
9135 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009136 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009137 xmlParsePI(ctxt);
9138 }
9139
9140 /*
9141 * Second case : a CDSection
9142 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009143 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009144 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009145 xmlParseCDSect(ctxt);
9146 }
9147
9148 /*
9149 * Third case : a comment
9150 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009151 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009152 (NXT(2) == '-') && (NXT(3) == '-')) {
9153 xmlParseComment(ctxt);
9154 ctxt->instate = XML_PARSER_CONTENT;
9155 }
9156
9157 /*
9158 * Fourth case : a sub-element.
9159 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009160 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009161 xmlParseElement(ctxt);
9162 }
9163
9164 /*
9165 * Fifth case : a reference. If if has not been resolved,
9166 * parsing returns it's Name, create the node
9167 */
9168
Daniel Veillard21a0f912001-02-25 19:54:14 +00009169 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009170 xmlParseReference(ctxt);
9171 }
9172
9173 /*
9174 * Last case, text. Note that References are handled directly.
9175 */
9176 else {
9177 xmlParseCharData(ctxt, 0);
9178 }
9179
9180 GROW;
9181 /*
9182 * Pop-up of finished entities.
9183 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009184 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009185 xmlPopInput(ctxt);
9186 SHRINK;
9187
Daniel Veillardfdc91562002-07-01 21:52:03 +00009188 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009189 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9190 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009191 ctxt->instate = XML_PARSER_EOF;
9192 break;
9193 }
9194 }
9195}
9196
9197/**
9198 * xmlParseElement:
9199 * @ctxt: an XML parser context
9200 *
9201 * parse an XML element, this is highly recursive
9202 *
9203 * [39] element ::= EmptyElemTag | STag content ETag
9204 *
9205 * [ WFC: Element Type Match ]
9206 * The Name in an element's end-tag must match the element type in the
9207 * start-tag.
9208 *
Owen Taylor3473f882001-02-23 17:55:21 +00009209 */
9210
9211void
9212xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009213 const xmlChar *name;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009214 const xmlChar *prefix;
9215 const xmlChar *URI;
Owen Taylor3473f882001-02-23 17:55:21 +00009216 xmlParserNodeInfo node_info;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009217 int line, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +00009218 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009219 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009220
Daniel Veillard8915c152008-08-26 13:05:34 +00009221 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9222 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9223 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9224 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9225 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009226 ctxt->instate = XML_PARSER_EOF;
9227 return;
9228 }
9229
Owen Taylor3473f882001-02-23 17:55:21 +00009230 /* Capture start position */
9231 if (ctxt->record_info) {
9232 node_info.begin_pos = ctxt->input->consumed +
9233 (CUR_PTR - ctxt->input->base);
9234 node_info.begin_line = ctxt->input->line;
9235 }
9236
9237 if (ctxt->spaceNr == 0)
9238 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009239 else if (*ctxt->space == -2)
9240 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009241 else
9242 spacePush(ctxt, *ctxt->space);
9243
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009244 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009245#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009246 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009247#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009248 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009249#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009250 else
9251 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009252#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009253 if (name == NULL) {
9254 spacePop(ctxt);
9255 return;
9256 }
9257 namePush(ctxt, name);
9258 ret = ctxt->node;
9259
Daniel Veillard4432df22003-09-28 18:58:27 +00009260#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009261 /*
9262 * [ VC: Root Element Type ]
9263 * The Name in the document type declaration must match the element
9264 * type of the root element.
9265 */
9266 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9267 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9268 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009269#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009270
9271 /*
9272 * Check for an Empty Element.
9273 */
9274 if ((RAW == '/') && (NXT(1) == '>')) {
9275 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009276 if (ctxt->sax2) {
9277 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9278 (!ctxt->disableSAX))
9279 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009280#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009281 } else {
9282 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9283 (!ctxt->disableSAX))
9284 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009285#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009286 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009287 namePop(ctxt);
9288 spacePop(ctxt);
9289 if (nsNr != ctxt->nsNr)
9290 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009291 if ( ret != NULL && ctxt->record_info ) {
9292 node_info.end_pos = ctxt->input->consumed +
9293 (CUR_PTR - ctxt->input->base);
9294 node_info.end_line = ctxt->input->line;
9295 node_info.node = ret;
9296 xmlParserAddNodeInfo(ctxt, &node_info);
9297 }
9298 return;
9299 }
9300 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009301 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009302 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009303 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9304 "Couldn't find end of Start Tag %s line %d\n",
9305 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009306
9307 /*
9308 * end of parsing of this node.
9309 */
9310 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009311 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009312 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009313 if (nsNr != ctxt->nsNr)
9314 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009315
9316 /*
9317 * Capture end position and add node
9318 */
9319 if ( ret != NULL && ctxt->record_info ) {
9320 node_info.end_pos = ctxt->input->consumed +
9321 (CUR_PTR - ctxt->input->base);
9322 node_info.end_line = ctxt->input->line;
9323 node_info.node = ret;
9324 xmlParserAddNodeInfo(ctxt, &node_info);
9325 }
9326 return;
9327 }
9328
9329 /*
9330 * Parse the content of the element:
9331 */
9332 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009333 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009334 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009335 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009336 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009337
9338 /*
9339 * end of parsing of this node.
9340 */
9341 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009342 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009343 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009344 if (nsNr != ctxt->nsNr)
9345 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009346 return;
9347 }
9348
9349 /*
9350 * parse the end of tag: '</' should be here.
9351 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009352 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009353 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009354 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009355 }
9356#ifdef LIBXML_SAX1_ENABLED
9357 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00009358 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00009359#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009360
9361 /*
9362 * Capture end position and add node
9363 */
9364 if ( ret != NULL && ctxt->record_info ) {
9365 node_info.end_pos = ctxt->input->consumed +
9366 (CUR_PTR - ctxt->input->base);
9367 node_info.end_line = ctxt->input->line;
9368 node_info.node = ret;
9369 xmlParserAddNodeInfo(ctxt, &node_info);
9370 }
9371}
9372
9373/**
9374 * xmlParseVersionNum:
9375 * @ctxt: an XML parser context
9376 *
9377 * parse the XML version value.
9378 *
Daniel Veillard34e3f642008-07-29 09:02:27 +00009379 * [26] VersionNum ::= '1.' [0-9]+
9380 *
9381 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +00009382 *
9383 * Returns the string giving the XML version number, or NULL
9384 */
9385xmlChar *
9386xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9387 xmlChar *buf = NULL;
9388 int len = 0;
9389 int size = 10;
9390 xmlChar cur;
9391
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009392 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009393 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009394 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009395 return(NULL);
9396 }
9397 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00009398 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009399 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009400 return(NULL);
9401 }
9402 buf[len++] = cur;
9403 NEXT;
9404 cur=CUR;
9405 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009406 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009407 return(NULL);
9408 }
9409 buf[len++] = cur;
9410 NEXT;
9411 cur=CUR;
9412 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009413 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009414 xmlChar *tmp;
9415
Owen Taylor3473f882001-02-23 17:55:21 +00009416 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009417 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9418 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009419 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009420 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009421 return(NULL);
9422 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009423 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009424 }
9425 buf[len++] = cur;
9426 NEXT;
9427 cur=CUR;
9428 }
9429 buf[len] = 0;
9430 return(buf);
9431}
9432
9433/**
9434 * xmlParseVersionInfo:
9435 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009436 *
Owen Taylor3473f882001-02-23 17:55:21 +00009437 * parse the XML version.
9438 *
9439 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009440 *
Owen Taylor3473f882001-02-23 17:55:21 +00009441 * [25] Eq ::= S? '=' S?
9442 *
9443 * Returns the version string, e.g. "1.0"
9444 */
9445
9446xmlChar *
9447xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9448 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009449
Daniel Veillarda07050d2003-10-19 14:46:32 +00009450 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009451 SKIP(7);
9452 SKIP_BLANKS;
9453 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009454 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009455 return(NULL);
9456 }
9457 NEXT;
9458 SKIP_BLANKS;
9459 if (RAW == '"') {
9460 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009461 version = xmlParseVersionNum(ctxt);
9462 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009463 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009464 } else
9465 NEXT;
9466 } else if (RAW == '\''){
9467 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009468 version = xmlParseVersionNum(ctxt);
9469 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009470 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009471 } else
9472 NEXT;
9473 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009474 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009475 }
9476 }
9477 return(version);
9478}
9479
9480/**
9481 * xmlParseEncName:
9482 * @ctxt: an XML parser context
9483 *
9484 * parse the XML encoding name
9485 *
9486 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9487 *
9488 * Returns the encoding name value or NULL
9489 */
9490xmlChar *
9491xmlParseEncName(xmlParserCtxtPtr ctxt) {
9492 xmlChar *buf = NULL;
9493 int len = 0;
9494 int size = 10;
9495 xmlChar cur;
9496
9497 cur = CUR;
9498 if (((cur >= 'a') && (cur <= 'z')) ||
9499 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009500 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009501 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009502 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009503 return(NULL);
9504 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00009505
Owen Taylor3473f882001-02-23 17:55:21 +00009506 buf[len++] = cur;
9507 NEXT;
9508 cur = CUR;
9509 while (((cur >= 'a') && (cur <= 'z')) ||
9510 ((cur >= 'A') && (cur <= 'Z')) ||
9511 ((cur >= '0') && (cur <= '9')) ||
9512 (cur == '.') || (cur == '_') ||
9513 (cur == '-')) {
9514 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009515 xmlChar *tmp;
9516
Owen Taylor3473f882001-02-23 17:55:21 +00009517 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009518 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9519 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009520 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009521 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009522 return(NULL);
9523 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009524 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009525 }
9526 buf[len++] = cur;
9527 NEXT;
9528 cur = CUR;
9529 if (cur == 0) {
9530 SHRINK;
9531 GROW;
9532 cur = CUR;
9533 }
9534 }
9535 buf[len] = 0;
9536 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009537 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009538 }
9539 return(buf);
9540}
9541
9542/**
9543 * xmlParseEncodingDecl:
9544 * @ctxt: an XML parser context
9545 *
9546 * parse the XML encoding declaration
9547 *
9548 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9549 *
9550 * this setups the conversion filters.
9551 *
9552 * Returns the encoding value or NULL
9553 */
9554
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009555const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009556xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9557 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009558
9559 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009560 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009561 SKIP(8);
9562 SKIP_BLANKS;
9563 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009564 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009565 return(NULL);
9566 }
9567 NEXT;
9568 SKIP_BLANKS;
9569 if (RAW == '"') {
9570 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009571 encoding = xmlParseEncName(ctxt);
9572 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009573 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009574 } else
9575 NEXT;
9576 } else if (RAW == '\''){
9577 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009578 encoding = xmlParseEncName(ctxt);
9579 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009580 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009581 } else
9582 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009583 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009584 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009585 }
Daniel Veillard6b621b82003-08-11 15:03:34 +00009586 /*
9587 * UTF-16 encoding stwich has already taken place at this stage,
9588 * more over the little-endian/big-endian selection is already done
9589 */
9590 if ((encoding != NULL) &&
9591 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9592 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +00009593 /*
9594 * If no encoding was passed to the parser, that we are
9595 * using UTF-16 and no decoder is present i.e. the
9596 * document is apparently UTF-8 compatible, then raise an
9597 * encoding mismatch fatal error
9598 */
9599 if ((ctxt->encoding == NULL) &&
9600 (ctxt->input->buf != NULL) &&
9601 (ctxt->input->buf->encoder == NULL)) {
9602 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9603 "Document labelled UTF-16 but has UTF-8 content\n");
9604 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009605 if (ctxt->encoding != NULL)
9606 xmlFree((xmlChar *) ctxt->encoding);
9607 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009608 }
9609 /*
9610 * UTF-8 encoding is handled natively
9611 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009612 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009613 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9614 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009615 if (ctxt->encoding != NULL)
9616 xmlFree((xmlChar *) ctxt->encoding);
9617 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +00009618 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009619 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009620 xmlCharEncodingHandlerPtr handler;
9621
9622 if (ctxt->input->encoding != NULL)
9623 xmlFree((xmlChar *) ctxt->input->encoding);
9624 ctxt->input->encoding = encoding;
9625
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009626 handler = xmlFindCharEncodingHandler((const char *) encoding);
9627 if (handler != NULL) {
9628 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +00009629 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009630 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +00009631 "Unsupported encoding %s\n", encoding);
9632 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009633 }
9634 }
9635 }
9636 return(encoding);
9637}
9638
9639/**
9640 * xmlParseSDDecl:
9641 * @ctxt: an XML parser context
9642 *
9643 * parse the XML standalone declaration
9644 *
9645 * [32] SDDecl ::= S 'standalone' Eq
9646 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
9647 *
9648 * [ VC: Standalone Document Declaration ]
9649 * TODO The standalone document declaration must have the value "no"
9650 * if any external markup declarations contain declarations of:
9651 * - attributes with default values, if elements to which these
9652 * attributes apply appear in the document without specifications
9653 * of values for these attributes, or
9654 * - entities (other than amp, lt, gt, apos, quot), if references
9655 * to those entities appear in the document, or
9656 * - attributes with values subject to normalization, where the
9657 * attribute appears in the document with a value which will change
9658 * as a result of normalization, or
9659 * - element types with element content, if white space occurs directly
9660 * within any instance of those types.
9661 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009662 * Returns:
9663 * 1 if standalone="yes"
9664 * 0 if standalone="no"
9665 * -2 if standalone attribute is missing or invalid
9666 * (A standalone value of -2 means that the XML declaration was found,
9667 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +00009668 */
9669
9670int
9671xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +00009672 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00009673
9674 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009675 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009676 SKIP(10);
9677 SKIP_BLANKS;
9678 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009679 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009680 return(standalone);
9681 }
9682 NEXT;
9683 SKIP_BLANKS;
9684 if (RAW == '\''){
9685 NEXT;
9686 if ((RAW == 'n') && (NXT(1) == 'o')) {
9687 standalone = 0;
9688 SKIP(2);
9689 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9690 (NXT(2) == 's')) {
9691 standalone = 1;
9692 SKIP(3);
9693 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009694 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009695 }
9696 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009697 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009698 } else
9699 NEXT;
9700 } else if (RAW == '"'){
9701 NEXT;
9702 if ((RAW == 'n') && (NXT(1) == 'o')) {
9703 standalone = 0;
9704 SKIP(2);
9705 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
9706 (NXT(2) == 's')) {
9707 standalone = 1;
9708 SKIP(3);
9709 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009710 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009711 }
9712 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009713 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009714 } else
9715 NEXT;
9716 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009717 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009718 }
9719 }
9720 return(standalone);
9721}
9722
9723/**
9724 * xmlParseXMLDecl:
9725 * @ctxt: an XML parser context
9726 *
9727 * parse an XML declaration header
9728 *
9729 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9730 */
9731
9732void
9733xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
9734 xmlChar *version;
9735
9736 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +00009737 * This value for standalone indicates that the document has an
9738 * XML declaration but it does not have a standalone attribute.
9739 * It will be overwritten later if a standalone attribute is found.
9740 */
9741 ctxt->input->standalone = -2;
9742
9743 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009744 * We know that '<?xml' is here.
9745 */
9746 SKIP(5);
9747
William M. Brack76e95df2003-10-18 16:20:14 +00009748 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009749 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9750 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009751 }
9752 SKIP_BLANKS;
9753
9754 /*
Daniel Veillard19840942001-11-29 16:11:38 +00009755 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00009756 */
9757 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00009758 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009759 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +00009760 } else {
9761 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
9762 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +00009763 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +00009764 */
Daniel Veillard34e3f642008-07-29 09:02:27 +00009765 if (ctxt->options & XML_PARSE_OLD10) {
9766 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9767 "Unsupported version '%s'\n",
9768 version);
9769 } else {
9770 if ((version[0] == '1') && ((version[1] == '.'))) {
9771 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
9772 "Unsupported version '%s'\n",
9773 version, NULL);
9774 } else {
9775 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
9776 "Unsupported version '%s'\n",
9777 version);
9778 }
9779 }
Daniel Veillard19840942001-11-29 16:11:38 +00009780 }
9781 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00009782 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00009783 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00009784 }
Owen Taylor3473f882001-02-23 17:55:21 +00009785
9786 /*
9787 * We may have the encoding declaration
9788 */
William M. Brack76e95df2003-10-18 16:20:14 +00009789 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009790 if ((RAW == '?') && (NXT(1) == '>')) {
9791 SKIP(2);
9792 return;
9793 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009794 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009795 }
9796 xmlParseEncodingDecl(ctxt);
9797 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9798 /*
9799 * The XML REC instructs us to stop parsing right here
9800 */
9801 return;
9802 }
9803
9804 /*
9805 * We may have the standalone status.
9806 */
William M. Brack76e95df2003-10-18 16:20:14 +00009807 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009808 if ((RAW == '?') && (NXT(1) == '>')) {
9809 SKIP(2);
9810 return;
9811 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009812 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009813 }
9814 SKIP_BLANKS;
9815 ctxt->input->standalone = xmlParseSDDecl(ctxt);
9816
9817 SKIP_BLANKS;
9818 if ((RAW == '?') && (NXT(1) == '>')) {
9819 SKIP(2);
9820 } else if (RAW == '>') {
9821 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009822 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009823 NEXT;
9824 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009825 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009826 MOVETO_ENDTAG(CUR_PTR);
9827 NEXT;
9828 }
9829}
9830
9831/**
9832 * xmlParseMisc:
9833 * @ctxt: an XML parser context
9834 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009835 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00009836 *
9837 * [27] Misc ::= Comment | PI | S
9838 */
9839
9840void
9841xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009842 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +00009843 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00009844 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00009845 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009846 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00009847 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009848 NEXT;
9849 } else
9850 xmlParseComment(ctxt);
9851 }
9852}
9853
9854/**
9855 * xmlParseDocument:
9856 * @ctxt: an XML parser context
9857 *
9858 * parse an XML document (and build a tree if using the standard SAX
9859 * interface).
9860 *
9861 * [1] document ::= prolog element Misc*
9862 *
9863 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
9864 *
9865 * Returns 0, -1 in case of error. the parser context is augmented
9866 * as a result of the parsing.
9867 */
9868
9869int
9870xmlParseDocument(xmlParserCtxtPtr ctxt) {
9871 xmlChar start[4];
9872 xmlCharEncoding enc;
9873
9874 xmlInitParser();
9875
Daniel Veillard36e5cd52004-11-02 14:52:23 +00009876 if ((ctxt == NULL) || (ctxt->input == NULL))
9877 return(-1);
9878
Owen Taylor3473f882001-02-23 17:55:21 +00009879 GROW;
9880
9881 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00009882 * SAX: detecting the level.
9883 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009884 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009885
9886 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009887 * SAX: beginning of the document processing.
9888 */
9889 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9890 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
9891
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009892 if ((ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) &&
9893 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00009894 /*
9895 * Get the 4 first bytes and decode the charset
9896 * if enc != XML_CHAR_ENCODING_NONE
9897 * plug some encoding conversion routines.
9898 */
9899 start[0] = RAW;
9900 start[1] = NXT(1);
9901 start[2] = NXT(2);
9902 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00009903 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +00009904 if (enc != XML_CHAR_ENCODING_NONE) {
9905 xmlSwitchEncoding(ctxt, enc);
9906 }
Owen Taylor3473f882001-02-23 17:55:21 +00009907 }
9908
9909
9910 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009911 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009912 }
9913
9914 /*
9915 * Check for the XMLDecl in the Prolog.
9916 */
9917 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009918 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00009919
9920 /*
9921 * Note that we will switch encoding on the fly.
9922 */
9923 xmlParseXMLDecl(ctxt);
9924 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9925 /*
9926 * The XML REC instructs us to stop parsing right here
9927 */
9928 return(-1);
9929 }
9930 ctxt->standalone = ctxt->input->standalone;
9931 SKIP_BLANKS;
9932 } else {
9933 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
9934 }
9935 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
9936 ctxt->sax->startDocument(ctxt->userData);
9937
9938 /*
9939 * The Misc part of the Prolog
9940 */
9941 GROW;
9942 xmlParseMisc(ctxt);
9943
9944 /*
9945 * Then possibly doc type declaration(s) and more Misc
9946 * (doctypedecl Misc*)?
9947 */
9948 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009949 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009950
9951 ctxt->inSubset = 1;
9952 xmlParseDocTypeDecl(ctxt);
9953 if (RAW == '[') {
9954 ctxt->instate = XML_PARSER_DTD;
9955 xmlParseInternalSubset(ctxt);
9956 }
9957
9958 /*
9959 * Create and update the external subset.
9960 */
9961 ctxt->inSubset = 2;
9962 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9963 (!ctxt->disableSAX))
9964 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9965 ctxt->extSubSystem, ctxt->extSubURI);
9966 ctxt->inSubset = 0;
9967
Daniel Veillardac4118d2008-01-11 05:27:32 +00009968 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009969
9970 ctxt->instate = XML_PARSER_PROLOG;
9971 xmlParseMisc(ctxt);
9972 }
9973
9974 /*
9975 * Time to start parsing the tree itself
9976 */
9977 GROW;
9978 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009979 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
9980 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009981 } else {
9982 ctxt->instate = XML_PARSER_CONTENT;
9983 xmlParseElement(ctxt);
9984 ctxt->instate = XML_PARSER_EPILOG;
9985
9986
9987 /*
9988 * The Misc part at the end
9989 */
9990 xmlParseMisc(ctxt);
9991
Daniel Veillard561b7f82002-03-20 21:55:57 +00009992 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009993 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009994 }
9995 ctxt->instate = XML_PARSER_EOF;
9996 }
9997
9998 /*
9999 * SAX: end of the document processing.
10000 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010001 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010002 ctxt->sax->endDocument(ctxt->userData);
10003
Daniel Veillard5997aca2002-03-18 18:36:20 +000010004 /*
10005 * Remove locally kept entity definitions if the tree was not built
10006 */
10007 if ((ctxt->myDoc != NULL) &&
10008 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10009 xmlFreeDoc(ctxt->myDoc);
10010 ctxt->myDoc = NULL;
10011 }
10012
Daniel Veillardae0765b2008-07-31 19:54:59 +000010013 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10014 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10015 if (ctxt->valid)
10016 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10017 if (ctxt->nsWellFormed)
10018 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10019 if (ctxt->options & XML_PARSE_OLD10)
10020 ctxt->myDoc->properties |= XML_DOC_OLD10;
10021 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010022 if (! ctxt->wellFormed) {
10023 ctxt->valid = 0;
10024 return(-1);
10025 }
Owen Taylor3473f882001-02-23 17:55:21 +000010026 return(0);
10027}
10028
10029/**
10030 * xmlParseExtParsedEnt:
10031 * @ctxt: an XML parser context
10032 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010033 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010034 * An external general parsed entity is well-formed if it matches the
10035 * production labeled extParsedEnt.
10036 *
10037 * [78] extParsedEnt ::= TextDecl? content
10038 *
10039 * Returns 0, -1 in case of error. the parser context is augmented
10040 * as a result of the parsing.
10041 */
10042
10043int
10044xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10045 xmlChar start[4];
10046 xmlCharEncoding enc;
10047
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010048 if ((ctxt == NULL) || (ctxt->input == NULL))
10049 return(-1);
10050
Owen Taylor3473f882001-02-23 17:55:21 +000010051 xmlDefaultSAXHandlerInit();
10052
Daniel Veillard309f81d2003-09-23 09:02:53 +000010053 xmlDetectSAX2(ctxt);
10054
Owen Taylor3473f882001-02-23 17:55:21 +000010055 GROW;
10056
10057 /*
10058 * SAX: beginning of the document processing.
10059 */
10060 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10061 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10062
10063 /*
10064 * Get the 4 first bytes and decode the charset
10065 * if enc != XML_CHAR_ENCODING_NONE
10066 * plug some encoding conversion routines.
10067 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010068 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10069 start[0] = RAW;
10070 start[1] = NXT(1);
10071 start[2] = NXT(2);
10072 start[3] = NXT(3);
10073 enc = xmlDetectCharEncoding(start, 4);
10074 if (enc != XML_CHAR_ENCODING_NONE) {
10075 xmlSwitchEncoding(ctxt, enc);
10076 }
Owen Taylor3473f882001-02-23 17:55:21 +000010077 }
10078
10079
10080 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010081 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010082 }
10083
10084 /*
10085 * Check for the XMLDecl in the Prolog.
10086 */
10087 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010088 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010089
10090 /*
10091 * Note that we will switch encoding on the fly.
10092 */
10093 xmlParseXMLDecl(ctxt);
10094 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10095 /*
10096 * The XML REC instructs us to stop parsing right here
10097 */
10098 return(-1);
10099 }
10100 SKIP_BLANKS;
10101 } else {
10102 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10103 }
10104 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10105 ctxt->sax->startDocument(ctxt->userData);
10106
10107 /*
10108 * Doing validity checking on chunk doesn't make sense
10109 */
10110 ctxt->instate = XML_PARSER_CONTENT;
10111 ctxt->validate = 0;
10112 ctxt->loadsubset = 0;
10113 ctxt->depth = 0;
10114
10115 xmlParseContent(ctxt);
10116
10117 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010118 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010119 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010120 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010121 }
10122
10123 /*
10124 * SAX: end of the document processing.
10125 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010126 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010127 ctxt->sax->endDocument(ctxt->userData);
10128
10129 if (! ctxt->wellFormed) return(-1);
10130 return(0);
10131}
10132
Daniel Veillard73b013f2003-09-30 12:36:01 +000010133#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010134/************************************************************************
10135 * *
10136 * Progressive parsing interfaces *
10137 * *
10138 ************************************************************************/
10139
10140/**
10141 * xmlParseLookupSequence:
10142 * @ctxt: an XML parser context
10143 * @first: the first char to lookup
10144 * @next: the next char to lookup or zero
10145 * @third: the next char to lookup or zero
10146 *
10147 * Try to find if a sequence (first, next, third) or just (first next) or
10148 * (first) is available in the input stream.
10149 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10150 * to avoid rescanning sequences of bytes, it DOES change the state of the
10151 * parser, do not use liberally.
10152 *
10153 * Returns the index to the current parsing point if the full sequence
10154 * is available, -1 otherwise.
10155 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010156static int
Owen Taylor3473f882001-02-23 17:55:21 +000010157xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10158 xmlChar next, xmlChar third) {
10159 int base, len;
10160 xmlParserInputPtr in;
10161 const xmlChar *buf;
10162
10163 in = ctxt->input;
10164 if (in == NULL) return(-1);
10165 base = in->cur - in->base;
10166 if (base < 0) return(-1);
10167 if (ctxt->checkIndex > base)
10168 base = ctxt->checkIndex;
10169 if (in->buf == NULL) {
10170 buf = in->base;
10171 len = in->length;
10172 } else {
10173 buf = in->buf->buffer->content;
10174 len = in->buf->buffer->use;
10175 }
10176 /* take into account the sequence length */
10177 if (third) len -= 2;
10178 else if (next) len --;
10179 for (;base < len;base++) {
10180 if (buf[base] == first) {
10181 if (third != 0) {
10182 if ((buf[base + 1] != next) ||
10183 (buf[base + 2] != third)) continue;
10184 } else if (next != 0) {
10185 if (buf[base + 1] != next) continue;
10186 }
10187 ctxt->checkIndex = 0;
10188#ifdef DEBUG_PUSH
10189 if (next == 0)
10190 xmlGenericError(xmlGenericErrorContext,
10191 "PP: lookup '%c' found at %d\n",
10192 first, base);
10193 else if (third == 0)
10194 xmlGenericError(xmlGenericErrorContext,
10195 "PP: lookup '%c%c' found at %d\n",
10196 first, next, base);
10197 else
10198 xmlGenericError(xmlGenericErrorContext,
10199 "PP: lookup '%c%c%c' found at %d\n",
10200 first, next, third, base);
10201#endif
10202 return(base - (in->cur - in->base));
10203 }
10204 }
10205 ctxt->checkIndex = base;
10206#ifdef DEBUG_PUSH
10207 if (next == 0)
10208 xmlGenericError(xmlGenericErrorContext,
10209 "PP: lookup '%c' failed\n", first);
10210 else if (third == 0)
10211 xmlGenericError(xmlGenericErrorContext,
10212 "PP: lookup '%c%c' failed\n", first, next);
10213 else
10214 xmlGenericError(xmlGenericErrorContext,
10215 "PP: lookup '%c%c%c' failed\n", first, next, third);
10216#endif
10217 return(-1);
10218}
10219
10220/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010221 * xmlParseGetLasts:
10222 * @ctxt: an XML parser context
10223 * @lastlt: pointer to store the last '<' from the input
10224 * @lastgt: pointer to store the last '>' from the input
10225 *
10226 * Lookup the last < and > in the current chunk
10227 */
10228static void
10229xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10230 const xmlChar **lastgt) {
10231 const xmlChar *tmp;
10232
10233 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10234 xmlGenericError(xmlGenericErrorContext,
10235 "Internal error: xmlParseGetLasts\n");
10236 return;
10237 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010238 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010239 tmp = ctxt->input->end;
10240 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010241 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010242 if (tmp < ctxt->input->base) {
10243 *lastlt = NULL;
10244 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010245 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010246 *lastlt = tmp;
10247 tmp++;
10248 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10249 if (*tmp == '\'') {
10250 tmp++;
10251 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10252 if (tmp < ctxt->input->end) tmp++;
10253 } else if (*tmp == '"') {
10254 tmp++;
10255 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10256 if (tmp < ctxt->input->end) tmp++;
10257 } else
10258 tmp++;
10259 }
10260 if (tmp < ctxt->input->end)
10261 *lastgt = tmp;
10262 else {
10263 tmp = *lastlt;
10264 tmp--;
10265 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10266 if (tmp >= ctxt->input->base)
10267 *lastgt = tmp;
10268 else
10269 *lastgt = NULL;
10270 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010271 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010272 } else {
10273 *lastlt = NULL;
10274 *lastgt = NULL;
10275 }
10276}
10277/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010278 * xmlCheckCdataPush:
10279 * @cur: pointer to the bock of characters
10280 * @len: length of the block in bytes
10281 *
10282 * Check that the block of characters is okay as SCdata content [20]
10283 *
10284 * Returns the number of bytes to pass if okay, a negative index where an
10285 * UTF-8 error occured otherwise
10286 */
10287static int
10288xmlCheckCdataPush(const xmlChar *utf, int len) {
10289 int ix;
10290 unsigned char c;
10291 int codepoint;
10292
10293 if ((utf == NULL) || (len <= 0))
10294 return(0);
10295
10296 for (ix = 0; ix < len;) { /* string is 0-terminated */
10297 c = utf[ix];
10298 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10299 if (c >= 0x20)
10300 ix++;
10301 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10302 ix++;
10303 else
10304 return(-ix);
10305 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10306 if (ix + 2 > len) return(ix);
10307 if ((utf[ix+1] & 0xc0 ) != 0x80)
10308 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010309 codepoint = (utf[ix] & 0x1f) << 6;
10310 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010311 if (!xmlIsCharQ(codepoint))
10312 return(-ix);
10313 ix += 2;
10314 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10315 if (ix + 3 > len) return(ix);
10316 if (((utf[ix+1] & 0xc0) != 0x80) ||
10317 ((utf[ix+2] & 0xc0) != 0x80))
10318 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010319 codepoint = (utf[ix] & 0xf) << 12;
10320 codepoint |= (utf[ix+1] & 0x3f) << 6;
10321 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010322 if (!xmlIsCharQ(codepoint))
10323 return(-ix);
10324 ix += 3;
10325 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10326 if (ix + 4 > len) return(ix);
10327 if (((utf[ix+1] & 0xc0) != 0x80) ||
10328 ((utf[ix+2] & 0xc0) != 0x80) ||
10329 ((utf[ix+3] & 0xc0) != 0x80))
10330 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010331 codepoint = (utf[ix] & 0x7) << 18;
10332 codepoint |= (utf[ix+1] & 0x3f) << 12;
10333 codepoint |= (utf[ix+2] & 0x3f) << 6;
10334 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010335 if (!xmlIsCharQ(codepoint))
10336 return(-ix);
10337 ix += 4;
10338 } else /* unknown encoding */
10339 return(-ix);
10340 }
10341 return(ix);
10342}
10343
10344/**
Owen Taylor3473f882001-02-23 17:55:21 +000010345 * xmlParseTryOrFinish:
10346 * @ctxt: an XML parser context
10347 * @terminate: last chunk indicator
10348 *
10349 * Try to progress on parsing
10350 *
10351 * Returns zero if no parsing was possible
10352 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010353static int
Owen Taylor3473f882001-02-23 17:55:21 +000010354xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10355 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010356 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000010357 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000010358 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000010359
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010360 if (ctxt->input == NULL)
10361 return(0);
10362
Owen Taylor3473f882001-02-23 17:55:21 +000010363#ifdef DEBUG_PUSH
10364 switch (ctxt->instate) {
10365 case XML_PARSER_EOF:
10366 xmlGenericError(xmlGenericErrorContext,
10367 "PP: try EOF\n"); break;
10368 case XML_PARSER_START:
10369 xmlGenericError(xmlGenericErrorContext,
10370 "PP: try START\n"); break;
10371 case XML_PARSER_MISC:
10372 xmlGenericError(xmlGenericErrorContext,
10373 "PP: try MISC\n");break;
10374 case XML_PARSER_COMMENT:
10375 xmlGenericError(xmlGenericErrorContext,
10376 "PP: try COMMENT\n");break;
10377 case XML_PARSER_PROLOG:
10378 xmlGenericError(xmlGenericErrorContext,
10379 "PP: try PROLOG\n");break;
10380 case XML_PARSER_START_TAG:
10381 xmlGenericError(xmlGenericErrorContext,
10382 "PP: try START_TAG\n");break;
10383 case XML_PARSER_CONTENT:
10384 xmlGenericError(xmlGenericErrorContext,
10385 "PP: try CONTENT\n");break;
10386 case XML_PARSER_CDATA_SECTION:
10387 xmlGenericError(xmlGenericErrorContext,
10388 "PP: try CDATA_SECTION\n");break;
10389 case XML_PARSER_END_TAG:
10390 xmlGenericError(xmlGenericErrorContext,
10391 "PP: try END_TAG\n");break;
10392 case XML_PARSER_ENTITY_DECL:
10393 xmlGenericError(xmlGenericErrorContext,
10394 "PP: try ENTITY_DECL\n");break;
10395 case XML_PARSER_ENTITY_VALUE:
10396 xmlGenericError(xmlGenericErrorContext,
10397 "PP: try ENTITY_VALUE\n");break;
10398 case XML_PARSER_ATTRIBUTE_VALUE:
10399 xmlGenericError(xmlGenericErrorContext,
10400 "PP: try ATTRIBUTE_VALUE\n");break;
10401 case XML_PARSER_DTD:
10402 xmlGenericError(xmlGenericErrorContext,
10403 "PP: try DTD\n");break;
10404 case XML_PARSER_EPILOG:
10405 xmlGenericError(xmlGenericErrorContext,
10406 "PP: try EPILOG\n");break;
10407 case XML_PARSER_PI:
10408 xmlGenericError(xmlGenericErrorContext,
10409 "PP: try PI\n");break;
10410 case XML_PARSER_IGNORE:
10411 xmlGenericError(xmlGenericErrorContext,
10412 "PP: try IGNORE\n");break;
10413 }
10414#endif
10415
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010416 if ((ctxt->input != NULL) &&
10417 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010418 xmlSHRINK(ctxt);
10419 ctxt->checkIndex = 0;
10420 }
10421 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010422
Daniel Veillarda880b122003-04-21 21:36:41 +000010423 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010424 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010425 return(0);
10426
10427
Owen Taylor3473f882001-02-23 17:55:21 +000010428 /*
10429 * Pop-up of finished entities.
10430 */
10431 while ((RAW == 0) && (ctxt->inputNr > 1))
10432 xmlPopInput(ctxt);
10433
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010434 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010435 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010436 avail = ctxt->input->length -
10437 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010438 else {
10439 /*
10440 * If we are operating on converted input, try to flush
10441 * remainng chars to avoid them stalling in the non-converted
10442 * buffer.
10443 */
10444 if ((ctxt->input->buf->raw != NULL) &&
10445 (ctxt->input->buf->raw->use > 0)) {
10446 int base = ctxt->input->base -
10447 ctxt->input->buf->buffer->content;
10448 int current = ctxt->input->cur - ctxt->input->base;
10449
10450 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
10451 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10452 ctxt->input->cur = ctxt->input->base + current;
10453 ctxt->input->end =
10454 &ctxt->input->buf->buffer->content[
10455 ctxt->input->buf->buffer->use];
10456 }
10457 avail = ctxt->input->buf->buffer->use -
10458 (ctxt->input->cur - ctxt->input->base);
10459 }
Owen Taylor3473f882001-02-23 17:55:21 +000010460 if (avail < 1)
10461 goto done;
10462 switch (ctxt->instate) {
10463 case XML_PARSER_EOF:
10464 /*
10465 * Document parsing is done !
10466 */
10467 goto done;
10468 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010469 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10470 xmlChar start[4];
10471 xmlCharEncoding enc;
10472
10473 /*
10474 * Very first chars read from the document flow.
10475 */
10476 if (avail < 4)
10477 goto done;
10478
10479 /*
10480 * Get the 4 first bytes and decode the charset
10481 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010482 * plug some encoding conversion routines,
10483 * else xmlSwitchEncoding will set to (default)
10484 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010485 */
10486 start[0] = RAW;
10487 start[1] = NXT(1);
10488 start[2] = NXT(2);
10489 start[3] = NXT(3);
10490 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010491 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010492 break;
10493 }
Owen Taylor3473f882001-02-23 17:55:21 +000010494
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010495 if (avail < 2)
10496 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010497 cur = ctxt->input->cur[0];
10498 next = ctxt->input->cur[1];
10499 if (cur == 0) {
10500 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10501 ctxt->sax->setDocumentLocator(ctxt->userData,
10502 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010503 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010504 ctxt->instate = XML_PARSER_EOF;
10505#ifdef DEBUG_PUSH
10506 xmlGenericError(xmlGenericErrorContext,
10507 "PP: entering EOF\n");
10508#endif
10509 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10510 ctxt->sax->endDocument(ctxt->userData);
10511 goto done;
10512 }
10513 if ((cur == '<') && (next == '?')) {
10514 /* PI or XML decl */
10515 if (avail < 5) return(ret);
10516 if ((!terminate) &&
10517 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10518 return(ret);
10519 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10520 ctxt->sax->setDocumentLocator(ctxt->userData,
10521 &xmlDefaultSAXLocator);
10522 if ((ctxt->input->cur[2] == 'x') &&
10523 (ctxt->input->cur[3] == 'm') &&
10524 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010525 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010526 ret += 5;
10527#ifdef DEBUG_PUSH
10528 xmlGenericError(xmlGenericErrorContext,
10529 "PP: Parsing XML Decl\n");
10530#endif
10531 xmlParseXMLDecl(ctxt);
10532 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10533 /*
10534 * The XML REC instructs us to stop parsing right
10535 * here
10536 */
10537 ctxt->instate = XML_PARSER_EOF;
10538 return(0);
10539 }
10540 ctxt->standalone = ctxt->input->standalone;
10541 if ((ctxt->encoding == NULL) &&
10542 (ctxt->input->encoding != NULL))
10543 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10544 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10545 (!ctxt->disableSAX))
10546 ctxt->sax->startDocument(ctxt->userData);
10547 ctxt->instate = XML_PARSER_MISC;
10548#ifdef DEBUG_PUSH
10549 xmlGenericError(xmlGenericErrorContext,
10550 "PP: entering MISC\n");
10551#endif
10552 } else {
10553 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10554 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10555 (!ctxt->disableSAX))
10556 ctxt->sax->startDocument(ctxt->userData);
10557 ctxt->instate = XML_PARSER_MISC;
10558#ifdef DEBUG_PUSH
10559 xmlGenericError(xmlGenericErrorContext,
10560 "PP: entering MISC\n");
10561#endif
10562 }
10563 } else {
10564 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10565 ctxt->sax->setDocumentLocator(ctxt->userData,
10566 &xmlDefaultSAXLocator);
10567 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010568 if (ctxt->version == NULL) {
10569 xmlErrMemory(ctxt, NULL);
10570 break;
10571 }
Owen Taylor3473f882001-02-23 17:55:21 +000010572 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10573 (!ctxt->disableSAX))
10574 ctxt->sax->startDocument(ctxt->userData);
10575 ctxt->instate = XML_PARSER_MISC;
10576#ifdef DEBUG_PUSH
10577 xmlGenericError(xmlGenericErrorContext,
10578 "PP: entering MISC\n");
10579#endif
10580 }
10581 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010582 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010583 const xmlChar *name;
10584 const xmlChar *prefix;
10585 const xmlChar *URI;
10586 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010587
10588 if ((avail < 2) && (ctxt->inputNr == 1))
10589 goto done;
10590 cur = ctxt->input->cur[0];
10591 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010592 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010593 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010594 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10595 ctxt->sax->endDocument(ctxt->userData);
10596 goto done;
10597 }
10598 if (!terminate) {
10599 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010600 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010601 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010602 goto done;
10603 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10604 goto done;
10605 }
10606 }
10607 if (ctxt->spaceNr == 0)
10608 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000010609 else if (*ctxt->space == -2)
10610 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000010611 else
10612 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000010613#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010614 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000010615#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010616 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000010617#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010618 else
10619 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010620#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010621 if (name == NULL) {
10622 spacePop(ctxt);
10623 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010624 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10625 ctxt->sax->endDocument(ctxt->userData);
10626 goto done;
10627 }
Daniel Veillard4432df22003-09-28 18:58:27 +000010628#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000010629 /*
10630 * [ VC: Root Element Type ]
10631 * The Name in the document type declaration must match
10632 * the element type of the root element.
10633 */
10634 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10635 ctxt->node && (ctxt->node == ctxt->myDoc->children))
10636 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000010637#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010638
10639 /*
10640 * Check for an Empty Element.
10641 */
10642 if ((RAW == '/') && (NXT(1) == '>')) {
10643 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010644
10645 if (ctxt->sax2) {
10646 if ((ctxt->sax != NULL) &&
10647 (ctxt->sax->endElementNs != NULL) &&
10648 (!ctxt->disableSAX))
10649 ctxt->sax->endElementNs(ctxt->userData, name,
10650 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000010651 if (ctxt->nsNr - nsNr > 0)
10652 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010653#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010654 } else {
10655 if ((ctxt->sax != NULL) &&
10656 (ctxt->sax->endElement != NULL) &&
10657 (!ctxt->disableSAX))
10658 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010659#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000010660 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010661 spacePop(ctxt);
10662 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010663 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010664 } else {
10665 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010666 }
10667 break;
10668 }
10669 if (RAW == '>') {
10670 NEXT;
10671 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000010672 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000010673 "Couldn't find end of Start Tag %s\n",
10674 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000010675 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010676 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000010677 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010678 if (ctxt->sax2)
10679 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000010680#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000010681 else
10682 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000010683#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010684
Daniel Veillarda880b122003-04-21 21:36:41 +000010685 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010686 break;
10687 }
10688 case XML_PARSER_CONTENT: {
10689 const xmlChar *test;
10690 unsigned int cons;
10691 if ((avail < 2) && (ctxt->inputNr == 1))
10692 goto done;
10693 cur = ctxt->input->cur[0];
10694 next = ctxt->input->cur[1];
10695
10696 test = CUR_PTR;
10697 cons = ctxt->input->consumed;
10698 if ((cur == '<') && (next == '/')) {
10699 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010700 break;
10701 } else if ((cur == '<') && (next == '?')) {
10702 if ((!terminate) &&
10703 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10704 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010705 xmlParsePI(ctxt);
10706 } else if ((cur == '<') && (next != '!')) {
10707 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010708 break;
10709 } else if ((cur == '<') && (next == '!') &&
10710 (ctxt->input->cur[2] == '-') &&
10711 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000010712 int term;
10713
10714 if (avail < 4)
10715 goto done;
10716 ctxt->input->cur += 4;
10717 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
10718 ctxt->input->cur -= 4;
10719 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000010720 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010721 xmlParseComment(ctxt);
10722 ctxt->instate = XML_PARSER_CONTENT;
10723 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
10724 (ctxt->input->cur[2] == '[') &&
10725 (ctxt->input->cur[3] == 'C') &&
10726 (ctxt->input->cur[4] == 'D') &&
10727 (ctxt->input->cur[5] == 'A') &&
10728 (ctxt->input->cur[6] == 'T') &&
10729 (ctxt->input->cur[7] == 'A') &&
10730 (ctxt->input->cur[8] == '[')) {
10731 SKIP(9);
10732 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000010733 break;
10734 } else if ((cur == '<') && (next == '!') &&
10735 (avail < 9)) {
10736 goto done;
10737 } else if (cur == '&') {
10738 if ((!terminate) &&
10739 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
10740 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000010741 xmlParseReference(ctxt);
10742 } else {
10743 /* TODO Avoid the extra copy, handle directly !!! */
10744 /*
10745 * Goal of the following test is:
10746 * - minimize calls to the SAX 'character' callback
10747 * when they are mergeable
10748 * - handle an problem for isBlank when we only parse
10749 * a sequence of blank chars and the next one is
10750 * not available to check against '<' presence.
10751 * - tries to homogenize the differences in SAX
10752 * callbacks between the push and pull versions
10753 * of the parser.
10754 */
10755 if ((ctxt->inputNr == 1) &&
10756 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
10757 if (!terminate) {
10758 if (ctxt->progressive) {
10759 if ((lastlt == NULL) ||
10760 (ctxt->input->cur > lastlt))
10761 goto done;
10762 } else if (xmlParseLookupSequence(ctxt,
10763 '<', 0, 0) < 0) {
10764 goto done;
10765 }
10766 }
10767 }
10768 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000010769 xmlParseCharData(ctxt, 0);
10770 }
10771 /*
10772 * Pop-up of finished entities.
10773 */
10774 while ((RAW == 0) && (ctxt->inputNr > 1))
10775 xmlPopInput(ctxt);
10776 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010777 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
10778 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000010779 ctxt->instate = XML_PARSER_EOF;
10780 break;
10781 }
10782 break;
10783 }
10784 case XML_PARSER_END_TAG:
10785 if (avail < 2)
10786 goto done;
10787 if (!terminate) {
10788 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010789 /* > can be found unescaped in attribute values */
10790 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010791 goto done;
10792 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10793 goto done;
10794 }
10795 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000010796 if (ctxt->sax2) {
10797 xmlParseEndTag2(ctxt,
10798 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
10799 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010800 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000010801 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000010802 }
10803#ifdef LIBXML_SAX1_ENABLED
10804 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000010805 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000010806#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010807 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010808 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010809 } else {
10810 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000010811 }
10812 break;
10813 case XML_PARSER_CDATA_SECTION: {
10814 /*
10815 * The Push mode need to have the SAX callback for
10816 * cdataBlock merge back contiguous callbacks.
10817 */
10818 int base;
10819
10820 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10821 if (base < 0) {
10822 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010823 int tmp;
10824
10825 tmp = xmlCheckCdataPush(ctxt->input->cur,
10826 XML_PARSER_BIG_BUFFER_SIZE);
10827 if (tmp < 0) {
10828 tmp = -tmp;
10829 ctxt->input->cur += tmp;
10830 goto encoding_error;
10831 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010832 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10833 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010834 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010835 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010836 else if (ctxt->sax->characters != NULL)
10837 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010838 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010839 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010840 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000010841 ctxt->checkIndex = 0;
10842 }
10843 goto done;
10844 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010845 int tmp;
10846
10847 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
10848 if ((tmp < 0) || (tmp != base)) {
10849 tmp = -tmp;
10850 ctxt->input->cur += tmp;
10851 goto encoding_error;
10852 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000010853 if ((ctxt->sax != NULL) && (base == 0) &&
10854 (ctxt->sax->cdataBlock != NULL) &&
10855 (!ctxt->disableSAX)) {
10856 /*
10857 * Special case to provide identical behaviour
10858 * between pull and push parsers on enpty CDATA
10859 * sections
10860 */
10861 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
10862 (!strncmp((const char *)&ctxt->input->cur[-9],
10863 "<![CDATA[", 9)))
10864 ctxt->sax->cdataBlock(ctxt->userData,
10865 BAD_CAST "", 0);
10866 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010867 (!ctxt->disableSAX)) {
10868 if (ctxt->sax->cdataBlock != NULL)
10869 ctxt->sax->cdataBlock(ctxt->userData,
10870 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000010871 else if (ctxt->sax->characters != NULL)
10872 ctxt->sax->characters(ctxt->userData,
10873 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000010874 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000010875 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000010876 ctxt->checkIndex = 0;
10877 ctxt->instate = XML_PARSER_CONTENT;
10878#ifdef DEBUG_PUSH
10879 xmlGenericError(xmlGenericErrorContext,
10880 "PP: entering CONTENT\n");
10881#endif
10882 }
10883 break;
10884 }
Owen Taylor3473f882001-02-23 17:55:21 +000010885 case XML_PARSER_MISC:
10886 SKIP_BLANKS;
10887 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010888 avail = ctxt->input->length -
10889 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010890 else
Daniel Veillarda880b122003-04-21 21:36:41 +000010891 avail = ctxt->input->buf->buffer->use -
10892 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000010893 if (avail < 2)
10894 goto done;
10895 cur = ctxt->input->cur[0];
10896 next = ctxt->input->cur[1];
10897 if ((cur == '<') && (next == '?')) {
10898 if ((!terminate) &&
10899 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10900 goto done;
10901#ifdef DEBUG_PUSH
10902 xmlGenericError(xmlGenericErrorContext,
10903 "PP: Parsing PI\n");
10904#endif
10905 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000010906 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010907 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010908 (ctxt->input->cur[2] == '-') &&
10909 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010910 if ((!terminate) &&
10911 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10912 goto done;
10913#ifdef DEBUG_PUSH
10914 xmlGenericError(xmlGenericErrorContext,
10915 "PP: Parsing Comment\n");
10916#endif
10917 xmlParseComment(ctxt);
10918 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000010919 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010920 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000010921 (ctxt->input->cur[2] == 'D') &&
10922 (ctxt->input->cur[3] == 'O') &&
10923 (ctxt->input->cur[4] == 'C') &&
10924 (ctxt->input->cur[5] == 'T') &&
10925 (ctxt->input->cur[6] == 'Y') &&
10926 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000010927 (ctxt->input->cur[8] == 'E')) {
10928 if ((!terminate) &&
10929 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
10930 goto done;
10931#ifdef DEBUG_PUSH
10932 xmlGenericError(xmlGenericErrorContext,
10933 "PP: Parsing internal subset\n");
10934#endif
10935 ctxt->inSubset = 1;
10936 xmlParseDocTypeDecl(ctxt);
10937 if (RAW == '[') {
10938 ctxt->instate = XML_PARSER_DTD;
10939#ifdef DEBUG_PUSH
10940 xmlGenericError(xmlGenericErrorContext,
10941 "PP: entering DTD\n");
10942#endif
10943 } else {
10944 /*
10945 * Create and update the external subset.
10946 */
10947 ctxt->inSubset = 2;
10948 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10949 (ctxt->sax->externalSubset != NULL))
10950 ctxt->sax->externalSubset(ctxt->userData,
10951 ctxt->intSubName, ctxt->extSubSystem,
10952 ctxt->extSubURI);
10953 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000010954 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010955 ctxt->instate = XML_PARSER_PROLOG;
10956#ifdef DEBUG_PUSH
10957 xmlGenericError(xmlGenericErrorContext,
10958 "PP: entering PROLOG\n");
10959#endif
10960 }
10961 } else if ((cur == '<') && (next == '!') &&
10962 (avail < 9)) {
10963 goto done;
10964 } else {
10965 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000010966 ctxt->progressive = 1;
10967 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000010968#ifdef DEBUG_PUSH
10969 xmlGenericError(xmlGenericErrorContext,
10970 "PP: entering START_TAG\n");
10971#endif
10972 }
10973 break;
Owen Taylor3473f882001-02-23 17:55:21 +000010974 case XML_PARSER_PROLOG:
10975 SKIP_BLANKS;
10976 if (ctxt->input->buf == NULL)
10977 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
10978 else
10979 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
10980 if (avail < 2)
10981 goto done;
10982 cur = ctxt->input->cur[0];
10983 next = ctxt->input->cur[1];
10984 if ((cur == '<') && (next == '?')) {
10985 if ((!terminate) &&
10986 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10987 goto done;
10988#ifdef DEBUG_PUSH
10989 xmlGenericError(xmlGenericErrorContext,
10990 "PP: Parsing PI\n");
10991#endif
10992 xmlParsePI(ctxt);
10993 } else if ((cur == '<') && (next == '!') &&
10994 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
10995 if ((!terminate) &&
10996 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
10997 goto done;
10998#ifdef DEBUG_PUSH
10999 xmlGenericError(xmlGenericErrorContext,
11000 "PP: Parsing Comment\n");
11001#endif
11002 xmlParseComment(ctxt);
11003 ctxt->instate = XML_PARSER_PROLOG;
11004 } else if ((cur == '<') && (next == '!') &&
11005 (avail < 4)) {
11006 goto done;
11007 } else {
11008 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011009 if (ctxt->progressive == 0)
11010 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011011 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011012#ifdef DEBUG_PUSH
11013 xmlGenericError(xmlGenericErrorContext,
11014 "PP: entering START_TAG\n");
11015#endif
11016 }
11017 break;
11018 case XML_PARSER_EPILOG:
11019 SKIP_BLANKS;
11020 if (ctxt->input->buf == NULL)
11021 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11022 else
11023 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
11024 if (avail < 2)
11025 goto done;
11026 cur = ctxt->input->cur[0];
11027 next = ctxt->input->cur[1];
11028 if ((cur == '<') && (next == '?')) {
11029 if ((!terminate) &&
11030 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11031 goto done;
11032#ifdef DEBUG_PUSH
11033 xmlGenericError(xmlGenericErrorContext,
11034 "PP: Parsing PI\n");
11035#endif
11036 xmlParsePI(ctxt);
11037 ctxt->instate = XML_PARSER_EPILOG;
11038 } else if ((cur == '<') && (next == '!') &&
11039 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11040 if ((!terminate) &&
11041 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11042 goto done;
11043#ifdef DEBUG_PUSH
11044 xmlGenericError(xmlGenericErrorContext,
11045 "PP: Parsing Comment\n");
11046#endif
11047 xmlParseComment(ctxt);
11048 ctxt->instate = XML_PARSER_EPILOG;
11049 } else if ((cur == '<') && (next == '!') &&
11050 (avail < 4)) {
11051 goto done;
11052 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011053 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011054 ctxt->instate = XML_PARSER_EOF;
11055#ifdef DEBUG_PUSH
11056 xmlGenericError(xmlGenericErrorContext,
11057 "PP: entering EOF\n");
11058#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011059 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011060 ctxt->sax->endDocument(ctxt->userData);
11061 goto done;
11062 }
11063 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011064 case XML_PARSER_DTD: {
11065 /*
11066 * Sorry but progressive parsing of the internal subset
11067 * is not expected to be supported. We first check that
11068 * the full content of the internal subset is available and
11069 * the parsing is launched only at that point.
11070 * Internal subset ends up with "']' S? '>'" in an unescaped
11071 * section and not in a ']]>' sequence which are conditional
11072 * sections (whoever argued to keep that crap in XML deserve
11073 * a place in hell !).
11074 */
11075 int base, i;
11076 xmlChar *buf;
11077 xmlChar quote = 0;
11078
11079 base = ctxt->input->cur - ctxt->input->base;
11080 if (base < 0) return(0);
11081 if (ctxt->checkIndex > base)
11082 base = ctxt->checkIndex;
11083 buf = ctxt->input->buf->buffer->content;
11084 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
11085 base++) {
11086 if (quote != 0) {
11087 if (buf[base] == quote)
11088 quote = 0;
11089 continue;
11090 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011091 if ((quote == 0) && (buf[base] == '<')) {
11092 int found = 0;
11093 /* special handling of comments */
11094 if (((unsigned int) base + 4 <
11095 ctxt->input->buf->buffer->use) &&
11096 (buf[base + 1] == '!') &&
11097 (buf[base + 2] == '-') &&
11098 (buf[base + 3] == '-')) {
11099 for (;(unsigned int) base + 3 <
11100 ctxt->input->buf->buffer->use; base++) {
11101 if ((buf[base] == '-') &&
11102 (buf[base + 1] == '-') &&
11103 (buf[base + 2] == '>')) {
11104 found = 1;
11105 base += 2;
11106 break;
11107 }
11108 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011109 if (!found) {
11110#if 0
11111 fprintf(stderr, "unfinished comment\n");
11112#endif
11113 break; /* for */
11114 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011115 continue;
11116 }
11117 }
Owen Taylor3473f882001-02-23 17:55:21 +000011118 if (buf[base] == '"') {
11119 quote = '"';
11120 continue;
11121 }
11122 if (buf[base] == '\'') {
11123 quote = '\'';
11124 continue;
11125 }
11126 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011127#if 0
11128 fprintf(stderr, "%c%c%c%c: ", buf[base],
11129 buf[base + 1], buf[base + 2], buf[base + 3]);
11130#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011131 if ((unsigned int) base +1 >=
11132 ctxt->input->buf->buffer->use)
11133 break;
11134 if (buf[base + 1] == ']') {
11135 /* conditional crap, skip both ']' ! */
11136 base++;
11137 continue;
11138 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011139 for (i = 1;
Owen Taylor3473f882001-02-23 17:55:21 +000011140 (unsigned int) base + i < ctxt->input->buf->buffer->use;
11141 i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011142 if (buf[base + i] == '>') {
11143#if 0
11144 fprintf(stderr, "found\n");
11145#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011146 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011147 }
11148 if (!IS_BLANK_CH(buf[base + i])) {
11149#if 0
11150 fprintf(stderr, "not found\n");
11151#endif
11152 goto not_end_of_int_subset;
11153 }
Owen Taylor3473f882001-02-23 17:55:21 +000011154 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011155#if 0
11156 fprintf(stderr, "end of stream\n");
11157#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011158 break;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011159
Owen Taylor3473f882001-02-23 17:55:21 +000011160 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011161not_end_of_int_subset:
11162 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011163 }
11164 /*
11165 * We didn't found the end of the Internal subset
11166 */
Owen Taylor3473f882001-02-23 17:55:21 +000011167#ifdef DEBUG_PUSH
11168 if (next == 0)
11169 xmlGenericError(xmlGenericErrorContext,
11170 "PP: lookup of int subset end filed\n");
11171#endif
11172 goto done;
11173
11174found_end_int_subset:
11175 xmlParseInternalSubset(ctxt);
11176 ctxt->inSubset = 2;
11177 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11178 (ctxt->sax->externalSubset != NULL))
11179 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11180 ctxt->extSubSystem, ctxt->extSubURI);
11181 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011182 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011183 ctxt->instate = XML_PARSER_PROLOG;
11184 ctxt->checkIndex = 0;
11185#ifdef DEBUG_PUSH
11186 xmlGenericError(xmlGenericErrorContext,
11187 "PP: entering PROLOG\n");
11188#endif
11189 break;
11190 }
11191 case XML_PARSER_COMMENT:
11192 xmlGenericError(xmlGenericErrorContext,
11193 "PP: internal error, state == COMMENT\n");
11194 ctxt->instate = XML_PARSER_CONTENT;
11195#ifdef DEBUG_PUSH
11196 xmlGenericError(xmlGenericErrorContext,
11197 "PP: entering CONTENT\n");
11198#endif
11199 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011200 case XML_PARSER_IGNORE:
11201 xmlGenericError(xmlGenericErrorContext,
11202 "PP: internal error, state == IGNORE");
11203 ctxt->instate = XML_PARSER_DTD;
11204#ifdef DEBUG_PUSH
11205 xmlGenericError(xmlGenericErrorContext,
11206 "PP: entering DTD\n");
11207#endif
11208 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011209 case XML_PARSER_PI:
11210 xmlGenericError(xmlGenericErrorContext,
11211 "PP: internal error, state == PI\n");
11212 ctxt->instate = XML_PARSER_CONTENT;
11213#ifdef DEBUG_PUSH
11214 xmlGenericError(xmlGenericErrorContext,
11215 "PP: entering CONTENT\n");
11216#endif
11217 break;
11218 case XML_PARSER_ENTITY_DECL:
11219 xmlGenericError(xmlGenericErrorContext,
11220 "PP: internal error, state == ENTITY_DECL\n");
11221 ctxt->instate = XML_PARSER_DTD;
11222#ifdef DEBUG_PUSH
11223 xmlGenericError(xmlGenericErrorContext,
11224 "PP: entering DTD\n");
11225#endif
11226 break;
11227 case XML_PARSER_ENTITY_VALUE:
11228 xmlGenericError(xmlGenericErrorContext,
11229 "PP: internal error, state == ENTITY_VALUE\n");
11230 ctxt->instate = XML_PARSER_CONTENT;
11231#ifdef DEBUG_PUSH
11232 xmlGenericError(xmlGenericErrorContext,
11233 "PP: entering DTD\n");
11234#endif
11235 break;
11236 case XML_PARSER_ATTRIBUTE_VALUE:
11237 xmlGenericError(xmlGenericErrorContext,
11238 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11239 ctxt->instate = XML_PARSER_START_TAG;
11240#ifdef DEBUG_PUSH
11241 xmlGenericError(xmlGenericErrorContext,
11242 "PP: entering START_TAG\n");
11243#endif
11244 break;
11245 case XML_PARSER_SYSTEM_LITERAL:
11246 xmlGenericError(xmlGenericErrorContext,
11247 "PP: internal error, state == SYSTEM_LITERAL\n");
11248 ctxt->instate = XML_PARSER_START_TAG;
11249#ifdef DEBUG_PUSH
11250 xmlGenericError(xmlGenericErrorContext,
11251 "PP: entering START_TAG\n");
11252#endif
11253 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011254 case XML_PARSER_PUBLIC_LITERAL:
11255 xmlGenericError(xmlGenericErrorContext,
11256 "PP: internal error, state == PUBLIC_LITERAL\n");
11257 ctxt->instate = XML_PARSER_START_TAG;
11258#ifdef DEBUG_PUSH
11259 xmlGenericError(xmlGenericErrorContext,
11260 "PP: entering START_TAG\n");
11261#endif
11262 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011263 }
11264 }
11265done:
11266#ifdef DEBUG_PUSH
11267 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11268#endif
11269 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011270encoding_error:
11271 {
11272 char buffer[150];
11273
11274 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11275 ctxt->input->cur[0], ctxt->input->cur[1],
11276 ctxt->input->cur[2], ctxt->input->cur[3]);
11277 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11278 "Input is not proper UTF-8, indicate encoding !\n%s",
11279 BAD_CAST buffer, NULL);
11280 }
11281 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000011282}
11283
11284/**
Owen Taylor3473f882001-02-23 17:55:21 +000011285 * xmlParseChunk:
11286 * @ctxt: an XML parser context
11287 * @chunk: an char array
11288 * @size: the size in byte of the chunk
11289 * @terminate: last chunk indicator
11290 *
11291 * Parse a Chunk of memory
11292 *
11293 * Returns zero if no error, the xmlParserErrors otherwise.
11294 */
11295int
11296xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11297 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000011298 int end_in_lf = 0;
11299
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011300 if (ctxt == NULL)
11301 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000011302 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011303 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000011304 if (ctxt->instate == XML_PARSER_START)
11305 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000011306 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11307 (chunk[size - 1] == '\r')) {
11308 end_in_lf = 1;
11309 size--;
11310 }
Owen Taylor3473f882001-02-23 17:55:21 +000011311 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11312 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
11313 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11314 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000011315 int res;
Owen Taylor3473f882001-02-23 17:55:21 +000011316
William M. Bracka3215c72004-07-31 16:24:01 +000011317 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11318 if (res < 0) {
11319 ctxt->errNo = XML_PARSER_EOF;
11320 ctxt->disableSAX = 1;
11321 return (XML_PARSER_EOF);
11322 }
Owen Taylor3473f882001-02-23 17:55:21 +000011323 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11324 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011325 ctxt->input->end =
11326 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011327#ifdef DEBUG_PUSH
11328 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11329#endif
11330
Owen Taylor3473f882001-02-23 17:55:21 +000011331 } else if (ctxt->instate != XML_PARSER_EOF) {
11332 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11333 xmlParserInputBufferPtr in = ctxt->input->buf;
11334 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11335 (in->raw != NULL)) {
11336 int nbchars;
11337
11338 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
11339 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011340 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000011341 xmlGenericError(xmlGenericErrorContext,
11342 "xmlParseChunk: encoder error\n");
11343 return(XML_ERR_INVALID_ENCODING);
11344 }
11345 }
11346 }
11347 }
11348 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarda617e242006-01-09 14:38:44 +000011349 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11350 (ctxt->input->buf != NULL)) {
11351 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11352 }
Daniel Veillard14412512005-01-21 23:53:26 +000011353 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011354 return(ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000011355 if (terminate) {
11356 /*
11357 * Check for termination
11358 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011359 int avail = 0;
11360
11361 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011362 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011363 avail = ctxt->input->length -
11364 (ctxt->input->cur - ctxt->input->base);
11365 else
11366 avail = ctxt->input->buf->buffer->use -
11367 (ctxt->input->cur - ctxt->input->base);
11368 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011369
Owen Taylor3473f882001-02-23 17:55:21 +000011370 if ((ctxt->instate != XML_PARSER_EOF) &&
11371 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011372 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011373 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011374 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011375 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011376 }
Owen Taylor3473f882001-02-23 17:55:21 +000011377 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011378 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011379 ctxt->sax->endDocument(ctxt->userData);
11380 }
11381 ctxt->instate = XML_PARSER_EOF;
11382 }
11383 return((xmlParserErrors) ctxt->errNo);
11384}
11385
11386/************************************************************************
11387 * *
11388 * I/O front end functions to the parser *
11389 * *
11390 ************************************************************************/
11391
11392/**
Owen Taylor3473f882001-02-23 17:55:21 +000011393 * xmlCreatePushParserCtxt:
11394 * @sax: a SAX handler
11395 * @user_data: The user data returned on SAX callbacks
11396 * @chunk: a pointer to an array of chars
11397 * @size: number of chars in the array
11398 * @filename: an optional file name or URI
11399 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000011400 * Create a parser context for using the XML parser in push mode.
11401 * If @buffer and @size are non-NULL, the data is used to detect
11402 * the encoding. The remaining characters will be parsed so they
11403 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000011404 * To allow content encoding detection, @size should be >= 4
11405 * The value of @filename is used for fetching external entities
11406 * and error/warning reports.
11407 *
11408 * Returns the new parser context or NULL
11409 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000011410
Owen Taylor3473f882001-02-23 17:55:21 +000011411xmlParserCtxtPtr
11412xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11413 const char *chunk, int size, const char *filename) {
11414 xmlParserCtxtPtr ctxt;
11415 xmlParserInputPtr inputStream;
11416 xmlParserInputBufferPtr buf;
11417 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11418
11419 /*
11420 * plug some encoding conversion routines
11421 */
11422 if ((chunk != NULL) && (size >= 4))
11423 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11424
11425 buf = xmlAllocParserInputBuffer(enc);
11426 if (buf == NULL) return(NULL);
11427
11428 ctxt = xmlNewParserCtxt();
11429 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011430 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011431 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011432 return(NULL);
11433 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011434 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011435 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11436 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011437 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011438 xmlFreeParserInputBuffer(buf);
11439 xmlFreeParserCtxt(ctxt);
11440 return(NULL);
11441 }
Owen Taylor3473f882001-02-23 17:55:21 +000011442 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011443#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011444 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011445#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011446 xmlFree(ctxt->sax);
11447 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11448 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011449 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011450 xmlFreeParserInputBuffer(buf);
11451 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011452 return(NULL);
11453 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011454 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11455 if (sax->initialized == XML_SAX2_MAGIC)
11456 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11457 else
11458 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011459 if (user_data != NULL)
11460 ctxt->userData = user_data;
11461 }
11462 if (filename == NULL) {
11463 ctxt->directory = NULL;
11464 } else {
11465 ctxt->directory = xmlParserGetDirectory(filename);
11466 }
11467
11468 inputStream = xmlNewInputStream(ctxt);
11469 if (inputStream == NULL) {
11470 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011471 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011472 return(NULL);
11473 }
11474
11475 if (filename == NULL)
11476 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011477 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011478 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011479 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011480 if (inputStream->filename == NULL) {
11481 xmlFreeParserCtxt(ctxt);
11482 xmlFreeParserInputBuffer(buf);
11483 return(NULL);
11484 }
11485 }
Owen Taylor3473f882001-02-23 17:55:21 +000011486 inputStream->buf = buf;
11487 inputStream->base = inputStream->buf->buffer->content;
11488 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000011489 inputStream->end =
11490 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011491
11492 inputPush(ctxt, inputStream);
11493
William M. Brack3a1cd212005-02-11 14:35:54 +000011494 /*
11495 * If the caller didn't provide an initial 'chunk' for determining
11496 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11497 * that it can be automatically determined later
11498 */
11499 if ((size == 0) || (chunk == NULL)) {
11500 ctxt->charset = XML_CHAR_ENCODING_NONE;
11501 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011502 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
11503 int cur = ctxt->input->cur - ctxt->input->base;
11504
Owen Taylor3473f882001-02-23 17:55:21 +000011505 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011506
11507 ctxt->input->base = ctxt->input->buf->buffer->content + base;
11508 ctxt->input->cur = ctxt->input->base + cur;
11509 ctxt->input->end =
11510 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000011511#ifdef DEBUG_PUSH
11512 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11513#endif
11514 }
11515
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011516 if (enc != XML_CHAR_ENCODING_NONE) {
11517 xmlSwitchEncoding(ctxt, enc);
11518 }
11519
Owen Taylor3473f882001-02-23 17:55:21 +000011520 return(ctxt);
11521}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011522#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011523
11524/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011525 * xmlStopParser:
11526 * @ctxt: an XML parser context
11527 *
11528 * Blocks further parser processing
11529 */
11530void
11531xmlStopParser(xmlParserCtxtPtr ctxt) {
11532 if (ctxt == NULL)
11533 return;
11534 ctxt->instate = XML_PARSER_EOF;
11535 ctxt->disableSAX = 1;
11536 if (ctxt->input != NULL) {
11537 ctxt->input->cur = BAD_CAST"";
11538 ctxt->input->base = ctxt->input->cur;
11539 }
11540}
11541
11542/**
Owen Taylor3473f882001-02-23 17:55:21 +000011543 * xmlCreateIOParserCtxt:
11544 * @sax: a SAX handler
11545 * @user_data: The user data returned on SAX callbacks
11546 * @ioread: an I/O read function
11547 * @ioclose: an I/O close function
11548 * @ioctx: an I/O handler
11549 * @enc: the charset encoding if known
11550 *
11551 * Create a parser context for using the XML parser with an existing
11552 * I/O stream
11553 *
11554 * Returns the new parser context or NULL
11555 */
11556xmlParserCtxtPtr
11557xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11558 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11559 void *ioctx, xmlCharEncoding enc) {
11560 xmlParserCtxtPtr ctxt;
11561 xmlParserInputPtr inputStream;
11562 xmlParserInputBufferPtr buf;
Daniel Veillard42595322004-11-08 10:52:06 +000011563
11564 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011565
11566 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
11567 if (buf == NULL) return(NULL);
11568
11569 ctxt = xmlNewParserCtxt();
11570 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011571 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011572 return(NULL);
11573 }
11574 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011575#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011576 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011577#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011578 xmlFree(ctxt->sax);
11579 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11580 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011581 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000011582 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011583 return(NULL);
11584 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011585 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11586 if (sax->initialized == XML_SAX2_MAGIC)
11587 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11588 else
11589 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011590 if (user_data != NULL)
11591 ctxt->userData = user_data;
11592 }
11593
11594 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
11595 if (inputStream == NULL) {
11596 xmlFreeParserCtxt(ctxt);
11597 return(NULL);
11598 }
11599 inputPush(ctxt, inputStream);
11600
11601 return(ctxt);
11602}
11603
Daniel Veillard4432df22003-09-28 18:58:27 +000011604#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000011605/************************************************************************
11606 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000011607 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000011608 * *
11609 ************************************************************************/
11610
11611/**
11612 * xmlIOParseDTD:
11613 * @sax: the SAX handler block or NULL
11614 * @input: an Input Buffer
11615 * @enc: the charset encoding if known
11616 *
11617 * Load and parse a DTD
11618 *
11619 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000011620 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000011621 */
11622
11623xmlDtdPtr
11624xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
11625 xmlCharEncoding enc) {
11626 xmlDtdPtr ret = NULL;
11627 xmlParserCtxtPtr ctxt;
11628 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011629 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000011630
11631 if (input == NULL)
11632 return(NULL);
11633
11634 ctxt = xmlNewParserCtxt();
11635 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000011636 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011637 return(NULL);
11638 }
11639
11640 /*
11641 * Set-up the SAX context
11642 */
11643 if (sax != NULL) {
11644 if (ctxt->sax != NULL)
11645 xmlFree(ctxt->sax);
11646 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000011647 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011648 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011649 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011650
11651 /*
11652 * generate a parser input from the I/O handler
11653 */
11654
Daniel Veillard43caefb2003-12-07 19:32:22 +000011655 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000011656 if (pinput == NULL) {
11657 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000011658 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000011659 xmlFreeParserCtxt(ctxt);
11660 return(NULL);
11661 }
11662
11663 /*
11664 * plug some encoding conversion routines here.
11665 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000011666 if (xmlPushInput(ctxt, pinput) < 0) {
11667 if (sax != NULL) ctxt->sax = NULL;
11668 xmlFreeParserCtxt(ctxt);
11669 return(NULL);
11670 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000011671 if (enc != XML_CHAR_ENCODING_NONE) {
11672 xmlSwitchEncoding(ctxt, enc);
11673 }
Owen Taylor3473f882001-02-23 17:55:21 +000011674
11675 pinput->filename = NULL;
11676 pinput->line = 1;
11677 pinput->col = 1;
11678 pinput->base = ctxt->input->cur;
11679 pinput->cur = ctxt->input->cur;
11680 pinput->free = NULL;
11681
11682 /*
11683 * let's parse that entity knowing it's an external subset.
11684 */
11685 ctxt->inSubset = 2;
11686 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000011687 if (ctxt->myDoc == NULL) {
11688 xmlErrMemory(ctxt, "New Doc failed");
11689 return(NULL);
11690 }
11691 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000011692 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11693 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000011694
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011695 if ((enc == XML_CHAR_ENCODING_NONE) &&
11696 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000011697 /*
11698 * Get the 4 first bytes and decode the charset
11699 * if enc != XML_CHAR_ENCODING_NONE
11700 * plug some encoding conversion routines.
11701 */
11702 start[0] = RAW;
11703 start[1] = NXT(1);
11704 start[2] = NXT(2);
11705 start[3] = NXT(3);
11706 enc = xmlDetectCharEncoding(start, 4);
11707 if (enc != XML_CHAR_ENCODING_NONE) {
11708 xmlSwitchEncoding(ctxt, enc);
11709 }
11710 }
11711
Owen Taylor3473f882001-02-23 17:55:21 +000011712 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
11713
11714 if (ctxt->myDoc != NULL) {
11715 if (ctxt->wellFormed) {
11716 ret = ctxt->myDoc->extSubset;
11717 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000011718 if (ret != NULL) {
11719 xmlNodePtr tmp;
11720
11721 ret->doc = NULL;
11722 tmp = ret->children;
11723 while (tmp != NULL) {
11724 tmp->doc = NULL;
11725 tmp = tmp->next;
11726 }
11727 }
Owen Taylor3473f882001-02-23 17:55:21 +000011728 } else {
11729 ret = NULL;
11730 }
11731 xmlFreeDoc(ctxt->myDoc);
11732 ctxt->myDoc = NULL;
11733 }
11734 if (sax != NULL) ctxt->sax = NULL;
11735 xmlFreeParserCtxt(ctxt);
11736
11737 return(ret);
11738}
11739
11740/**
11741 * xmlSAXParseDTD:
11742 * @sax: the SAX handler block
11743 * @ExternalID: a NAME* containing the External ID of the DTD
11744 * @SystemID: a NAME* containing the URL to the DTD
11745 *
11746 * Load and parse an external subset.
11747 *
11748 * Returns the resulting xmlDtdPtr or NULL in case of error.
11749 */
11750
11751xmlDtdPtr
11752xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
11753 const xmlChar *SystemID) {
11754 xmlDtdPtr ret = NULL;
11755 xmlParserCtxtPtr ctxt;
11756 xmlParserInputPtr input = NULL;
11757 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011758 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000011759
11760 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
11761
11762 ctxt = xmlNewParserCtxt();
11763 if (ctxt == NULL) {
11764 return(NULL);
11765 }
11766
11767 /*
11768 * Set-up the SAX context
11769 */
11770 if (sax != NULL) {
11771 if (ctxt->sax != NULL)
11772 xmlFree(ctxt->sax);
11773 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000011774 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000011775 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011776
11777 /*
11778 * Canonicalise the system ID
11779 */
11780 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000011781 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011782 xmlFreeParserCtxt(ctxt);
11783 return(NULL);
11784 }
Owen Taylor3473f882001-02-23 17:55:21 +000011785
11786 /*
11787 * Ask the Entity resolver to load the damn thing
11788 */
11789
11790 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000011791 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
11792 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011793 if (input == NULL) {
11794 if (sax != NULL) ctxt->sax = NULL;
11795 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000011796 if (systemIdCanonic != NULL)
11797 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011798 return(NULL);
11799 }
11800
11801 /*
11802 * plug some encoding conversion routines here.
11803 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000011804 if (xmlPushInput(ctxt, input) < 0) {
11805 if (sax != NULL) ctxt->sax = NULL;
11806 xmlFreeParserCtxt(ctxt);
11807 if (systemIdCanonic != NULL)
11808 xmlFree(systemIdCanonic);
11809 return(NULL);
11810 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000011811 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
11812 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
11813 xmlSwitchEncoding(ctxt, enc);
11814 }
Owen Taylor3473f882001-02-23 17:55:21 +000011815
11816 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000011817 input->filename = (char *) systemIdCanonic;
11818 else
11819 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000011820 input->line = 1;
11821 input->col = 1;
11822 input->base = ctxt->input->cur;
11823 input->cur = ctxt->input->cur;
11824 input->free = NULL;
11825
11826 /*
11827 * let's parse that entity knowing it's an external subset.
11828 */
11829 ctxt->inSubset = 2;
11830 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000011831 if (ctxt->myDoc == NULL) {
11832 xmlErrMemory(ctxt, "New Doc failed");
11833 if (sax != NULL) ctxt->sax = NULL;
11834 xmlFreeParserCtxt(ctxt);
11835 return(NULL);
11836 }
11837 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000011838 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
11839 ExternalID, SystemID);
11840 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
11841
11842 if (ctxt->myDoc != NULL) {
11843 if (ctxt->wellFormed) {
11844 ret = ctxt->myDoc->extSubset;
11845 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000011846 if (ret != NULL) {
11847 xmlNodePtr tmp;
11848
11849 ret->doc = NULL;
11850 tmp = ret->children;
11851 while (tmp != NULL) {
11852 tmp->doc = NULL;
11853 tmp = tmp->next;
11854 }
11855 }
Owen Taylor3473f882001-02-23 17:55:21 +000011856 } else {
11857 ret = NULL;
11858 }
11859 xmlFreeDoc(ctxt->myDoc);
11860 ctxt->myDoc = NULL;
11861 }
11862 if (sax != NULL) ctxt->sax = NULL;
11863 xmlFreeParserCtxt(ctxt);
11864
11865 return(ret);
11866}
11867
Daniel Veillard4432df22003-09-28 18:58:27 +000011868
Owen Taylor3473f882001-02-23 17:55:21 +000011869/**
11870 * xmlParseDTD:
11871 * @ExternalID: a NAME* containing the External ID of the DTD
11872 * @SystemID: a NAME* containing the URL to the DTD
11873 *
11874 * Load and parse an external subset.
11875 *
11876 * Returns the resulting xmlDtdPtr or NULL in case of error.
11877 */
11878
11879xmlDtdPtr
11880xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
11881 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
11882}
Daniel Veillard4432df22003-09-28 18:58:27 +000011883#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011884
11885/************************************************************************
11886 * *
11887 * Front ends when parsing an Entity *
11888 * *
11889 ************************************************************************/
11890
11891/**
Owen Taylor3473f882001-02-23 17:55:21 +000011892 * xmlParseCtxtExternalEntity:
11893 * @ctx: the existing parsing context
11894 * @URL: the URL for the entity to load
11895 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000011896 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000011897 *
11898 * Parse an external general entity within an existing parsing context
11899 * An external general parsed entity is well-formed if it matches the
11900 * production labeled extParsedEnt.
11901 *
11902 * [78] extParsedEnt ::= TextDecl? content
11903 *
11904 * Returns 0 if the entity is well formed, -1 in case of args problem and
11905 * the parser error code otherwise
11906 */
11907
11908int
11909xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000011910 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000011911 xmlParserCtxtPtr ctxt;
11912 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011913 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000011914 xmlSAXHandlerPtr oldsax = NULL;
11915 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000011916 xmlChar start[4];
11917 xmlCharEncoding enc;
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011918 xmlParserInputPtr inputStream;
11919 char *directory = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011920
Daniel Veillardce682bc2004-11-05 17:22:25 +000011921 if (ctx == NULL) return(-1);
11922
Daniel Veillard8915c152008-08-26 13:05:34 +000011923 if (((ctx->depth > 20) || (ctx->nbentities >= 100000)) &&
11924 ((ctx->options & XML_PARSE_HUGE) == 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +000011925 return(XML_ERR_ENTITY_LOOP);
11926 }
11927
Daniel Veillardcda96922001-08-21 10:56:31 +000011928 if (lst != NULL)
11929 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000011930 if ((URL == NULL) && (ID == NULL))
11931 return(-1);
11932 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
11933 return(-1);
11934
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011935 ctxt = xmlNewParserCtxt();
11936 if (ctxt == NULL) {
11937 return(-1);
11938 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011939
Owen Taylor3473f882001-02-23 17:55:21 +000011940 ctxt->userData = ctxt;
Daniel Veillarde2830f12003-01-08 17:47:49 +000011941 ctxt->_private = ctx->_private;
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011942
Daniel Veillard2937b3a2006-10-10 08:52:34 +000011943 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11944 if (inputStream == NULL) {
11945 xmlFreeParserCtxt(ctxt);
11946 return(-1);
11947 }
11948
11949 inputPush(ctxt, inputStream);
11950
11951 if ((ctxt->directory == NULL) && (directory == NULL))
11952 directory = xmlParserGetDirectory((char *)URL);
11953 if ((ctxt->directory == NULL) && (directory != NULL))
11954 ctxt->directory = directory;
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011955
Owen Taylor3473f882001-02-23 17:55:21 +000011956 oldsax = ctxt->sax;
11957 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011958 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011959 newDoc = xmlNewDoc(BAD_CAST "1.0");
11960 if (newDoc == NULL) {
11961 xmlFreeParserCtxt(ctxt);
11962 return(-1);
11963 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000011964 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000011965 if (ctx->myDoc->dict) {
11966 newDoc->dict = ctx->myDoc->dict;
11967 xmlDictReference(newDoc->dict);
11968 }
Owen Taylor3473f882001-02-23 17:55:21 +000011969 if (ctx->myDoc != NULL) {
11970 newDoc->intSubset = ctx->myDoc->intSubset;
11971 newDoc->extSubset = ctx->myDoc->extSubset;
11972 }
11973 if (ctx->myDoc->URL != NULL) {
11974 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
11975 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011976 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11977 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000011978 ctxt->sax = oldsax;
11979 xmlFreeParserCtxt(ctxt);
11980 newDoc->intSubset = NULL;
11981 newDoc->extSubset = NULL;
11982 xmlFreeDoc(newDoc);
11983 return(-1);
11984 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000011985 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000011986 nodePush(ctxt, newDoc->children);
11987 if (ctx->myDoc == NULL) {
11988 ctxt->myDoc = newDoc;
11989 } else {
11990 ctxt->myDoc = ctx->myDoc;
11991 newDoc->children->doc = ctx->myDoc;
11992 }
11993
Daniel Veillard40ec29a2008-07-30 12:35:40 +000011994 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000011995 * Get the 4 first bytes and decode the charset
11996 * if enc != XML_CHAR_ENCODING_NONE
11997 * plug some encoding conversion routines.
11998 */
11999 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012000 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12001 start[0] = RAW;
12002 start[1] = NXT(1);
12003 start[2] = NXT(2);
12004 start[3] = NXT(3);
12005 enc = xmlDetectCharEncoding(start, 4);
12006 if (enc != XML_CHAR_ENCODING_NONE) {
12007 xmlSwitchEncoding(ctxt, enc);
12008 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012009 }
12010
Owen Taylor3473f882001-02-23 17:55:21 +000012011 /*
12012 * Parse a possible text declaration first
12013 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012014 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012015 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012016 /*
12017 * An XML-1.0 document can't reference an entity not XML-1.0
12018 */
12019 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12020 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12021 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12022 "Version mismatch between document and entity\n");
12023 }
Owen Taylor3473f882001-02-23 17:55:21 +000012024 }
12025
12026 /*
12027 * Doing validity checking on chunk doesn't make sense
12028 */
12029 ctxt->instate = XML_PARSER_CONTENT;
12030 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012031 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012032 ctxt->loadsubset = ctx->loadsubset;
12033 ctxt->depth = ctx->depth + 1;
12034 ctxt->replaceEntities = ctx->replaceEntities;
12035 if (ctxt->validate) {
12036 ctxt->vctxt.error = ctx->vctxt.error;
12037 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012038 } else {
12039 ctxt->vctxt.error = NULL;
12040 ctxt->vctxt.warning = NULL;
12041 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012042 ctxt->vctxt.nodeTab = NULL;
12043 ctxt->vctxt.nodeNr = 0;
12044 ctxt->vctxt.nodeMax = 0;
12045 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012046 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12047 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012048 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12049 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12050 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012051 ctxt->dictNames = ctx->dictNames;
12052 ctxt->attsDefault = ctx->attsDefault;
12053 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012054 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012055
12056 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012057
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012058 ctx->validate = ctxt->validate;
12059 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012060 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012061 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012062 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012063 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012064 }
12065 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012066 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012067 }
12068
12069 if (!ctxt->wellFormed) {
12070 if (ctxt->errNo == 0)
12071 ret = 1;
12072 else
12073 ret = ctxt->errNo;
12074 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012075 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012076 xmlNodePtr cur;
12077
12078 /*
12079 * Return the newly created nodeset after unlinking it from
12080 * they pseudo parent.
12081 */
12082 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012083 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012084 while (cur != NULL) {
12085 cur->parent = NULL;
12086 cur = cur->next;
12087 }
12088 newDoc->children->children = NULL;
12089 }
12090 ret = 0;
12091 }
12092 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012093 ctxt->dict = NULL;
12094 ctxt->attsDefault = NULL;
12095 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012096 xmlFreeParserCtxt(ctxt);
12097 newDoc->intSubset = NULL;
12098 newDoc->extSubset = NULL;
12099 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012100
Owen Taylor3473f882001-02-23 17:55:21 +000012101 return(ret);
12102}
12103
12104/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012105 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012106 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012107 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012108 * @sax: the SAX handler bloc (possibly NULL)
12109 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12110 * @depth: Used for loop detection, use 0
12111 * @URL: the URL for the entity to load
12112 * @ID: the System ID for the entity to load
12113 * @list: the return value for the set of parsed nodes
12114 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012115 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012116 *
12117 * Returns 0 if the entity is well formed, -1 in case of args problem and
12118 * the parser error code otherwise
12119 */
12120
Daniel Veillard7d515752003-09-26 19:12:37 +000012121static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012122xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12123 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012124 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012125 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012126 xmlParserCtxtPtr ctxt;
12127 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012128 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012129 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012130 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012131 xmlChar start[4];
12132 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012133
Daniel Veillard8915c152008-08-26 13:05:34 +000012134 if (((depth > 20) ||
12135 ((oldctxt != NULL) && (oldctxt->nbentities >= 100000))) &&
12136 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012137 return(XML_ERR_ENTITY_LOOP);
12138 }
12139
Owen Taylor3473f882001-02-23 17:55:21 +000012140 if (list != NULL)
12141 *list = NULL;
12142 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012143 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012144 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012145 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012146
12147
12148 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
William M. Brackb670e2e2003-09-27 01:05:55 +000012149 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012150 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012151 if (oldctxt != NULL) {
12152 ctxt->_private = oldctxt->_private;
12153 ctxt->loadsubset = oldctxt->loadsubset;
12154 ctxt->validate = oldctxt->validate;
12155 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012156 ctxt->record_info = oldctxt->record_info;
12157 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12158 ctxt->node_seq.length = oldctxt->node_seq.length;
12159 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012160 } else {
12161 /*
12162 * Doing validity checking on chunk without context
12163 * doesn't make sense
12164 */
12165 ctxt->_private = NULL;
12166 ctxt->validate = 0;
12167 ctxt->external = 2;
12168 ctxt->loadsubset = 0;
12169 }
Owen Taylor3473f882001-02-23 17:55:21 +000012170 if (sax != NULL) {
12171 oldsax = ctxt->sax;
12172 ctxt->sax = sax;
12173 if (user_data != NULL)
12174 ctxt->userData = user_data;
12175 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012176 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012177 newDoc = xmlNewDoc(BAD_CAST "1.0");
12178 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012179 ctxt->node_seq.maximum = 0;
12180 ctxt->node_seq.length = 0;
12181 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012182 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000012183 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012184 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012185 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000012186 newDoc->intSubset = doc->intSubset;
12187 newDoc->extSubset = doc->extSubset;
12188 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012189 xmlDictReference(newDoc->dict);
12190
Owen Taylor3473f882001-02-23 17:55:21 +000012191 if (doc->URL != NULL) {
12192 newDoc->URL = xmlStrdup(doc->URL);
12193 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012194 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12195 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012196 if (sax != NULL)
12197 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012198 ctxt->node_seq.maximum = 0;
12199 ctxt->node_seq.length = 0;
12200 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012201 xmlFreeParserCtxt(ctxt);
12202 newDoc->intSubset = NULL;
12203 newDoc->extSubset = NULL;
12204 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000012205 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012206 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012207 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012208 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000012209 ctxt->myDoc = doc;
12210 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000012211
Daniel Veillard87a764e2001-06-20 17:41:10 +000012212 /*
12213 * Get the 4 first bytes and decode the charset
12214 * if enc != XML_CHAR_ENCODING_NONE
12215 * plug some encoding conversion routines.
12216 */
12217 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012218 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12219 start[0] = RAW;
12220 start[1] = NXT(1);
12221 start[2] = NXT(2);
12222 start[3] = NXT(3);
12223 enc = xmlDetectCharEncoding(start, 4);
12224 if (enc != XML_CHAR_ENCODING_NONE) {
12225 xmlSwitchEncoding(ctxt, enc);
12226 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012227 }
12228
Owen Taylor3473f882001-02-23 17:55:21 +000012229 /*
12230 * Parse a possible text declaration first
12231 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012232 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012233 xmlParseTextDecl(ctxt);
12234 }
12235
Owen Taylor3473f882001-02-23 17:55:21 +000012236 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012237 ctxt->depth = depth;
12238
12239 xmlParseContent(ctxt);
12240
Daniel Veillard561b7f82002-03-20 21:55:57 +000012241 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012242 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000012243 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012244 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012245 }
12246 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012247 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012248 }
12249
12250 if (!ctxt->wellFormed) {
12251 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012252 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000012253 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012254 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000012255 } else {
12256 if (list != NULL) {
12257 xmlNodePtr cur;
12258
12259 /*
12260 * Return the newly created nodeset after unlinking it from
12261 * they pseudo parent.
12262 */
12263 cur = newDoc->children->children;
12264 *list = cur;
12265 while (cur != NULL) {
12266 cur->parent = NULL;
12267 cur = cur->next;
12268 }
12269 newDoc->children->children = NULL;
12270 }
Daniel Veillard7d515752003-09-26 19:12:37 +000012271 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000012272 }
12273 if (sax != NULL)
12274 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000012275 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12276 oldctxt->node_seq.length = ctxt->node_seq.length;
12277 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000012278 oldctxt->nbentities += ctxt->nbentities;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012279 ctxt->node_seq.maximum = 0;
12280 ctxt->node_seq.length = 0;
12281 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012282 xmlFreeParserCtxt(ctxt);
12283 newDoc->intSubset = NULL;
12284 newDoc->extSubset = NULL;
12285 xmlFreeDoc(newDoc);
12286
12287 return(ret);
12288}
12289
Daniel Veillard81273902003-09-30 00:43:48 +000012290#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012291/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012292 * xmlParseExternalEntity:
12293 * @doc: the document the chunk pertains to
12294 * @sax: the SAX handler bloc (possibly NULL)
12295 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12296 * @depth: Used for loop detection, use 0
12297 * @URL: the URL for the entity to load
12298 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012299 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000012300 *
12301 * Parse an external general entity
12302 * An external general parsed entity is well-formed if it matches the
12303 * production labeled extParsedEnt.
12304 *
12305 * [78] extParsedEnt ::= TextDecl? content
12306 *
12307 * Returns 0 if the entity is well formed, -1 in case of args problem and
12308 * the parser error code otherwise
12309 */
12310
12311int
12312xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000012313 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012314 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012315 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000012316}
12317
12318/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000012319 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000012320 * @doc: the document the chunk pertains to
12321 * @sax: the SAX handler bloc (possibly NULL)
12322 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12323 * @depth: Used for loop detection, use 0
12324 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000012325 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012326 *
12327 * Parse a well-balanced chunk of an XML document
12328 * called by the parser
12329 * The allowed sequence for the Well Balanced Chunk is the one defined by
12330 * the content production in the XML grammar:
12331 *
12332 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12333 *
12334 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12335 * the parser error code otherwise
12336 */
12337
12338int
12339xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000012340 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012341 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12342 depth, string, lst, 0 );
12343}
Daniel Veillard81273902003-09-30 00:43:48 +000012344#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000012345
12346/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000012347 * xmlParseBalancedChunkMemoryInternal:
12348 * @oldctxt: the existing parsing context
12349 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12350 * @user_data: the user data field for the parser context
12351 * @lst: the return value for the set of parsed nodes
12352 *
12353 *
12354 * Parse a well-balanced chunk of an XML document
12355 * called by the parser
12356 * The allowed sequence for the Well Balanced Chunk is the one defined by
12357 * the content production in the XML grammar:
12358 *
12359 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12360 *
Daniel Veillard7d515752003-09-26 19:12:37 +000012361 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12362 * error code otherwise
Daniel Veillard328f48c2002-11-15 15:24:34 +000012363 *
12364 * In case recover is set to 1, the nodelist will not be empty even if
12365 * the parsed chunk is not well balanced.
12366 */
Daniel Veillard7d515752003-09-26 19:12:37 +000012367static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000012368xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12369 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12370 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012371 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012372 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012373 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012374 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012375 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012376 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000012377 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012378
Daniel Veillard8915c152008-08-26 13:05:34 +000012379 if (((oldctxt->depth > 20) || (oldctxt->nbentities >= 100000)) &&
12380 ((oldctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012381 return(XML_ERR_ENTITY_LOOP);
12382 }
12383
12384
12385 if (lst != NULL)
12386 *lst = NULL;
12387 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000012388 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012389
12390 size = xmlStrlen(string);
12391
12392 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000012393 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012394 if (user_data != NULL)
12395 ctxt->userData = user_data;
12396 else
12397 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012398 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12399 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012400 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12401 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12402 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012403
12404 oldsax = ctxt->sax;
12405 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012406 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012407 ctxt->replaceEntities = oldctxt->replaceEntities;
12408 ctxt->options = oldctxt->options;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012409
Daniel Veillarde1ca5032002-12-09 14:13:43 +000012410 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012411 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012412 newDoc = xmlNewDoc(BAD_CAST "1.0");
12413 if (newDoc == NULL) {
12414 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012415 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012416 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000012417 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012418 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012419 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012420 newDoc->dict = ctxt->dict;
12421 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012422 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012423 } else {
12424 ctxt->myDoc = oldctxt->myDoc;
12425 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012426 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012427 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012428 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12429 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012430 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012431 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012432 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012433 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012434 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012435 }
William M. Brack7b9154b2003-09-27 19:23:50 +000012436 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012437 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012438 ctxt->myDoc->children = NULL;
12439 ctxt->myDoc->last = NULL;
12440 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012441 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012442 ctxt->instate = XML_PARSER_CONTENT;
12443 ctxt->depth = oldctxt->depth + 1;
12444
Daniel Veillard328f48c2002-11-15 15:24:34 +000012445 ctxt->validate = 0;
12446 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000012447 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12448 /*
12449 * ID/IDREF registration will be done in xmlValidateElement below
12450 */
12451 ctxt->loadsubset |= XML_SKIP_IDS;
12452 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012453 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012454 ctxt->attsDefault = oldctxt->attsDefault;
12455 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012456
Daniel Veillard68e9e742002-11-16 15:35:11 +000012457 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012458 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012459 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012460 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012461 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012462 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012463 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012464 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012465 }
12466
12467 if (!ctxt->wellFormed) {
12468 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012469 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012470 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012471 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012472 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012473 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012474 }
12475
William M. Brack7b9154b2003-09-27 19:23:50 +000012476 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012477 xmlNodePtr cur;
12478
12479 /*
12480 * Return the newly created nodeset after unlinking it from
12481 * they pseudo parent.
12482 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012483 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012484 *lst = cur;
12485 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012486#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012487 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12488 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12489 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012490 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12491 oldctxt->myDoc, cur);
12492 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012493#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012494 cur->parent = NULL;
12495 cur = cur->next;
12496 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012497 ctxt->myDoc->children->children = NULL;
12498 }
12499 if (ctxt->myDoc != NULL) {
12500 xmlFreeNode(ctxt->myDoc->children);
12501 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012502 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012503 }
12504
Daniel Veillard4bf899b2008-08-20 17:04:30 +000012505 oldctxt->nbentities += ctxt->nbentities;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012506 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012507 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012508 ctxt->attsDefault = NULL;
12509 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012510 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012511 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012512 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012513 }
Daniel Veillard328f48c2002-11-15 15:24:34 +000012514
12515 return(ret);
12516}
12517
Daniel Veillard29b17482004-08-16 00:39:03 +000012518/**
12519 * xmlParseInNodeContext:
12520 * @node: the context node
12521 * @data: the input string
12522 * @datalen: the input string length in bytes
12523 * @options: a combination of xmlParserOption
12524 * @lst: the return value for the set of parsed nodes
12525 *
12526 * Parse a well-balanced chunk of an XML document
12527 * within the context (DTD, namespaces, etc ...) of the given node.
12528 *
12529 * The allowed sequence for the data is a Well Balanced Chunk defined by
12530 * the content production in the XML grammar:
12531 *
12532 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12533 *
12534 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12535 * error code otherwise
12536 */
12537xmlParserErrors
12538xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
12539 int options, xmlNodePtr *lst) {
12540#ifdef SAX2
12541 xmlParserCtxtPtr ctxt;
12542 xmlDocPtr doc = NULL;
12543 xmlNodePtr fake, cur;
12544 int nsnr = 0;
12545
12546 xmlParserErrors ret = XML_ERR_OK;
12547
12548 /*
12549 * check all input parameters, grab the document
12550 */
12551 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
12552 return(XML_ERR_INTERNAL_ERROR);
12553 switch (node->type) {
12554 case XML_ELEMENT_NODE:
12555 case XML_ATTRIBUTE_NODE:
12556 case XML_TEXT_NODE:
12557 case XML_CDATA_SECTION_NODE:
12558 case XML_ENTITY_REF_NODE:
12559 case XML_PI_NODE:
12560 case XML_COMMENT_NODE:
12561 case XML_DOCUMENT_NODE:
12562 case XML_HTML_DOCUMENT_NODE:
12563 break;
12564 default:
12565 return(XML_ERR_INTERNAL_ERROR);
12566
12567 }
12568 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
12569 (node->type != XML_DOCUMENT_NODE) &&
12570 (node->type != XML_HTML_DOCUMENT_NODE))
12571 node = node->parent;
12572 if (node == NULL)
12573 return(XML_ERR_INTERNAL_ERROR);
12574 if (node->type == XML_ELEMENT_NODE)
12575 doc = node->doc;
12576 else
12577 doc = (xmlDocPtr) node;
12578 if (doc == NULL)
12579 return(XML_ERR_INTERNAL_ERROR);
12580
12581 /*
12582 * allocate a context and set-up everything not related to the
12583 * node position in the tree
12584 */
12585 if (doc->type == XML_DOCUMENT_NODE)
12586 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
12587#ifdef LIBXML_HTML_ENABLED
12588 else if (doc->type == XML_HTML_DOCUMENT_NODE)
12589 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
12590#endif
12591 else
12592 return(XML_ERR_INTERNAL_ERROR);
12593
12594 if (ctxt == NULL)
12595 return(XML_ERR_NO_MEMORY);
12596 fake = xmlNewComment(NULL);
12597 if (fake == NULL) {
12598 xmlFreeParserCtxt(ctxt);
12599 return(XML_ERR_NO_MEMORY);
12600 }
12601 xmlAddChild(node, fake);
William M. Brackc3f81342004-10-03 01:22:44 +000012602
12603 /*
12604 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
12605 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
12606 * we must wait until the last moment to free the original one.
12607 */
Daniel Veillard29b17482004-08-16 00:39:03 +000012608 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000012609 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000012610 xmlDictFree(ctxt->dict);
12611 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000012612 } else
12613 options |= XML_PARSE_NODICT;
12614
Daniel Veillard37334572008-07-31 08:20:02 +000012615 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000012616 xmlDetectSAX2(ctxt);
12617 ctxt->myDoc = doc;
12618
12619 if (node->type == XML_ELEMENT_NODE) {
12620 nodePush(ctxt, node);
12621 /*
12622 * initialize the SAX2 namespaces stack
12623 */
12624 cur = node;
12625 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
12626 xmlNsPtr ns = cur->nsDef;
12627 const xmlChar *iprefix, *ihref;
12628
12629 while (ns != NULL) {
12630 if (ctxt->dict) {
12631 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
12632 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
12633 } else {
12634 iprefix = ns->prefix;
12635 ihref = ns->href;
12636 }
12637
12638 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
12639 nsPush(ctxt, iprefix, ihref);
12640 nsnr++;
12641 }
12642 ns = ns->next;
12643 }
12644 cur = cur->parent;
12645 }
12646 ctxt->instate = XML_PARSER_CONTENT;
12647 }
12648
12649 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
12650 /*
12651 * ID/IDREF registration will be done in xmlValidateElement below
12652 */
12653 ctxt->loadsubset |= XML_SKIP_IDS;
12654 }
12655
Daniel Veillard499cc922006-01-18 17:22:35 +000012656#ifdef LIBXML_HTML_ENABLED
12657 if (doc->type == XML_HTML_DOCUMENT_NODE)
12658 __htmlParseContent(ctxt);
12659 else
12660#endif
12661 xmlParseContent(ctxt);
12662
Daniel Veillard29b17482004-08-16 00:39:03 +000012663 nsPop(ctxt, nsnr);
12664 if ((RAW == '<') && (NXT(1) == '/')) {
12665 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12666 } else if (RAW != 0) {
12667 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12668 }
12669 if ((ctxt->node != NULL) && (ctxt->node != node)) {
12670 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12671 ctxt->wellFormed = 0;
12672 }
12673
12674 if (!ctxt->wellFormed) {
12675 if (ctxt->errNo == 0)
12676 ret = XML_ERR_INTERNAL_ERROR;
12677 else
12678 ret = (xmlParserErrors)ctxt->errNo;
12679 } else {
12680 ret = XML_ERR_OK;
12681 }
12682
12683 /*
12684 * Return the newly created nodeset after unlinking it from
12685 * the pseudo sibling.
12686 */
12687
12688 cur = fake->next;
12689 fake->next = NULL;
12690 node->last = fake;
12691
12692 if (cur != NULL) {
12693 cur->prev = NULL;
12694 }
12695
12696 *lst = cur;
12697
12698 while (cur != NULL) {
12699 cur->parent = NULL;
12700 cur = cur->next;
12701 }
12702
12703 xmlUnlinkNode(fake);
12704 xmlFreeNode(fake);
12705
12706
12707 if (ret != XML_ERR_OK) {
12708 xmlFreeNodeList(*lst);
12709 *lst = NULL;
12710 }
William M. Brackc3f81342004-10-03 01:22:44 +000012711
William M. Brackb7b54de2004-10-06 16:38:01 +000012712 if (doc->dict != NULL)
12713 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000012714 xmlFreeParserCtxt(ctxt);
12715
12716 return(ret);
12717#else /* !SAX2 */
12718 return(XML_ERR_INTERNAL_ERROR);
12719#endif
12720}
12721
Daniel Veillard81273902003-09-30 00:43:48 +000012722#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000012723/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000012724 * xmlParseBalancedChunkMemoryRecover:
12725 * @doc: the document the chunk pertains to
12726 * @sax: the SAX handler bloc (possibly NULL)
12727 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12728 * @depth: Used for loop detection, use 0
12729 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12730 * @lst: the return value for the set of parsed nodes
12731 * @recover: return nodes even if the data is broken (use 0)
12732 *
12733 *
12734 * Parse a well-balanced chunk of an XML document
12735 * called by the parser
12736 * The allowed sequence for the Well Balanced Chunk is the one defined by
12737 * the content production in the XML grammar:
12738 *
12739 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12740 *
12741 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12742 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000012743 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000012744 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000012745 * the parsed chunk is not well balanced, assuming the parsing succeeded to
12746 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000012747 */
12748int
12749xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000012750 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000012751 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000012752 xmlParserCtxtPtr ctxt;
12753 xmlDocPtr newDoc;
12754 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012755 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012756 int size;
12757 int ret = 0;
12758
Daniel Veillard8915c152008-08-26 13:05:34 +000012759 if (depth > 20) {
Owen Taylor3473f882001-02-23 17:55:21 +000012760 return(XML_ERR_ENTITY_LOOP);
12761 }
12762
12763
Daniel Veillardcda96922001-08-21 10:56:31 +000012764 if (lst != NULL)
12765 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012766 if (string == NULL)
12767 return(-1);
12768
12769 size = xmlStrlen(string);
12770
12771 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
12772 if (ctxt == NULL) return(-1);
12773 ctxt->userData = ctxt;
12774 if (sax != NULL) {
12775 oldsax = ctxt->sax;
12776 ctxt->sax = sax;
12777 if (user_data != NULL)
12778 ctxt->userData = user_data;
12779 }
12780 newDoc = xmlNewDoc(BAD_CAST "1.0");
12781 if (newDoc == NULL) {
12782 xmlFreeParserCtxt(ctxt);
12783 return(-1);
12784 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012785 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012786 if ((doc != NULL) && (doc->dict != NULL)) {
12787 xmlDictFree(ctxt->dict);
12788 ctxt->dict = doc->dict;
12789 xmlDictReference(ctxt->dict);
12790 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12791 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12792 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12793 ctxt->dictNames = 1;
12794 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000012795 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012796 }
Owen Taylor3473f882001-02-23 17:55:21 +000012797 if (doc != NULL) {
12798 newDoc->intSubset = doc->intSubset;
12799 newDoc->extSubset = doc->extSubset;
12800 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012801 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12802 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012803 if (sax != NULL)
12804 ctxt->sax = oldsax;
12805 xmlFreeParserCtxt(ctxt);
12806 newDoc->intSubset = NULL;
12807 newDoc->extSubset = NULL;
12808 xmlFreeDoc(newDoc);
12809 return(-1);
12810 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012811 xmlAddChild((xmlNodePtr) newDoc, newRoot);
12812 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012813 if (doc == NULL) {
12814 ctxt->myDoc = newDoc;
12815 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000012816 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000012817 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000012818 /* Ensure that doc has XML spec namespace */
12819 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
12820 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000012821 }
12822 ctxt->instate = XML_PARSER_CONTENT;
12823 ctxt->depth = depth;
12824
12825 /*
12826 * Doing validity checking on chunk doesn't make sense
12827 */
12828 ctxt->validate = 0;
12829 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012830 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012831
Daniel Veillardb39bc392002-10-26 19:29:51 +000012832 if ( doc != NULL ){
12833 content = doc->children;
12834 doc->children = NULL;
12835 xmlParseContent(ctxt);
12836 doc->children = content;
12837 }
12838 else {
12839 xmlParseContent(ctxt);
12840 }
Owen Taylor3473f882001-02-23 17:55:21 +000012841 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012842 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012843 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012844 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012845 }
12846 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012847 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012848 }
12849
12850 if (!ctxt->wellFormed) {
12851 if (ctxt->errNo == 0)
12852 ret = 1;
12853 else
12854 ret = ctxt->errNo;
12855 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012856 ret = 0;
12857 }
12858
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012859 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
12860 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012861
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012862 /*
12863 * Return the newly created nodeset after unlinking it from
12864 * they pseudo parent.
12865 */
12866 cur = newDoc->children->children;
12867 *lst = cur;
12868 while (cur != NULL) {
12869 xmlSetTreeDoc(cur, doc);
12870 cur->parent = NULL;
12871 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000012872 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000012873 newDoc->children->children = NULL;
12874 }
Daniel Veillard58e44c92002-08-02 22:19:49 +000012875
Owen Taylor3473f882001-02-23 17:55:21 +000012876 if (sax != NULL)
12877 ctxt->sax = oldsax;
12878 xmlFreeParserCtxt(ctxt);
12879 newDoc->intSubset = NULL;
12880 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000012881 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012882 xmlFreeDoc(newDoc);
12883
12884 return(ret);
12885}
12886
12887/**
12888 * xmlSAXParseEntity:
12889 * @sax: the SAX handler block
12890 * @filename: the filename
12891 *
12892 * parse an XML external entity out of context and build a tree.
12893 * It use the given SAX function block to handle the parsing callback.
12894 * If sax is NULL, fallback to the default DOM tree building routines.
12895 *
12896 * [78] extParsedEnt ::= TextDecl? content
12897 *
12898 * This correspond to a "Well Balanced" chunk
12899 *
12900 * Returns the resulting document tree
12901 */
12902
12903xmlDocPtr
12904xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
12905 xmlDocPtr ret;
12906 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012907
12908 ctxt = xmlCreateFileParserCtxt(filename);
12909 if (ctxt == NULL) {
12910 return(NULL);
12911 }
12912 if (sax != NULL) {
12913 if (ctxt->sax != NULL)
12914 xmlFree(ctxt->sax);
12915 ctxt->sax = sax;
12916 ctxt->userData = NULL;
12917 }
12918
Owen Taylor3473f882001-02-23 17:55:21 +000012919 xmlParseExtParsedEnt(ctxt);
12920
12921 if (ctxt->wellFormed)
12922 ret = ctxt->myDoc;
12923 else {
12924 ret = NULL;
12925 xmlFreeDoc(ctxt->myDoc);
12926 ctxt->myDoc = NULL;
12927 }
12928 if (sax != NULL)
12929 ctxt->sax = NULL;
12930 xmlFreeParserCtxt(ctxt);
12931
12932 return(ret);
12933}
12934
12935/**
12936 * xmlParseEntity:
12937 * @filename: the filename
12938 *
12939 * parse an XML external entity out of context and build a tree.
12940 *
12941 * [78] extParsedEnt ::= TextDecl? content
12942 *
12943 * This correspond to a "Well Balanced" chunk
12944 *
12945 * Returns the resulting document tree
12946 */
12947
12948xmlDocPtr
12949xmlParseEntity(const char *filename) {
12950 return(xmlSAXParseEntity(NULL, filename));
12951}
Daniel Veillard81273902003-09-30 00:43:48 +000012952#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012953
12954/**
12955 * xmlCreateEntityParserCtxt:
12956 * @URL: the entity URL
12957 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012958 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +000012959 *
12960 * Create a parser context for an external entity
12961 * Automatic support for ZLIB/Compress compressed document is provided
12962 * by default if found at compile-time.
12963 *
12964 * Returns the new parser context or NULL
12965 */
12966xmlParserCtxtPtr
12967xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
12968 const xmlChar *base) {
12969 xmlParserCtxtPtr ctxt;
12970 xmlParserInputPtr inputStream;
12971 char *directory = NULL;
12972 xmlChar *uri;
12973
12974 ctxt = xmlNewParserCtxt();
12975 if (ctxt == NULL) {
12976 return(NULL);
12977 }
12978
12979 uri = xmlBuildURI(URL, base);
12980
12981 if (uri == NULL) {
12982 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
12983 if (inputStream == NULL) {
12984 xmlFreeParserCtxt(ctxt);
12985 return(NULL);
12986 }
12987
12988 inputPush(ctxt, inputStream);
12989
12990 if ((ctxt->directory == NULL) && (directory == NULL))
12991 directory = xmlParserGetDirectory((char *)URL);
12992 if ((ctxt->directory == NULL) && (directory != NULL))
12993 ctxt->directory = directory;
12994 } else {
12995 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
12996 if (inputStream == NULL) {
12997 xmlFree(uri);
12998 xmlFreeParserCtxt(ctxt);
12999 return(NULL);
13000 }
13001
13002 inputPush(ctxt, inputStream);
13003
13004 if ((ctxt->directory == NULL) && (directory == NULL))
13005 directory = xmlParserGetDirectory((char *)uri);
13006 if ((ctxt->directory == NULL) && (directory != NULL))
13007 ctxt->directory = directory;
13008 xmlFree(uri);
13009 }
Owen Taylor3473f882001-02-23 17:55:21 +000013010 return(ctxt);
13011}
13012
13013/************************************************************************
13014 * *
13015 * Front ends when parsing from a file *
13016 * *
13017 ************************************************************************/
13018
13019/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013020 * xmlCreateURLParserCtxt:
13021 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013022 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013023 *
Daniel Veillard61b93382003-11-03 14:28:31 +000013024 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000013025 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000013026 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000013027 *
13028 * Returns the new parser context or NULL
13029 */
13030xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000013031xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000013032{
13033 xmlParserCtxtPtr ctxt;
13034 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000013035 char *directory = NULL;
13036
Owen Taylor3473f882001-02-23 17:55:21 +000013037 ctxt = xmlNewParserCtxt();
13038 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000013039 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000013040 return(NULL);
13041 }
13042
Daniel Veillarddf292f72005-01-16 19:00:15 +000013043 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000013044 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000013045 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000013046
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000013047 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013048 if (inputStream == NULL) {
13049 xmlFreeParserCtxt(ctxt);
13050 return(NULL);
13051 }
13052
Owen Taylor3473f882001-02-23 17:55:21 +000013053 inputPush(ctxt, inputStream);
13054 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000013055 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013056 if ((ctxt->directory == NULL) && (directory != NULL))
13057 ctxt->directory = directory;
13058
13059 return(ctxt);
13060}
13061
Daniel Veillard61b93382003-11-03 14:28:31 +000013062/**
13063 * xmlCreateFileParserCtxt:
13064 * @filename: the filename
13065 *
13066 * Create a parser context for a file content.
13067 * Automatic support for ZLIB/Compress compressed document is provided
13068 * by default if found at compile-time.
13069 *
13070 * Returns the new parser context or NULL
13071 */
13072xmlParserCtxtPtr
13073xmlCreateFileParserCtxt(const char *filename)
13074{
13075 return(xmlCreateURLParserCtxt(filename, 0));
13076}
13077
Daniel Veillard81273902003-09-30 00:43:48 +000013078#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013079/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013080 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000013081 * @sax: the SAX handler block
13082 * @filename: the filename
13083 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13084 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000013085 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000013086 *
13087 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13088 * compressed document is provided by default if found at compile-time.
13089 * It use the given SAX function block to handle the parsing callback.
13090 * If sax is NULL, fallback to the default DOM tree building routines.
13091 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000013092 * User data (void *) is stored within the parser context in the
13093 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000013094 *
Owen Taylor3473f882001-02-23 17:55:21 +000013095 * Returns the resulting document tree
13096 */
13097
13098xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000013099xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13100 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000013101 xmlDocPtr ret;
13102 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013103
Daniel Veillard635ef722001-10-29 11:48:19 +000013104 xmlInitParser();
13105
Owen Taylor3473f882001-02-23 17:55:21 +000013106 ctxt = xmlCreateFileParserCtxt(filename);
13107 if (ctxt == NULL) {
13108 return(NULL);
13109 }
13110 if (sax != NULL) {
13111 if (ctxt->sax != NULL)
13112 xmlFree(ctxt->sax);
13113 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013114 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013115 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000013116 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000013117 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000013118 }
Owen Taylor3473f882001-02-23 17:55:21 +000013119
Daniel Veillard37d2d162008-03-14 10:54:00 +000013120 if (ctxt->directory == NULL)
13121 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013122
Daniel Veillarddad3f682002-11-17 16:47:27 +000013123 ctxt->recovery = recovery;
13124
Owen Taylor3473f882001-02-23 17:55:21 +000013125 xmlParseDocument(ctxt);
13126
William M. Brackc07329e2003-09-08 01:57:30 +000013127 if ((ctxt->wellFormed) || recovery) {
13128 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000013129 if (ret != NULL) {
13130 if (ctxt->input->buf->compressed > 0)
13131 ret->compression = 9;
13132 else
13133 ret->compression = ctxt->input->buf->compressed;
13134 }
William M. Brackc07329e2003-09-08 01:57:30 +000013135 }
Owen Taylor3473f882001-02-23 17:55:21 +000013136 else {
13137 ret = NULL;
13138 xmlFreeDoc(ctxt->myDoc);
13139 ctxt->myDoc = NULL;
13140 }
13141 if (sax != NULL)
13142 ctxt->sax = NULL;
13143 xmlFreeParserCtxt(ctxt);
13144
13145 return(ret);
13146}
13147
13148/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013149 * xmlSAXParseFile:
13150 * @sax: the SAX handler block
13151 * @filename: the filename
13152 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13153 * documents
13154 *
13155 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13156 * compressed document is provided by default if found at compile-time.
13157 * It use the given SAX function block to handle the parsing callback.
13158 * If sax is NULL, fallback to the default DOM tree building routines.
13159 *
13160 * Returns the resulting document tree
13161 */
13162
13163xmlDocPtr
13164xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13165 int recovery) {
13166 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13167}
13168
13169/**
Owen Taylor3473f882001-02-23 17:55:21 +000013170 * xmlRecoverDoc:
13171 * @cur: a pointer to an array of xmlChar
13172 *
13173 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013174 * In the case the document is not Well Formed, a attempt to build a
13175 * tree is tried anyway
13176 *
13177 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013178 */
13179
13180xmlDocPtr
13181xmlRecoverDoc(xmlChar *cur) {
13182 return(xmlSAXParseDoc(NULL, cur, 1));
13183}
13184
13185/**
13186 * xmlParseFile:
13187 * @filename: the filename
13188 *
13189 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13190 * compressed document is provided by default if found at compile-time.
13191 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000013192 * Returns the resulting document tree if the file was wellformed,
13193 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000013194 */
13195
13196xmlDocPtr
13197xmlParseFile(const char *filename) {
13198 return(xmlSAXParseFile(NULL, filename, 0));
13199}
13200
13201/**
13202 * xmlRecoverFile:
13203 * @filename: the filename
13204 *
13205 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13206 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013207 * In the case the document is not Well Formed, it attempts to build
13208 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000013209 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000013210 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013211 */
13212
13213xmlDocPtr
13214xmlRecoverFile(const char *filename) {
13215 return(xmlSAXParseFile(NULL, filename, 1));
13216}
13217
13218
13219/**
13220 * xmlSetupParserForBuffer:
13221 * @ctxt: an XML parser context
13222 * @buffer: a xmlChar * buffer
13223 * @filename: a file name
13224 *
13225 * Setup the parser context to parse a new buffer; Clears any prior
13226 * contents from the parser context. The buffer parameter must not be
13227 * NULL, but the filename parameter can be
13228 */
13229void
13230xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13231 const char* filename)
13232{
13233 xmlParserInputPtr input;
13234
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013235 if ((ctxt == NULL) || (buffer == NULL))
13236 return;
13237
Owen Taylor3473f882001-02-23 17:55:21 +000013238 input = xmlNewInputStream(ctxt);
13239 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000013240 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013241 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013242 return;
13243 }
13244
13245 xmlClearParserCtxt(ctxt);
13246 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000013247 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013248 input->base = buffer;
13249 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013250 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000013251 inputPush(ctxt, input);
13252}
13253
13254/**
13255 * xmlSAXUserParseFile:
13256 * @sax: a SAX handler
13257 * @user_data: The user data returned on SAX callbacks
13258 * @filename: a file name
13259 *
13260 * parse an XML file and call the given SAX handler routines.
13261 * Automatic support for ZLIB/Compress compressed document is provided
13262 *
13263 * Returns 0 in case of success or a error number otherwise
13264 */
13265int
13266xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13267 const char *filename) {
13268 int ret = 0;
13269 xmlParserCtxtPtr ctxt;
13270
13271 ctxt = xmlCreateFileParserCtxt(filename);
13272 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000013273 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000013274 xmlFree(ctxt->sax);
13275 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013276 xmlDetectSAX2(ctxt);
13277
Owen Taylor3473f882001-02-23 17:55:21 +000013278 if (user_data != NULL)
13279 ctxt->userData = user_data;
13280
13281 xmlParseDocument(ctxt);
13282
13283 if (ctxt->wellFormed)
13284 ret = 0;
13285 else {
13286 if (ctxt->errNo != 0)
13287 ret = ctxt->errNo;
13288 else
13289 ret = -1;
13290 }
13291 if (sax != NULL)
13292 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013293 if (ctxt->myDoc != NULL) {
13294 xmlFreeDoc(ctxt->myDoc);
13295 ctxt->myDoc = NULL;
13296 }
Owen Taylor3473f882001-02-23 17:55:21 +000013297 xmlFreeParserCtxt(ctxt);
13298
13299 return ret;
13300}
Daniel Veillard81273902003-09-30 00:43:48 +000013301#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013302
13303/************************************************************************
13304 * *
13305 * Front ends when parsing from memory *
13306 * *
13307 ************************************************************************/
13308
13309/**
13310 * xmlCreateMemoryParserCtxt:
13311 * @buffer: a pointer to a char array
13312 * @size: the size of the array
13313 *
13314 * Create a parser context for an XML in-memory document.
13315 *
13316 * Returns the new parser context or NULL
13317 */
13318xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013319xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013320 xmlParserCtxtPtr ctxt;
13321 xmlParserInputPtr input;
13322 xmlParserInputBufferPtr buf;
13323
13324 if (buffer == NULL)
13325 return(NULL);
13326 if (size <= 0)
13327 return(NULL);
13328
13329 ctxt = xmlNewParserCtxt();
13330 if (ctxt == NULL)
13331 return(NULL);
13332
Daniel Veillard53350552003-09-18 13:35:51 +000013333 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000013334 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013335 if (buf == NULL) {
13336 xmlFreeParserCtxt(ctxt);
13337 return(NULL);
13338 }
Owen Taylor3473f882001-02-23 17:55:21 +000013339
13340 input = xmlNewInputStream(ctxt);
13341 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013342 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000013343 xmlFreeParserCtxt(ctxt);
13344 return(NULL);
13345 }
13346
13347 input->filename = NULL;
13348 input->buf = buf;
13349 input->base = input->buf->buffer->content;
13350 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013351 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000013352
13353 inputPush(ctxt, input);
13354 return(ctxt);
13355}
13356
Daniel Veillard81273902003-09-30 00:43:48 +000013357#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013358/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013359 * xmlSAXParseMemoryWithData:
13360 * @sax: the SAX handler block
13361 * @buffer: an pointer to a char array
13362 * @size: the size of the array
13363 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13364 * documents
13365 * @data: the userdata
13366 *
13367 * parse an XML in-memory block and use the given SAX function block
13368 * to handle the parsing callback. If sax is NULL, fallback to the default
13369 * DOM tree building routines.
13370 *
13371 * User data (void *) is stored within the parser context in the
13372 * context's _private member, so it is available nearly everywhere in libxml
13373 *
13374 * Returns the resulting document tree
13375 */
13376
13377xmlDocPtr
13378xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13379 int size, int recovery, void *data) {
13380 xmlDocPtr ret;
13381 xmlParserCtxtPtr ctxt;
13382
13383 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13384 if (ctxt == NULL) return(NULL);
13385 if (sax != NULL) {
13386 if (ctxt->sax != NULL)
13387 xmlFree(ctxt->sax);
13388 ctxt->sax = sax;
13389 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013390 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013391 if (data!=NULL) {
13392 ctxt->_private=data;
13393 }
13394
Daniel Veillardadba5f12003-04-04 16:09:01 +000013395 ctxt->recovery = recovery;
13396
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013397 xmlParseDocument(ctxt);
13398
13399 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13400 else {
13401 ret = NULL;
13402 xmlFreeDoc(ctxt->myDoc);
13403 ctxt->myDoc = NULL;
13404 }
13405 if (sax != NULL)
13406 ctxt->sax = NULL;
13407 xmlFreeParserCtxt(ctxt);
13408
13409 return(ret);
13410}
13411
13412/**
Owen Taylor3473f882001-02-23 17:55:21 +000013413 * xmlSAXParseMemory:
13414 * @sax: the SAX handler block
13415 * @buffer: an pointer to a char array
13416 * @size: the size of the array
13417 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13418 * documents
13419 *
13420 * parse an XML in-memory block and use the given SAX function block
13421 * to handle the parsing callback. If sax is NULL, fallback to the default
13422 * DOM tree building routines.
13423 *
13424 * Returns the resulting document tree
13425 */
13426xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000013427xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13428 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013429 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013430}
13431
13432/**
13433 * xmlParseMemory:
13434 * @buffer: an pointer to a char array
13435 * @size: the size of the array
13436 *
13437 * parse an XML in-memory block and build a tree.
13438 *
13439 * Returns the resulting document tree
13440 */
13441
Daniel Veillard50822cb2001-07-26 20:05:51 +000013442xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013443 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13444}
13445
13446/**
13447 * xmlRecoverMemory:
13448 * @buffer: an pointer to a char array
13449 * @size: the size of the array
13450 *
13451 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013452 * In the case the document is not Well Formed, an attempt to
13453 * build a tree is tried anyway
13454 *
13455 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000013456 */
13457
Daniel Veillard50822cb2001-07-26 20:05:51 +000013458xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013459 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13460}
13461
13462/**
13463 * xmlSAXUserParseMemory:
13464 * @sax: a SAX handler
13465 * @user_data: The user data returned on SAX callbacks
13466 * @buffer: an in-memory XML document input
13467 * @size: the length of the XML document in bytes
13468 *
13469 * A better SAX parsing routine.
13470 * parse an XML in-memory buffer and call the given SAX handler routines.
13471 *
13472 * Returns 0 in case of success or a error number otherwise
13473 */
13474int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013475 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013476 int ret = 0;
13477 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013478
13479 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13480 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013481 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
13482 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000013483 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013484 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013485
Daniel Veillard30211a02001-04-26 09:33:18 +000013486 if (user_data != NULL)
13487 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000013488
13489 xmlParseDocument(ctxt);
13490
13491 if (ctxt->wellFormed)
13492 ret = 0;
13493 else {
13494 if (ctxt->errNo != 0)
13495 ret = ctxt->errNo;
13496 else
13497 ret = -1;
13498 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000013499 if (sax != NULL)
13500 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013501 if (ctxt->myDoc != NULL) {
13502 xmlFreeDoc(ctxt->myDoc);
13503 ctxt->myDoc = NULL;
13504 }
Owen Taylor3473f882001-02-23 17:55:21 +000013505 xmlFreeParserCtxt(ctxt);
13506
13507 return ret;
13508}
Daniel Veillard81273902003-09-30 00:43:48 +000013509#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013510
13511/**
13512 * xmlCreateDocParserCtxt:
13513 * @cur: a pointer to an array of xmlChar
13514 *
13515 * Creates a parser context for an XML in-memory document.
13516 *
13517 * Returns the new parser context or NULL
13518 */
13519xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013520xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013521 int len;
13522
13523 if (cur == NULL)
13524 return(NULL);
13525 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013526 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000013527}
13528
Daniel Veillard81273902003-09-30 00:43:48 +000013529#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013530/**
13531 * xmlSAXParseDoc:
13532 * @sax: the SAX handler block
13533 * @cur: a pointer to an array of xmlChar
13534 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13535 * documents
13536 *
13537 * parse an XML in-memory document and build a tree.
13538 * It use the given SAX function block to handle the parsing callback.
13539 * If sax is NULL, fallback to the default DOM tree building routines.
13540 *
13541 * Returns the resulting document tree
13542 */
13543
13544xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013545xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000013546 xmlDocPtr ret;
13547 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000013548 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013549
Daniel Veillard38936062004-11-04 17:45:11 +000013550 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013551
13552
13553 ctxt = xmlCreateDocParserCtxt(cur);
13554 if (ctxt == NULL) return(NULL);
13555 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000013556 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013557 ctxt->sax = sax;
13558 ctxt->userData = NULL;
13559 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013560 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013561
13562 xmlParseDocument(ctxt);
13563 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13564 else {
13565 ret = NULL;
13566 xmlFreeDoc(ctxt->myDoc);
13567 ctxt->myDoc = NULL;
13568 }
Daniel Veillard34099b42004-11-04 17:34:35 +000013569 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000013570 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000013571 xmlFreeParserCtxt(ctxt);
13572
13573 return(ret);
13574}
13575
13576/**
13577 * xmlParseDoc:
13578 * @cur: a pointer to an array of xmlChar
13579 *
13580 * parse an XML in-memory document and build a tree.
13581 *
13582 * Returns the resulting document tree
13583 */
13584
13585xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000013586xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013587 return(xmlSAXParseDoc(NULL, cur, 0));
13588}
Daniel Veillard81273902003-09-30 00:43:48 +000013589#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013590
Daniel Veillard81273902003-09-30 00:43:48 +000013591#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000013592/************************************************************************
13593 * *
13594 * Specific function to keep track of entities references *
13595 * and used by the XSLT debugger *
13596 * *
13597 ************************************************************************/
13598
13599static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
13600
13601/**
13602 * xmlAddEntityReference:
13603 * @ent : A valid entity
13604 * @firstNode : A valid first node for children of entity
13605 * @lastNode : A valid last node of children entity
13606 *
13607 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
13608 */
13609static void
13610xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
13611 xmlNodePtr lastNode)
13612{
13613 if (xmlEntityRefFunc != NULL) {
13614 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
13615 }
13616}
13617
13618
13619/**
13620 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000013621 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000013622 *
13623 * Set the function to call call back when a xml reference has been made
13624 */
13625void
13626xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
13627{
13628 xmlEntityRefFunc = func;
13629}
Daniel Veillard81273902003-09-30 00:43:48 +000013630#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013631
13632/************************************************************************
13633 * *
13634 * Miscellaneous *
13635 * *
13636 ************************************************************************/
13637
13638#ifdef LIBXML_XPATH_ENABLED
13639#include <libxml/xpath.h>
13640#endif
13641
Daniel Veillardffa3c742005-07-21 13:24:09 +000013642extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000013643static int xmlParserInitialized = 0;
13644
13645/**
13646 * xmlInitParser:
13647 *
13648 * Initialization function for the XML parser.
13649 * This is not reentrant. Call once before processing in case of
13650 * use in multithreaded programs.
13651 */
13652
13653void
13654xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000013655 if (xmlParserInitialized != 0)
13656 return;
Owen Taylor3473f882001-02-23 17:55:21 +000013657
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013658#ifdef LIBXML_THREAD_ENABLED
13659 __xmlGlobalInitMutexLock();
13660 if (xmlParserInitialized == 0) {
13661#endif
13662 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
13663 (xmlGenericError == NULL))
13664 initGenericErrorDefaultFunc(NULL);
13665 xmlInitGlobals();
13666 xmlInitThreads();
13667 xmlInitMemory();
13668 xmlInitCharEncodingHandlers();
13669 xmlDefaultSAXHandlerInit();
13670 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013671#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013672 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000013673#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013674#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013675 htmlInitAutoClose();
13676 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013677#endif
13678#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013679 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000013680#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000013681 xmlParserInitialized = 1;
13682#ifdef LIBXML_THREAD_ENABLED
13683 }
13684 __xmlGlobalInitMutexUnlock();
13685#endif
Owen Taylor3473f882001-02-23 17:55:21 +000013686}
13687
13688/**
13689 * xmlCleanupParser:
13690 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000013691 * This function name is somewhat misleading. It does not clean up
13692 * parser state, it cleans up memory allocated by the library itself.
13693 * It is a cleanup function for the XML library. It tries to reclaim all
13694 * related global memory allocated for the library processing.
13695 * It doesn't deallocate any document related memory. One should
13696 * call xmlCleanupParser() only when the process has finished using
13697 * the library and all XML/HTML documents built with it.
13698 * See also xmlInitParser() which has the opposite function of preparing
13699 * the library for operations.
Owen Taylor3473f882001-02-23 17:55:21 +000013700 */
13701
13702void
13703xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000013704 if (!xmlParserInitialized)
13705 return;
13706
Owen Taylor3473f882001-02-23 17:55:21 +000013707 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000013708#ifdef LIBXML_CATALOG_ENABLED
13709 xmlCatalogCleanup();
13710#endif
Daniel Veillard14412512005-01-21 23:53:26 +000013711 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000013712 xmlCleanupInputCallbacks();
13713#ifdef LIBXML_OUTPUT_ENABLED
13714 xmlCleanupOutputCallbacks();
13715#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013716#ifdef LIBXML_SCHEMAS_ENABLED
13717 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000013718 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013719#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000013720 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000013721 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000013722 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000013723 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000013724 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000013725}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013726
13727/************************************************************************
13728 * *
13729 * New set (2.6.0) of simpler and more flexible APIs *
13730 * *
13731 ************************************************************************/
13732
13733/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013734 * DICT_FREE:
13735 * @str: a string
13736 *
13737 * Free a string if it is not owned by the "dict" dictionnary in the
13738 * current scope
13739 */
13740#define DICT_FREE(str) \
13741 if ((str) && ((!dict) || \
13742 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
13743 xmlFree((char *)(str));
13744
13745/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013746 * xmlCtxtReset:
13747 * @ctxt: an XML parser context
13748 *
13749 * Reset a parser context
13750 */
13751void
13752xmlCtxtReset(xmlParserCtxtPtr ctxt)
13753{
13754 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013755 xmlDictPtr dict;
13756
13757 if (ctxt == NULL)
13758 return;
13759
13760 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013761
13762 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
13763 xmlFreeInputStream(input);
13764 }
13765 ctxt->inputNr = 0;
13766 ctxt->input = NULL;
13767
13768 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000013769 if (ctxt->spaceTab != NULL) {
13770 ctxt->spaceTab[0] = -1;
13771 ctxt->space = &ctxt->spaceTab[0];
13772 } else {
13773 ctxt->space = NULL;
13774 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013775
13776
13777 ctxt->nodeNr = 0;
13778 ctxt->node = NULL;
13779
13780 ctxt->nameNr = 0;
13781 ctxt->name = NULL;
13782
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013783 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013784 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013785 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013786 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000013787 DICT_FREE(ctxt->directory);
13788 ctxt->directory = NULL;
13789 DICT_FREE(ctxt->extSubURI);
13790 ctxt->extSubURI = NULL;
13791 DICT_FREE(ctxt->extSubSystem);
13792 ctxt->extSubSystem = NULL;
13793 if (ctxt->myDoc != NULL)
13794 xmlFreeDoc(ctxt->myDoc);
13795 ctxt->myDoc = NULL;
13796
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013797 ctxt->standalone = -1;
13798 ctxt->hasExternalSubset = 0;
13799 ctxt->hasPErefs = 0;
13800 ctxt->html = 0;
13801 ctxt->external = 0;
13802 ctxt->instate = XML_PARSER_START;
13803 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013804
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013805 ctxt->wellFormed = 1;
13806 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000013807 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013808 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013809#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013810 ctxt->vctxt.userData = ctxt;
13811 ctxt->vctxt.error = xmlParserValidityError;
13812 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000013813#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013814 ctxt->record_info = 0;
13815 ctxt->nbChars = 0;
13816 ctxt->checkIndex = 0;
13817 ctxt->inSubset = 0;
13818 ctxt->errNo = XML_ERR_OK;
13819 ctxt->depth = 0;
13820 ctxt->charset = XML_CHAR_ENCODING_UTF8;
13821 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000013822 ctxt->nbentities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013823 xmlInitNodeInfoSeq(&ctxt->node_seq);
13824
13825 if (ctxt->attsDefault != NULL) {
13826 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
13827 ctxt->attsDefault = NULL;
13828 }
13829 if (ctxt->attsSpecial != NULL) {
13830 xmlHashFree(ctxt->attsSpecial, NULL);
13831 ctxt->attsSpecial = NULL;
13832 }
13833
Daniel Veillard4432df22003-09-28 18:58:27 +000013834#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013835 if (ctxt->catalogs != NULL)
13836 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000013837#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000013838 if (ctxt->lastError.code != XML_ERR_OK)
13839 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013840}
13841
13842/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013843 * xmlCtxtResetPush:
13844 * @ctxt: an XML parser context
13845 * @chunk: a pointer to an array of chars
13846 * @size: number of chars in the array
13847 * @filename: an optional file name or URI
13848 * @encoding: the document encoding, or NULL
13849 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013850 * Reset a push parser context
13851 *
13852 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013853 */
13854int
13855xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
13856 int size, const char *filename, const char *encoding)
13857{
13858 xmlParserInputPtr inputStream;
13859 xmlParserInputBufferPtr buf;
13860 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
13861
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000013862 if (ctxt == NULL)
13863 return(1);
13864
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013865 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
13866 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
13867
13868 buf = xmlAllocParserInputBuffer(enc);
13869 if (buf == NULL)
13870 return(1);
13871
13872 if (ctxt == NULL) {
13873 xmlFreeParserInputBuffer(buf);
13874 return(1);
13875 }
13876
13877 xmlCtxtReset(ctxt);
13878
13879 if (ctxt->pushTab == NULL) {
13880 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
13881 sizeof(xmlChar *));
13882 if (ctxt->pushTab == NULL) {
13883 xmlErrMemory(ctxt, NULL);
13884 xmlFreeParserInputBuffer(buf);
13885 return(1);
13886 }
13887 }
13888
13889 if (filename == NULL) {
13890 ctxt->directory = NULL;
13891 } else {
13892 ctxt->directory = xmlParserGetDirectory(filename);
13893 }
13894
13895 inputStream = xmlNewInputStream(ctxt);
13896 if (inputStream == NULL) {
13897 xmlFreeParserInputBuffer(buf);
13898 return(1);
13899 }
13900
13901 if (filename == NULL)
13902 inputStream->filename = NULL;
13903 else
13904 inputStream->filename = (char *)
13905 xmlCanonicPath((const xmlChar *) filename);
13906 inputStream->buf = buf;
13907 inputStream->base = inputStream->buf->buffer->content;
13908 inputStream->cur = inputStream->buf->buffer->content;
13909 inputStream->end =
13910 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
13911
13912 inputPush(ctxt, inputStream);
13913
13914 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
13915 (ctxt->input->buf != NULL)) {
13916 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
13917 int cur = ctxt->input->cur - ctxt->input->base;
13918
13919 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
13920
13921 ctxt->input->base = ctxt->input->buf->buffer->content + base;
13922 ctxt->input->cur = ctxt->input->base + cur;
13923 ctxt->input->end =
13924 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->
13925 use];
13926#ifdef DEBUG_PUSH
13927 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
13928#endif
13929 }
13930
13931 if (encoding != NULL) {
13932 xmlCharEncodingHandlerPtr hdlr;
13933
Daniel Veillard37334572008-07-31 08:20:02 +000013934 if (ctxt->encoding != NULL)
13935 xmlFree((xmlChar *) ctxt->encoding);
13936 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
13937
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013938 hdlr = xmlFindCharEncodingHandler(encoding);
13939 if (hdlr != NULL) {
13940 xmlSwitchToEncoding(ctxt, hdlr);
13941 } else {
13942 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
13943 "Unsupported encoding %s\n", BAD_CAST encoding);
13944 }
13945 } else if (enc != XML_CHAR_ENCODING_NONE) {
13946 xmlSwitchEncoding(ctxt, enc);
13947 }
13948
13949 return(0);
13950}
13951
Daniel Veillard37334572008-07-31 08:20:02 +000013952
Daniel Veillard9ba8e382003-10-28 21:31:45 +000013953/**
Daniel Veillard37334572008-07-31 08:20:02 +000013954 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013955 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013956 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000013957 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013958 *
13959 * Applies the options to the parser context
13960 *
13961 * Returns 0 in case of success, the set of unknown or unimplemented options
13962 * in case of error.
13963 */
Daniel Veillard37334572008-07-31 08:20:02 +000013964static int
13965xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013966{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013967 if (ctxt == NULL)
13968 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000013969 if (encoding != NULL) {
13970 if (ctxt->encoding != NULL)
13971 xmlFree((xmlChar *) ctxt->encoding);
13972 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
13973 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013974 if (options & XML_PARSE_RECOVER) {
13975 ctxt->recovery = 1;
13976 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000013977 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013978 } else
13979 ctxt->recovery = 0;
13980 if (options & XML_PARSE_DTDLOAD) {
13981 ctxt->loadsubset = XML_DETECT_IDS;
13982 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000013983 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013984 } else
13985 ctxt->loadsubset = 0;
13986 if (options & XML_PARSE_DTDATTR) {
13987 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
13988 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000013989 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013990 }
13991 if (options & XML_PARSE_NOENT) {
13992 ctxt->replaceEntities = 1;
13993 /* ctxt->loadsubset |= XML_DETECT_IDS; */
13994 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000013995 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013996 } else
13997 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000013998 if (options & XML_PARSE_PEDANTIC) {
13999 ctxt->pedantic = 1;
14000 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014001 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014002 } else
14003 ctxt->pedantic = 0;
14004 if (options & XML_PARSE_NOBLANKS) {
14005 ctxt->keepBlanks = 0;
14006 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14007 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014008 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014009 } else
14010 ctxt->keepBlanks = 1;
14011 if (options & XML_PARSE_DTDVALID) {
14012 ctxt->validate = 1;
14013 if (options & XML_PARSE_NOWARNING)
14014 ctxt->vctxt.warning = NULL;
14015 if (options & XML_PARSE_NOERROR)
14016 ctxt->vctxt.error = NULL;
14017 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014018 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014019 } else
14020 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000014021 if (options & XML_PARSE_NOWARNING) {
14022 ctxt->sax->warning = NULL;
14023 options -= XML_PARSE_NOWARNING;
14024 }
14025 if (options & XML_PARSE_NOERROR) {
14026 ctxt->sax->error = NULL;
14027 ctxt->sax->fatalError = NULL;
14028 options -= XML_PARSE_NOERROR;
14029 }
Daniel Veillard81273902003-09-30 00:43:48 +000014030#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014031 if (options & XML_PARSE_SAX1) {
14032 ctxt->sax->startElement = xmlSAX2StartElement;
14033 ctxt->sax->endElement = xmlSAX2EndElement;
14034 ctxt->sax->startElementNs = NULL;
14035 ctxt->sax->endElementNs = NULL;
14036 ctxt->sax->initialized = 1;
14037 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014038 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014039 }
Daniel Veillard81273902003-09-30 00:43:48 +000014040#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014041 if (options & XML_PARSE_NODICT) {
14042 ctxt->dictNames = 0;
14043 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014044 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014045 } else {
14046 ctxt->dictNames = 1;
14047 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014048 if (options & XML_PARSE_NOCDATA) {
14049 ctxt->sax->cdataBlock = NULL;
14050 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014051 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014052 }
14053 if (options & XML_PARSE_NSCLEAN) {
14054 ctxt->options |= XML_PARSE_NSCLEAN;
14055 options -= XML_PARSE_NSCLEAN;
14056 }
Daniel Veillard61b93382003-11-03 14:28:31 +000014057 if (options & XML_PARSE_NONET) {
14058 ctxt->options |= XML_PARSE_NONET;
14059 options -= XML_PARSE_NONET;
14060 }
Daniel Veillard8874b942005-08-25 13:19:21 +000014061 if (options & XML_PARSE_COMPACT) {
14062 ctxt->options |= XML_PARSE_COMPACT;
14063 options -= XML_PARSE_COMPACT;
14064 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000014065 if (options & XML_PARSE_OLD10) {
14066 ctxt->options |= XML_PARSE_OLD10;
14067 options -= XML_PARSE_OLD10;
14068 }
Daniel Veillard8915c152008-08-26 13:05:34 +000014069 if (options & XML_PARSE_NOBASEFIX) {
14070 ctxt->options |= XML_PARSE_NOBASEFIX;
14071 options -= XML_PARSE_NOBASEFIX;
14072 }
14073 if (options & XML_PARSE_HUGE) {
14074 ctxt->options |= XML_PARSE_HUGE;
14075 options -= XML_PARSE_HUGE;
14076 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000014077 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014078 return (options);
14079}
14080
14081/**
Daniel Veillard37334572008-07-31 08:20:02 +000014082 * xmlCtxtUseOptions:
14083 * @ctxt: an XML parser context
14084 * @options: a combination of xmlParserOption
14085 *
14086 * Applies the options to the parser context
14087 *
14088 * Returns 0 in case of success, the set of unknown or unimplemented options
14089 * in case of error.
14090 */
14091int
14092xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14093{
14094 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14095}
14096
14097/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014098 * xmlDoRead:
14099 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000014100 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014101 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014102 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014103 * @reuse: keep the context for reuse
14104 *
14105 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000014106 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014107 * Returns the resulting document tree or NULL
14108 */
14109static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014110xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14111 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014112{
14113 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000014114
14115 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014116 if (encoding != NULL) {
14117 xmlCharEncodingHandlerPtr hdlr;
14118
14119 hdlr = xmlFindCharEncodingHandler(encoding);
14120 if (hdlr != NULL)
14121 xmlSwitchToEncoding(ctxt, hdlr);
14122 }
Daniel Veillard60942de2003-09-25 21:05:58 +000014123 if ((URL != NULL) && (ctxt->input != NULL) &&
14124 (ctxt->input->filename == NULL))
14125 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014126 xmlParseDocument(ctxt);
14127 if ((ctxt->wellFormed) || ctxt->recovery)
14128 ret = ctxt->myDoc;
14129 else {
14130 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014131 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014132 xmlFreeDoc(ctxt->myDoc);
14133 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014134 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014135 ctxt->myDoc = NULL;
14136 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014137 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014138 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014139
14140 return (ret);
14141}
14142
14143/**
14144 * xmlReadDoc:
14145 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014146 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014147 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014148 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014149 *
14150 * parse an XML in-memory document and build a tree.
14151 *
14152 * Returns the resulting document tree
14153 */
14154xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014155xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014156{
14157 xmlParserCtxtPtr ctxt;
14158
14159 if (cur == NULL)
14160 return (NULL);
14161
14162 ctxt = xmlCreateDocParserCtxt(cur);
14163 if (ctxt == NULL)
14164 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014165 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014166}
14167
14168/**
14169 * xmlReadFile:
14170 * @filename: a file or URL
14171 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014172 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014173 *
14174 * parse an XML file from the filesystem or the network.
14175 *
14176 * Returns the resulting document tree
14177 */
14178xmlDocPtr
14179xmlReadFile(const char *filename, const char *encoding, int options)
14180{
14181 xmlParserCtxtPtr ctxt;
14182
Daniel Veillard61b93382003-11-03 14:28:31 +000014183 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014184 if (ctxt == NULL)
14185 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014186 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014187}
14188
14189/**
14190 * xmlReadMemory:
14191 * @buffer: a pointer to a char array
14192 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014193 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014194 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014195 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014196 *
14197 * parse an XML in-memory document and build a tree.
14198 *
14199 * Returns the resulting document tree
14200 */
14201xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014202xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014203{
14204 xmlParserCtxtPtr ctxt;
14205
14206 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14207 if (ctxt == NULL)
14208 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014209 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014210}
14211
14212/**
14213 * xmlReadFd:
14214 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014215 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014216 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014217 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014218 *
14219 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014220 * NOTE that the file descriptor will not be closed when the
14221 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014222 *
14223 * Returns the resulting document tree
14224 */
14225xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014226xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014227{
14228 xmlParserCtxtPtr ctxt;
14229 xmlParserInputBufferPtr input;
14230 xmlParserInputPtr stream;
14231
14232 if (fd < 0)
14233 return (NULL);
14234
14235 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14236 if (input == NULL)
14237 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014238 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014239 ctxt = xmlNewParserCtxt();
14240 if (ctxt == NULL) {
14241 xmlFreeParserInputBuffer(input);
14242 return (NULL);
14243 }
14244 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14245 if (stream == NULL) {
14246 xmlFreeParserInputBuffer(input);
14247 xmlFreeParserCtxt(ctxt);
14248 return (NULL);
14249 }
14250 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014251 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014252}
14253
14254/**
14255 * xmlReadIO:
14256 * @ioread: an I/O read function
14257 * @ioclose: an I/O close function
14258 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014259 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014260 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014261 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014262 *
14263 * parse an XML document from I/O functions and source and build a tree.
14264 *
14265 * Returns the resulting document tree
14266 */
14267xmlDocPtr
14268xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000014269 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014270{
14271 xmlParserCtxtPtr ctxt;
14272 xmlParserInputBufferPtr input;
14273 xmlParserInputPtr stream;
14274
14275 if (ioread == NULL)
14276 return (NULL);
14277
14278 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14279 XML_CHAR_ENCODING_NONE);
14280 if (input == NULL)
14281 return (NULL);
14282 ctxt = xmlNewParserCtxt();
14283 if (ctxt == NULL) {
14284 xmlFreeParserInputBuffer(input);
14285 return (NULL);
14286 }
14287 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14288 if (stream == NULL) {
14289 xmlFreeParserInputBuffer(input);
14290 xmlFreeParserCtxt(ctxt);
14291 return (NULL);
14292 }
14293 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014294 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014295}
14296
14297/**
14298 * xmlCtxtReadDoc:
14299 * @ctxt: an XML parser context
14300 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014301 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014302 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014303 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014304 *
14305 * parse an XML in-memory document and build a tree.
14306 * This reuses the existing @ctxt parser context
14307 *
14308 * Returns the resulting document tree
14309 */
14310xmlDocPtr
14311xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000014312 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014313{
14314 xmlParserInputPtr stream;
14315
14316 if (cur == NULL)
14317 return (NULL);
14318 if (ctxt == NULL)
14319 return (NULL);
14320
14321 xmlCtxtReset(ctxt);
14322
14323 stream = xmlNewStringInputStream(ctxt, cur);
14324 if (stream == NULL) {
14325 return (NULL);
14326 }
14327 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014328 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014329}
14330
14331/**
14332 * xmlCtxtReadFile:
14333 * @ctxt: an XML parser context
14334 * @filename: a file or URL
14335 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014336 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014337 *
14338 * parse an XML file from the filesystem or the network.
14339 * This reuses the existing @ctxt parser context
14340 *
14341 * Returns the resulting document tree
14342 */
14343xmlDocPtr
14344xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14345 const char *encoding, int options)
14346{
14347 xmlParserInputPtr stream;
14348
14349 if (filename == NULL)
14350 return (NULL);
14351 if (ctxt == NULL)
14352 return (NULL);
14353
14354 xmlCtxtReset(ctxt);
14355
Daniel Veillard29614c72004-11-26 10:47:26 +000014356 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014357 if (stream == NULL) {
14358 return (NULL);
14359 }
14360 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014361 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014362}
14363
14364/**
14365 * xmlCtxtReadMemory:
14366 * @ctxt: an XML parser context
14367 * @buffer: a pointer to a char array
14368 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014369 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014370 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014371 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014372 *
14373 * parse an XML in-memory document and build a tree.
14374 * This reuses the existing @ctxt parser context
14375 *
14376 * Returns the resulting document tree
14377 */
14378xmlDocPtr
14379xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000014380 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014381{
14382 xmlParserInputBufferPtr input;
14383 xmlParserInputPtr stream;
14384
14385 if (ctxt == NULL)
14386 return (NULL);
14387 if (buffer == NULL)
14388 return (NULL);
14389
14390 xmlCtxtReset(ctxt);
14391
14392 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14393 if (input == NULL) {
14394 return(NULL);
14395 }
14396
14397 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14398 if (stream == NULL) {
14399 xmlFreeParserInputBuffer(input);
14400 return(NULL);
14401 }
14402
14403 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014404 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014405}
14406
14407/**
14408 * xmlCtxtReadFd:
14409 * @ctxt: an XML parser context
14410 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014411 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014412 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014413 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014414 *
14415 * parse an XML from a file descriptor and build a tree.
14416 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014417 * NOTE that the file descriptor will not be closed when the
14418 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014419 *
14420 * Returns the resulting document tree
14421 */
14422xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014423xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14424 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014425{
14426 xmlParserInputBufferPtr input;
14427 xmlParserInputPtr stream;
14428
14429 if (fd < 0)
14430 return (NULL);
14431 if (ctxt == NULL)
14432 return (NULL);
14433
14434 xmlCtxtReset(ctxt);
14435
14436
14437 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14438 if (input == NULL)
14439 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014440 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014441 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14442 if (stream == NULL) {
14443 xmlFreeParserInputBuffer(input);
14444 return (NULL);
14445 }
14446 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014447 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014448}
14449
14450/**
14451 * xmlCtxtReadIO:
14452 * @ctxt: an XML parser context
14453 * @ioread: an I/O read function
14454 * @ioclose: an I/O close function
14455 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014456 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014457 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014458 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014459 *
14460 * parse an XML document from I/O functions and source and build a tree.
14461 * This reuses the existing @ctxt parser context
14462 *
14463 * Returns the resulting document tree
14464 */
14465xmlDocPtr
14466xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
14467 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000014468 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014469 const char *encoding, int options)
14470{
14471 xmlParserInputBufferPtr input;
14472 xmlParserInputPtr stream;
14473
14474 if (ioread == NULL)
14475 return (NULL);
14476 if (ctxt == NULL)
14477 return (NULL);
14478
14479 xmlCtxtReset(ctxt);
14480
14481 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14482 XML_CHAR_ENCODING_NONE);
14483 if (input == NULL)
14484 return (NULL);
14485 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14486 if (stream == NULL) {
14487 xmlFreeParserInputBuffer(input);
14488 return (NULL);
14489 }
14490 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014491 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014492}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000014493
14494#define bottom_parser
14495#include "elfgcchack.h"