blob: 78b69c1f6ff06fac5e1d71cfd705d4a01c3d7b64 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
Daniel Veillard459eeb92012-07-17 16:19:17 +080043#include <limits.h>
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <string.h>
Aleksey Sanine7acf432003-10-02 20:05:27 +000045#include <stdarg.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000047#include <libxml/threads.h>
48#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000049#include <libxml/tree.h>
50#include <libxml/parser.h>
51#include <libxml/parserInternals.h>
52#include <libxml/valid.h>
53#include <libxml/entities.h>
54#include <libxml/xmlerror.h>
55#include <libxml/encoding.h>
56#include <libxml/xmlIO.h>
57#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000058#ifdef LIBXML_CATALOG_ENABLED
59#include <libxml/catalog.h>
60#endif
William M. Brack1d8c9b22004-12-25 10:14:57 +000061#ifdef LIBXML_SCHEMAS_ENABLED
62#include <libxml/xmlschemastypes.h>
63#include <libxml/relaxng.h>
64#endif
Owen Taylor3473f882001-02-23 17:55:21 +000065#ifdef HAVE_CTYPE_H
66#include <ctype.h>
67#endif
68#ifdef HAVE_STDLIB_H
69#include <stdlib.h>
70#endif
71#ifdef HAVE_SYS_STAT_H
72#include <sys/stat.h>
73#endif
74#ifdef HAVE_FCNTL_H
75#include <fcntl.h>
76#endif
77#ifdef HAVE_UNISTD_H
78#include <unistd.h>
79#endif
80#ifdef HAVE_ZLIB_H
81#include <zlib.h>
82#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +020083#ifdef HAVE_LZMA_H
84#include <lzma.h>
85#endif
Owen Taylor3473f882001-02-23 17:55:21 +000086
Daniel Veillard768eb3b2012-07-16 14:19:49 +080087#include "buf.h"
88#include "enc.h"
89
Daniel Veillard0161e632008-08-28 15:36:32 +000090static void
91xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info);
92
Rob Richards9c0aa472009-03-26 18:10:19 +000093static xmlParserCtxtPtr
94xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
95 const xmlChar *base, xmlParserCtxtPtr pctx);
96
Daniel Veillard0161e632008-08-28 15:36:32 +000097/************************************************************************
98 * *
99 * Arbitrary limits set in the parser. See XML_PARSE_HUGE *
100 * *
101 ************************************************************************/
102
103#define XML_PARSER_BIG_ENTITY 1000
104#define XML_PARSER_LOT_ENTITY 5000
105
106/*
107 * XML_PARSER_NON_LINEAR is the threshold where the ratio of parsed entity
108 * replacement over the size in byte of the input indicates that you have
109 * and eponential behaviour. A value of 10 correspond to at least 3 entity
110 * replacement per byte of input.
111 */
112#define XML_PARSER_NON_LINEAR 10
113
114/*
115 * xmlParserEntityCheck
116 *
117 * Function to check non-linear entity expansion behaviour
118 * This is here to detect and stop exponential linear entity expansion
119 * This is not a limitation of the parser but a safety
120 * boundary feature. It can be disabled with the XML_PARSE_HUGE
121 * parser option.
122 */
123static int
Daniel Veillard459eeb92012-07-17 16:19:17 +0800124xmlParserEntityCheck(xmlParserCtxtPtr ctxt, size_t size,
Daniel Veillard0161e632008-08-28 15:36:32 +0000125 xmlEntityPtr ent)
126{
Daniel Veillard459eeb92012-07-17 16:19:17 +0800127 size_t consumed = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +0000128
129 if ((ctxt == NULL) || (ctxt->options & XML_PARSE_HUGE))
130 return (0);
131 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
132 return (1);
133 if (size != 0) {
134 /*
135 * Do the check based on the replacement size of the entity
136 */
137 if (size < XML_PARSER_BIG_ENTITY)
138 return(0);
139
140 /*
141 * A limit on the amount of text data reasonably used
142 */
143 if (ctxt->input != NULL) {
144 consumed = ctxt->input->consumed +
145 (ctxt->input->cur - ctxt->input->base);
146 }
147 consumed += ctxt->sizeentities;
148
149 if ((size < XML_PARSER_NON_LINEAR * consumed) &&
150 (ctxt->nbentities * 3 < XML_PARSER_NON_LINEAR * consumed))
151 return (0);
152 } else if (ent != NULL) {
153 /*
154 * use the number of parsed entities in the replacement
155 */
156 size = ent->checked;
157
158 /*
159 * The amount of data parsed counting entities size only once
160 */
161 if (ctxt->input != NULL) {
162 consumed = ctxt->input->consumed +
163 (ctxt->input->cur - ctxt->input->base);
164 }
165 consumed += ctxt->sizeentities;
166
167 /*
168 * Check the density of entities for the amount of data
169 * knowing an entity reference will take at least 3 bytes
170 */
171 if (size * 3 < consumed * XML_PARSER_NON_LINEAR)
172 return (0);
173 } else {
174 /*
175 * strange we got no data for checking just return
176 */
177 return (0);
178 }
179
180 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
181 return (1);
182}
183
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000184/**
Daniel Veillard4aede2e2003-10-17 12:43:59 +0000185 * xmlParserMaxDepth:
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000186 *
Daniel Veillard8915c152008-08-26 13:05:34 +0000187 * arbitrary depth limit for the XML documents that we allow to
188 * process. This is not a limitation of the parser but a safety
189 * boundary feature. It can be disabled with the XML_PARSE_HUGE
190 * parser option.
Daniel Veillard3b2e4e12003-02-03 08:52:58 +0000191 */
Daniel Veillard8915c152008-08-26 13:05:34 +0000192unsigned int xmlParserMaxDepth = 256;
Owen Taylor3473f882001-02-23 17:55:21 +0000193
Daniel Veillard0fb18932003-09-07 09:14:37 +0000194
Daniel Veillard0161e632008-08-28 15:36:32 +0000195
196#define SAX2 1
Daniel Veillard21a0f912001-02-25 19:54:14 +0000197#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +0000198#define XML_PARSER_BUFFER_SIZE 100
Daniel Veillard5997aca2002-03-18 18:36:20 +0000199#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
200
Owen Taylor3473f882001-02-23 17:55:21 +0000201/*
Owen Taylor3473f882001-02-23 17:55:21 +0000202 * List of XML prefixed PI allowed by W3C specs
203 */
204
Daniel Veillardb44025c2001-10-11 22:55:55 +0000205static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +0000206 "xml-stylesheet",
Daniel Veillard4c4653e2011-06-05 11:29:29 +0800207 "xml-model",
Owen Taylor3473f882001-02-23 17:55:21 +0000208 NULL
209};
210
Daniel Veillarda07050d2003-10-19 14:46:32 +0000211
Owen Taylor3473f882001-02-23 17:55:21 +0000212/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Daniel Veillard8ed10722009-08-20 19:17:36 +0200213static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
214 const xmlChar **str);
Owen Taylor3473f882001-02-23 17:55:21 +0000215
Daniel Veillard7d515752003-09-26 19:12:37 +0000216static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000217xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
218 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000219 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000220 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000221
Daniel Veillard37334572008-07-31 08:20:02 +0000222static int
223xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options,
224 const char *encoding);
Daniel Veillard81273902003-09-30 00:43:48 +0000225#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +0000226static void
227xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
228 xmlNodePtr lastNode);
Daniel Veillard81273902003-09-30 00:43:48 +0000229#endif /* LIBXML_LEGACY_ENABLED */
Daniel Veillard8107a222002-01-13 14:10:10 +0000230
Daniel Veillard7d515752003-09-26 19:12:37 +0000231static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +0000232xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
233 const xmlChar *string, void *user_data, xmlNodePtr *lst);
Daniel Veillarde57ec792003-09-10 10:50:59 +0000234
Daniel Veillard8bf64ae2008-03-24 20:45:21 +0000235static int
236xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
237
Daniel Veillarde57ec792003-09-10 10:50:59 +0000238/************************************************************************
239 * *
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000240 * Some factorized error routines *
241 * *
242 ************************************************************************/
243
244/**
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000245 * xmlErrAttributeDup:
246 * @ctxt: an XML parser context
247 * @prefix: the attribute prefix
248 * @localname: the attribute localname
249 *
250 * Handle a redefinition of attribute error
251 */
252static void
253xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
254 const xmlChar * localname)
255{
Daniel Veillard157fee02003-10-31 10:36:03 +0000256 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
257 (ctxt->instate == XML_PARSER_EOF))
258 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000259 if (ctxt != NULL)
260 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard76d36452009-09-07 11:19:33 +0200261
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000262 if (prefix == NULL)
Daniel Veillard659e71e2003-10-10 14:10:40 +0000263 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200264 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000265 (const char *) localname, NULL, NULL, 0, 0,
266 "Attribute %s redefined\n", localname);
Daniel Veillard2b8c4a12003-10-02 22:28:19 +0000267 else
Daniel Veillard659e71e2003-10-10 14:10:40 +0000268 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
Daniel Veillard76d36452009-09-07 11:19:33 +0200269 XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000270 (const char *) prefix, (const char *) localname,
271 NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
272 localname);
Daniel Veillard30e76072006-03-09 14:13:55 +0000273 if (ctxt != NULL) {
274 ctxt->wellFormed = 0;
275 if (ctxt->recovery == 0)
276 ctxt->disableSAX = 1;
277 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000278}
279
280/**
281 * xmlFatalErr:
282 * @ctxt: an XML parser context
283 * @error: the error number
284 * @extra: extra information string
285 *
286 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
287 */
288static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000289xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000290{
291 const char *errmsg;
292
Daniel Veillard157fee02003-10-31 10:36:03 +0000293 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
294 (ctxt->instate == XML_PARSER_EOF))
295 return;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000296 switch (error) {
297 case XML_ERR_INVALID_HEX_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000298 errmsg = "CharRef: invalid hexadecimal value\n";
299 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000300 case XML_ERR_INVALID_DEC_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000301 errmsg = "CharRef: invalid decimal value\n";
302 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000303 case XML_ERR_INVALID_CHARREF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000304 errmsg = "CharRef: invalid value\n";
305 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000306 case XML_ERR_INTERNAL_ERROR:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000307 errmsg = "internal error";
308 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000309 case XML_ERR_PEREF_AT_EOF:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000310 errmsg = "PEReference at end of document\n";
311 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000312 case XML_ERR_PEREF_IN_PROLOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000313 errmsg = "PEReference in prolog\n";
314 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000315 case XML_ERR_PEREF_IN_EPILOG:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000316 errmsg = "PEReference in epilog\n";
317 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000318 case XML_ERR_PEREF_NO_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000319 errmsg = "PEReference: no name\n";
320 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000321 case XML_ERR_PEREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000322 errmsg = "PEReference: expecting ';'\n";
323 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000324 case XML_ERR_ENTITY_LOOP:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000325 errmsg = "Detected an entity reference loop\n";
326 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000327 case XML_ERR_ENTITY_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000328 errmsg = "EntityValue: \" or ' expected\n";
329 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000330 case XML_ERR_ENTITY_PE_INTERNAL:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000331 errmsg = "PEReferences forbidden in internal subset\n";
332 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000333 case XML_ERR_ENTITY_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000334 errmsg = "EntityValue: \" or ' expected\n";
335 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000336 case XML_ERR_ATTRIBUTE_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000337 errmsg = "AttValue: \" or ' expected\n";
338 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000339 case XML_ERR_LT_IN_ATTRIBUTE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000340 errmsg = "Unescaped '<' not allowed in attributes values\n";
341 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000342 case XML_ERR_LITERAL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000343 errmsg = "SystemLiteral \" or ' expected\n";
344 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000345 case XML_ERR_LITERAL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000346 errmsg = "Unfinished System or Public ID \" or ' expected\n";
347 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000348 case XML_ERR_MISPLACED_CDATA_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000349 errmsg = "Sequence ']]>' not allowed in content\n";
350 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000351 case XML_ERR_URI_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000352 errmsg = "SYSTEM or PUBLIC, the URI is missing\n";
353 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000354 case XML_ERR_PUBID_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000355 errmsg = "PUBLIC, the Public Identifier is missing\n";
356 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000357 case XML_ERR_HYPHEN_IN_COMMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000358 errmsg = "Comment must not contain '--' (double-hyphen)\n";
359 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000360 case XML_ERR_PI_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000361 errmsg = "xmlParsePI : no target name\n";
362 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000363 case XML_ERR_RESERVED_XML_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000364 errmsg = "Invalid PI name\n";
365 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000366 case XML_ERR_NOTATION_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000367 errmsg = "NOTATION: Name expected here\n";
368 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000369 case XML_ERR_NOTATION_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000370 errmsg = "'>' required to close NOTATION declaration\n";
371 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000372 case XML_ERR_VALUE_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000373 errmsg = "Entity value required\n";
374 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000375 case XML_ERR_URI_FRAGMENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000376 errmsg = "Fragment not allowed";
377 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000378 case XML_ERR_ATTLIST_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000379 errmsg = "'(' required to start ATTLIST enumeration\n";
380 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000381 case XML_ERR_NMTOKEN_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000382 errmsg = "NmToken expected in ATTLIST enumeration\n";
383 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000384 case XML_ERR_ATTLIST_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000385 errmsg = "')' required to finish ATTLIST enumeration\n";
386 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000387 case XML_ERR_MIXED_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000388 errmsg = "MixedContentDecl : '|' or ')*' expected\n";
389 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000390 case XML_ERR_PCDATA_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000391 errmsg = "MixedContentDecl : '#PCDATA' expected\n";
392 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000393 case XML_ERR_ELEMCONTENT_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000394 errmsg = "ContentDecl : Name or '(' expected\n";
395 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000396 case XML_ERR_ELEMCONTENT_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000397 errmsg = "ContentDecl : ',' '|' or ')' expected\n";
398 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000399 case XML_ERR_PEREF_IN_INT_SUBSET:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000400 errmsg =
401 "PEReference: forbidden within markup decl in internal subset\n";
402 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000403 case XML_ERR_GT_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000404 errmsg = "expected '>'\n";
405 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000406 case XML_ERR_CONDSEC_INVALID:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000407 errmsg = "XML conditional section '[' expected\n";
408 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000409 case XML_ERR_EXT_SUBSET_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000410 errmsg = "Content error in the external subset\n";
411 break;
412 case XML_ERR_CONDSEC_INVALID_KEYWORD:
413 errmsg =
414 "conditional section INCLUDE or IGNORE keyword expected\n";
415 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000416 case XML_ERR_CONDSEC_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000417 errmsg = "XML conditional section not closed\n";
418 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000419 case XML_ERR_XMLDECL_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000420 errmsg = "Text declaration '<?xml' required\n";
421 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000422 case XML_ERR_XMLDECL_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000423 errmsg = "parsing XML declaration: '?>' expected\n";
424 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000425 case XML_ERR_EXT_ENTITY_STANDALONE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000426 errmsg = "external parsed entities cannot be standalone\n";
427 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000428 case XML_ERR_ENTITYREF_SEMICOL_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000429 errmsg = "EntityRef: expecting ';'\n";
430 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000431 case XML_ERR_DOCTYPE_NOT_FINISHED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000432 errmsg = "DOCTYPE improperly terminated\n";
433 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000434 case XML_ERR_LTSLASH_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000435 errmsg = "EndTag: '</' not found\n";
436 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000437 case XML_ERR_EQUAL_REQUIRED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000438 errmsg = "expected '='\n";
439 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000440 case XML_ERR_STRING_NOT_CLOSED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000441 errmsg = "String not closed expecting \" or '\n";
442 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000443 case XML_ERR_STRING_NOT_STARTED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000444 errmsg = "String not started expecting ' or \"\n";
445 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000446 case XML_ERR_ENCODING_NAME:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000447 errmsg = "Invalid XML encoding name\n";
448 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000449 case XML_ERR_STANDALONE_VALUE:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000450 errmsg = "standalone accepts only 'yes' or 'no'\n";
451 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000452 case XML_ERR_DOCUMENT_EMPTY:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000453 errmsg = "Document is empty\n";
454 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000455 case XML_ERR_DOCUMENT_END:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000456 errmsg = "Extra content at the end of the document\n";
457 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000458 case XML_ERR_NOT_WELL_BALANCED:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000459 errmsg = "chunk is not well balanced\n";
460 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000461 case XML_ERR_EXTRA_CONTENT:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000462 errmsg = "extra content at the end of well balanced chunk\n";
463 break;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000464 case XML_ERR_VERSION_MISSING:
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000465 errmsg = "Malformed declaration expecting version\n";
466 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000467#if 0
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000468 case:
469 errmsg = "\n";
470 break;
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000471#endif
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000472 default:
473 errmsg = "Unregistered error message\n";
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000474 }
Daniel Veillard30e76072006-03-09 14:13:55 +0000475 if (ctxt != NULL)
476 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000477 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000478 XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, errmsg,
479 info);
Daniel Veillard30e76072006-03-09 14:13:55 +0000480 if (ctxt != NULL) {
481 ctxt->wellFormed = 0;
482 if (ctxt->recovery == 0)
483 ctxt->disableSAX = 1;
484 }
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000485}
486
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000487/**
488 * xmlFatalErrMsg:
489 * @ctxt: an XML parser context
490 * @error: the error number
491 * @msg: the error message
492 *
493 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
494 */
495static void
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000496xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
497 const char *msg)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000498{
Daniel Veillard157fee02003-10-31 10:36:03 +0000499 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
500 (ctxt->instate == XML_PARSER_EOF))
501 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000502 if (ctxt != NULL)
503 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000504 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
Daniel Veillardbccae2d2009-06-04 11:22:45 +0200505 XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
Daniel Veillard30e76072006-03-09 14:13:55 +0000506 if (ctxt != NULL) {
507 ctxt->wellFormed = 0;
508 if (ctxt->recovery == 0)
509 ctxt->disableSAX = 1;
510 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000511}
512
513/**
Daniel Veillard24eb9782003-10-04 21:08:09 +0000514 * xmlWarningMsg:
515 * @ctxt: an XML parser context
516 * @error: the error number
517 * @msg: the error message
518 * @str1: extra data
519 * @str2: extra data
520 *
521 * Handle a warning.
522 */
523static void
524xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
525 const char *msg, const xmlChar *str1, const xmlChar *str2)
526{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000527 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard34e3f642008-07-29 09:02:27 +0000528
Daniel Veillard157fee02003-10-31 10:36:03 +0000529 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
530 (ctxt->instate == XML_PARSER_EOF))
531 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000532 if ((ctxt != NULL) && (ctxt->sax != NULL) &&
533 (ctxt->sax->initialized == XML_SAX2_MAGIC))
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000534 schannel = ctxt->sax->serror;
Daniel Veillardd44b9362009-09-07 12:15:08 +0200535 if (ctxt != NULL) {
536 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000537 (ctxt->sax) ? ctxt->sax->warning : NULL,
538 ctxt->userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000539 ctxt, NULL, XML_FROM_PARSER, error,
540 XML_ERR_WARNING, NULL, 0,
541 (const char *) str1, (const char *) str2, NULL, 0, 0,
542 msg, (const char *) str1, (const char *) str2);
Daniel Veillardd44b9362009-09-07 12:15:08 +0200543 } else {
544 __xmlRaiseError(schannel, NULL, NULL,
545 ctxt, NULL, XML_FROM_PARSER, error,
546 XML_ERR_WARNING, NULL, 0,
547 (const char *) str1, (const char *) str2, NULL, 0, 0,
548 msg, (const char *) str1, (const char *) str2);
549 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000550}
551
552/**
553 * xmlValidityError:
554 * @ctxt: an XML parser context
555 * @error: the error number
556 * @msg: the error message
557 * @str1: extra data
558 *
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000559 * Handle a validity error.
Daniel Veillard24eb9782003-10-04 21:08:09 +0000560 */
561static void
562xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000563 const char *msg, const xmlChar *str1, const xmlChar *str2)
Daniel Veillard24eb9782003-10-04 21:08:09 +0000564{
Daniel Veillard9bcc7c52003-10-11 10:57:05 +0000565 xmlStructuredErrorFunc schannel = NULL;
Daniel Veillard157fee02003-10-31 10:36:03 +0000566
567 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
568 (ctxt->instate == XML_PARSER_EOF))
569 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000570 if (ctxt != NULL) {
571 ctxt->errNo = error;
572 if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
573 schannel = ctxt->sax->serror;
574 }
Daniel Veillard76d36452009-09-07 11:19:33 +0200575 if (ctxt != NULL) {
576 __xmlRaiseError(schannel,
Daniel Veillard659e71e2003-10-10 14:10:40 +0000577 ctxt->vctxt.error, ctxt->vctxt.userData,
Daniel Veillard24eb9782003-10-04 21:08:09 +0000578 ctxt, NULL, XML_FROM_DTD, error,
579 XML_ERR_ERROR, NULL, 0, (const char *) str1,
Daniel Veillardae0765b2008-07-31 19:54:59 +0000580 (const char *) str2, NULL, 0, 0,
581 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000582 ctxt->valid = 0;
Daniel Veillard76d36452009-09-07 11:19:33 +0200583 } else {
584 __xmlRaiseError(schannel, NULL, NULL,
585 ctxt, NULL, XML_FROM_DTD, error,
586 XML_ERR_ERROR, NULL, 0, (const char *) str1,
587 (const char *) str2, NULL, 0, 0,
588 msg, (const char *) str1, (const char *) str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000589 }
Daniel Veillard24eb9782003-10-04 21:08:09 +0000590}
591
592/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000593 * xmlFatalErrMsgInt:
594 * @ctxt: an XML parser context
595 * @error: the error number
596 * @msg: the error message
597 * @val: an integer value
598 *
599 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
600 */
601static void
602xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000603 const char *msg, int val)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000604{
Daniel Veillard157fee02003-10-31 10:36:03 +0000605 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
606 (ctxt->instate == XML_PARSER_EOF))
607 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000608 if (ctxt != NULL)
609 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000610 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000611 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
612 NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000613 if (ctxt != NULL) {
614 ctxt->wellFormed = 0;
615 if (ctxt->recovery == 0)
616 ctxt->disableSAX = 1;
617 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000618}
619
620/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000621 * xmlFatalErrMsgStrIntStr:
622 * @ctxt: an XML parser context
623 * @error: the error number
624 * @msg: the error message
625 * @str1: an string info
626 * @val: an integer value
627 * @str2: an string info
628 *
629 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
630 */
631static void
632xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
633 const char *msg, const xmlChar *str1, int val,
634 const xmlChar *str2)
635{
Daniel Veillard157fee02003-10-31 10:36:03 +0000636 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
637 (ctxt->instate == XML_PARSER_EOF))
638 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000639 if (ctxt != NULL)
640 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000641 __xmlRaiseError(NULL, NULL, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000642 ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
643 NULL, 0, (const char *) str1, (const char *) str2,
644 NULL, val, 0, msg, str1, val, str2);
Daniel Veillard30e76072006-03-09 14:13:55 +0000645 if (ctxt != NULL) {
646 ctxt->wellFormed = 0;
647 if (ctxt->recovery == 0)
648 ctxt->disableSAX = 1;
649 }
Daniel Veillardf403d292003-10-05 13:51:35 +0000650}
651
652/**
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000653 * xmlFatalErrMsgStr:
654 * @ctxt: an XML parser context
655 * @error: the error number
656 * @msg: the error message
657 * @val: a string value
658 *
659 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
660 */
661static void
662xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000663 const char *msg, const xmlChar * val)
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000664{
Daniel Veillard157fee02003-10-31 10:36:03 +0000665 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
666 (ctxt->instate == XML_PARSER_EOF))
667 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000668 if (ctxt != NULL)
669 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000670 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000671 XML_FROM_PARSER, error, XML_ERR_FATAL,
672 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
673 val);
Daniel Veillard30e76072006-03-09 14:13:55 +0000674 if (ctxt != NULL) {
675 ctxt->wellFormed = 0;
676 if (ctxt->recovery == 0)
677 ctxt->disableSAX = 1;
678 }
Daniel Veillardbc92eca2003-09-15 09:48:06 +0000679}
680
681/**
Daniel Veillardf403d292003-10-05 13:51:35 +0000682 * xmlErrMsgStr:
683 * @ctxt: an XML parser context
684 * @error: the error number
685 * @msg: the error message
686 * @val: a string value
687 *
688 * Handle a non fatal parser error
689 */
690static void
691xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
692 const char *msg, const xmlChar * val)
693{
Daniel Veillard157fee02003-10-31 10:36:03 +0000694 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
695 (ctxt->instate == XML_PARSER_EOF))
696 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000697 if (ctxt != NULL)
698 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000699 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
Daniel Veillardf403d292003-10-05 13:51:35 +0000700 XML_FROM_PARSER, error, XML_ERR_ERROR,
701 NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
702 val);
703}
704
705/**
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000706 * xmlNsErr:
707 * @ctxt: an XML parser context
708 * @error: the error number
709 * @msg: the message
710 * @info1: extra information string
711 * @info2: extra information string
712 *
713 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
714 */
715static void
716xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
717 const char *msg,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000718 const xmlChar * info1, const xmlChar * info2,
719 const xmlChar * info3)
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000720{
Daniel Veillard157fee02003-10-31 10:36:03 +0000721 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
722 (ctxt->instate == XML_PARSER_EOF))
723 return;
Daniel Veillard30e76072006-03-09 14:13:55 +0000724 if (ctxt != NULL)
725 ctxt->errNo = error;
Daniel Veillard659e71e2003-10-10 14:10:40 +0000726 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
Daniel Veillardbb5abab2003-10-03 22:21:51 +0000727 XML_ERR_ERROR, NULL, 0, (const char *) info1,
728 (const char *) info2, (const char *) info3, 0, 0, msg,
729 info1, info2, info3);
Daniel Veillard30e76072006-03-09 14:13:55 +0000730 if (ctxt != NULL)
731 ctxt->nsWellFormed = 0;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +0000732}
733
Daniel Veillard37334572008-07-31 08:20:02 +0000734/**
735 * xmlNsWarn
736 * @ctxt: an XML parser context
737 * @error: the error number
738 * @msg: the message
739 * @info1: extra information string
740 * @info2: extra information string
741 *
Daniel Veillard288bb622012-05-07 15:01:29 +0800742 * Handle a namespace warning error
Daniel Veillard37334572008-07-31 08:20:02 +0000743 */
744static void
745xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
746 const char *msg,
747 const xmlChar * info1, const xmlChar * info2,
748 const xmlChar * info3)
749{
750 if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
751 (ctxt->instate == XML_PARSER_EOF))
752 return;
Daniel Veillard37334572008-07-31 08:20:02 +0000753 __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
754 XML_ERR_WARNING, NULL, 0, (const char *) info1,
755 (const char *) info2, (const char *) info3, 0, 0, msg,
756 info1, info2, info3);
757}
758
Daniel Veillard1afc9f32003-09-13 12:44:05 +0000759/************************************************************************
760 * *
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000761 * Library wide options *
762 * *
763 ************************************************************************/
764
765/**
766 * xmlHasFeature:
767 * @feature: the feature to be examined
768 *
769 * Examines if the library has been compiled with a given feature.
770 *
771 * Returns a non-zero value if the feature exist, otherwise zero.
772 * Returns zero (0) if the feature does not exist or an unknown
773 * unknown feature is requested, non-zero otherwise.
774 */
775int
776xmlHasFeature(xmlFeature feature)
777{
778 switch (feature) {
Daniel Veillard602434d2005-09-12 09:20:31 +0000779 case XML_WITH_THREAD:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000780#ifdef LIBXML_THREAD_ENABLED
781 return(1);
782#else
783 return(0);
784#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000785 case XML_WITH_TREE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000786#ifdef LIBXML_TREE_ENABLED
787 return(1);
788#else
789 return(0);
790#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000791 case XML_WITH_OUTPUT:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000792#ifdef LIBXML_OUTPUT_ENABLED
793 return(1);
794#else
795 return(0);
796#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000797 case XML_WITH_PUSH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000798#ifdef LIBXML_PUSH_ENABLED
799 return(1);
800#else
801 return(0);
802#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000803 case XML_WITH_READER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000804#ifdef LIBXML_READER_ENABLED
805 return(1);
806#else
807 return(0);
808#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000809 case XML_WITH_PATTERN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000810#ifdef LIBXML_PATTERN_ENABLED
811 return(1);
812#else
813 return(0);
814#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000815 case XML_WITH_WRITER:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000816#ifdef LIBXML_WRITER_ENABLED
817 return(1);
818#else
819 return(0);
820#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000821 case XML_WITH_SAX1:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000822#ifdef LIBXML_SAX1_ENABLED
823 return(1);
824#else
825 return(0);
826#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000827 case XML_WITH_FTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000828#ifdef LIBXML_FTP_ENABLED
829 return(1);
830#else
831 return(0);
832#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000833 case XML_WITH_HTTP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000834#ifdef LIBXML_HTTP_ENABLED
835 return(1);
836#else
837 return(0);
838#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000839 case XML_WITH_VALID:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000840#ifdef LIBXML_VALID_ENABLED
841 return(1);
842#else
843 return(0);
844#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000845 case XML_WITH_HTML:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000846#ifdef LIBXML_HTML_ENABLED
847 return(1);
848#else
849 return(0);
850#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000851 case XML_WITH_LEGACY:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000852#ifdef LIBXML_LEGACY_ENABLED
853 return(1);
854#else
855 return(0);
856#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000857 case XML_WITH_C14N:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000858#ifdef LIBXML_C14N_ENABLED
859 return(1);
860#else
861 return(0);
862#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000863 case XML_WITH_CATALOG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000864#ifdef LIBXML_CATALOG_ENABLED
865 return(1);
866#else
867 return(0);
868#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000869 case XML_WITH_XPATH:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000870#ifdef LIBXML_XPATH_ENABLED
871 return(1);
872#else
873 return(0);
874#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000875 case XML_WITH_XPTR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000876#ifdef LIBXML_XPTR_ENABLED
877 return(1);
878#else
879 return(0);
880#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000881 case XML_WITH_XINCLUDE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000882#ifdef LIBXML_XINCLUDE_ENABLED
883 return(1);
884#else
885 return(0);
886#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000887 case XML_WITH_ICONV:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000888#ifdef LIBXML_ICONV_ENABLED
889 return(1);
890#else
891 return(0);
892#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000893 case XML_WITH_ISO8859X:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000894#ifdef LIBXML_ISO8859X_ENABLED
895 return(1);
896#else
897 return(0);
898#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000899 case XML_WITH_UNICODE:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000900#ifdef LIBXML_UNICODE_ENABLED
901 return(1);
902#else
903 return(0);
904#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000905 case XML_WITH_REGEXP:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000906#ifdef LIBXML_REGEXP_ENABLED
907 return(1);
908#else
909 return(0);
910#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000911 case XML_WITH_AUTOMATA:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000912#ifdef LIBXML_AUTOMATA_ENABLED
913 return(1);
914#else
915 return(0);
916#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000917 case XML_WITH_EXPR:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000918#ifdef LIBXML_EXPR_ENABLED
919 return(1);
920#else
921 return(0);
922#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000923 case XML_WITH_SCHEMAS:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000924#ifdef LIBXML_SCHEMAS_ENABLED
925 return(1);
926#else
927 return(0);
928#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000929 case XML_WITH_SCHEMATRON:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000930#ifdef LIBXML_SCHEMATRON_ENABLED
931 return(1);
932#else
933 return(0);
934#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000935 case XML_WITH_MODULES:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000936#ifdef LIBXML_MODULES_ENABLED
937 return(1);
938#else
939 return(0);
940#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000941 case XML_WITH_DEBUG:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000942#ifdef LIBXML_DEBUG_ENABLED
943 return(1);
944#else
945 return(0);
946#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000947 case XML_WITH_DEBUG_MEM:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000948#ifdef DEBUG_MEMORY_LOCATION
949 return(1);
950#else
951 return(0);
952#endif
Daniel Veillard602434d2005-09-12 09:20:31 +0000953 case XML_WITH_DEBUG_RUN:
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000954#ifdef LIBXML_DEBUG_RUNTIME
955 return(1);
956#else
957 return(0);
Daniel Veillard34e3f642008-07-29 09:02:27 +0000958#endif
Daniel Veillard75acfee2006-07-13 06:29:56 +0000959 case XML_WITH_ZLIB:
960#ifdef LIBXML_ZLIB_ENABLED
961 return(1);
962#else
963 return(0);
964#endif
Anders F Bjorklundeae52612011-09-18 16:59:13 +0200965 case XML_WITH_LZMA:
966#ifdef LIBXML_LZMA_ENABLED
967 return(1);
968#else
969 return(0);
970#endif
Giuseppe Iuculano48f7dcb2010-11-04 17:42:42 +0100971 case XML_WITH_ICU:
972#ifdef LIBXML_ICU_ENABLED
973 return(1);
974#else
975 return(0);
976#endif
Daniel Veillard0bcc7f62005-09-04 21:39:03 +0000977 default:
978 break;
979 }
980 return(0);
981}
982
983/************************************************************************
984 * *
Daniel Veillarde57ec792003-09-10 10:50:59 +0000985 * SAX2 defaulted attributes handling *
986 * *
987 ************************************************************************/
988
989/**
990 * xmlDetectSAX2:
991 * @ctxt: an XML parser context
992 *
993 * Do the SAX2 detection and specific intialization
994 */
995static void
996xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
997 if (ctxt == NULL) return;
Daniel Veillard81273902003-09-30 00:43:48 +0000998#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +0000999 if ((ctxt->sax) && (ctxt->sax->initialized == XML_SAX2_MAGIC) &&
1000 ((ctxt->sax->startElementNs != NULL) ||
1001 (ctxt->sax->endElementNs != NULL))) ctxt->sax2 = 1;
Daniel Veillard81273902003-09-30 00:43:48 +00001002#else
1003 ctxt->sax2 = 1;
1004#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001005
1006 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
1007 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
1008 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
William M. Brack9f797ab2004-07-28 07:40:12 +00001009 if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
1010 (ctxt->str_xml_ns == NULL)) {
1011 xmlErrMemory(ctxt, NULL);
1012 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001013}
1014
Daniel Veillarde57ec792003-09-10 10:50:59 +00001015typedef struct _xmlDefAttrs xmlDefAttrs;
1016typedef xmlDefAttrs *xmlDefAttrsPtr;
1017struct _xmlDefAttrs {
1018 int nbAttrs; /* number of defaulted attributes on that element */
1019 int maxAttrs; /* the size of the array */
Daniel Veillardae0765b2008-07-31 19:54:59 +00001020 const xmlChar *values[5]; /* array of localname/prefix/values/external */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001021};
Daniel Veillarde57ec792003-09-10 10:50:59 +00001022
1023/**
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001024 * xmlAttrNormalizeSpace:
1025 * @src: the source string
1026 * @dst: the target string
1027 *
1028 * Normalize the space in non CDATA attribute values:
1029 * If the attribute type is not CDATA, then the XML processor MUST further
1030 * process the normalized attribute value by discarding any leading and
1031 * trailing space (#x20) characters, and by replacing sequences of space
1032 * (#x20) characters by a single space (#x20) character.
1033 * Note that the size of dst need to be at least src, and if one doesn't need
1034 * to preserve dst (and it doesn't come from a dictionary or read-only) then
1035 * passing src as dst is just fine.
1036 *
1037 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1038 * is needed.
1039 */
1040static xmlChar *
1041xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
1042{
1043 if ((src == NULL) || (dst == NULL))
1044 return(NULL);
1045
1046 while (*src == 0x20) src++;
1047 while (*src != 0) {
1048 if (*src == 0x20) {
1049 while (*src == 0x20) src++;
1050 if (*src != 0)
1051 *dst++ = 0x20;
1052 } else {
1053 *dst++ = *src++;
1054 }
1055 }
1056 *dst = 0;
1057 if (dst == src)
1058 return(NULL);
1059 return(dst);
1060}
1061
1062/**
1063 * xmlAttrNormalizeSpace2:
1064 * @src: the source string
1065 *
1066 * Normalize the space in non CDATA attribute values, a slightly more complex
1067 * front end to avoid allocation problems when running on attribute values
1068 * coming from the input.
1069 *
1070 * Returns a pointer to the normalized value (dst) or NULL if no conversion
1071 * is needed.
1072 */
1073static const xmlChar *
Daniel Veillardae0765b2008-07-31 19:54:59 +00001074xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001075{
1076 int i;
1077 int remove_head = 0;
1078 int need_realloc = 0;
1079 const xmlChar *cur;
1080
1081 if ((ctxt == NULL) || (src == NULL) || (len == NULL))
1082 return(NULL);
1083 i = *len;
1084 if (i <= 0)
1085 return(NULL);
1086
1087 cur = src;
1088 while (*cur == 0x20) {
1089 cur++;
1090 remove_head++;
1091 }
1092 while (*cur != 0) {
1093 if (*cur == 0x20) {
1094 cur++;
1095 if ((*cur == 0x20) || (*cur == 0)) {
1096 need_realloc = 1;
1097 break;
1098 }
1099 } else
1100 cur++;
1101 }
1102 if (need_realloc) {
1103 xmlChar *ret;
1104
1105 ret = xmlStrndup(src + remove_head, i - remove_head + 1);
1106 if (ret == NULL) {
1107 xmlErrMemory(ctxt, NULL);
1108 return(NULL);
1109 }
1110 xmlAttrNormalizeSpace(ret, ret);
1111 *len = (int) strlen((const char *)ret);
1112 return(ret);
1113 } else if (remove_head) {
1114 *len -= remove_head;
Daniel Veillardae0765b2008-07-31 19:54:59 +00001115 memmove(src, src + remove_head, 1 + *len);
1116 return(src);
Daniel Veillard97c9ce22008-03-25 16:52:41 +00001117 }
1118 return(NULL);
1119}
1120
1121/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001122 * xmlAddDefAttrs:
1123 * @ctxt: an XML parser context
1124 * @fullname: the element fullname
1125 * @fullattr: the attribute fullname
1126 * @value: the attribute value
1127 *
1128 * Add a defaulted attribute for an element
1129 */
1130static void
1131xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1132 const xmlChar *fullname,
1133 const xmlChar *fullattr,
1134 const xmlChar *value) {
1135 xmlDefAttrsPtr defaults;
1136 int len;
1137 const xmlChar *name;
1138 const xmlChar *prefix;
1139
Daniel Veillard6a31b832008-03-26 14:06:44 +00001140 /*
1141 * Allows to detect attribute redefinitions
1142 */
1143 if (ctxt->attsSpecial != NULL) {
1144 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1145 return;
1146 }
1147
Daniel Veillarde57ec792003-09-10 10:50:59 +00001148 if (ctxt->attsDefault == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001149 ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001150 if (ctxt->attsDefault == NULL)
1151 goto mem_error;
1152 }
1153
1154 /*
Daniel Veillard079f6a72004-09-23 13:15:03 +00001155 * split the element name into prefix:localname , the string found
1156 * are within the DTD and then not associated to namespace names.
Daniel Veillarde57ec792003-09-10 10:50:59 +00001157 */
1158 name = xmlSplitQName3(fullname, &len);
1159 if (name == NULL) {
1160 name = xmlDictLookup(ctxt->dict, fullname, -1);
1161 prefix = NULL;
1162 } else {
1163 name = xmlDictLookup(ctxt->dict, name, -1);
1164 prefix = xmlDictLookup(ctxt->dict, fullname, len);
1165 }
1166
1167 /*
1168 * make sure there is some storage
1169 */
1170 defaults = xmlHashLookup2(ctxt->attsDefault, name, prefix);
1171 if (defaults == NULL) {
1172 defaults = (xmlDefAttrsPtr) xmlMalloc(sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001173 (4 * 5) * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001174 if (defaults == NULL)
1175 goto mem_error;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001176 defaults->nbAttrs = 0;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001177 defaults->maxAttrs = 4;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001178 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1179 defaults, NULL) < 0) {
1180 xmlFree(defaults);
1181 goto mem_error;
1182 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001183 } else if (defaults->nbAttrs >= defaults->maxAttrs) {
Daniel Veillard079f6a72004-09-23 13:15:03 +00001184 xmlDefAttrsPtr temp;
1185
1186 temp = (xmlDefAttrsPtr) xmlRealloc(defaults, sizeof(xmlDefAttrs) +
Daniel Veillardae0765b2008-07-31 19:54:59 +00001187 (2 * defaults->maxAttrs * 5) * sizeof(const xmlChar *));
Daniel Veillard079f6a72004-09-23 13:15:03 +00001188 if (temp == NULL)
Daniel Veillarde57ec792003-09-10 10:50:59 +00001189 goto mem_error;
Daniel Veillard079f6a72004-09-23 13:15:03 +00001190 defaults = temp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001191 defaults->maxAttrs *= 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00001192 if (xmlHashUpdateEntry2(ctxt->attsDefault, name, prefix,
1193 defaults, NULL) < 0) {
1194 xmlFree(defaults);
1195 goto mem_error;
1196 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001197 }
1198
1199 /*
Daniel Veillard8874b942005-08-25 13:19:21 +00001200 * Split the element name into prefix:localname , the string found
Daniel Veillarde57ec792003-09-10 10:50:59 +00001201 * are within the DTD and hen not associated to namespace names.
1202 */
1203 name = xmlSplitQName3(fullattr, &len);
1204 if (name == NULL) {
1205 name = xmlDictLookup(ctxt->dict, fullattr, -1);
1206 prefix = NULL;
1207 } else {
1208 name = xmlDictLookup(ctxt->dict, name, -1);
1209 prefix = xmlDictLookup(ctxt->dict, fullattr, len);
1210 }
1211
Daniel Veillardae0765b2008-07-31 19:54:59 +00001212 defaults->values[5 * defaults->nbAttrs] = name;
1213 defaults->values[5 * defaults->nbAttrs + 1] = prefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001214 /* intern the string and precompute the end */
1215 len = xmlStrlen(value);
1216 value = xmlDictLookup(ctxt->dict, value, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00001217 defaults->values[5 * defaults->nbAttrs + 2] = value;
1218 defaults->values[5 * defaults->nbAttrs + 3] = value + len;
1219 if (ctxt->external)
1220 defaults->values[5 * defaults->nbAttrs + 4] = BAD_CAST "external";
1221 else
1222 defaults->values[5 * defaults->nbAttrs + 4] = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001223 defaults->nbAttrs++;
1224
1225 return;
1226
1227mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001228 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001229 return;
1230}
1231
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001232/**
1233 * xmlAddSpecialAttr:
1234 * @ctxt: an XML parser context
1235 * @fullname: the element fullname
1236 * @fullattr: the attribute fullname
1237 * @type: the attribute type
1238 *
Daniel Veillardac4118d2008-01-11 05:27:32 +00001239 * Register this attribute type
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001240 */
1241static void
1242xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1243 const xmlChar *fullname,
1244 const xmlChar *fullattr,
1245 int type)
1246{
1247 if (ctxt->attsSpecial == NULL) {
Daniel Veillard316a5c32005-01-23 22:56:39 +00001248 ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001249 if (ctxt->attsSpecial == NULL)
1250 goto mem_error;
1251 }
1252
Daniel Veillardac4118d2008-01-11 05:27:32 +00001253 if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1254 return;
1255
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001256 xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1257 (void *) (long) type);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001258 return;
1259
1260mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001261 xmlErrMemory(ctxt, NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00001262 return;
1263}
1264
Daniel Veillard4432df22003-09-28 18:58:27 +00001265/**
Daniel Veillardac4118d2008-01-11 05:27:32 +00001266 * xmlCleanSpecialAttrCallback:
1267 *
1268 * Removes CDATA attributes from the special attribute table
1269 */
1270static void
1271xmlCleanSpecialAttrCallback(void *payload, void *data,
1272 const xmlChar *fullname, const xmlChar *fullattr,
1273 const xmlChar *unused ATTRIBUTE_UNUSED) {
1274 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1275
Daniel Veillardb3edafd2008-01-11 08:00:57 +00001276 if (((long) payload) == XML_ATTRIBUTE_CDATA) {
Daniel Veillardac4118d2008-01-11 05:27:32 +00001277 xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1278 }
1279}
1280
1281/**
1282 * xmlCleanSpecialAttr:
1283 * @ctxt: an XML parser context
1284 *
1285 * Trim the list of attributes defined to remove all those of type
1286 * CDATA as they are not special. This call should be done when finishing
1287 * to parse the DTD and before starting to parse the document root.
1288 */
1289static void
1290xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1291{
1292 if (ctxt->attsSpecial == NULL)
1293 return;
1294
1295 xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1296
1297 if (xmlHashSize(ctxt->attsSpecial) == 0) {
1298 xmlHashFree(ctxt->attsSpecial, NULL);
1299 ctxt->attsSpecial = NULL;
1300 }
1301 return;
1302}
1303
1304/**
Daniel Veillard4432df22003-09-28 18:58:27 +00001305 * xmlCheckLanguageID:
1306 * @lang: pointer to the string value
1307 *
1308 * Checks that the value conforms to the LanguageID production:
1309 *
1310 * NOTE: this is somewhat deprecated, those productions were removed from
1311 * the XML Second edition.
1312 *
1313 * [33] LanguageID ::= Langcode ('-' Subcode)*
1314 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
1315 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1316 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1317 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1318 * [38] Subcode ::= ([a-z] | [A-Z])+
1319 *
Daniel Veillard60587d62010-11-04 15:16:27 +01001320 * The current REC reference the sucessors of RFC 1766, currently 5646
1321 *
1322 * http://www.rfc-editor.org/rfc/rfc5646.txt
1323 * langtag = language
1324 * ["-" script]
1325 * ["-" region]
1326 * *("-" variant)
1327 * *("-" extension)
1328 * ["-" privateuse]
1329 * language = 2*3ALPHA ; shortest ISO 639 code
1330 * ["-" extlang] ; sometimes followed by
1331 * ; extended language subtags
1332 * / 4ALPHA ; or reserved for future use
1333 * / 5*8ALPHA ; or registered language subtag
1334 *
1335 * extlang = 3ALPHA ; selected ISO 639 codes
1336 * *2("-" 3ALPHA) ; permanently reserved
1337 *
1338 * script = 4ALPHA ; ISO 15924 code
1339 *
1340 * region = 2ALPHA ; ISO 3166-1 code
1341 * / 3DIGIT ; UN M.49 code
1342 *
1343 * variant = 5*8alphanum ; registered variants
1344 * / (DIGIT 3alphanum)
1345 *
1346 * extension = singleton 1*("-" (2*8alphanum))
1347 *
1348 * ; Single alphanumerics
1349 * ; "x" reserved for private use
1350 * singleton = DIGIT ; 0 - 9
1351 * / %x41-57 ; A - W
1352 * / %x59-5A ; Y - Z
1353 * / %x61-77 ; a - w
1354 * / %x79-7A ; y - z
1355 *
1356 * it sounds right to still allow Irregular i-xxx IANA and user codes too
1357 * The parser below doesn't try to cope with extension or privateuse
1358 * that could be added but that's not interoperable anyway
1359 *
Daniel Veillard4432df22003-09-28 18:58:27 +00001360 * Returns 1 if correct 0 otherwise
1361 **/
1362int
1363xmlCheckLanguageID(const xmlChar * lang)
1364{
Daniel Veillard60587d62010-11-04 15:16:27 +01001365 const xmlChar *cur = lang, *nxt;
Daniel Veillard4432df22003-09-28 18:58:27 +00001366
1367 if (cur == NULL)
1368 return (0);
1369 if (((cur[0] == 'i') && (cur[1] == '-')) ||
Daniel Veillard60587d62010-11-04 15:16:27 +01001370 ((cur[0] == 'I') && (cur[1] == '-')) ||
1371 ((cur[0] == 'x') && (cur[1] == '-')) ||
1372 ((cur[0] == 'X') && (cur[1] == '-'))) {
Daniel Veillard4432df22003-09-28 18:58:27 +00001373 /*
Daniel Veillard60587d62010-11-04 15:16:27 +01001374 * Still allow IANA code and user code which were coming
1375 * from the previous version of the XML-1.0 specification
1376 * it's deprecated but we should not fail
Daniel Veillard4432df22003-09-28 18:58:27 +00001377 */
1378 cur += 2;
Daniel Veillard60587d62010-11-04 15:16:27 +01001379 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
Daniel Veillard4432df22003-09-28 18:58:27 +00001380 ((cur[0] >= 'a') && (cur[0] <= 'z')))
1381 cur++;
Daniel Veillard60587d62010-11-04 15:16:27 +01001382 return(cur[0] == 0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001383 }
Daniel Veillard60587d62010-11-04 15:16:27 +01001384 nxt = cur;
1385 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1386 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1387 nxt++;
1388 if (nxt - cur >= 4) {
1389 /*
1390 * Reserved
1391 */
1392 if ((nxt - cur > 8) || (nxt[0] != 0))
1393 return(0);
1394 return(1);
1395 }
1396 if (nxt - cur < 2)
1397 return(0);
1398 /* we got an ISO 639 code */
1399 if (nxt[0] == 0)
1400 return(1);
1401 if (nxt[0] != '-')
1402 return(0);
1403
1404 nxt++;
1405 cur = nxt;
1406 /* now we can have extlang or script or region or variant */
1407 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1408 goto region_m49;
1409
1410 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1411 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1412 nxt++;
1413 if (nxt - cur == 4)
1414 goto script;
1415 if (nxt - cur == 2)
1416 goto region;
1417 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1418 goto variant;
1419 if (nxt - cur != 3)
1420 return(0);
1421 /* we parsed an extlang */
1422 if (nxt[0] == 0)
1423 return(1);
1424 if (nxt[0] != '-')
1425 return(0);
1426
1427 nxt++;
1428 cur = nxt;
1429 /* now we can have script or region or variant */
1430 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1431 goto region_m49;
1432
1433 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1434 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1435 nxt++;
1436 if (nxt - cur == 2)
1437 goto region;
1438 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1439 goto variant;
1440 if (nxt - cur != 4)
1441 return(0);
1442 /* we parsed a script */
1443script:
1444 if (nxt[0] == 0)
1445 return(1);
1446 if (nxt[0] != '-')
1447 return(0);
1448
1449 nxt++;
1450 cur = nxt;
1451 /* now we can have region or variant */
1452 if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1453 goto region_m49;
1454
1455 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1456 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1457 nxt++;
1458
1459 if ((nxt - cur >= 5) && (nxt - cur <= 8))
1460 goto variant;
1461 if (nxt - cur != 2)
1462 return(0);
1463 /* we parsed a region */
1464region:
1465 if (nxt[0] == 0)
1466 return(1);
1467 if (nxt[0] != '-')
1468 return(0);
1469
1470 nxt++;
1471 cur = nxt;
1472 /* now we can just have a variant */
1473 while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1474 ((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1475 nxt++;
1476
1477 if ((nxt - cur < 5) || (nxt - cur > 8))
1478 return(0);
1479
1480 /* we parsed a variant */
1481variant:
1482 if (nxt[0] == 0)
1483 return(1);
1484 if (nxt[0] != '-')
1485 return(0);
1486 /* extensions and private use subtags not checked */
Daniel Veillard4432df22003-09-28 18:58:27 +00001487 return (1);
Daniel Veillard60587d62010-11-04 15:16:27 +01001488
1489region_m49:
1490 if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1491 ((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1492 nxt += 3;
1493 goto region;
1494 }
1495 return(0);
Daniel Veillard4432df22003-09-28 18:58:27 +00001496}
1497
Owen Taylor3473f882001-02-23 17:55:21 +00001498/************************************************************************
1499 * *
Daniel Veillard0161e632008-08-28 15:36:32 +00001500 * Parser stacks related functions and macros *
Owen Taylor3473f882001-02-23 17:55:21 +00001501 * *
1502 ************************************************************************/
1503
Daniel Veillard8ed10722009-08-20 19:17:36 +02001504static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1505 const xmlChar ** str);
Owen Taylor3473f882001-02-23 17:55:21 +00001506
Daniel Veillard0fb18932003-09-07 09:14:37 +00001507#ifdef SAX2
1508/**
1509 * nsPush:
1510 * @ctxt: an XML parser context
1511 * @prefix: the namespace prefix or NULL
1512 * @URL: the namespace name
1513 *
1514 * Pushes a new parser namespace on top of the ns stack
1515 *
William M. Brack7b9154b2003-09-27 19:23:50 +00001516 * Returns -1 in case of error, -2 if the namespace should be discarded
1517 * and the index in the stack otherwise.
Daniel Veillard0fb18932003-09-07 09:14:37 +00001518 */
1519static int
1520nsPush(xmlParserCtxtPtr ctxt, const xmlChar *prefix, const xmlChar *URL)
1521{
Daniel Veillarddca8cc72003-09-26 13:53:14 +00001522 if (ctxt->options & XML_PARSE_NSCLEAN) {
1523 int i;
1524 for (i = 0;i < ctxt->nsNr;i += 2) {
1525 if (ctxt->nsTab[i] == prefix) {
1526 /* in scope */
1527 if (ctxt->nsTab[i + 1] == URL)
1528 return(-2);
1529 /* out of scope keep it */
1530 break;
1531 }
1532 }
1533 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00001534 if ((ctxt->nsMax == 0) || (ctxt->nsTab == NULL)) {
1535 ctxt->nsMax = 10;
1536 ctxt->nsNr = 0;
1537 ctxt->nsTab = (const xmlChar **)
1538 xmlMalloc(ctxt->nsMax * sizeof(xmlChar *));
1539 if (ctxt->nsTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001540 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001541 ctxt->nsMax = 0;
1542 return (-1);
1543 }
1544 } else if (ctxt->nsNr >= ctxt->nsMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001545 const xmlChar ** tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001546 ctxt->nsMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001547 tmp = (const xmlChar **) xmlRealloc((char *) ctxt->nsTab,
1548 ctxt->nsMax * sizeof(ctxt->nsTab[0]));
1549 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001550 xmlErrMemory(ctxt, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001551 ctxt->nsMax /= 2;
1552 return (-1);
1553 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001554 ctxt->nsTab = tmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001555 }
1556 ctxt->nsTab[ctxt->nsNr++] = prefix;
1557 ctxt->nsTab[ctxt->nsNr++] = URL;
1558 return (ctxt->nsNr);
1559}
1560/**
1561 * nsPop:
1562 * @ctxt: an XML parser context
1563 * @nr: the number to pop
1564 *
1565 * Pops the top @nr parser prefix/namespace from the ns stack
1566 *
1567 * Returns the number of namespaces removed
1568 */
1569static int
1570nsPop(xmlParserCtxtPtr ctxt, int nr)
1571{
1572 int i;
1573
1574 if (ctxt->nsTab == NULL) return(0);
1575 if (ctxt->nsNr < nr) {
1576 xmlGenericError(xmlGenericErrorContext, "Pbm popping %d NS\n", nr);
1577 nr = ctxt->nsNr;
1578 }
1579 if (ctxt->nsNr <= 0)
1580 return (0);
Daniel Veillard34e3f642008-07-29 09:02:27 +00001581
Daniel Veillard0fb18932003-09-07 09:14:37 +00001582 for (i = 0;i < nr;i++) {
1583 ctxt->nsNr--;
1584 ctxt->nsTab[ctxt->nsNr] = NULL;
1585 }
1586 return(nr);
1587}
1588#endif
1589
1590static int
1591xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1592 const xmlChar **atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001593 int *attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001594 int maxatts;
1595
1596 if (ctxt->atts == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001597 maxatts = 55; /* allow for 10 attrs by default */
Daniel Veillard0fb18932003-09-07 09:14:37 +00001598 atts = (const xmlChar **)
1599 xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001600 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001601 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001602 attallocs = (int *) xmlMalloc((maxatts / 5) * sizeof(int));
1603 if (attallocs == NULL) goto mem_error;
1604 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001605 ctxt->maxatts = maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001606 } else if (nr + 5 > ctxt->maxatts) {
1607 maxatts = (nr + 5) * 2;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001608 atts = (const xmlChar **) xmlRealloc((void *) ctxt->atts,
1609 maxatts * sizeof(const xmlChar *));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001610 if (atts == NULL) goto mem_error;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001611 ctxt->atts = atts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001612 attallocs = (int *) xmlRealloc((void *) ctxt->attallocs,
1613 (maxatts / 5) * sizeof(int));
1614 if (attallocs == NULL) goto mem_error;
1615 ctxt->attallocs = attallocs;
Daniel Veillard0fb18932003-09-07 09:14:37 +00001616 ctxt->maxatts = maxatts;
1617 }
1618 return(ctxt->maxatts);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001619mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001620 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001621 return(-1);
Daniel Veillard0fb18932003-09-07 09:14:37 +00001622}
1623
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001624/**
1625 * inputPush:
1626 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001627 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001628 *
1629 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001630 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001631 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001632 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001633int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001634inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1635{
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001636 if ((ctxt == NULL) || (value == NULL))
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001637 return(-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001638 if (ctxt->inputNr >= ctxt->inputMax) {
1639 ctxt->inputMax *= 2;
1640 ctxt->inputTab =
1641 (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1642 ctxt->inputMax *
1643 sizeof(ctxt->inputTab[0]));
1644 if (ctxt->inputTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001645 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001646 xmlFreeInputStream(value);
1647 ctxt->inputMax /= 2;
1648 value = NULL;
1649 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001650 }
1651 }
1652 ctxt->inputTab[ctxt->inputNr] = value;
1653 ctxt->input = value;
1654 return (ctxt->inputNr++);
1655}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001656/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001657 * inputPop:
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001658 * @ctxt: an XML parser context
1659 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001660 * Pops the top parser input from the input stack
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001661 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001662 * Returns the input just removed
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001663 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001664xmlParserInputPtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001665inputPop(xmlParserCtxtPtr ctxt)
1666{
1667 xmlParserInputPtr ret;
1668
Daniel Veillard36e5cd52004-11-02 14:52:23 +00001669 if (ctxt == NULL)
1670 return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001671 if (ctxt->inputNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001672 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001673 ctxt->inputNr--;
1674 if (ctxt->inputNr > 0)
1675 ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1676 else
1677 ctxt->input = NULL;
1678 ret = ctxt->inputTab[ctxt->inputNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001679 ctxt->inputTab[ctxt->inputNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001680 return (ret);
1681}
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001682/**
1683 * nodePush:
1684 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +00001685 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001686 *
1687 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +00001688 *
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001689 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +00001690 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001691int
Daniel Veillard1c732d22002-11-30 11:22:59 +00001692nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1693{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001694 if (ctxt == NULL) return(0);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001695 if (ctxt->nodeNr >= ctxt->nodeMax) {
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001696 xmlNodePtr *tmp;
1697
1698 tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1699 ctxt->nodeMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001700 sizeof(ctxt->nodeTab[0]));
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001701 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001702 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001703 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001704 }
Daniel Veillardd0cf7f62004-11-09 16:17:02 +00001705 ctxt->nodeTab = tmp;
1706 ctxt->nodeMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001707 }
Daniel Veillard8915c152008-08-26 13:05:34 +00001708 if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1709 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00001710 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard8915c152008-08-26 13:05:34 +00001711 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
Daniel Veillard4aede2e2003-10-17 12:43:59 +00001712 xmlParserMaxDepth);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001713 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001714 return(-1);
Daniel Veillard3b2e4e12003-02-03 08:52:58 +00001715 }
Daniel Veillard1c732d22002-11-30 11:22:59 +00001716 ctxt->nodeTab[ctxt->nodeNr] = value;
1717 ctxt->node = value;
1718 return (ctxt->nodeNr++);
1719}
Daniel Veillard8915c152008-08-26 13:05:34 +00001720
Daniel Veillard1c732d22002-11-30 11:22:59 +00001721/**
1722 * nodePop:
1723 * @ctxt: an XML parser context
1724 *
1725 * Pops the top element node from the node stack
1726 *
1727 * Returns the node just removed
Owen Taylor3473f882001-02-23 17:55:21 +00001728 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001729xmlNodePtr
Daniel Veillard1c732d22002-11-30 11:22:59 +00001730nodePop(xmlParserCtxtPtr ctxt)
1731{
1732 xmlNodePtr ret;
1733
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001734 if (ctxt == NULL) return(NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001735 if (ctxt->nodeNr <= 0)
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001736 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001737 ctxt->nodeNr--;
1738 if (ctxt->nodeNr > 0)
1739 ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
1740 else
1741 ctxt->node = NULL;
1742 ret = ctxt->nodeTab[ctxt->nodeNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001743 ctxt->nodeTab[ctxt->nodeNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001744 return (ret);
1745}
Daniel Veillarda2351322004-06-27 12:08:10 +00001746
1747#ifdef LIBXML_PUSH_ENABLED
Daniel Veillard1c732d22002-11-30 11:22:59 +00001748/**
Daniel Veillarde57ec792003-09-10 10:50:59 +00001749 * nameNsPush:
1750 * @ctxt: an XML parser context
1751 * @value: the element name
1752 * @prefix: the element prefix
1753 * @URI: the element namespace name
1754 *
1755 * Pushes a new element name/prefix/URL on top of the name stack
1756 *
1757 * Returns -1 in case of error, the index in the stack otherwise
1758 */
1759static int
1760nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
1761 const xmlChar *prefix, const xmlChar *URI, int nsNr)
1762{
1763 if (ctxt->nameNr >= ctxt->nameMax) {
1764 const xmlChar * *tmp;
1765 void **tmp2;
1766 ctxt->nameMax *= 2;
1767 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
1768 ctxt->nameMax *
1769 sizeof(ctxt->nameTab[0]));
1770 if (tmp == NULL) {
1771 ctxt->nameMax /= 2;
1772 goto mem_error;
1773 }
1774 ctxt->nameTab = tmp;
1775 tmp2 = (void **) xmlRealloc((void * *)ctxt->pushTab,
1776 ctxt->nameMax * 3 *
1777 sizeof(ctxt->pushTab[0]));
1778 if (tmp2 == NULL) {
1779 ctxt->nameMax /= 2;
1780 goto mem_error;
1781 }
1782 ctxt->pushTab = tmp2;
1783 }
1784 ctxt->nameTab[ctxt->nameNr] = value;
1785 ctxt->name = value;
1786 ctxt->pushTab[ctxt->nameNr * 3] = (void *) prefix;
1787 ctxt->pushTab[ctxt->nameNr * 3 + 1] = (void *) URI;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +00001788 ctxt->pushTab[ctxt->nameNr * 3 + 2] = (void *) (long) nsNr;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001789 return (ctxt->nameNr++);
1790mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001791 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001792 return (-1);
1793}
1794/**
1795 * nameNsPop:
1796 * @ctxt: an XML parser context
1797 *
1798 * Pops the top element/prefix/URI name from the name stack
1799 *
1800 * Returns the name just removed
1801 */
1802static const xmlChar *
1803nameNsPop(xmlParserCtxtPtr ctxt)
1804{
1805 const xmlChar *ret;
1806
1807 if (ctxt->nameNr <= 0)
Daniel Veillard24505b02005-07-28 23:49:35 +00001808 return (NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001809 ctxt->nameNr--;
1810 if (ctxt->nameNr > 0)
1811 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1812 else
1813 ctxt->name = NULL;
1814 ret = ctxt->nameTab[ctxt->nameNr];
1815 ctxt->nameTab[ctxt->nameNr] = NULL;
1816 return (ret);
1817}
Daniel Veillarda2351322004-06-27 12:08:10 +00001818#endif /* LIBXML_PUSH_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +00001819
1820/**
Daniel Veillard1c732d22002-11-30 11:22:59 +00001821 * namePush:
1822 * @ctxt: an XML parser context
1823 * @value: the element name
1824 *
1825 * Pushes a new element name on top of the name stack
1826 *
Daniel Veillarde57ec792003-09-10 10:50:59 +00001827 * Returns -1 in case of error, the index in the stack otherwise
Daniel Veillard1c732d22002-11-30 11:22:59 +00001828 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001829int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001830namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
Daniel Veillard1c732d22002-11-30 11:22:59 +00001831{
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001832 if (ctxt == NULL) return (-1);
1833
Daniel Veillard1c732d22002-11-30 11:22:59 +00001834 if (ctxt->nameNr >= ctxt->nameMax) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001835 const xmlChar * *tmp;
Daniel Veillarde57ec792003-09-10 10:50:59 +00001836 tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001837 ctxt->nameMax * 2 *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001838 sizeof(ctxt->nameTab[0]));
Daniel Veillarde57ec792003-09-10 10:50:59 +00001839 if (tmp == NULL) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00001840 goto mem_error;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001841 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00001842 ctxt->nameTab = tmp;
Xia Xinfeng5825ebb2011-11-10 13:50:22 +08001843 ctxt->nameMax *= 2;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001844 }
1845 ctxt->nameTab[ctxt->nameNr] = value;
1846 ctxt->name = value;
1847 return (ctxt->nameNr++);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001848mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001849 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00001850 return (-1);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001851}
1852/**
1853 * namePop:
1854 * @ctxt: an XML parser context
1855 *
1856 * Pops the top element name from the name stack
1857 *
1858 * Returns the name just removed
1859 */
Daniel Veillard7a5e0dd2004-09-17 08:45:25 +00001860const xmlChar *
Daniel Veillard1c732d22002-11-30 11:22:59 +00001861namePop(xmlParserCtxtPtr ctxt)
1862{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00001863 const xmlChar *ret;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001864
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00001865 if ((ctxt == NULL) || (ctxt->nameNr <= 0))
1866 return (NULL);
Daniel Veillard1c732d22002-11-30 11:22:59 +00001867 ctxt->nameNr--;
1868 if (ctxt->nameNr > 0)
1869 ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
1870 else
1871 ctxt->name = NULL;
1872 ret = ctxt->nameTab[ctxt->nameNr];
Daniel Veillard24505b02005-07-28 23:49:35 +00001873 ctxt->nameTab[ctxt->nameNr] = NULL;
Daniel Veillard1c732d22002-11-30 11:22:59 +00001874 return (ret);
1875}
Owen Taylor3473f882001-02-23 17:55:21 +00001876
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001877static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001878 if (ctxt->spaceNr >= ctxt->spaceMax) {
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001879 int *tmp;
1880
Owen Taylor3473f882001-02-23 17:55:21 +00001881 ctxt->spaceMax *= 2;
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001882 tmp = (int *) xmlRealloc(ctxt->spaceTab,
1883 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
1884 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00001885 xmlErrMemory(ctxt, NULL);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00001886 ctxt->spaceMax /=2;
1887 return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00001888 }
Daniel Veillarda4ba09d2008-04-03 06:24:04 +00001889 ctxt->spaceTab = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00001890 }
1891 ctxt->spaceTab[ctxt->spaceNr] = val;
1892 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
1893 return(ctxt->spaceNr++);
1894}
1895
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001896static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +00001897 int ret;
1898 if (ctxt->spaceNr <= 0) return(0);
1899 ctxt->spaceNr--;
1900 if (ctxt->spaceNr > 0)
1901 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
1902 else
Daniel Veillarddbcbbd22006-06-18 19:55:20 +00001903 ctxt->space = &ctxt->spaceTab[0];
Owen Taylor3473f882001-02-23 17:55:21 +00001904 ret = ctxt->spaceTab[ctxt->spaceNr];
1905 ctxt->spaceTab[ctxt->spaceNr] = -1;
1906 return(ret);
1907}
1908
1909/*
1910 * Macros for accessing the content. Those should be used only by the parser,
1911 * and not exported.
1912 *
1913 * Dirty macros, i.e. one often need to make assumption on the context to
1914 * use them
1915 *
1916 * CUR_PTR return the current pointer to the xmlChar to be parsed.
1917 * To be used with extreme caution since operations consuming
1918 * characters may move the input buffer to a different location !
1919 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
1920 * This should be used internally by the parser
1921 * only to compare to ASCII values otherwise it would break when
1922 * running with UTF-8 encoding.
1923 * RAW same as CUR but in the input buffer, bypass any token
1924 * extraction that may have been done
1925 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
1926 * to compare on ASCII based substring.
1927 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard77a90a72003-03-22 00:04:05 +00001928 * strings without newlines within the parser.
1929 * NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
1930 * defined char within the parser.
Owen Taylor3473f882001-02-23 17:55:21 +00001931 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
1932 *
1933 * NEXT Skip to the next character, this does the proper decoding
1934 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard77a90a72003-03-22 00:04:05 +00001935 * NEXTL(l) Skip the current unicode character of l xmlChars long.
Owen Taylor3473f882001-02-23 17:55:21 +00001936 * CUR_CHAR(l) returns the current unicode character (int), set l
1937 * to the number of xmlChars used for the encoding [0-5].
1938 * CUR_SCHAR same but operate on a string instead of the context
1939 * COPY_BUF copy the current unicode char to the target buffer, increment
1940 * the index
1941 * GROW, SHRINK handling of input buffers
1942 */
1943
Daniel Veillardfdc91562002-07-01 21:52:03 +00001944#define RAW (*ctxt->input->cur)
1945#define CUR (*ctxt->input->cur)
Owen Taylor3473f882001-02-23 17:55:21 +00001946#define NXT(val) ctxt->input->cur[(val)]
1947#define CUR_PTR ctxt->input->cur
1948
Daniel Veillarda07050d2003-10-19 14:46:32 +00001949#define CMP4( s, c1, c2, c3, c4 ) \
1950 ( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
1951 ((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
1952#define CMP5( s, c1, c2, c3, c4, c5 ) \
1953 ( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
1954#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
1955 ( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
1956#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
1957 ( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
1958#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
1959 ( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
1960#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
1961 ( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
1962 ((unsigned char *) s)[ 8 ] == c9 )
1963#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
1964 ( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
1965 ((unsigned char *) s)[ 9 ] == c10 )
1966
Owen Taylor3473f882001-02-23 17:55:21 +00001967#define SKIP(val) do { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00001968 ctxt->nbChars += (val),ctxt->input->cur += (val),ctxt->input->col+=(val); \
Owen Taylor3473f882001-02-23 17:55:21 +00001969 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +00001970 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +00001971 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1972 xmlPopInput(ctxt); \
1973 } while (0)
1974
Daniel Veillard0b787f32004-03-26 17:29:53 +00001975#define SKIPL(val) do { \
1976 int skipl; \
1977 for(skipl=0; skipl<val; skipl++) { \
1978 if (*(ctxt->input->cur) == '\n') { \
1979 ctxt->input->line++; ctxt->input->col = 1; \
1980 } else ctxt->input->col++; \
1981 ctxt->nbChars++; \
1982 ctxt->input->cur++; \
1983 } \
1984 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
1985 if ((*ctxt->input->cur == 0) && \
1986 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
1987 xmlPopInput(ctxt); \
1988 } while (0)
1989
Daniel Veillarda880b122003-04-21 21:36:41 +00001990#define SHRINK if ((ctxt->progressive == 0) && \
Daniel Veillard6155d8a2003-08-19 15:01:28 +00001991 (ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
1992 (ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00001993 xmlSHRINK (ctxt);
1994
1995static void xmlSHRINK (xmlParserCtxtPtr ctxt) {
1996 xmlParserInputShrink(ctxt->input);
1997 if ((*ctxt->input->cur == 0) &&
1998 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1999 xmlPopInput(ctxt);
Daniel Veillard48b2f892001-02-25 16:11:03 +00002000 }
Owen Taylor3473f882001-02-23 17:55:21 +00002001
Daniel Veillarda880b122003-04-21 21:36:41 +00002002#define GROW if ((ctxt->progressive == 0) && \
2003 (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK)) \
Daniel Veillard46de64e2002-05-29 08:21:33 +00002004 xmlGROW (ctxt);
2005
2006static void xmlGROW (xmlParserCtxtPtr ctxt) {
2007 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillard59df7832010-02-02 10:24:01 +01002008 if ((ctxt->input->cur != NULL) && (*ctxt->input->cur == 0) &&
Daniel Veillard46de64e2002-05-29 08:21:33 +00002009 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2010 xmlPopInput(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +00002011}
Owen Taylor3473f882001-02-23 17:55:21 +00002012
2013#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2014
2015#define NEXT xmlNextChar(ctxt)
2016
Daniel Veillard21a0f912001-02-25 19:54:14 +00002017#define NEXT1 { \
Daniel Veillard77a90a72003-03-22 00:04:05 +00002018 ctxt->input->col++; \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002019 ctxt->input->cur++; \
2020 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +00002021 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +00002022 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
2023 }
2024
Owen Taylor3473f882001-02-23 17:55:21 +00002025#define NEXTL(l) do { \
2026 if (*(ctxt->input->cur) == '\n') { \
2027 ctxt->input->line++; ctxt->input->col = 1; \
2028 } else ctxt->input->col++; \
Daniel Veillardfdc91562002-07-01 21:52:03 +00002029 ctxt->input->cur += l; \
Owen Taylor3473f882001-02-23 17:55:21 +00002030 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +00002031 } while (0)
2032
2033#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2034#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2035
2036#define COPY_BUF(l,b,i,v) \
2037 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002038 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +00002039
2040/**
2041 * xmlSkipBlankChars:
2042 * @ctxt: the XML parser context
2043 *
2044 * skip all blanks character found at that point in the input streams.
2045 * It pops up finished entities in the process if allowable at that point.
2046 *
2047 * Returns the number of space chars skipped
2048 */
2049
2050int
2051xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002052 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002053
2054 /*
2055 * It's Okay to use CUR/NEXT here since all the blanks are on
2056 * the ASCII range.
2057 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002058 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
2059 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002060 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +00002061 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +00002062 */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002063 cur = ctxt->input->cur;
William M. Brack76e95df2003-10-18 16:20:14 +00002064 while (IS_BLANK_CH(*cur)) {
Daniel Veillard02141ea2001-04-30 11:46:40 +00002065 if (*cur == '\n') {
2066 ctxt->input->line++; ctxt->input->col = 1;
2067 }
2068 cur++;
2069 res++;
2070 if (*cur == 0) {
2071 ctxt->input->cur = cur;
2072 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
2073 cur = ctxt->input->cur;
2074 }
2075 }
2076 ctxt->input->cur = cur;
2077 } else {
2078 int cur;
2079 do {
2080 cur = CUR;
Daniel Veillard7da92702005-01-23 20:15:53 +00002081 while (IS_BLANK_CH(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillard02141ea2001-04-30 11:46:40 +00002082 NEXT;
2083 cur = CUR;
2084 res++;
2085 }
2086 while ((cur == 0) && (ctxt->inputNr > 1) &&
2087 (ctxt->instate != XML_PARSER_COMMENT)) {
2088 xmlPopInput(ctxt);
2089 cur = CUR;
2090 }
2091 /*
2092 * Need to handle support of entities branching here
2093 */
2094 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
2095 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
2096 }
Owen Taylor3473f882001-02-23 17:55:21 +00002097 return(res);
2098}
2099
2100/************************************************************************
2101 * *
2102 * Commodity functions to handle entities *
2103 * *
2104 ************************************************************************/
2105
2106/**
2107 * xmlPopInput:
2108 * @ctxt: an XML parser context
2109 *
2110 * xmlPopInput: the current input pointed by ctxt->input came to an end
2111 * pop it and return the next char.
2112 *
2113 * Returns the current xmlChar in the parser context
2114 */
2115xmlChar
2116xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002117 if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002118 if (xmlParserDebugEntities)
2119 xmlGenericError(xmlGenericErrorContext,
2120 "Popping input %d\n", ctxt->inputNr);
2121 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +00002122 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +00002123 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
2124 return(xmlPopInput(ctxt));
2125 return(CUR);
2126}
2127
2128/**
2129 * xmlPushInput:
2130 * @ctxt: an XML parser context
2131 * @input: an XML parser input fragment (entity, XML fragment ...).
2132 *
2133 * xmlPushInput: switch to a new input stream which is stacked on top
2134 * of the previous one(s).
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002135 * Returns -1 in case of error or the index in the input stack
Owen Taylor3473f882001-02-23 17:55:21 +00002136 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002137int
Owen Taylor3473f882001-02-23 17:55:21 +00002138xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002139 int ret;
2140 if (input == NULL) return(-1);
Owen Taylor3473f882001-02-23 17:55:21 +00002141
2142 if (xmlParserDebugEntities) {
2143 if ((ctxt->input != NULL) && (ctxt->input->filename))
2144 xmlGenericError(xmlGenericErrorContext,
2145 "%s(%d): ", ctxt->input->filename,
2146 ctxt->input->line);
2147 xmlGenericError(xmlGenericErrorContext,
2148 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2149 }
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002150 ret = inputPush(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00002151 GROW;
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002152 return(ret);
Owen Taylor3473f882001-02-23 17:55:21 +00002153}
2154
2155/**
2156 * xmlParseCharRef:
2157 * @ctxt: an XML parser context
2158 *
2159 * parse Reference declarations
2160 *
2161 * [66] CharRef ::= '&#' [0-9]+ ';' |
2162 * '&#x' [0-9a-fA-F]+ ';'
2163 *
2164 * [ WFC: Legal Character ]
2165 * Characters referred to using character references must match the
2166 * production for Char.
2167 *
2168 * Returns the value parsed (as an int), 0 in case of error
2169 */
2170int
2171xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +00002172 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002173 int count = 0;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002174 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002175
Owen Taylor3473f882001-02-23 17:55:21 +00002176 /*
2177 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2178 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00002179 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +00002180 (NXT(2) == 'x')) {
2181 SKIP(3);
2182 GROW;
2183 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002184 if (count++ > 20) {
2185 count = 0;
2186 GROW;
2187 }
2188 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002189 val = val * 16 + (CUR - '0');
2190 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2191 val = val * 16 + (CUR - 'a') + 10;
2192 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2193 val = val * 16 + (CUR - 'A') + 10;
2194 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002195 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002196 val = 0;
2197 break;
2198 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002199 if (val > 0x10FFFF)
2200 outofrange = val;
2201
Owen Taylor3473f882001-02-23 17:55:21 +00002202 NEXT;
2203 count++;
2204 }
2205 if (RAW == ';') {
2206 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002207 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002208 ctxt->nbChars ++;
2209 ctxt->input->cur++;
2210 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002211 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002212 SKIP(2);
2213 GROW;
2214 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002215 if (count++ > 20) {
2216 count = 0;
2217 GROW;
2218 }
2219 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +00002220 val = val * 10 + (CUR - '0');
2221 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002222 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002223 val = 0;
2224 break;
2225 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002226 if (val > 0x10FFFF)
2227 outofrange = val;
2228
Owen Taylor3473f882001-02-23 17:55:21 +00002229 NEXT;
2230 count++;
2231 }
2232 if (RAW == ';') {
2233 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
Daniel Veillard77a90a72003-03-22 00:04:05 +00002234 ctxt->input->col++;
Owen Taylor3473f882001-02-23 17:55:21 +00002235 ctxt->nbChars ++;
2236 ctxt->input->cur++;
2237 }
2238 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002239 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002240 }
2241
2242 /*
2243 * [ WFC: Legal Character ]
2244 * Characters referred to using character references must match the
2245 * production for Char.
2246 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002247 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002248 return(val);
2249 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002250 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2251 "xmlParseCharRef: invalid xmlChar value %d\n",
2252 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002253 }
2254 return(0);
2255}
2256
2257/**
2258 * xmlParseStringCharRef:
2259 * @ctxt: an XML parser context
2260 * @str: a pointer to an index in the string
2261 *
2262 * parse Reference declarations, variant parsing from a string rather
2263 * than an an input flow.
2264 *
2265 * [66] CharRef ::= '&#' [0-9]+ ';' |
2266 * '&#x' [0-9a-fA-F]+ ';'
2267 *
2268 * [ WFC: Legal Character ]
2269 * Characters referred to using character references must match the
2270 * production for Char.
2271 *
2272 * Returns the value parsed (as an int), 0 in case of error, str will be
2273 * updated to the current value of the index
2274 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00002275static int
Owen Taylor3473f882001-02-23 17:55:21 +00002276xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2277 const xmlChar *ptr;
2278 xmlChar cur;
Daniel Veillard37fd3072004-06-03 11:22:31 +00002279 unsigned int val = 0;
2280 unsigned int outofrange = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002281
2282 if ((str == NULL) || (*str == NULL)) return(0);
2283 ptr = *str;
2284 cur = *ptr;
2285 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2286 ptr += 3;
2287 cur = *ptr;
2288 while (cur != ';') { /* Non input consuming loop */
2289 if ((cur >= '0') && (cur <= '9'))
2290 val = val * 16 + (cur - '0');
2291 else if ((cur >= 'a') && (cur <= 'f'))
2292 val = val * 16 + (cur - 'a') + 10;
2293 else if ((cur >= 'A') && (cur <= 'F'))
2294 val = val * 16 + (cur - 'A') + 10;
2295 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002296 xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002297 val = 0;
2298 break;
2299 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002300 if (val > 0x10FFFF)
2301 outofrange = val;
2302
Owen Taylor3473f882001-02-23 17:55:21 +00002303 ptr++;
2304 cur = *ptr;
2305 }
2306 if (cur == ';')
2307 ptr++;
2308 } else if ((cur == '&') && (ptr[1] == '#')){
2309 ptr += 2;
2310 cur = *ptr;
2311 while (cur != ';') { /* Non input consuming loops */
2312 if ((cur >= '0') && (cur <= '9'))
2313 val = val * 10 + (cur - '0');
2314 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002315 xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002316 val = 0;
2317 break;
2318 }
Daniel Veillard37fd3072004-06-03 11:22:31 +00002319 if (val > 0x10FFFF)
2320 outofrange = val;
2321
Owen Taylor3473f882001-02-23 17:55:21 +00002322 ptr++;
2323 cur = *ptr;
2324 }
2325 if (cur == ';')
2326 ptr++;
2327 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002328 xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002329 return(0);
2330 }
2331 *str = ptr;
2332
2333 /*
2334 * [ WFC: Legal Character ]
2335 * Characters referred to using character references must match the
2336 * production for Char.
2337 */
Daniel Veillard37fd3072004-06-03 11:22:31 +00002338 if ((IS_CHAR(val) && (outofrange == 0))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002339 return(val);
2340 } else {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00002341 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2342 "xmlParseStringCharRef: invalid xmlChar value %d\n",
2343 val);
Owen Taylor3473f882001-02-23 17:55:21 +00002344 }
2345 return(0);
2346}
2347
2348/**
Daniel Veillardf5582f12002-06-11 10:08:16 +00002349 * xmlNewBlanksWrapperInputStream:
2350 * @ctxt: an XML parser context
2351 * @entity: an Entity pointer
2352 *
2353 * Create a new input stream for wrapping
2354 * blanks around a PEReference
2355 *
2356 * Returns the new input stream or NULL
2357 */
2358
2359static void deallocblankswrapper (xmlChar *str) {xmlFree(str);}
2360
Daniel Veillardf4862f02002-09-10 11:13:43 +00002361static xmlParserInputPtr
Daniel Veillardf5582f12002-06-11 10:08:16 +00002362xmlNewBlanksWrapperInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
2363 xmlParserInputPtr input;
2364 xmlChar *buffer;
2365 size_t length;
2366 if (entity == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002367 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2368 "xmlNewBlanksWrapperInputStream entity\n");
Daniel Veillardf5582f12002-06-11 10:08:16 +00002369 return(NULL);
2370 }
2371 if (xmlParserDebugEntities)
2372 xmlGenericError(xmlGenericErrorContext,
2373 "new blanks wrapper for entity: %s\n", entity->name);
2374 input = xmlNewInputStream(ctxt);
2375 if (input == NULL) {
2376 return(NULL);
2377 }
2378 length = xmlStrlen(entity->name) + 5;
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002379 buffer = xmlMallocAtomic(length);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002380 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002381 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002382 xmlFree(input);
Daniel Veillardf5582f12002-06-11 10:08:16 +00002383 return(NULL);
2384 }
2385 buffer [0] = ' ';
2386 buffer [1] = '%';
2387 buffer [length-3] = ';';
2388 buffer [length-2] = ' ';
2389 buffer [length-1] = 0;
2390 memcpy(buffer + 2, entity->name, length - 5);
2391 input->free = deallocblankswrapper;
2392 input->base = buffer;
2393 input->cur = buffer;
2394 input->length = length;
2395 input->end = &buffer[length];
2396 return(input);
2397}
2398
2399/**
Owen Taylor3473f882001-02-23 17:55:21 +00002400 * xmlParserHandlePEReference:
2401 * @ctxt: the parser context
2402 *
2403 * [69] PEReference ::= '%' Name ';'
2404 *
2405 * [ WFC: No Recursion ]
2406 * A parsed entity must not contain a recursive
2407 * reference to itself, either directly or indirectly.
2408 *
2409 * [ WFC: Entity Declared ]
2410 * In a document without any DTD, a document with only an internal DTD
2411 * subset which contains no parameter entity references, or a document
2412 * with "standalone='yes'", ... ... The declaration of a parameter
2413 * entity must precede any reference to it...
2414 *
2415 * [ VC: Entity Declared ]
2416 * In a document with an external subset or external parameter entities
2417 * with "standalone='no'", ... ... The declaration of a parameter entity
2418 * must precede any reference to it...
2419 *
2420 * [ WFC: In DTD ]
2421 * Parameter-entity references may only appear in the DTD.
2422 * NOTE: misleading but this is handled.
2423 *
2424 * A PEReference may have been detected in the current input stream
2425 * the handling is done accordingly to
2426 * http://www.w3.org/TR/REC-xml#entproc
2427 * i.e.
2428 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002429 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +00002430 */
2431void
2432xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00002433 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00002434 xmlEntityPtr entity = NULL;
2435 xmlParserInputPtr input;
2436
Owen Taylor3473f882001-02-23 17:55:21 +00002437 if (RAW != '%') return;
2438 switch(ctxt->instate) {
2439 case XML_PARSER_CDATA_SECTION:
2440 return;
2441 case XML_PARSER_COMMENT:
2442 return;
2443 case XML_PARSER_START_TAG:
2444 return;
2445 case XML_PARSER_END_TAG:
2446 return;
2447 case XML_PARSER_EOF:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002448 xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002449 return;
2450 case XML_PARSER_PROLOG:
2451 case XML_PARSER_START:
2452 case XML_PARSER_MISC:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002453 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002454 return;
2455 case XML_PARSER_ENTITY_DECL:
2456 case XML_PARSER_CONTENT:
2457 case XML_PARSER_ATTRIBUTE_VALUE:
2458 case XML_PARSER_PI:
2459 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002460 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +00002461 /* we just ignore it there */
2462 return;
2463 case XML_PARSER_EPILOG:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002464 xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002465 return;
2466 case XML_PARSER_ENTITY_VALUE:
2467 /*
2468 * NOTE: in the case of entity values, we don't do the
2469 * substitution here since we need the literal
2470 * entity value to be able to save the internal
2471 * subset of the document.
2472 * This will be handled by xmlStringDecodeEntities
2473 */
2474 return;
2475 case XML_PARSER_DTD:
2476 /*
2477 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2478 * In the internal DTD subset, parameter-entity references
2479 * can occur only where markup declarations can occur, not
2480 * within markup declarations.
2481 * In that case this is handled in xmlParseMarkupDecl
2482 */
2483 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2484 return;
William M. Brack76e95df2003-10-18 16:20:14 +00002485 if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
Daniel Veillardf5582f12002-06-11 10:08:16 +00002486 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002487 break;
2488 case XML_PARSER_IGNORE:
2489 return;
2490 }
2491
2492 NEXT;
2493 name = xmlParseName(ctxt);
2494 if (xmlParserDebugEntities)
2495 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002496 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002497 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002498 xmlFatalErr(ctxt, XML_ERR_PEREF_NO_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002499 } else {
2500 if (RAW == ';') {
2501 NEXT;
2502 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2503 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2504 if (entity == NULL) {
2505
2506 /*
2507 * [ WFC: Entity Declared ]
2508 * In a document without any DTD, a document with only an
2509 * internal DTD subset which contains no parameter entity
2510 * references, or a document with "standalone='yes'", ...
2511 * ... The declaration of a parameter entity must precede
2512 * any reference to it...
2513 */
2514 if ((ctxt->standalone == 1) ||
2515 ((ctxt->hasExternalSubset == 0) &&
2516 (ctxt->hasPErefs == 0))) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002517 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
Owen Taylor3473f882001-02-23 17:55:21 +00002518 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00002519 } else {
2520 /*
2521 * [ VC: Entity Declared ]
2522 * In a document with an external subset or external
2523 * parameter entities with "standalone='no'", ...
2524 * ... The declaration of a parameter entity must precede
2525 * any reference to it...
2526 */
Daniel Veillard24eb9782003-10-04 21:08:09 +00002527 if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
2528 xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
2529 "PEReference: %%%s; not found\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00002530 name, NULL);
Daniel Veillard24eb9782003-10-04 21:08:09 +00002531 } else
2532 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
2533 "PEReference: %%%s; not found\n",
2534 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002535 ctxt->valid = 0;
2536 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00002537 } else if (ctxt->input->free != deallocblankswrapper) {
2538 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002539 if (xmlPushInput(ctxt, input) < 0)
2540 return;
Owen Taylor3473f882001-02-23 17:55:21 +00002541 } else {
2542 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2543 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +00002544 xmlChar start[4];
2545 xmlCharEncoding enc;
2546
Owen Taylor3473f882001-02-23 17:55:21 +00002547 /*
2548 * handle the extra spaces added before and after
2549 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002550 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +00002551 */
2552 input = xmlNewEntityInputStream(ctxt, entity);
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002553 if (xmlPushInput(ctxt, input) < 0)
2554 return;
Daniel Veillard87a764e2001-06-20 17:41:10 +00002555
2556 /*
2557 * Get the 4 first bytes and decode the charset
2558 * if enc != XML_CHAR_ENCODING_NONE
2559 * plug some encoding conversion routines.
William M. Bracka0c48ad2004-04-16 15:58:29 +00002560 * Note that, since we may have some non-UTF8
2561 * encoding (like UTF16, bug 135229), the 'length'
2562 * is not known, but we can calculate based upon
2563 * the amount of data in the buffer.
Daniel Veillard87a764e2001-06-20 17:41:10 +00002564 */
2565 GROW
William M. Bracka0c48ad2004-04-16 15:58:29 +00002566 if ((ctxt->input->end - ctxt->input->cur)>=4) {
Daniel Veillarde059b892002-06-13 15:32:10 +00002567 start[0] = RAW;
2568 start[1] = NXT(1);
2569 start[2] = NXT(2);
2570 start[3] = NXT(3);
2571 enc = xmlDetectCharEncoding(start, 4);
2572 if (enc != XML_CHAR_ENCODING_NONE) {
2573 xmlSwitchEncoding(ctxt, enc);
2574 }
Daniel Veillard87a764e2001-06-20 17:41:10 +00002575 }
2576
Owen Taylor3473f882001-02-23 17:55:21 +00002577 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
Daniel Veillarda07050d2003-10-19 14:46:32 +00002578 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l' )) &&
2579 (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +00002580 xmlParseTextDecl(ctxt);
2581 }
Owen Taylor3473f882001-02-23 17:55:21 +00002582 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00002583 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
2584 "PEReference: %s is not a parameter entity\n",
2585 name);
Owen Taylor3473f882001-02-23 17:55:21 +00002586 }
2587 }
2588 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002589 xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002590 }
Owen Taylor3473f882001-02-23 17:55:21 +00002591 }
2592}
2593
2594/*
2595 * Macro used to grow the current buffer.
Daniel Veillard459eeb92012-07-17 16:19:17 +08002596 * buffer##_size is expected to be a size_t
2597 * mem_error: is expected to handle memory allocation failures
Owen Taylor3473f882001-02-23 17:55:21 +00002598 */
Daniel Veillard0161e632008-08-28 15:36:32 +00002599#define growBuffer(buffer, n) { \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002600 xmlChar *tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002601 size_t new_size = buffer##_size * 2 + n; \
2602 if (new_size < buffer##_size) goto mem_error; \
2603 tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
Daniel Veillardd3999c72004-03-10 16:27:03 +00002604 if (tmp == NULL) goto mem_error; \
2605 buffer = tmp; \
Daniel Veillard459eeb92012-07-17 16:19:17 +08002606 buffer##_size = new_size; \
Owen Taylor3473f882001-02-23 17:55:21 +00002607}
2608
2609/**
Daniel Veillard7a02cfe2003-09-25 12:18:34 +00002610 * xmlStringLenDecodeEntities:
Owen Taylor3473f882001-02-23 17:55:21 +00002611 * @ctxt: the parser context
2612 * @str: the input string
Daniel Veillarde57ec792003-09-10 10:50:59 +00002613 * @len: the string length
Owen Taylor3473f882001-02-23 17:55:21 +00002614 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2615 * @end: an end marker xmlChar, 0 if none
2616 * @end2: an end marker xmlChar, 0 if none
2617 * @end3: an end marker xmlChar, 0 if none
2618 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002619 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +00002620 *
2621 * [67] Reference ::= EntityRef | CharRef
2622 *
2623 * [69] PEReference ::= '%' Name ';'
2624 *
2625 * Returns A newly allocated string with the substitution done. The caller
2626 * must deallocate it !
2627 */
2628xmlChar *
Daniel Veillarde57ec792003-09-10 10:50:59 +00002629xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2630 int what, xmlChar end, xmlChar end2, xmlChar end3) {
Owen Taylor3473f882001-02-23 17:55:21 +00002631 xmlChar *buffer = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002632 size_t buffer_size = 0;
2633 size_t nbchars = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002634
2635 xmlChar *current = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00002636 xmlChar *rep = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +00002637 const xmlChar *last;
Owen Taylor3473f882001-02-23 17:55:21 +00002638 xmlEntityPtr ent;
2639 int c,l;
Owen Taylor3473f882001-02-23 17:55:21 +00002640
Daniel Veillarda82b1822004-11-08 16:24:57 +00002641 if ((ctxt == NULL) || (str == NULL) || (len < 0))
Owen Taylor3473f882001-02-23 17:55:21 +00002642 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002643 last = str + len;
Owen Taylor3473f882001-02-23 17:55:21 +00002644
Daniel Veillard0161e632008-08-28 15:36:32 +00002645 if (((ctxt->depth > 40) &&
2646 ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2647 (ctxt->depth > 1024)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002648 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002649 return(NULL);
2650 }
2651
2652 /*
2653 * allocate a translation buffer.
2654 */
2655 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002656 buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002657 if (buffer == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00002658
2659 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002660 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002661 * we are operating on already parsed values.
2662 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00002663 if (str < last)
2664 c = CUR_SCHAR(str, l);
2665 else
2666 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002667 while ((c != 0) && (c != end) && /* non input consuming loop */
2668 (c != end2) && (c != end3)) {
2669
2670 if (c == 0) break;
2671 if ((c == '&') && (str[1] == '#')) {
2672 int val = xmlParseStringCharRef(ctxt, &str);
2673 if (val != 0) {
2674 COPY_BUF(0,buffer,nbchars,val);
2675 }
Daniel Veillard459eeb92012-07-17 16:19:17 +08002676 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002677 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002678 }
Owen Taylor3473f882001-02-23 17:55:21 +00002679 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2680 if (xmlParserDebugEntities)
2681 xmlGenericError(xmlGenericErrorContext,
2682 "String decoding Entity Reference: %.30s\n",
2683 str);
2684 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillard8915c152008-08-26 13:05:34 +00002685 if ((ctxt->lastError.code == XML_ERR_ENTITY_LOOP) ||
2686 (ctxt->lastError.code == XML_ERR_INTERNAL_ERROR))
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002687 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002688 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002689 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002690 if ((ent != NULL) &&
2691 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2692 if (ent->content != NULL) {
2693 COPY_BUF(0,buffer,nbchars,ent->content[0]);
Daniel Veillard459eeb92012-07-17 16:19:17 +08002694 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002695 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Daniel Veillardbedc9772005-09-28 21:42:15 +00002696 }
Owen Taylor3473f882001-02-23 17:55:21 +00002697 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00002698 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2699 "predefined entity has no content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002700 }
2701 } else if ((ent != NULL) && (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002702 ctxt->depth++;
2703 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2704 0, 0, 0);
2705 ctxt->depth--;
Daniel Veillard0161e632008-08-28 15:36:32 +00002706
Owen Taylor3473f882001-02-23 17:55:21 +00002707 if (rep != NULL) {
2708 current = rep;
2709 while (*current != 0) { /* non input consuming loop */
2710 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002711 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002712 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2713 goto int_error;
2714 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002715 }
2716 }
2717 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002718 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002719 }
2720 } else if (ent != NULL) {
2721 int i = xmlStrlen(ent->name);
2722 const xmlChar *cur = ent->name;
2723
2724 buffer[nbchars++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08002725 if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard5bd3c062011-12-16 18:53:35 +08002726 growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002727 }
2728 for (;i > 0;i--)
2729 buffer[nbchars++] = *cur++;
2730 buffer[nbchars++] = ';';
2731 }
2732 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2733 if (xmlParserDebugEntities)
2734 xmlGenericError(xmlGenericErrorContext,
2735 "String decoding PE Reference: %.30s\n", str);
2736 ent = xmlParseStringPEReference(ctxt, &str);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002737 if (ctxt->lastError.code == XML_ERR_ENTITY_LOOP)
2738 goto int_error;
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002739 if (ent != NULL)
Daniel Veillardf4f4e482008-08-25 08:57:48 +00002740 ctxt->nbentities += ent->checked;
Owen Taylor3473f882001-02-23 17:55:21 +00002741 if (ent != NULL) {
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002742 if (ent->content == NULL) {
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00002743 xmlLoadEntityContent(ctxt, ent);
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002744 }
Owen Taylor3473f882001-02-23 17:55:21 +00002745 ctxt->depth++;
2746 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2747 0, 0, 0);
2748 ctxt->depth--;
2749 if (rep != NULL) {
2750 current = rep;
2751 while (*current != 0) { /* non input consuming loop */
2752 buffer[nbchars++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002753 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00002754 if (xmlParserEntityCheck(ctxt, nbchars, ent))
2755 goto int_error;
2756 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002757 }
2758 }
2759 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00002760 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002761 }
2762 }
2763 } else {
2764 COPY_BUF(l,buffer,nbchars,c);
2765 str += l;
Daniel Veillard459eeb92012-07-17 16:19:17 +08002766 if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2767 growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
Owen Taylor3473f882001-02-23 17:55:21 +00002768 }
2769 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00002770 if (str < last)
2771 c = CUR_SCHAR(str, l);
2772 else
2773 c = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002774 }
Daniel Veillard13cee4e2009-09-05 14:52:55 +02002775 buffer[nbchars] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002776 return(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002777
2778mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002779 xmlErrMemory(ctxt, NULL);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00002780int_error:
Daniel Veillard68b6e022008-03-31 09:26:00 +00002781 if (rep != NULL)
2782 xmlFree(rep);
2783 if (buffer != NULL)
2784 xmlFree(buffer);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00002785 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002786}
2787
Daniel Veillarde57ec792003-09-10 10:50:59 +00002788/**
2789 * xmlStringDecodeEntities:
2790 * @ctxt: the parser context
2791 * @str: the input string
2792 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2793 * @end: an end marker xmlChar, 0 if none
2794 * @end2: an end marker xmlChar, 0 if none
2795 * @end3: an end marker xmlChar, 0 if none
2796 *
2797 * Takes a entity string content and process to do the adequate substitutions.
2798 *
2799 * [67] Reference ::= EntityRef | CharRef
2800 *
2801 * [69] PEReference ::= '%' Name ';'
2802 *
2803 * Returns A newly allocated string with the substitution done. The caller
2804 * must deallocate it !
2805 */
2806xmlChar *
2807xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2808 xmlChar end, xmlChar end2, xmlChar end3) {
Daniel Veillarda82b1822004-11-08 16:24:57 +00002809 if ((ctxt == NULL) || (str == NULL)) return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00002810 return(xmlStringLenDecodeEntities(ctxt, str, xmlStrlen(str), what,
2811 end, end2, end3));
2812}
Owen Taylor3473f882001-02-23 17:55:21 +00002813
2814/************************************************************************
2815 * *
Owen Taylor3473f882001-02-23 17:55:21 +00002816 * Commodity functions, cleanup needed ? *
2817 * *
2818 ************************************************************************/
2819
2820/**
2821 * areBlanks:
2822 * @ctxt: an XML parser context
2823 * @str: a xmlChar *
2824 * @len: the size of @str
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002825 * @blank_chars: we know the chars are blanks
Owen Taylor3473f882001-02-23 17:55:21 +00002826 *
2827 * Is this a sequence of blank chars that one can ignore ?
2828 *
2829 * Returns 1 if ignorable 0 otherwise.
2830 */
2831
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002832static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2833 int blank_chars) {
Owen Taylor3473f882001-02-23 17:55:21 +00002834 int i, ret;
2835 xmlNodePtr lastChild;
2836
Daniel Veillard05c13a22001-09-09 08:38:09 +00002837 /*
2838 * Don't spend time trying to differentiate them, the same callback is
2839 * used !
2840 */
2841 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00002842 return(0);
2843
Owen Taylor3473f882001-02-23 17:55:21 +00002844 /*
2845 * Check for xml:space value.
2846 */
Daniel Veillard1114d002006-10-12 16:24:35 +00002847 if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
2848 (*(ctxt->space) == -2))
Owen Taylor3473f882001-02-23 17:55:21 +00002849 return(0);
2850
2851 /*
2852 * Check that the string is made of blanks
2853 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00002854 if (blank_chars == 0) {
2855 for (i = 0;i < len;i++)
2856 if (!(IS_BLANK_CH(str[i]))) return(0);
2857 }
Owen Taylor3473f882001-02-23 17:55:21 +00002858
2859 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002860 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00002861 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00002862 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002863 if (ctxt->myDoc != NULL) {
2864 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
2865 if (ret == 0) return(1);
2866 if (ret == 1) return(0);
2867 }
2868
2869 /*
2870 * Otherwise, heuristic :-\
2871 */
Daniel Veillardabac41e2005-07-06 15:17:38 +00002872 if ((RAW != '<') && (RAW != 0xD)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002873 if ((ctxt->node->children == NULL) &&
2874 (RAW == '<') && (NXT(1) == '/')) return(0);
2875
2876 lastChild = xmlGetLastChild(ctxt->node);
2877 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00002878 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
2879 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00002880 } else if (xmlNodeIsText(lastChild))
2881 return(0);
2882 else if ((ctxt->node->children != NULL) &&
2883 (xmlNodeIsText(ctxt->node->children)))
2884 return(0);
2885 return(1);
2886}
2887
Owen Taylor3473f882001-02-23 17:55:21 +00002888/************************************************************************
2889 * *
2890 * Extra stuff for namespace support *
2891 * Relates to http://www.w3.org/TR/WD-xml-names *
2892 * *
2893 ************************************************************************/
2894
2895/**
2896 * xmlSplitQName:
2897 * @ctxt: an XML parser context
2898 * @name: an XML parser context
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002899 * @prefix: a xmlChar **
Owen Taylor3473f882001-02-23 17:55:21 +00002900 *
2901 * parse an UTF8 encoded XML qualified name string
2902 *
2903 * [NS 5] QName ::= (Prefix ':')? LocalPart
2904 *
2905 * [NS 6] Prefix ::= NCName
2906 *
2907 * [NS 7] LocalPart ::= NCName
2908 *
2909 * Returns the local part, and prefix is updated
2910 * to get the Prefix if any.
2911 */
2912
2913xmlChar *
2914xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
2915 xmlChar buf[XML_MAX_NAMELEN + 5];
2916 xmlChar *buffer = NULL;
2917 int len = 0;
2918 int max = XML_MAX_NAMELEN;
2919 xmlChar *ret = NULL;
2920 const xmlChar *cur = name;
2921 int c;
2922
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00002923 if (prefix == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002924 *prefix = NULL;
2925
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00002926 if (cur == NULL) return(NULL);
2927
Owen Taylor3473f882001-02-23 17:55:21 +00002928#ifndef XML_XML_NAMESPACE
2929 /* xml: prefix is not really a namespace */
2930 if ((cur[0] == 'x') && (cur[1] == 'm') &&
2931 (cur[2] == 'l') && (cur[3] == ':'))
2932 return(xmlStrdup(name));
2933#endif
2934
Daniel Veillard597bc482003-07-24 16:08:28 +00002935 /* nasty but well=formed */
Owen Taylor3473f882001-02-23 17:55:21 +00002936 if (cur[0] == ':')
2937 return(xmlStrdup(name));
2938
2939 c = *cur++;
2940 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
2941 buf[len++] = c;
2942 c = *cur++;
2943 }
2944 if (len >= max) {
2945 /*
2946 * Okay someone managed to make a huge name, so he's ready to pay
2947 * for the processing speed.
2948 */
2949 max = len * 2;
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002950
Daniel Veillard3c908dc2003-04-19 00:07:51 +00002951 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00002952 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002953 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002954 return(NULL);
2955 }
2956 memcpy(buffer, buf, len);
2957 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
2958 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00002959 xmlChar *tmp;
2960
Owen Taylor3473f882001-02-23 17:55:21 +00002961 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00002962 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00002963 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00002964 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00002965 xmlFree(buffer);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00002966 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00002967 return(NULL);
2968 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00002969 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00002970 }
2971 buffer[len++] = c;
2972 c = *cur++;
2973 }
2974 buffer[len] = 0;
2975 }
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00002976
Daniel Veillard597bc482003-07-24 16:08:28 +00002977 if ((c == ':') && (*cur == 0)) {
Daniel Veillard02a49632006-10-13 12:42:31 +00002978 if (buffer != NULL)
2979 xmlFree(buffer);
2980 *prefix = NULL;
Daniel Veillard597bc482003-07-24 16:08:28 +00002981 return(xmlStrdup(name));
Daniel Veillard02a49632006-10-13 12:42:31 +00002982 }
Daniel Veillard597bc482003-07-24 16:08:28 +00002983
Owen Taylor3473f882001-02-23 17:55:21 +00002984 if (buffer == NULL)
2985 ret = xmlStrndup(buf, len);
2986 else {
2987 ret = buffer;
2988 buffer = NULL;
2989 max = XML_MAX_NAMELEN;
2990 }
2991
2992
2993 if (c == ':') {
Daniel Veillardbb284f42002-10-16 18:02:47 +00002994 c = *cur;
Owen Taylor3473f882001-02-23 17:55:21 +00002995 *prefix = ret;
Daniel Veillard597bc482003-07-24 16:08:28 +00002996 if (c == 0) {
Daniel Veillard8d73bcb2003-08-04 01:06:15 +00002997 return(xmlStrndup(BAD_CAST "", 0));
Daniel Veillard597bc482003-07-24 16:08:28 +00002998 }
Owen Taylor3473f882001-02-23 17:55:21 +00002999 len = 0;
3000
Daniel Veillardbb284f42002-10-16 18:02:47 +00003001 /*
3002 * Check that the first character is proper to start
3003 * a new name
3004 */
3005 if (!(((c >= 0x61) && (c <= 0x7A)) ||
3006 ((c >= 0x41) && (c <= 0x5A)) ||
3007 (c == '_') || (c == ':'))) {
3008 int l;
3009 int first = CUR_SCHAR(cur, l);
3010
3011 if (!IS_LETTER(first) && (first != '_')) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003012 xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
Daniel Veillardbb284f42002-10-16 18:02:47 +00003013 "Name %s is not XML Namespace compliant\n",
Daniel Veillardbc92eca2003-09-15 09:48:06 +00003014 name);
Daniel Veillardbb284f42002-10-16 18:02:47 +00003015 }
3016 }
3017 cur++;
3018
Owen Taylor3473f882001-02-23 17:55:21 +00003019 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3020 buf[len++] = c;
3021 c = *cur++;
3022 }
3023 if (len >= max) {
3024 /*
3025 * Okay someone managed to make a huge name, so he's ready to pay
3026 * for the processing speed.
3027 */
3028 max = len * 2;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003029
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003030 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003031 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003032 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003033 return(NULL);
3034 }
3035 memcpy(buffer, buf, len);
3036 while (c != 0) { /* tested bigname2.xml */
3037 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003038 xmlChar *tmp;
3039
Owen Taylor3473f882001-02-23 17:55:21 +00003040 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003041 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003042 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003043 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003044 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003045 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003046 return(NULL);
3047 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003048 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003049 }
3050 buffer[len++] = c;
3051 c = *cur++;
3052 }
3053 buffer[len] = 0;
3054 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003055
Owen Taylor3473f882001-02-23 17:55:21 +00003056 if (buffer == NULL)
3057 ret = xmlStrndup(buf, len);
3058 else {
3059 ret = buffer;
3060 }
3061 }
3062
3063 return(ret);
3064}
3065
3066/************************************************************************
3067 * *
3068 * The parser itself *
3069 * Relates to http://www.w3.org/TR/REC-xml *
3070 * *
3071 ************************************************************************/
3072
Daniel Veillard34e3f642008-07-29 09:02:27 +00003073/************************************************************************
3074 * *
3075 * Routines to parse Name, NCName and NmToken *
3076 * *
3077 ************************************************************************/
Daniel Veillardc6561462009-03-25 10:22:31 +00003078#ifdef DEBUG
3079static unsigned long nbParseName = 0;
3080static unsigned long nbParseNmToken = 0;
3081static unsigned long nbParseNCName = 0;
3082static unsigned long nbParseNCNameComplex = 0;
3083static unsigned long nbParseNameComplex = 0;
3084static unsigned long nbParseStringName = 0;
3085#endif
3086
Daniel Veillard34e3f642008-07-29 09:02:27 +00003087/*
3088 * The two following functions are related to the change of accepted
3089 * characters for Name and NmToken in the Revision 5 of XML-1.0
3090 * They correspond to the modified production [4] and the new production [4a]
3091 * changes in that revision. Also note that the macros used for the
3092 * productions Letter, Digit, CombiningChar and Extender are not needed
3093 * anymore.
3094 * We still keep compatibility to pre-revision5 parsing semantic if the
3095 * new XML_PARSE_OLD10 option is given to the parser.
3096 */
3097static int
3098xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3099 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3100 /*
3101 * Use the new checks of production [4] [4a] amd [5] of the
3102 * Update 5 of XML-1.0
3103 */
3104 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3105 (((c >= 'a') && (c <= 'z')) ||
3106 ((c >= 'A') && (c <= 'Z')) ||
3107 (c == '_') || (c == ':') ||
3108 ((c >= 0xC0) && (c <= 0xD6)) ||
3109 ((c >= 0xD8) && (c <= 0xF6)) ||
3110 ((c >= 0xF8) && (c <= 0x2FF)) ||
3111 ((c >= 0x370) && (c <= 0x37D)) ||
3112 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3113 ((c >= 0x200C) && (c <= 0x200D)) ||
3114 ((c >= 0x2070) && (c <= 0x218F)) ||
3115 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3116 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3117 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3118 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3119 ((c >= 0x10000) && (c <= 0xEFFFF))))
3120 return(1);
3121 } else {
3122 if (IS_LETTER(c) || (c == '_') || (c == ':'))
3123 return(1);
3124 }
3125 return(0);
3126}
3127
3128static int
3129xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3130 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3131 /*
3132 * Use the new checks of production [4] [4a] amd [5] of the
3133 * Update 5 of XML-1.0
3134 */
3135 if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3136 (((c >= 'a') && (c <= 'z')) ||
3137 ((c >= 'A') && (c <= 'Z')) ||
3138 ((c >= '0') && (c <= '9')) || /* !start */
3139 (c == '_') || (c == ':') ||
3140 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3141 ((c >= 0xC0) && (c <= 0xD6)) ||
3142 ((c >= 0xD8) && (c <= 0xF6)) ||
3143 ((c >= 0xF8) && (c <= 0x2FF)) ||
3144 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3145 ((c >= 0x370) && (c <= 0x37D)) ||
3146 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3147 ((c >= 0x200C) && (c <= 0x200D)) ||
3148 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3149 ((c >= 0x2070) && (c <= 0x218F)) ||
3150 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3151 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3152 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3153 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3154 ((c >= 0x10000) && (c <= 0xEFFFF))))
3155 return(1);
3156 } else {
3157 if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3158 (c == '.') || (c == '-') ||
3159 (c == '_') || (c == ':') ||
3160 (IS_COMBINING(c)) ||
3161 (IS_EXTENDER(c)))
3162 return(1);
3163 }
3164 return(0);
3165}
3166
Daniel Veillarde57ec792003-09-10 10:50:59 +00003167static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003168 int *len, int *alloc, int normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003169
Daniel Veillard34e3f642008-07-29 09:02:27 +00003170static const xmlChar *
3171xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3172 int len = 0, l;
3173 int c;
3174 int count = 0;
3175
Daniel Veillardc6561462009-03-25 10:22:31 +00003176#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003177 nbParseNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003178#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003179
3180 /*
3181 * Handler for more complex cases
3182 */
3183 GROW;
3184 c = CUR_CHAR(l);
3185 if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3186 /*
3187 * Use the new checks of production [4] [4a] amd [5] of the
3188 * Update 5 of XML-1.0
3189 */
3190 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3191 (!(((c >= 'a') && (c <= 'z')) ||
3192 ((c >= 'A') && (c <= 'Z')) ||
3193 (c == '_') || (c == ':') ||
3194 ((c >= 0xC0) && (c <= 0xD6)) ||
3195 ((c >= 0xD8) && (c <= 0xF6)) ||
3196 ((c >= 0xF8) && (c <= 0x2FF)) ||
3197 ((c >= 0x370) && (c <= 0x37D)) ||
3198 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3199 ((c >= 0x200C) && (c <= 0x200D)) ||
3200 ((c >= 0x2070) && (c <= 0x218F)) ||
3201 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3202 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3203 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3204 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3205 ((c >= 0x10000) && (c <= 0xEFFFF))))) {
3206 return(NULL);
3207 }
3208 len += l;
3209 NEXTL(l);
3210 c = CUR_CHAR(l);
3211 while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3212 (((c >= 'a') && (c <= 'z')) ||
3213 ((c >= 'A') && (c <= 'Z')) ||
3214 ((c >= '0') && (c <= '9')) || /* !start */
3215 (c == '_') || (c == ':') ||
3216 (c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3217 ((c >= 0xC0) && (c <= 0xD6)) ||
3218 ((c >= 0xD8) && (c <= 0xF6)) ||
3219 ((c >= 0xF8) && (c <= 0x2FF)) ||
3220 ((c >= 0x300) && (c <= 0x36F)) || /* !start */
3221 ((c >= 0x370) && (c <= 0x37D)) ||
3222 ((c >= 0x37F) && (c <= 0x1FFF)) ||
3223 ((c >= 0x200C) && (c <= 0x200D)) ||
3224 ((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3225 ((c >= 0x2070) && (c <= 0x218F)) ||
3226 ((c >= 0x2C00) && (c <= 0x2FEF)) ||
3227 ((c >= 0x3001) && (c <= 0xD7FF)) ||
3228 ((c >= 0xF900) && (c <= 0xFDCF)) ||
3229 ((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3230 ((c >= 0x10000) && (c <= 0xEFFFF))
3231 )) {
3232 if (count++ > 100) {
3233 count = 0;
3234 GROW;
3235 }
3236 len += l;
3237 NEXTL(l);
3238 c = CUR_CHAR(l);
3239 }
3240 } else {
3241 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3242 (!IS_LETTER(c) && (c != '_') &&
3243 (c != ':'))) {
3244 return(NULL);
3245 }
3246 len += l;
3247 NEXTL(l);
3248 c = CUR_CHAR(l);
3249
3250 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3251 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3252 (c == '.') || (c == '-') ||
3253 (c == '_') || (c == ':') ||
3254 (IS_COMBINING(c)) ||
3255 (IS_EXTENDER(c)))) {
3256 if (count++ > 100) {
3257 count = 0;
3258 GROW;
3259 }
3260 len += l;
3261 NEXTL(l);
3262 c = CUR_CHAR(l);
3263 }
3264 }
3265 if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3266 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3267 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3268}
3269
Owen Taylor3473f882001-02-23 17:55:21 +00003270/**
3271 * xmlParseName:
3272 * @ctxt: an XML parser context
3273 *
3274 * parse an XML name.
3275 *
3276 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3277 * CombiningChar | Extender
3278 *
3279 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3280 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003281 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003282 *
3283 * Returns the Name parsed or NULL
3284 */
3285
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003286const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003287xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003288 const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003289 const xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00003290 int count = 0;
3291
3292 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00003293
Daniel Veillardc6561462009-03-25 10:22:31 +00003294#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003295 nbParseName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003296#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003297
Daniel Veillard48b2f892001-02-25 16:11:03 +00003298 /*
3299 * Accelerator for simple ASCII names
3300 */
3301 in = ctxt->input->cur;
3302 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3303 ((*in >= 0x41) && (*in <= 0x5A)) ||
3304 (*in == '_') || (*in == ':')) {
3305 in++;
3306 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3307 ((*in >= 0x41) && (*in <= 0x5A)) ||
3308 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00003309 (*in == '_') || (*in == '-') ||
3310 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00003311 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003312 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00003313 count = in - ctxt->input->cur;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003314 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003315 ctxt->input->cur = in;
Daniel Veillard77a90a72003-03-22 00:04:05 +00003316 ctxt->nbChars += count;
3317 ctxt->input->col += count;
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003318 if (ret == NULL)
3319 xmlErrMemory(ctxt, NULL);
Daniel Veillard48b2f892001-02-25 16:11:03 +00003320 return(ret);
3321 }
3322 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00003323 /* accelerator for special cases */
Daniel Veillard2f362242001-03-02 17:36:21 +00003324 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00003325}
Daniel Veillard48b2f892001-02-25 16:11:03 +00003326
Daniel Veillard34e3f642008-07-29 09:02:27 +00003327static const xmlChar *
3328xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3329 int len = 0, l;
3330 int c;
3331 int count = 0;
3332
Daniel Veillardc6561462009-03-25 10:22:31 +00003333#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003334 nbParseNCNameComplex++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003335#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003336
3337 /*
3338 * Handler for more complex cases
3339 */
3340 GROW;
3341 c = CUR_CHAR(l);
3342 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3343 (!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3344 return(NULL);
3345 }
3346
3347 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3348 (xmlIsNameChar(ctxt, c) && (c != ':'))) {
3349 if (count++ > 100) {
3350 count = 0;
3351 GROW;
3352 }
3353 len += l;
3354 NEXTL(l);
3355 c = CUR_CHAR(l);
3356 }
3357 return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3358}
3359
3360/**
3361 * xmlParseNCName:
3362 * @ctxt: an XML parser context
3363 * @len: lenght of the string parsed
3364 *
3365 * parse an XML name.
3366 *
3367 * [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3368 * CombiningChar | Extender
3369 *
3370 * [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3371 *
3372 * Returns the Name parsed or NULL
3373 */
3374
3375static const xmlChar *
3376xmlParseNCName(xmlParserCtxtPtr ctxt) {
3377 const xmlChar *in;
3378 const xmlChar *ret;
3379 int count = 0;
3380
Daniel Veillardc6561462009-03-25 10:22:31 +00003381#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003382 nbParseNCName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003383#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003384
3385 /*
3386 * Accelerator for simple ASCII names
3387 */
3388 in = ctxt->input->cur;
3389 if (((*in >= 0x61) && (*in <= 0x7A)) ||
3390 ((*in >= 0x41) && (*in <= 0x5A)) ||
3391 (*in == '_')) {
3392 in++;
3393 while (((*in >= 0x61) && (*in <= 0x7A)) ||
3394 ((*in >= 0x41) && (*in <= 0x5A)) ||
3395 ((*in >= 0x30) && (*in <= 0x39)) ||
3396 (*in == '_') || (*in == '-') ||
3397 (*in == '.'))
3398 in++;
3399 if ((*in > 0) && (*in < 0x80)) {
3400 count = in - ctxt->input->cur;
3401 ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3402 ctxt->input->cur = in;
3403 ctxt->nbChars += count;
3404 ctxt->input->col += count;
3405 if (ret == NULL) {
3406 xmlErrMemory(ctxt, NULL);
3407 }
3408 return(ret);
3409 }
3410 }
3411 return(xmlParseNCNameComplex(ctxt));
3412}
3413
Daniel Veillard46de64e2002-05-29 08:21:33 +00003414/**
3415 * xmlParseNameAndCompare:
3416 * @ctxt: an XML parser context
3417 *
3418 * parse an XML name and compares for match
3419 * (specialized for endtag parsing)
3420 *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003421 * Returns NULL for an illegal name, (xmlChar*) 1 for success
3422 * and the name for mismatch
3423 */
3424
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003425static const xmlChar *
Daniel Veillard46de64e2002-05-29 08:21:33 +00003426xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00003427 register const xmlChar *cmp = other;
3428 register const xmlChar *in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003429 const xmlChar *ret;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003430
3431 GROW;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003432
Daniel Veillard46de64e2002-05-29 08:21:33 +00003433 in = ctxt->input->cur;
3434 while (*in != 0 && *in == *cmp) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003435 ++in;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003436 ++cmp;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00003437 ctxt->input->col++;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003438 }
William M. Brack76e95df2003-10-18 16:20:14 +00003439 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard34e3f642008-07-29 09:02:27 +00003440 /* success */
Daniel Veillard46de64e2002-05-29 08:21:33 +00003441 ctxt->input->cur = in;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003442 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003443 }
3444 /* failure (or end of input buffer), check with full function */
3445 ret = xmlParseName (ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00003446 /* strings coming from the dictionnary direct compare possible */
3447 if (ret == other) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00003448 return (const xmlChar*) 1;
Daniel Veillard46de64e2002-05-29 08:21:33 +00003449 }
3450 return ret;
3451}
3452
Owen Taylor3473f882001-02-23 17:55:21 +00003453/**
3454 * xmlParseStringName:
3455 * @ctxt: an XML parser context
3456 * @str: a pointer to the string pointer (IN/OUT)
3457 *
3458 * parse an XML name.
3459 *
3460 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3461 * CombiningChar | Extender
3462 *
3463 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3464 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003465 * [6] Names ::= Name (#x20 Name)*
Owen Taylor3473f882001-02-23 17:55:21 +00003466 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003467 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00003468 * is updated to the current location in the string.
3469 */
3470
Daniel Veillard56a4cb82001-03-24 17:00:36 +00003471static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00003472xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3473 xmlChar buf[XML_MAX_NAMELEN + 5];
3474 const xmlChar *cur = *str;
3475 int len = 0, l;
3476 int c;
3477
Daniel Veillardc6561462009-03-25 10:22:31 +00003478#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003479 nbParseStringName++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003480#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003481
Owen Taylor3473f882001-02-23 17:55:21 +00003482 c = CUR_SCHAR(cur, l);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003483 if (!xmlIsNameStartChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003484 return(NULL);
3485 }
3486
Daniel Veillard34e3f642008-07-29 09:02:27 +00003487 COPY_BUF(l,buf,len,c);
3488 cur += l;
3489 c = CUR_SCHAR(cur, l);
3490 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003491 COPY_BUF(l,buf,len,c);
3492 cur += l;
3493 c = CUR_SCHAR(cur, l);
3494 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3495 /*
3496 * Okay someone managed to make a huge name, so he's ready to pay
3497 * for the processing speed.
3498 */
3499 xmlChar *buffer;
3500 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003501
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003502 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003503 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003504 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003505 return(NULL);
3506 }
3507 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003508 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003509 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003510 xmlChar *tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003511 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003512 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003513 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003514 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003515 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003516 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003517 return(NULL);
3518 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003519 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003520 }
3521 COPY_BUF(l,buffer,len,c);
3522 cur += l;
3523 c = CUR_SCHAR(cur, l);
3524 }
3525 buffer[len] = 0;
3526 *str = cur;
3527 return(buffer);
3528 }
3529 }
3530 *str = cur;
3531 return(xmlStrndup(buf, len));
3532}
3533
3534/**
3535 * xmlParseNmtoken:
3536 * @ctxt: an XML parser context
Daniel Veillard34e3f642008-07-29 09:02:27 +00003537 *
Owen Taylor3473f882001-02-23 17:55:21 +00003538 * parse an XML Nmtoken.
3539 *
3540 * [7] Nmtoken ::= (NameChar)+
3541 *
Daniel Veillard807b4de2004-09-26 14:42:56 +00003542 * [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
Owen Taylor3473f882001-02-23 17:55:21 +00003543 *
3544 * Returns the Nmtoken parsed or NULL
3545 */
3546
3547xmlChar *
3548xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3549 xmlChar buf[XML_MAX_NAMELEN + 5];
3550 int len = 0, l;
3551 int c;
3552 int count = 0;
3553
Daniel Veillardc6561462009-03-25 10:22:31 +00003554#ifdef DEBUG
Daniel Veillard34e3f642008-07-29 09:02:27 +00003555 nbParseNmToken++;
Daniel Veillardc6561462009-03-25 10:22:31 +00003556#endif
Daniel Veillard34e3f642008-07-29 09:02:27 +00003557
Owen Taylor3473f882001-02-23 17:55:21 +00003558 GROW;
3559 c = CUR_CHAR(l);
3560
Daniel Veillard34e3f642008-07-29 09:02:27 +00003561 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003562 if (count++ > 100) {
3563 count = 0;
3564 GROW;
3565 }
3566 COPY_BUF(l,buf,len,c);
3567 NEXTL(l);
3568 c = CUR_CHAR(l);
3569 if (len >= XML_MAX_NAMELEN) {
3570 /*
3571 * Okay someone managed to make a huge token, so he's ready to pay
3572 * for the processing speed.
3573 */
3574 xmlChar *buffer;
3575 int max = len * 2;
Daniel Veillard34e3f642008-07-29 09:02:27 +00003576
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003577 buffer = (xmlChar *) xmlMallocAtomic(max * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003578 if (buffer == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003579 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003580 return(NULL);
3581 }
3582 memcpy(buffer, buf, len);
Daniel Veillard34e3f642008-07-29 09:02:27 +00003583 while (xmlIsNameChar(ctxt, c)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003584 if (count++ > 100) {
3585 count = 0;
3586 GROW;
3587 }
3588 if (len + 10 > max) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003589 xmlChar *tmp;
3590
Owen Taylor3473f882001-02-23 17:55:21 +00003591 max *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003592 tmp = (xmlChar *) xmlRealloc(buffer,
Owen Taylor3473f882001-02-23 17:55:21 +00003593 max * sizeof(xmlChar));
Daniel Veillard2248ff12004-09-22 23:05:14 +00003594 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003595 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003596 xmlFree(buffer);
Owen Taylor3473f882001-02-23 17:55:21 +00003597 return(NULL);
3598 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003599 buffer = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003600 }
3601 COPY_BUF(l,buffer,len,c);
3602 NEXTL(l);
3603 c = CUR_CHAR(l);
3604 }
3605 buffer[len] = 0;
3606 return(buffer);
3607 }
3608 }
3609 if (len == 0)
3610 return(NULL);
3611 return(xmlStrndup(buf, len));
3612}
3613
3614/**
3615 * xmlParseEntityValue:
3616 * @ctxt: an XML parser context
3617 * @orig: if non-NULL store a copy of the original entity value
3618 *
3619 * parse a value for ENTITY declarations
3620 *
3621 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3622 * "'" ([^%&'] | PEReference | Reference)* "'"
3623 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003624 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00003625 */
3626
3627xmlChar *
3628xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3629 xmlChar *buf = NULL;
3630 int len = 0;
3631 int size = XML_PARSER_BUFFER_SIZE;
3632 int c, l;
3633 xmlChar stop;
3634 xmlChar *ret = NULL;
3635 const xmlChar *cur = NULL;
3636 xmlParserInputPtr input;
3637
3638 if (RAW == '"') stop = '"';
3639 else if (RAW == '\'') stop = '\'';
3640 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003641 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003642 return(NULL);
3643 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00003644 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00003645 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003646 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003647 return(NULL);
3648 }
3649
3650 /*
3651 * The content of the entity definition is copied in a buffer.
3652 */
3653
3654 ctxt->instate = XML_PARSER_ENTITY_VALUE;
3655 input = ctxt->input;
3656 GROW;
3657 NEXT;
3658 c = CUR_CHAR(l);
3659 /*
3660 * NOTE: 4.4.5 Included in Literal
3661 * When a parameter entity reference appears in a literal entity
3662 * value, ... a single or double quote character in the replacement
3663 * text is always treated as a normal data character and will not
3664 * terminate the literal.
3665 * In practice it means we stop the loop only when back at parsing
3666 * the initial entity and the quote is found
3667 */
William M. Brack871611b2003-10-18 04:53:14 +00003668 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00003669 (ctxt->input != input))) {
3670 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00003671 xmlChar *tmp;
3672
Owen Taylor3473f882001-02-23 17:55:21 +00003673 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00003674 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3675 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003676 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00003677 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00003678 return(NULL);
3679 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00003680 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00003681 }
3682 COPY_BUF(l,buf,len,c);
3683 NEXTL(l);
3684 /*
3685 * Pop-up of finished entities.
3686 */
3687 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
3688 xmlPopInput(ctxt);
3689
3690 GROW;
3691 c = CUR_CHAR(l);
3692 if (c == 0) {
3693 GROW;
3694 c = CUR_CHAR(l);
3695 }
3696 }
3697 buf[len] = 0;
3698
3699 /*
3700 * Raise problem w.r.t. '&' and '%' being used in non-entities
3701 * reference constructs. Note Charref will be handled in
3702 * xmlStringDecodeEntities()
3703 */
3704 cur = buf;
3705 while (*cur != 0) { /* non input consuming */
3706 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3707 xmlChar *name;
3708 xmlChar tmp = *cur;
3709
3710 cur++;
3711 name = xmlParseStringName(ctxt, &cur);
3712 if ((name == NULL) || (*cur != ';')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003713 xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00003714 "EntityValue: '%c' forbidden except for entities references\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00003715 tmp);
Owen Taylor3473f882001-02-23 17:55:21 +00003716 }
Daniel Veillard5151c062001-10-23 13:10:19 +00003717 if ((tmp == '%') && (ctxt->inSubset == 1) &&
3718 (ctxt->inputNr == 1)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003719 xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003720 }
3721 if (name != NULL)
3722 xmlFree(name);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00003723 if (*cur == 0)
3724 break;
Owen Taylor3473f882001-02-23 17:55:21 +00003725 }
3726 cur++;
3727 }
3728
3729 /*
3730 * Then PEReference entities are substituted.
3731 */
3732 if (c != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003733 xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003734 xmlFree(buf);
3735 } else {
3736 NEXT;
3737 /*
3738 * NOTE: 4.4.7 Bypassed
3739 * When a general entity reference appears in the EntityValue in
3740 * an entity declaration, it is bypassed and left as is.
3741 * so XML_SUBSTITUTE_REF is not set here.
3742 */
3743 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
3744 0, 0, 0);
3745 if (orig != NULL)
3746 *orig = buf;
3747 else
3748 xmlFree(buf);
3749 }
3750
3751 return(ret);
3752}
3753
3754/**
Daniel Veillard01c13b52002-12-10 15:19:08 +00003755 * xmlParseAttValueComplex:
3756 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00003757 * @len: the resulting attribute len
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003758 * @normalize: wether to apply the inner normalization
Daniel Veillard01c13b52002-12-10 15:19:08 +00003759 *
3760 * parse a value for an attribute, this is the fallback function
3761 * of xmlParseAttValue() when the attribute parsing requires handling
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003762 * of non-ASCII characters, or normalization compaction.
Daniel Veillard01c13b52002-12-10 15:19:08 +00003763 *
3764 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
3765 */
Daniel Veillard0fb18932003-09-07 09:14:37 +00003766static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003767xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
Daniel Veillarde72c7562002-05-31 09:47:30 +00003768 xmlChar limit = 0;
3769 xmlChar *buf = NULL;
Daniel Veillard68b6e022008-03-31 09:26:00 +00003770 xmlChar *rep = NULL;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003771 size_t len = 0;
3772 size_t buf_size = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003773 int c, l, in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003774 xmlChar *current = NULL;
3775 xmlEntityPtr ent;
3776
Owen Taylor3473f882001-02-23 17:55:21 +00003777 if (NXT(0) == '"') {
3778 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3779 limit = '"';
3780 NEXT;
3781 } else if (NXT(0) == '\'') {
3782 limit = '\'';
3783 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
3784 NEXT;
3785 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003786 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003787 return(NULL);
3788 }
Daniel Veillard68b6e022008-03-31 09:26:00 +00003789
Owen Taylor3473f882001-02-23 17:55:21 +00003790 /*
3791 * allocate a translation buffer.
3792 */
3793 buf_size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003794 buf = (xmlChar *) xmlMallocAtomic(buf_size);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003795 if (buf == NULL) goto mem_error;
Owen Taylor3473f882001-02-23 17:55:21 +00003796
3797 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00003798 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00003799 */
3800 c = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00003801 while ((NXT(0) != limit) && /* checked */
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003802 (IS_CHAR(c)) && (c != '<')) {
Owen Taylor3473f882001-02-23 17:55:21 +00003803 if (c == 0) break;
Daniel Veillardfdc91562002-07-01 21:52:03 +00003804 if (c == '&') {
Daniel Veillard62998c02003-09-15 12:56:36 +00003805 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003806 if (NXT(1) == '#') {
3807 int val = xmlParseCharRef(ctxt);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003808
Owen Taylor3473f882001-02-23 17:55:21 +00003809 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00003810 if (ctxt->replaceEntities) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003811 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003812 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003813 }
3814 buf[len++] = '&';
3815 } else {
3816 /*
3817 * The reparsing will be done in xmlStringGetNodeList()
3818 * called by the attribute() function in SAX.c
3819 */
Daniel Veillard459eeb92012-07-17 16:19:17 +08003820 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003821 growBuffer(buf, 10);
Daniel Veillard319a7422001-09-11 09:27:09 +00003822 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003823 buf[len++] = '&';
3824 buf[len++] = '#';
3825 buf[len++] = '3';
3826 buf[len++] = '8';
3827 buf[len++] = ';';
Owen Taylor3473f882001-02-23 17:55:21 +00003828 }
Daniel Veillarddc171602008-03-26 17:41:38 +00003829 } else if (val != 0) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003830 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003831 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003832 }
Owen Taylor3473f882001-02-23 17:55:21 +00003833 len += xmlCopyChar(0, &buf[len], val);
3834 }
3835 } else {
3836 ent = xmlParseEntityRef(ctxt);
Daniel Veillardcba68392008-08-29 12:43:40 +00003837 ctxt->nbentities++;
3838 if (ent != NULL)
3839 ctxt->nbentities += ent->owner;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003840 if ((ent != NULL) &&
3841 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003842 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003843 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003844 }
3845 if ((ctxt->replaceEntities == 0) &&
3846 (ent->content[0] == '&')) {
3847 buf[len++] = '&';
3848 buf[len++] = '#';
3849 buf[len++] = '3';
3850 buf[len++] = '8';
3851 buf[len++] = ';';
3852 } else {
3853 buf[len++] = ent->content[0];
3854 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003855 } else if ((ent != NULL) &&
3856 (ctxt->replaceEntities != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003857 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
3858 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003859 XML_SUBSTITUTE_REF,
3860 0, 0, 0);
Owen Taylor3473f882001-02-23 17:55:21 +00003861 if (rep != NULL) {
3862 current = rep;
3863 while (*current != 0) { /* non input consuming */
Daniel Veillard283d5022009-08-25 17:18:39 +02003864 if ((*current == 0xD) || (*current == 0xA) ||
3865 (*current == 0x9)) {
3866 buf[len++] = 0x20;
3867 current++;
3868 } else
3869 buf[len++] = *current++;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003870 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003871 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003872 }
3873 }
3874 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003875 rep = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00003876 }
3877 } else {
Daniel Veillard459eeb92012-07-17 16:19:17 +08003878 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003879 growBuffer(buf, 10);
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00003880 }
Owen Taylor3473f882001-02-23 17:55:21 +00003881 if (ent->content != NULL)
3882 buf[len++] = ent->content[0];
3883 }
3884 } else if (ent != NULL) {
3885 int i = xmlStrlen(ent->name);
3886 const xmlChar *cur = ent->name;
3887
3888 /*
3889 * This may look absurd but is needed to detect
3890 * entities problems
3891 */
3892 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
3893 (ent->content != NULL)) {
Owen Taylor3473f882001-02-23 17:55:21 +00003894 rep = xmlStringDecodeEntities(ctxt, ent->content,
Daniel Veillard68b6e022008-03-31 09:26:00 +00003895 XML_SUBSTITUTE_REF, 0, 0, 0);
3896 if (rep != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00003897 xmlFree(rep);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003898 rep = NULL;
3899 }
Owen Taylor3473f882001-02-23 17:55:21 +00003900 }
3901
3902 /*
3903 * Just output the reference
3904 */
3905 buf[len++] = '&';
Daniel Veillard459eeb92012-07-17 16:19:17 +08003906 while (len + i + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003907 growBuffer(buf, i + 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003908 }
3909 for (;i > 0;i--)
3910 buf[len++] = *cur++;
3911 buf[len++] = ';';
3912 }
3913 }
3914 } else {
3915 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003916 if ((len != 0) || (!normalize)) {
3917 if ((!normalize) || (!in_space)) {
3918 COPY_BUF(l,buf,len,0x20);
Daniel Veillard459eeb92012-07-17 16:19:17 +08003919 while (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003920 growBuffer(buf, 10);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003921 }
3922 }
3923 in_space = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00003924 }
3925 } else {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003926 in_space = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003927 COPY_BUF(l,buf,len,c);
Daniel Veillard459eeb92012-07-17 16:19:17 +08003928 if (len + 10 > buf_size) {
Daniel Veillard0161e632008-08-28 15:36:32 +00003929 growBuffer(buf, 10);
Owen Taylor3473f882001-02-23 17:55:21 +00003930 }
3931 }
3932 NEXTL(l);
3933 }
3934 GROW;
3935 c = CUR_CHAR(l);
3936 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003937 if ((in_space) && (normalize)) {
3938 while (buf[len - 1] == 0x20) len--;
3939 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00003940 buf[len] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00003941 if (RAW == '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003942 xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003943 } else if (RAW != limit) {
Daniel Veillardb9e5acc2007-06-12 13:43:00 +00003944 if ((c != 0) && (!IS_CHAR(c))) {
3945 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
3946 "invalid character in attribute value\n");
3947 } else {
3948 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3949 "AttValue: ' expected\n");
3950 }
Owen Taylor3473f882001-02-23 17:55:21 +00003951 } else
3952 NEXT;
Daniel Veillard459eeb92012-07-17 16:19:17 +08003953
3954 /*
3955 * There we potentially risk an overflow, don't allow attribute value of
3956 * lenght more than INT_MAX it is a very reasonnable assumption !
3957 */
3958 if (len >= INT_MAX) {
3959 xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
3960 "AttValue lenght too long\n");
3961 goto mem_error;
3962 }
3963
3964 if (attlen != NULL) *attlen = (int) len;
Owen Taylor3473f882001-02-23 17:55:21 +00003965 return(buf);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003966
3967mem_error:
Daniel Veillard1afc9f32003-09-13 12:44:05 +00003968 xmlErrMemory(ctxt, NULL);
Daniel Veillard68b6e022008-03-31 09:26:00 +00003969 if (buf != NULL)
3970 xmlFree(buf);
3971 if (rep != NULL)
3972 xmlFree(rep);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00003973 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00003974}
3975
3976/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00003977 * xmlParseAttValue:
3978 * @ctxt: an XML parser context
3979 *
3980 * parse a value for an attribute
3981 * Note: the parser won't do substitution of entities here, this
3982 * will be handled later in xmlStringGetNodeList
3983 *
3984 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
3985 * "'" ([^<&'] | Reference)* "'"
3986 *
3987 * 3.3.3 Attribute-Value Normalization:
3988 * Before the value of an attribute is passed to the application or
3989 * checked for validity, the XML processor must normalize it as follows:
3990 * - a character reference is processed by appending the referenced
3991 * character to the attribute value
3992 * - an entity reference is processed by recursively processing the
3993 * replacement text of the entity
3994 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
3995 * appending #x20 to the normalized value, except that only a single
3996 * #x20 is appended for a "#xD#xA" sequence that is part of an external
3997 * parsed entity or the literal entity value of an internal parsed entity
3998 * - other characters are processed by appending them to the normalized value
3999 * If the declared value is not CDATA, then the XML processor must further
4000 * process the normalized attribute value by discarding any leading and
4001 * trailing space (#x20) characters, and by replacing sequences of space
4002 * (#x20) characters by a single space (#x20) character.
4003 * All attributes for which no declaration has been read should be treated
4004 * by a non-validating parser as if declared CDATA.
4005 *
4006 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4007 */
4008
4009
4010xmlChar *
4011xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard2a4fb5a2004-11-08 14:02:18 +00004012 if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00004013 return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
Daniel Veillard0fb18932003-09-07 09:14:37 +00004014}
4015
4016/**
Owen Taylor3473f882001-02-23 17:55:21 +00004017 * xmlParseSystemLiteral:
4018 * @ctxt: an XML parser context
4019 *
4020 * parse an XML Literal
4021 *
4022 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4023 *
4024 * Returns the SystemLiteral parsed or NULL
4025 */
4026
4027xmlChar *
4028xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4029 xmlChar *buf = NULL;
4030 int len = 0;
4031 int size = XML_PARSER_BUFFER_SIZE;
4032 int cur, l;
4033 xmlChar stop;
4034 int state = ctxt->instate;
4035 int count = 0;
4036
4037 SHRINK;
4038 if (RAW == '"') {
4039 NEXT;
4040 stop = '"';
4041 } else if (RAW == '\'') {
4042 NEXT;
4043 stop = '\'';
4044 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004045 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004046 return(NULL);
4047 }
4048
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004049 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004050 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004051 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004052 return(NULL);
4053 }
4054 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4055 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004056 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004057 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004058 xmlChar *tmp;
4059
Owen Taylor3473f882001-02-23 17:55:21 +00004060 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004061 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4062 if (tmp == NULL) {
4063 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004064 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004065 ctxt->instate = (xmlParserInputState) state;
4066 return(NULL);
4067 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004068 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004069 }
4070 count++;
4071 if (count > 50) {
4072 GROW;
4073 count = 0;
4074 }
4075 COPY_BUF(l,buf,len,cur);
4076 NEXTL(l);
4077 cur = CUR_CHAR(l);
4078 if (cur == 0) {
4079 GROW;
4080 SHRINK;
4081 cur = CUR_CHAR(l);
4082 }
4083 }
4084 buf[len] = 0;
4085 ctxt->instate = (xmlParserInputState) state;
William M. Brack871611b2003-10-18 04:53:14 +00004086 if (!IS_CHAR(cur)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004087 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004088 } else {
4089 NEXT;
4090 }
4091 return(buf);
4092}
4093
4094/**
4095 * xmlParsePubidLiteral:
4096 * @ctxt: an XML parser context
4097 *
4098 * parse an XML public literal
4099 *
4100 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4101 *
4102 * Returns the PubidLiteral parsed or NULL.
4103 */
4104
4105xmlChar *
4106xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4107 xmlChar *buf = NULL;
4108 int len = 0;
4109 int size = XML_PARSER_BUFFER_SIZE;
4110 xmlChar cur;
4111 xmlChar stop;
4112 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004113 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00004114
4115 SHRINK;
4116 if (RAW == '"') {
4117 NEXT;
4118 stop = '"';
4119 } else if (RAW == '\'') {
4120 NEXT;
4121 stop = '\'';
4122 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004123 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004124 return(NULL);
4125 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004126 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004127 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004128 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004129 return(NULL);
4130 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004131 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00004132 cur = CUR;
William M. Brack76e95df2003-10-18 16:20:14 +00004133 while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004134 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004135 xmlChar *tmp;
4136
Owen Taylor3473f882001-02-23 17:55:21 +00004137 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00004138 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4139 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004140 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00004141 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004142 return(NULL);
4143 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00004144 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00004145 }
4146 buf[len++] = cur;
4147 count++;
4148 if (count > 50) {
4149 GROW;
4150 count = 0;
4151 }
4152 NEXT;
4153 cur = CUR;
4154 if (cur == 0) {
4155 GROW;
4156 SHRINK;
4157 cur = CUR;
4158 }
4159 }
4160 buf[len] = 0;
4161 if (cur != stop) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004162 xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004163 } else {
4164 NEXT;
4165 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00004166 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00004167 return(buf);
4168}
4169
Daniel Veillard8ed10722009-08-20 19:17:36 +02004170static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004171
4172/*
4173 * used for the test in the inner loop of the char data testing
4174 */
4175static const unsigned char test_char_data[256] = {
4176 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4177 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4178 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4179 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4180 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4181 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4182 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4183 0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4184 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4185 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4186 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4187 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4188 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4189 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4190 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4191 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4192 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4193 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4194 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4195 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4196 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4197 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4198 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4199 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4200 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4201 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4202 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4203 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4204 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4205 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4206 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4207 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4208};
4209
Owen Taylor3473f882001-02-23 17:55:21 +00004210/**
4211 * xmlParseCharData:
4212 * @ctxt: an XML parser context
4213 * @cdata: int indicating whether we are within a CDATA section
4214 *
4215 * parse a CharData section.
4216 * if we are within a CDATA section ']]>' marks an end of section.
4217 *
4218 * The right angle bracket (>) may be represented using the string "&gt;",
4219 * and must, for compatibility, be escaped using "&gt;" or a character
4220 * reference when it appears in the string "]]>" in content, when that
4221 * string is not marking the end of a CDATA section.
4222 *
4223 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4224 */
4225
4226void
4227xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00004228 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004229 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00004230 int line = ctxt->input->line;
4231 int col = ctxt->input->col;
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004232 int ccol;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004233
4234 SHRINK;
4235 GROW;
4236 /*
4237 * Accelerated common case where input don't need to be
4238 * modified before passing it to the handler.
4239 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00004240 if (!cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00004241 in = ctxt->input->cur;
4242 do {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004243get_more_space:
Daniel Veillard9e264ad2008-01-11 06:10:16 +00004244 while (*in == 0x20) { in++; ctxt->input->col++; }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004245 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004246 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004247 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004248 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004249 } while (*in == 0xA);
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004250 goto get_more_space;
4251 }
4252 if (*in == '<') {
4253 nbchar = in - ctxt->input->cur;
4254 if (nbchar > 0) {
4255 const xmlChar *tmp = ctxt->input->cur;
4256 ctxt->input->cur = in;
4257
Daniel Veillard34099b42004-11-04 17:34:35 +00004258 if ((ctxt->sax != NULL) &&
4259 (ctxt->sax->ignorableWhitespace !=
4260 ctxt->sax->characters)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004261 if (areBlanks(ctxt, tmp, nbchar, 1)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004262 if (ctxt->sax->ignorableWhitespace != NULL)
4263 ctxt->sax->ignorableWhitespace(ctxt->userData,
4264 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004265 } else {
4266 if (ctxt->sax->characters != NULL)
4267 ctxt->sax->characters(ctxt->userData,
4268 tmp, nbchar);
4269 if (*ctxt->space == -1)
4270 *ctxt->space = -2;
4271 }
Daniel Veillard34099b42004-11-04 17:34:35 +00004272 } else if ((ctxt->sax != NULL) &&
4273 (ctxt->sax->characters != NULL)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004274 ctxt->sax->characters(ctxt->userData,
4275 tmp, nbchar);
4276 }
4277 }
4278 return;
4279 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004280
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004281get_more:
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004282 ccol = ctxt->input->col;
Daniel Veillard0a119eb2005-07-20 13:46:00 +00004283 while (test_char_data[*in]) {
4284 in++;
4285 ccol++;
4286 }
Daniel Veillard0714c5b2005-01-23 00:01:01 +00004287 ctxt->input->col = ccol;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004288 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004289 do {
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004290 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004291 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004292 } while (*in == 0xA);
Daniel Veillard3ed155f2001-04-29 19:56:59 +00004293 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004294 }
4295 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004296 if ((in[1] == ']') && (in[2] == '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004297 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004298 ctxt->input->cur = in;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004299 return;
4300 }
4301 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004302 ctxt->input->col++;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004303 goto get_more;
4304 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00004305 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00004306 if (nbchar > 0) {
Daniel Veillard34099b42004-11-04 17:34:35 +00004307 if ((ctxt->sax != NULL) &&
4308 (ctxt->sax->ignorableWhitespace !=
Daniel Veillard40412cd2003-09-03 13:28:32 +00004309 ctxt->sax->characters) &&
William M. Brack76e95df2003-10-18 16:20:14 +00004310 (IS_BLANK_CH(*ctxt->input->cur))) {
Daniel Veillarda7374592001-05-10 14:17:55 +00004311 const xmlChar *tmp = ctxt->input->cur;
4312 ctxt->input->cur = in;
Daniel Veillard40412cd2003-09-03 13:28:32 +00004313
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004314 if (areBlanks(ctxt, tmp, nbchar, 0)) {
Daniel Veillard32acf0c2005-03-31 14:12:37 +00004315 if (ctxt->sax->ignorableWhitespace != NULL)
4316 ctxt->sax->ignorableWhitespace(ctxt->userData,
4317 tmp, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004318 } else {
4319 if (ctxt->sax->characters != NULL)
4320 ctxt->sax->characters(ctxt->userData,
4321 tmp, nbchar);
4322 if (*ctxt->space == -1)
4323 *ctxt->space = -2;
4324 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004325 line = ctxt->input->line;
4326 col = ctxt->input->col;
Daniel Veillard34099b42004-11-04 17:34:35 +00004327 } else if (ctxt->sax != NULL) {
Daniel Veillard80f32572001-03-07 19:45:40 +00004328 if (ctxt->sax->characters != NULL)
4329 ctxt->sax->characters(ctxt->userData,
4330 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00004331 line = ctxt->input->line;
4332 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00004333 }
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004334 /* something really bad happened in the SAX callback */
4335 if (ctxt->instate != XML_PARSER_CONTENT)
4336 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004337 }
4338 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004339 if (*in == 0xD) {
4340 in++;
4341 if (*in == 0xA) {
4342 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004343 in++;
Aleksey Sanin8fdc32a2005-01-05 15:37:55 +00004344 ctxt->input->line++; ctxt->input->col = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00004345 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00004346 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00004347 in--;
4348 }
4349 if (*in == '<') {
4350 return;
4351 }
4352 if (*in == '&') {
4353 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004354 }
4355 SHRINK;
4356 GROW;
4357 in = ctxt->input->cur;
William M. Brackc07329e2003-09-08 01:57:30 +00004358 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
Daniel Veillard48b2f892001-02-25 16:11:03 +00004359 nbchar = 0;
4360 }
Daniel Veillard50582112001-03-26 22:52:16 +00004361 ctxt->input->line = line;
4362 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00004363 xmlParseCharDataComplex(ctxt, cdata);
4364}
4365
Daniel Veillard01c13b52002-12-10 15:19:08 +00004366/**
4367 * xmlParseCharDataComplex:
4368 * @ctxt: an XML parser context
4369 * @cdata: int indicating whether we are within a CDATA section
4370 *
4371 * parse a CharData section.this is the fallback function
4372 * of xmlParseCharData() when the parsing requires handling
4373 * of non-ASCII characters.
4374 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02004375static void
Daniel Veillard48b2f892001-02-25 16:11:03 +00004376xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00004377 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4378 int nbchar = 0;
4379 int cur, l;
4380 int count = 0;
4381
4382 SHRINK;
4383 GROW;
4384 cur = CUR_CHAR(l);
Daniel Veillardfdc91562002-07-01 21:52:03 +00004385 while ((cur != '<') && /* checked */
4386 (cur != '&') &&
William M. Brack871611b2003-10-18 04:53:14 +00004387 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
Owen Taylor3473f882001-02-23 17:55:21 +00004388 if ((cur == ']') && (NXT(1) == ']') &&
4389 (NXT(2) == '>')) {
4390 if (cdata) break;
4391 else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004392 xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004393 }
4394 }
4395 COPY_BUF(l,buf,nbchar,cur);
4396 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004397 buf[nbchar] = 0;
4398
Owen Taylor3473f882001-02-23 17:55:21 +00004399 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004400 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004401 */
4402 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004403 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004404 if (ctxt->sax->ignorableWhitespace != NULL)
4405 ctxt->sax->ignorableWhitespace(ctxt->userData,
4406 buf, nbchar);
4407 } else {
4408 if (ctxt->sax->characters != NULL)
4409 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004410 if ((ctxt->sax->characters !=
4411 ctxt->sax->ignorableWhitespace) &&
4412 (*ctxt->space == -1))
4413 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004414 }
4415 }
4416 nbchar = 0;
Daniel Veillard1dc9feb2008-11-17 15:59:21 +00004417 /* something really bad happened in the SAX callback */
4418 if (ctxt->instate != XML_PARSER_CONTENT)
4419 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004420 }
4421 count++;
4422 if (count > 50) {
4423 GROW;
4424 count = 0;
4425 }
4426 NEXTL(l);
4427 cur = CUR_CHAR(l);
4428 }
4429 if (nbchar != 0) {
Daniel Veillard092643b2003-09-25 14:29:29 +00004430 buf[nbchar] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00004431 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004432 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00004433 */
4434 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00004435 if (areBlanks(ctxt, buf, nbchar, 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004436 if (ctxt->sax->ignorableWhitespace != NULL)
4437 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4438 } else {
4439 if (ctxt->sax->characters != NULL)
4440 ctxt->sax->characters(ctxt->userData, buf, nbchar);
Daniel Veillard1114d002006-10-12 16:24:35 +00004441 if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4442 (*ctxt->space == -1))
4443 *ctxt->space = -2;
Owen Taylor3473f882001-02-23 17:55:21 +00004444 }
4445 }
4446 }
Daniel Veillard3b1478b2006-04-24 08:50:10 +00004447 if ((cur != 0) && (!IS_CHAR(cur))) {
4448 /* Generate the error and skip the offending character */
4449 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4450 "PCDATA invalid Char value %d\n",
4451 cur);
4452 NEXTL(l);
4453 }
Owen Taylor3473f882001-02-23 17:55:21 +00004454}
4455
4456/**
4457 * xmlParseExternalID:
4458 * @ctxt: an XML parser context
4459 * @publicID: a xmlChar** receiving PubidLiteral
4460 * @strict: indicate whether we should restrict parsing to only
4461 * production [75], see NOTE below
4462 *
4463 * Parse an External ID or a Public ID
4464 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004465 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00004466 * 'PUBLIC' S PubidLiteral S SystemLiteral
4467 *
4468 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4469 * | 'PUBLIC' S PubidLiteral S SystemLiteral
4470 *
4471 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4472 *
4473 * Returns the function returns SystemLiteral and in the second
4474 * case publicID receives PubidLiteral, is strict is off
4475 * it is possible to return NULL and have publicID set.
4476 */
4477
4478xmlChar *
4479xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4480 xmlChar *URI = NULL;
4481
4482 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00004483
4484 *publicID = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00004485 if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004486 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004487 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004488 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4489 "Space required after 'SYSTEM'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004490 }
4491 SKIP_BLANKS;
4492 URI = xmlParseSystemLiteral(ctxt);
4493 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004494 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004495 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00004496 } else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
Owen Taylor3473f882001-02-23 17:55:21 +00004497 SKIP(6);
William M. Brack76e95df2003-10-18 16:20:14 +00004498 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004499 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004500 "Space required after 'PUBLIC'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004501 }
4502 SKIP_BLANKS;
4503 *publicID = xmlParsePubidLiteral(ctxt);
4504 if (*publicID == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004505 xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004506 }
4507 if (strict) {
4508 /*
4509 * We don't handle [83] so "S SystemLiteral" is required.
4510 */
William M. Brack76e95df2003-10-18 16:20:14 +00004511 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004512 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00004513 "Space required after the Public Identifier\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004514 }
4515 } else {
4516 /*
4517 * We handle [83] so we return immediately, if
4518 * "S SystemLiteral" is not detected. From a purely parsing
4519 * point of view that's a nice mess.
4520 */
4521 const xmlChar *ptr;
4522 GROW;
4523
4524 ptr = CUR_PTR;
William M. Brack76e95df2003-10-18 16:20:14 +00004525 if (!IS_BLANK_CH(*ptr)) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004526
William M. Brack76e95df2003-10-18 16:20:14 +00004527 while (IS_BLANK_CH(*ptr)) ptr++; /* TODO: dangerous, fix ! */
Owen Taylor3473f882001-02-23 17:55:21 +00004528 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
4529 }
4530 SKIP_BLANKS;
4531 URI = xmlParseSystemLiteral(ctxt);
4532 if (URI == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004533 xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004534 }
4535 }
4536 return(URI);
4537}
4538
4539/**
Daniel Veillard4c778d82005-01-23 17:37:44 +00004540 * xmlParseCommentComplex:
Owen Taylor3473f882001-02-23 17:55:21 +00004541 * @ctxt: an XML parser context
Daniel Veillard4c778d82005-01-23 17:37:44 +00004542 * @buf: the already parsed part of the buffer
4543 * @len: number of bytes filles in the buffer
4544 * @size: allocated size of the buffer
Owen Taylor3473f882001-02-23 17:55:21 +00004545 *
4546 * Skip an XML (SGML) comment <!-- .... -->
4547 * The spec says that "For compatibility, the string "--" (double-hyphen)
4548 * must not occur within comments. "
Daniel Veillard4c778d82005-01-23 17:37:44 +00004549 * This is the slow routine in case the accelerator for ascii didn't work
Owen Taylor3473f882001-02-23 17:55:21 +00004550 *
4551 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4552 */
Daniel Veillard4c778d82005-01-23 17:37:44 +00004553static void
4554xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf, int len, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00004555 int q, ql;
4556 int r, rl;
4557 int cur, l;
Owen Taylor3473f882001-02-23 17:55:21 +00004558 int count = 0;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004559 int inputid;
4560
4561 inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00004562
Owen Taylor3473f882001-02-23 17:55:21 +00004563 if (buf == NULL) {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004564 len = 0;
4565 size = XML_PARSER_BUFFER_SIZE;
4566 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4567 if (buf == NULL) {
4568 xmlErrMemory(ctxt, NULL);
4569 return;
4570 }
Owen Taylor3473f882001-02-23 17:55:21 +00004571 }
William M. Brackbf9a73d2007-02-09 00:07:07 +00004572 GROW; /* Assure there's enough input data */
Owen Taylor3473f882001-02-23 17:55:21 +00004573 q = CUR_CHAR(ql);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004574 if (q == 0)
4575 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004576 if (!IS_CHAR(q)) {
4577 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4578 "xmlParseComment: invalid xmlChar value %d\n",
4579 q);
4580 xmlFree (buf);
4581 return;
4582 }
Owen Taylor3473f882001-02-23 17:55:21 +00004583 NEXTL(ql);
4584 r = CUR_CHAR(rl);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004585 if (r == 0)
4586 goto not_terminated;
Daniel Veillardda629342007-08-01 07:49:06 +00004587 if (!IS_CHAR(r)) {
4588 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4589 "xmlParseComment: invalid xmlChar value %d\n",
4590 q);
4591 xmlFree (buf);
4592 return;
4593 }
Owen Taylor3473f882001-02-23 17:55:21 +00004594 NEXTL(rl);
4595 cur = CUR_CHAR(l);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004596 if (cur == 0)
4597 goto not_terminated;
William M. Brack871611b2003-10-18 04:53:14 +00004598 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004599 ((cur != '>') ||
4600 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004601 if ((r == '-') && (q == '-')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004602 xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004603 }
4604 if (len + 5 >= size) {
William M. Bracka3215c72004-07-31 16:24:01 +00004605 xmlChar *new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004606 size *= 2;
William M. Bracka3215c72004-07-31 16:24:01 +00004607 new_buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
4608 if (new_buf == NULL) {
4609 xmlFree (buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004610 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004611 return;
4612 }
William M. Bracka3215c72004-07-31 16:24:01 +00004613 buf = new_buf;
Owen Taylor3473f882001-02-23 17:55:21 +00004614 }
4615 COPY_BUF(ql,buf,len,q);
4616 q = r;
4617 ql = rl;
4618 r = cur;
4619 rl = l;
4620
4621 count++;
4622 if (count > 50) {
4623 GROW;
4624 count = 0;
4625 }
4626 NEXTL(l);
4627 cur = CUR_CHAR(l);
4628 if (cur == 0) {
4629 SHRINK;
4630 GROW;
4631 cur = CUR_CHAR(l);
4632 }
4633 }
4634 buf[len] = 0;
Daniel Veillardda629342007-08-01 07:49:06 +00004635 if (cur == 0) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004636 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00004637 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004638 } else if (!IS_CHAR(cur)) {
4639 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4640 "xmlParseComment: invalid xmlChar value %d\n",
4641 cur);
Owen Taylor3473f882001-02-23 17:55:21 +00004642 } else {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004643 if (inputid != ctxt->input->id) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004644 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4645 "Comment doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004646 }
4647 NEXT;
4648 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4649 (!ctxt->disableSAX))
4650 ctxt->sax->comment(ctxt->userData, buf);
Owen Taylor3473f882001-02-23 17:55:21 +00004651 }
Daniel Veillardda629342007-08-01 07:49:06 +00004652 xmlFree(buf);
Daniel Veillard4aede2e2003-10-17 12:43:59 +00004653 return;
4654not_terminated:
4655 xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4656 "Comment not terminated\n", NULL);
4657 xmlFree(buf);
Daniel Veillardda629342007-08-01 07:49:06 +00004658 return;
Owen Taylor3473f882001-02-23 17:55:21 +00004659}
Daniel Veillardda629342007-08-01 07:49:06 +00004660
Daniel Veillard4c778d82005-01-23 17:37:44 +00004661/**
4662 * xmlParseComment:
4663 * @ctxt: an XML parser context
4664 *
4665 * Skip an XML (SGML) comment <!-- .... -->
4666 * The spec says that "For compatibility, the string "--" (double-hyphen)
4667 * must not occur within comments. "
4668 *
4669 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4670 */
4671void
4672xmlParseComment(xmlParserCtxtPtr ctxt) {
4673 xmlChar *buf = NULL;
4674 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard316a5c32005-01-23 22:56:39 +00004675 int len = 0;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004676 xmlParserInputState state;
4677 const xmlChar *in;
4678 int nbchar = 0, ccol;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004679 int inputid;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004680
4681 /*
4682 * Check that there is a comment right here.
4683 */
4684 if ((RAW != '<') || (NXT(1) != '!') ||
4685 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004686 state = ctxt->instate;
4687 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillard051d52c2008-07-29 16:44:59 +00004688 inputid = ctxt->input->id;
Daniel Veillard4c778d82005-01-23 17:37:44 +00004689 SKIP(4);
4690 SHRINK;
4691 GROW;
4692
4693 /*
4694 * Accelerated common case where input don't need to be
4695 * modified before passing it to the handler.
4696 */
4697 in = ctxt->input->cur;
4698 do {
4699 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004700 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004701 ctxt->input->line++; ctxt->input->col = 1;
4702 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004703 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004704 }
4705get_more:
4706 ccol = ctxt->input->col;
4707 while (((*in > '-') && (*in <= 0x7F)) ||
4708 ((*in >= 0x20) && (*in < '-')) ||
4709 (*in == 0x09)) {
4710 in++;
4711 ccol++;
4712 }
4713 ctxt->input->col = ccol;
4714 if (*in == 0xA) {
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004715 do {
Daniel Veillard4c778d82005-01-23 17:37:44 +00004716 ctxt->input->line++; ctxt->input->col = 1;
4717 in++;
Daniel Veillardb20c63a2006-01-04 17:08:46 +00004718 } while (*in == 0xA);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004719 goto get_more;
4720 }
4721 nbchar = in - ctxt->input->cur;
4722 /*
4723 * save current set of data
4724 */
4725 if (nbchar > 0) {
4726 if ((ctxt->sax != NULL) &&
4727 (ctxt->sax->comment != NULL)) {
4728 if (buf == NULL) {
4729 if ((*in == '-') && (in[1] == '-'))
4730 size = nbchar + 1;
4731 else
4732 size = XML_PARSER_BUFFER_SIZE + nbchar;
4733 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
4734 if (buf == NULL) {
4735 xmlErrMemory(ctxt, NULL);
4736 ctxt->instate = state;
4737 return;
4738 }
4739 len = 0;
4740 } else if (len + nbchar + 1 >= size) {
4741 xmlChar *new_buf;
4742 size += len + nbchar + XML_PARSER_BUFFER_SIZE;
4743 new_buf = (xmlChar *) xmlRealloc(buf,
4744 size * sizeof(xmlChar));
4745 if (new_buf == NULL) {
4746 xmlFree (buf);
4747 xmlErrMemory(ctxt, NULL);
4748 ctxt->instate = state;
4749 return;
4750 }
4751 buf = new_buf;
4752 }
4753 memcpy(&buf[len], ctxt->input->cur, nbchar);
4754 len += nbchar;
4755 buf[len] = 0;
4756 }
4757 }
4758 ctxt->input->cur = in;
Daniel Veillard9b528c72006-02-05 03:06:15 +00004759 if (*in == 0xA) {
4760 in++;
4761 ctxt->input->line++; ctxt->input->col = 1;
4762 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004763 if (*in == 0xD) {
4764 in++;
4765 if (*in == 0xA) {
4766 ctxt->input->cur = in;
4767 in++;
4768 ctxt->input->line++; ctxt->input->col = 1;
4769 continue; /* while */
4770 }
4771 in--;
4772 }
4773 SHRINK;
4774 GROW;
4775 in = ctxt->input->cur;
4776 if (*in == '-') {
4777 if (in[1] == '-') {
4778 if (in[2] == '>') {
Daniel Veillard051d52c2008-07-29 16:44:59 +00004779 if (ctxt->input->id != inputid) {
4780 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4781 "comment doesn't start and stop in the same entity\n");
4782 }
Daniel Veillard4c778d82005-01-23 17:37:44 +00004783 SKIP(3);
4784 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4785 (!ctxt->disableSAX)) {
4786 if (buf != NULL)
4787 ctxt->sax->comment(ctxt->userData, buf);
4788 else
4789 ctxt->sax->comment(ctxt->userData, BAD_CAST "");
4790 }
4791 if (buf != NULL)
4792 xmlFree(buf);
4793 ctxt->instate = state;
4794 return;
4795 }
Bryan Henderson8658d272012-05-08 16:39:05 +08004796 if (buf != NULL) {
4797 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4798 "Double hyphen within comment: "
4799 "<!--%.50s\n",
Daniel Veillard4c778d82005-01-23 17:37:44 +00004800 buf);
Bryan Henderson8658d272012-05-08 16:39:05 +08004801 } else
4802 xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
4803 "Double hyphen within comment\n", NULL);
Daniel Veillard4c778d82005-01-23 17:37:44 +00004804 in++;
4805 ctxt->input->col++;
4806 }
4807 in++;
4808 ctxt->input->col++;
4809 goto get_more;
4810 }
4811 } while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09));
4812 xmlParseCommentComplex(ctxt, buf, len, size);
4813 ctxt->instate = state;
4814 return;
4815}
4816
Owen Taylor3473f882001-02-23 17:55:21 +00004817
4818/**
4819 * xmlParsePITarget:
4820 * @ctxt: an XML parser context
4821 *
4822 * parse the name of a PI
4823 *
4824 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
4825 *
4826 * Returns the PITarget name or NULL
4827 */
4828
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004829const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00004830xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004831 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00004832
4833 name = xmlParseName(ctxt);
4834 if ((name != NULL) &&
4835 ((name[0] == 'x') || (name[0] == 'X')) &&
4836 ((name[1] == 'm') || (name[1] == 'M')) &&
4837 ((name[2] == 'l') || (name[2] == 'L'))) {
4838 int i;
4839 if ((name[0] == 'x') && (name[1] == 'm') &&
4840 (name[2] == 'l') && (name[3] == 0)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004841 xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
Owen Taylor3473f882001-02-23 17:55:21 +00004842 "XML declaration allowed only at the start of the document\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004843 return(name);
4844 } else if (name[3] == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004845 xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004846 return(name);
4847 }
4848 for (i = 0;;i++) {
4849 if (xmlW3CPIs[i] == NULL) break;
4850 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
4851 return(name);
4852 }
Daniel Veillard24eb9782003-10-04 21:08:09 +00004853 xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
4854 "xmlParsePITarget: invalid name prefix 'xml'\n",
4855 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004856 }
Daniel Veillard37334572008-07-31 08:20:02 +00004857 if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
4858 xmlNsErr(ctxt, XML_NS_ERR_COLON,
4859 "colon are forbidden from PI names '%s'\n", name, NULL, NULL);
4860 }
Owen Taylor3473f882001-02-23 17:55:21 +00004861 return(name);
4862}
4863
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004864#ifdef LIBXML_CATALOG_ENABLED
4865/**
4866 * xmlParseCatalogPI:
4867 * @ctxt: an XML parser context
4868 * @catalog: the PI value string
4869 *
4870 * parse an XML Catalog Processing Instruction.
4871 *
4872 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
4873 *
4874 * Occurs only if allowed by the user and if happening in the Misc
4875 * part of the document before any doctype informations
4876 * This will add the given catalog to the parsing context in order
4877 * to be used if there is a resolution need further down in the document
4878 */
4879
4880static void
4881xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
4882 xmlChar *URL = NULL;
4883 const xmlChar *tmp, *base;
4884 xmlChar marker;
4885
4886 tmp = catalog;
William M. Brack76e95df2003-10-18 16:20:14 +00004887 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004888 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
4889 goto error;
4890 tmp += 7;
William M. Brack76e95df2003-10-18 16:20:14 +00004891 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004892 if (*tmp != '=') {
4893 return;
4894 }
4895 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004896 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004897 marker = *tmp;
4898 if ((marker != '\'') && (marker != '"'))
4899 goto error;
4900 tmp++;
4901 base = tmp;
4902 while ((*tmp != 0) && (*tmp != marker)) tmp++;
4903 if (*tmp == 0)
4904 goto error;
4905 URL = xmlStrndup(base, tmp - base);
4906 tmp++;
William M. Brack76e95df2003-10-18 16:20:14 +00004907 while (IS_BLANK_CH(*tmp)) tmp++;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004908 if (*tmp != 0)
4909 goto error;
4910
4911 if (URL != NULL) {
4912 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
4913 xmlFree(URL);
4914 }
4915 return;
4916
4917error:
Daniel Veillard24eb9782003-10-04 21:08:09 +00004918 xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
4919 "Catalog PI syntax error: %s\n",
4920 catalog, NULL);
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00004921 if (URL != NULL)
4922 xmlFree(URL);
4923}
4924#endif
4925
Owen Taylor3473f882001-02-23 17:55:21 +00004926/**
4927 * xmlParsePI:
4928 * @ctxt: an XML parser context
4929 *
4930 * parse an XML Processing Instruction.
4931 *
4932 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
4933 *
4934 * The processing is transfered to SAX once parsed.
4935 */
4936
4937void
4938xmlParsePI(xmlParserCtxtPtr ctxt) {
4939 xmlChar *buf = NULL;
4940 int len = 0;
4941 int size = XML_PARSER_BUFFER_SIZE;
4942 int cur, l;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00004943 const xmlChar *target;
Owen Taylor3473f882001-02-23 17:55:21 +00004944 xmlParserInputState state;
4945 int count = 0;
4946
4947 if ((RAW == '<') && (NXT(1) == '?')) {
4948 xmlParserInputPtr input = ctxt->input;
4949 state = ctxt->instate;
4950 ctxt->instate = XML_PARSER_PI;
4951 /*
4952 * this is a Processing Instruction.
4953 */
4954 SKIP(2);
4955 SHRINK;
4956
4957 /*
4958 * Parse the target name and check for special support like
4959 * namespace.
4960 */
4961 target = xmlParsePITarget(ctxt);
4962 if (target != NULL) {
4963 if ((RAW == '?') && (NXT(1) == '>')) {
4964 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00004965 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
4966 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00004967 }
4968 SKIP(2);
4969
4970 /*
4971 * SAX: PI detected.
4972 */
4973 if ((ctxt->sax) && (!ctxt->disableSAX) &&
4974 (ctxt->sax->processingInstruction != NULL))
4975 ctxt->sax->processingInstruction(ctxt->userData,
4976 target, NULL);
Chris Evans77404b82011-12-14 16:18:25 +08004977 if (ctxt->instate != XML_PARSER_EOF)
4978 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00004979 return;
4980 }
Daniel Veillard3c908dc2003-04-19 00:07:51 +00004981 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00004982 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00004983 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00004984 ctxt->instate = state;
4985 return;
4986 }
4987 cur = CUR;
4988 if (!IS_BLANK(cur)) {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00004989 xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
4990 "ParsePI: PI %s space expected\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00004991 }
4992 SKIP_BLANKS;
4993 cur = CUR_CHAR(l);
William M. Brack871611b2003-10-18 04:53:14 +00004994 while (IS_CHAR(cur) && /* checked */
Owen Taylor3473f882001-02-23 17:55:21 +00004995 ((cur != '?') || (NXT(1) != '>'))) {
4996 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00004997 xmlChar *tmp;
4998
Owen Taylor3473f882001-02-23 17:55:21 +00004999 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00005000 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
5001 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005002 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00005003 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005004 ctxt->instate = state;
5005 return;
5006 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00005007 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005008 }
5009 count++;
5010 if (count > 50) {
5011 GROW;
5012 count = 0;
5013 }
5014 COPY_BUF(l,buf,len,cur);
5015 NEXTL(l);
5016 cur = CUR_CHAR(l);
5017 if (cur == 0) {
5018 SHRINK;
5019 GROW;
5020 cur = CUR_CHAR(l);
5021 }
5022 }
5023 buf[len] = 0;
5024 if (cur != '?') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005025 xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5026 "ParsePI: PI %s never end ...\n", target);
Owen Taylor3473f882001-02-23 17:55:21 +00005027 } else {
5028 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005029 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5030 "PI declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005031 }
5032 SKIP(2);
5033
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005034#ifdef LIBXML_CATALOG_ENABLED
5035 if (((state == XML_PARSER_MISC) ||
5036 (state == XML_PARSER_START)) &&
5037 (xmlStrEqual(target, XML_CATALOG_PI))) {
5038 xmlCatalogAllow allow = xmlCatalogGetDefaults();
5039 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5040 (allow == XML_CATA_ALLOW_ALL))
5041 xmlParseCatalogPI(ctxt, buf);
5042 }
5043#endif
5044
5045
Owen Taylor3473f882001-02-23 17:55:21 +00005046 /*
5047 * SAX: PI detected.
5048 */
5049 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5050 (ctxt->sax->processingInstruction != NULL))
5051 ctxt->sax->processingInstruction(ctxt->userData,
5052 target, buf);
5053 }
5054 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00005055 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005056 xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005057 }
Chris Evans77404b82011-12-14 16:18:25 +08005058 if (ctxt->instate != XML_PARSER_EOF)
5059 ctxt->instate = state;
Owen Taylor3473f882001-02-23 17:55:21 +00005060 }
5061}
5062
5063/**
5064 * xmlParseNotationDecl:
5065 * @ctxt: an XML parser context
5066 *
5067 * parse a notation declaration
5068 *
5069 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5070 *
5071 * Hence there is actually 3 choices:
5072 * 'PUBLIC' S PubidLiteral
5073 * 'PUBLIC' S PubidLiteral S SystemLiteral
5074 * and 'SYSTEM' S SystemLiteral
5075 *
5076 * See the NOTE on xmlParseExternalID().
5077 */
5078
5079void
5080xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005081 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00005082 xmlChar *Pubid;
5083 xmlChar *Systemid;
5084
Daniel Veillarda07050d2003-10-19 14:46:32 +00005085 if (CMP10(CUR_PTR, '<', '!', 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005086 xmlParserInputPtr input = ctxt->input;
5087 SHRINK;
5088 SKIP(10);
William M. Brack76e95df2003-10-18 16:20:14 +00005089 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005090 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5091 "Space required after '<!NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005092 return;
5093 }
5094 SKIP_BLANKS;
5095
Daniel Veillard76d66f42001-05-16 21:05:17 +00005096 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005097 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005098 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005099 return;
5100 }
William M. Brack76e95df2003-10-18 16:20:14 +00005101 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005102 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005103 "Space required after the NOTATION name'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005104 return;
5105 }
Daniel Veillard37334572008-07-31 08:20:02 +00005106 if (xmlStrchr(name, ':') != NULL) {
5107 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5108 "colon are forbidden from notation names '%s'\n",
5109 name, NULL, NULL);
5110 }
Owen Taylor3473f882001-02-23 17:55:21 +00005111 SKIP_BLANKS;
5112
5113 /*
5114 * Parse the IDs.
5115 */
5116 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5117 SKIP_BLANKS;
5118
5119 if (RAW == '>') {
5120 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005121 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5122 "Notation declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005123 }
5124 NEXT;
5125 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5126 (ctxt->sax->notationDecl != NULL))
5127 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5128 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005129 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005130 }
Owen Taylor3473f882001-02-23 17:55:21 +00005131 if (Systemid != NULL) xmlFree(Systemid);
5132 if (Pubid != NULL) xmlFree(Pubid);
5133 }
5134}
5135
5136/**
5137 * xmlParseEntityDecl:
5138 * @ctxt: an XML parser context
5139 *
5140 * parse <!ENTITY declarations
5141 *
5142 * [70] EntityDecl ::= GEDecl | PEDecl
5143 *
5144 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5145 *
5146 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5147 *
5148 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5149 *
5150 * [74] PEDef ::= EntityValue | ExternalID
5151 *
5152 * [76] NDataDecl ::= S 'NDATA' S Name
5153 *
5154 * [ VC: Notation Declared ]
5155 * The Name must match the declared name of a notation.
5156 */
5157
5158void
5159xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005160 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005161 xmlChar *value = NULL;
5162 xmlChar *URI = NULL, *literal = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005163 const xmlChar *ndata = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005164 int isParameter = 0;
5165 xmlChar *orig = NULL;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005166 int skipped;
Owen Taylor3473f882001-02-23 17:55:21 +00005167
Daniel Veillard4c778d82005-01-23 17:37:44 +00005168 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00005169 if (CMP8(CUR_PTR, '<', '!', 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005170 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00005171 SHRINK;
5172 SKIP(8);
Daniel Veillardf5582f12002-06-11 10:08:16 +00005173 skipped = SKIP_BLANKS;
5174 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005175 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5176 "Space required after '<!ENTITY'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005177 }
Owen Taylor3473f882001-02-23 17:55:21 +00005178
5179 if (RAW == '%') {
5180 NEXT;
Daniel Veillardf5582f12002-06-11 10:08:16 +00005181 skipped = SKIP_BLANKS;
5182 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005183 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5184 "Space required after '%'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005185 }
Owen Taylor3473f882001-02-23 17:55:21 +00005186 isParameter = 1;
5187 }
5188
Daniel Veillard76d66f42001-05-16 21:05:17 +00005189 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005190 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005191 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5192 "xmlParseEntityDecl: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005193 return;
5194 }
Daniel Veillard37334572008-07-31 08:20:02 +00005195 if (xmlStrchr(name, ':') != NULL) {
5196 xmlNsErr(ctxt, XML_NS_ERR_COLON,
5197 "colon are forbidden from entities names '%s'\n",
5198 name, NULL, NULL);
5199 }
Daniel Veillardf5582f12002-06-11 10:08:16 +00005200 skipped = SKIP_BLANKS;
5201 if (skipped == 0) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005202 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5203 "Space required after the entity name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005204 }
Owen Taylor3473f882001-02-23 17:55:21 +00005205
Daniel Veillardf5582f12002-06-11 10:08:16 +00005206 ctxt->instate = XML_PARSER_ENTITY_DECL;
Owen Taylor3473f882001-02-23 17:55:21 +00005207 /*
5208 * handle the various case of definitions...
5209 */
5210 if (isParameter) {
5211 if ((RAW == '"') || (RAW == '\'')) {
5212 value = xmlParseEntityValue(ctxt, &orig);
5213 if (value) {
5214 if ((ctxt->sax != NULL) &&
5215 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5216 ctxt->sax->entityDecl(ctxt->userData, name,
5217 XML_INTERNAL_PARAMETER_ENTITY,
5218 NULL, NULL, value);
5219 }
5220 } else {
5221 URI = xmlParseExternalID(ctxt, &literal, 1);
5222 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005223 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005224 }
5225 if (URI) {
5226 xmlURIPtr uri;
5227
5228 uri = xmlParseURI((const char *) URI);
5229 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005230 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5231 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005232 /*
5233 * This really ought to be a well formedness error
5234 * but the XML Core WG decided otherwise c.f. issue
5235 * E26 of the XML erratas.
5236 */
Owen Taylor3473f882001-02-23 17:55:21 +00005237 } else {
5238 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005239 /*
5240 * Okay this is foolish to block those but not
5241 * invalid URIs.
5242 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005243 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005244 } else {
5245 if ((ctxt->sax != NULL) &&
5246 (!ctxt->disableSAX) &&
5247 (ctxt->sax->entityDecl != NULL))
5248 ctxt->sax->entityDecl(ctxt->userData, name,
5249 XML_EXTERNAL_PARAMETER_ENTITY,
5250 literal, URI, NULL);
5251 }
5252 xmlFreeURI(uri);
5253 }
5254 }
5255 }
5256 } else {
5257 if ((RAW == '"') || (RAW == '\'')) {
5258 value = xmlParseEntityValue(ctxt, &orig);
5259 if ((ctxt->sax != NULL) &&
5260 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5261 ctxt->sax->entityDecl(ctxt->userData, name,
5262 XML_INTERNAL_GENERAL_ENTITY,
5263 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005264 /*
5265 * For expat compatibility in SAX mode.
5266 */
5267 if ((ctxt->myDoc == NULL) ||
5268 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5269 if (ctxt->myDoc == NULL) {
5270 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005271 if (ctxt->myDoc == NULL) {
5272 xmlErrMemory(ctxt, "New Doc failed");
5273 return;
5274 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005275 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005276 }
5277 if (ctxt->myDoc->intSubset == NULL)
5278 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5279 BAD_CAST "fake", NULL, NULL);
5280
Daniel Veillard1af9a412003-08-20 22:54:39 +00005281 xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5282 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005283 }
Owen Taylor3473f882001-02-23 17:55:21 +00005284 } else {
5285 URI = xmlParseExternalID(ctxt, &literal, 1);
5286 if ((URI == NULL) && (literal == NULL)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005287 xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005288 }
5289 if (URI) {
5290 xmlURIPtr uri;
5291
5292 uri = xmlParseURI((const char *)URI);
5293 if (uri == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00005294 xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5295 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005296 /*
5297 * This really ought to be a well formedness error
5298 * but the XML Core WG decided otherwise c.f. issue
5299 * E26 of the XML erratas.
5300 */
Owen Taylor3473f882001-02-23 17:55:21 +00005301 } else {
5302 if (uri->fragment != NULL) {
Daniel Veillard4a7ae502002-02-18 19:18:17 +00005303 /*
5304 * Okay this is foolish to block those but not
5305 * invalid URIs.
5306 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005307 xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005308 }
5309 xmlFreeURI(uri);
5310 }
5311 }
William M. Brack76e95df2003-10-18 16:20:14 +00005312 if ((RAW != '>') && (!IS_BLANK_CH(CUR))) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005313 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5314 "Space required before 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005315 }
5316 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005317 if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005318 SKIP(5);
William M. Brack76e95df2003-10-18 16:20:14 +00005319 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005320 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5321 "Space required after 'NDATA'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005322 }
5323 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005324 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005325 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5326 (ctxt->sax->unparsedEntityDecl != NULL))
5327 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5328 literal, URI, ndata);
5329 } else {
5330 if ((ctxt->sax != NULL) &&
5331 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5332 ctxt->sax->entityDecl(ctxt->userData, name,
5333 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5334 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005335 /*
5336 * For expat compatibility in SAX mode.
5337 * assuming the entity repalcement was asked for
5338 */
5339 if ((ctxt->replaceEntities != 0) &&
5340 ((ctxt->myDoc == NULL) ||
5341 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5342 if (ctxt->myDoc == NULL) {
5343 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005344 if (ctxt->myDoc == NULL) {
5345 xmlErrMemory(ctxt, "New Doc failed");
5346 return;
5347 }
Daniel Veillardae0765b2008-07-31 19:54:59 +00005348 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard5997aca2002-03-18 18:36:20 +00005349 }
5350
5351 if (ctxt->myDoc->intSubset == NULL)
5352 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5353 BAD_CAST "fake", NULL, NULL);
Daniel Veillard1af9a412003-08-20 22:54:39 +00005354 xmlSAX2EntityDecl(ctxt, name,
5355 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5356 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005357 }
Owen Taylor3473f882001-02-23 17:55:21 +00005358 }
5359 }
5360 }
5361 SKIP_BLANKS;
5362 if (RAW != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00005363 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00005364 "xmlParseEntityDecl: entity %s not terminated\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00005365 } else {
5366 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005367 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5368 "Entity declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005369 }
5370 NEXT;
5371 }
5372 if (orig != NULL) {
5373 /*
5374 * Ugly mechanism to save the raw entity value.
5375 */
5376 xmlEntityPtr cur = NULL;
5377
5378 if (isParameter) {
5379 if ((ctxt->sax != NULL) &&
5380 (ctxt->sax->getParameterEntity != NULL))
5381 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5382 } else {
5383 if ((ctxt->sax != NULL) &&
5384 (ctxt->sax->getEntity != NULL))
5385 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005386 if ((cur == NULL) && (ctxt->userData==ctxt)) {
Daniel Veillard1af9a412003-08-20 22:54:39 +00005387 cur = xmlSAX2GetEntity(ctxt, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005388 }
Owen Taylor3473f882001-02-23 17:55:21 +00005389 }
5390 if (cur != NULL) {
5391 if (cur->orig != NULL)
5392 xmlFree(orig);
5393 else
5394 cur->orig = orig;
5395 } else
5396 xmlFree(orig);
5397 }
Owen Taylor3473f882001-02-23 17:55:21 +00005398 if (value != NULL) xmlFree(value);
5399 if (URI != NULL) xmlFree(URI);
5400 if (literal != NULL) xmlFree(literal);
Owen Taylor3473f882001-02-23 17:55:21 +00005401 }
5402}
5403
5404/**
5405 * xmlParseDefaultDecl:
5406 * @ctxt: an XML parser context
5407 * @value: Receive a possible fixed default value for the attribute
5408 *
5409 * Parse an attribute default declaration
5410 *
5411 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5412 *
5413 * [ VC: Required Attribute ]
5414 * if the default declaration is the keyword #REQUIRED, then the
5415 * attribute must be specified for all elements of the type in the
5416 * attribute-list declaration.
5417 *
5418 * [ VC: Attribute Default Legal ]
5419 * The declared default value must meet the lexical constraints of
5420 * the declared attribute type c.f. xmlValidateAttributeDecl()
5421 *
5422 * [ VC: Fixed Attribute Default ]
5423 * if an attribute has a default value declared with the #FIXED
5424 * keyword, instances of that attribute must match the default value.
5425 *
5426 * [ WFC: No < in Attribute Values ]
5427 * handled in xmlParseAttValue()
5428 *
5429 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5430 * or XML_ATTRIBUTE_FIXED.
5431 */
5432
5433int
5434xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5435 int val;
5436 xmlChar *ret;
5437
5438 *value = NULL;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005439 if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005440 SKIP(9);
5441 return(XML_ATTRIBUTE_REQUIRED);
5442 }
Daniel Veillarda07050d2003-10-19 14:46:32 +00005443 if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005444 SKIP(8);
5445 return(XML_ATTRIBUTE_IMPLIED);
5446 }
5447 val = XML_ATTRIBUTE_NONE;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005448 if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005449 SKIP(6);
5450 val = XML_ATTRIBUTE_FIXED;
William M. Brack76e95df2003-10-18 16:20:14 +00005451 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005452 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5453 "Space required after '#FIXED'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005454 }
5455 SKIP_BLANKS;
5456 }
5457 ret = xmlParseAttValue(ctxt);
5458 ctxt->instate = XML_PARSER_DTD;
5459 if (ret == NULL) {
William M. Brack7b9154b2003-09-27 19:23:50 +00005460 xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005461 "Attribute default value declaration error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005462 } else
5463 *value = ret;
5464 return(val);
5465}
5466
5467/**
5468 * xmlParseNotationType:
5469 * @ctxt: an XML parser context
5470 *
5471 * parse an Notation attribute type.
5472 *
5473 * Note: the leading 'NOTATION' S part has already being parsed...
5474 *
5475 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5476 *
5477 * [ VC: Notation Attributes ]
5478 * Values of this type must match one of the notation names included
5479 * in the declaration; all notation names in the declaration must be declared.
5480 *
5481 * Returns: the notation attribute tree built while parsing
5482 */
5483
5484xmlEnumerationPtr
5485xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005486 const xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005487 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005488
5489 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005490 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005491 return(NULL);
5492 }
5493 SHRINK;
5494 do {
5495 NEXT;
5496 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005497 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005498 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005499 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5500 "Name expected in NOTATION declaration\n");
Daniel Veillard489f9672009-08-10 16:49:30 +02005501 xmlFreeEnumeration(ret);
5502 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005503 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005504 tmp = ret;
5505 while (tmp != NULL) {
5506 if (xmlStrEqual(name, tmp->name)) {
5507 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5508 "standalone: attribute notation value token %s duplicated\n",
5509 name, NULL);
5510 if (!xmlDictOwns(ctxt->dict, name))
5511 xmlFree((xmlChar *) name);
5512 break;
5513 }
5514 tmp = tmp->next;
5515 }
5516 if (tmp == NULL) {
5517 cur = xmlCreateEnumeration(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005518 if (cur == NULL) {
5519 xmlFreeEnumeration(ret);
5520 return(NULL);
5521 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005522 if (last == NULL) ret = last = cur;
5523 else {
5524 last->next = cur;
5525 last = cur;
5526 }
Owen Taylor3473f882001-02-23 17:55:21 +00005527 }
5528 SKIP_BLANKS;
5529 } while (RAW == '|');
5530 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005531 xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
Daniel Veillard489f9672009-08-10 16:49:30 +02005532 xmlFreeEnumeration(ret);
5533 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005534 }
5535 NEXT;
5536 return(ret);
5537}
5538
5539/**
5540 * xmlParseEnumerationType:
5541 * @ctxt: an XML parser context
5542 *
5543 * parse an Enumeration attribute type.
5544 *
5545 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5546 *
5547 * [ VC: Enumeration ]
5548 * Values of this type must match one of the Nmtoken tokens in
5549 * the declaration
5550 *
5551 * Returns: the enumeration attribute tree built while parsing
5552 */
5553
5554xmlEnumerationPtr
5555xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5556 xmlChar *name;
Daniel Veillard49d44052008-08-27 19:57:06 +00005557 xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00005558
5559 if (RAW != '(') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005560 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005561 return(NULL);
5562 }
5563 SHRINK;
5564 do {
5565 NEXT;
5566 SKIP_BLANKS;
5567 name = xmlParseNmtoken(ctxt);
5568 if (name == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005569 xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005570 return(ret);
5571 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005572 tmp = ret;
5573 while (tmp != NULL) {
5574 if (xmlStrEqual(name, tmp->name)) {
5575 xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5576 "standalone: attribute enumeration value token %s duplicated\n",
5577 name, NULL);
5578 if (!xmlDictOwns(ctxt->dict, name))
5579 xmlFree(name);
5580 break;
5581 }
5582 tmp = tmp->next;
5583 }
5584 if (tmp == NULL) {
5585 cur = xmlCreateEnumeration(name);
5586 if (!xmlDictOwns(ctxt->dict, name))
5587 xmlFree(name);
Daniel Veillard489f9672009-08-10 16:49:30 +02005588 if (cur == NULL) {
5589 xmlFreeEnumeration(ret);
5590 return(NULL);
5591 }
Daniel Veillard49d44052008-08-27 19:57:06 +00005592 if (last == NULL) ret = last = cur;
5593 else {
5594 last->next = cur;
5595 last = cur;
5596 }
Owen Taylor3473f882001-02-23 17:55:21 +00005597 }
5598 SKIP_BLANKS;
5599 } while (RAW == '|');
5600 if (RAW != ')') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005601 xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005602 return(ret);
5603 }
5604 NEXT;
5605 return(ret);
5606}
5607
5608/**
5609 * xmlParseEnumeratedType:
5610 * @ctxt: an XML parser context
5611 * @tree: the enumeration tree built while parsing
5612 *
5613 * parse an Enumerated attribute type.
5614 *
5615 * [57] EnumeratedType ::= NotationType | Enumeration
5616 *
5617 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5618 *
5619 *
5620 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
5621 */
5622
5623int
5624xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarda07050d2003-10-19 14:46:32 +00005625 if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005626 SKIP(8);
William M. Brack76e95df2003-10-18 16:20:14 +00005627 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005628 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5629 "Space required after 'NOTATION'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005630 return(0);
5631 }
5632 SKIP_BLANKS;
5633 *tree = xmlParseNotationType(ctxt);
5634 if (*tree == NULL) return(0);
5635 return(XML_ATTRIBUTE_NOTATION);
5636 }
5637 *tree = xmlParseEnumerationType(ctxt);
5638 if (*tree == NULL) return(0);
5639 return(XML_ATTRIBUTE_ENUMERATION);
5640}
5641
5642/**
5643 * xmlParseAttributeType:
5644 * @ctxt: an XML parser context
5645 * @tree: the enumeration tree built while parsing
5646 *
5647 * parse the Attribute list def for an element
5648 *
5649 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5650 *
5651 * [55] StringType ::= 'CDATA'
5652 *
5653 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5654 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
5655 *
5656 * Validity constraints for attribute values syntax are checked in
5657 * xmlValidateAttributeValue()
5658 *
5659 * [ VC: ID ]
5660 * Values of type ID must match the Name production. A name must not
5661 * appear more than once in an XML document as a value of this type;
5662 * i.e., ID values must uniquely identify the elements which bear them.
5663 *
5664 * [ VC: One ID per Element Type ]
5665 * No element type may have more than one ID attribute specified.
5666 *
5667 * [ VC: ID Attribute Default ]
5668 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
5669 *
5670 * [ VC: IDREF ]
5671 * Values of type IDREF must match the Name production, and values
5672 * of type IDREFS must match Names; each IDREF Name must match the value
5673 * of an ID attribute on some element in the XML document; i.e. IDREF
5674 * values must match the value of some ID attribute.
5675 *
5676 * [ VC: Entity Name ]
5677 * Values of type ENTITY must match the Name production, values
5678 * of type ENTITIES must match Names; each Entity Name must match the
5679 * name of an unparsed entity declared in the DTD.
5680 *
5681 * [ VC: Name Token ]
5682 * Values of type NMTOKEN must match the Nmtoken production; values
5683 * of type NMTOKENS must match Nmtokens.
5684 *
5685 * Returns the attribute type
5686 */
5687int
5688xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
5689 SHRINK;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005690 if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005691 SKIP(5);
5692 return(XML_ATTRIBUTE_CDATA);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005693 } else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005694 SKIP(6);
5695 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005696 } else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005697 SKIP(5);
5698 return(XML_ATTRIBUTE_IDREF);
5699 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
5700 SKIP(2);
5701 return(XML_ATTRIBUTE_ID);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005702 } else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005703 SKIP(6);
5704 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005705 } else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005706 SKIP(8);
5707 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005708 } else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005709 SKIP(8);
5710 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillarda07050d2003-10-19 14:46:32 +00005711 } else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005712 SKIP(7);
5713 return(XML_ATTRIBUTE_NMTOKEN);
5714 }
5715 return(xmlParseEnumeratedType(ctxt, tree));
5716}
5717
5718/**
5719 * xmlParseAttributeListDecl:
5720 * @ctxt: an XML parser context
5721 *
5722 * : parse the Attribute list def for an element
5723 *
5724 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5725 *
5726 * [53] AttDef ::= S Name S AttType S DefaultDecl
5727 *
5728 */
5729void
5730xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005731 const xmlChar *elemName;
5732 const xmlChar *attrName;
Owen Taylor3473f882001-02-23 17:55:21 +00005733 xmlEnumerationPtr tree;
5734
Daniel Veillarda07050d2003-10-19 14:46:32 +00005735 if (CMP9(CUR_PTR, '<', '!', 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005736 xmlParserInputPtr input = ctxt->input;
5737
5738 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00005739 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005740 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005741 "Space required after '<!ATTLIST'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005742 }
5743 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005744 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005745 if (elemName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005746 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5747 "ATTLIST: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005748 return;
5749 }
5750 SKIP_BLANKS;
5751 GROW;
5752 while (RAW != '>') {
5753 const xmlChar *check = CUR_PTR;
5754 int type;
5755 int def;
5756 xmlChar *defaultValue = NULL;
5757
5758 GROW;
5759 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005760 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005761 if (attrName == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005762 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5763 "ATTLIST: no name for Attribute\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005764 break;
5765 }
5766 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005767 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005768 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005769 "Space required after the attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005770 break;
5771 }
5772 SKIP_BLANKS;
5773
5774 type = xmlParseAttributeType(ctxt, &tree);
5775 if (type <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005776 break;
5777 }
5778
5779 GROW;
William M. Brack76e95df2003-10-18 16:20:14 +00005780 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005781 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5782 "Space required after the attribute type\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005783 if (tree != NULL)
5784 xmlFreeEnumeration(tree);
5785 break;
5786 }
5787 SKIP_BLANKS;
5788
5789 def = xmlParseDefaultDecl(ctxt, &defaultValue);
5790 if (def <= 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00005791 if (defaultValue != NULL)
5792 xmlFree(defaultValue);
5793 if (tree != NULL)
5794 xmlFreeEnumeration(tree);
5795 break;
5796 }
Daniel Veillard97c9ce22008-03-25 16:52:41 +00005797 if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
5798 xmlAttrNormalizeSpace(defaultValue, defaultValue);
Owen Taylor3473f882001-02-23 17:55:21 +00005799
5800 GROW;
5801 if (RAW != '>') {
William M. Brack76e95df2003-10-18 16:20:14 +00005802 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005803 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005804 "Space required after the attribute default value\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005805 if (defaultValue != NULL)
5806 xmlFree(defaultValue);
5807 if (tree != NULL)
5808 xmlFreeEnumeration(tree);
5809 break;
5810 }
5811 SKIP_BLANKS;
5812 }
5813 if (check == CUR_PTR) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005814 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
5815 "in xmlParseAttributeListDecl\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005816 if (defaultValue != NULL)
5817 xmlFree(defaultValue);
5818 if (tree != NULL)
5819 xmlFreeEnumeration(tree);
5820 break;
5821 }
5822 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5823 (ctxt->sax->attributeDecl != NULL))
5824 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
5825 type, def, defaultValue, tree);
Daniel Veillarde57ec792003-09-10 10:50:59 +00005826 else if (tree != NULL)
5827 xmlFreeEnumeration(tree);
5828
5829 if ((ctxt->sax2) && (defaultValue != NULL) &&
5830 (def != XML_ATTRIBUTE_IMPLIED) &&
5831 (def != XML_ATTRIBUTE_REQUIRED)) {
5832 xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
5833 }
Daniel Veillardac4118d2008-01-11 05:27:32 +00005834 if (ctxt->sax2) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00005835 xmlAddSpecialAttr(ctxt, elemName, attrName, type);
5836 }
Owen Taylor3473f882001-02-23 17:55:21 +00005837 if (defaultValue != NULL)
5838 xmlFree(defaultValue);
5839 GROW;
5840 }
5841 if (RAW == '>') {
5842 if (input != ctxt->input) {
Daniel Veillard49d44052008-08-27 19:57:06 +00005843 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5844 "Attribute list declaration doesn't start and stop in the same entity\n",
5845 NULL, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005846 }
5847 NEXT;
5848 }
Owen Taylor3473f882001-02-23 17:55:21 +00005849 }
5850}
5851
5852/**
5853 * xmlParseElementMixedContentDecl:
5854 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005855 * @inputchk: the input used for the current entity, needed for boundary checks
Owen Taylor3473f882001-02-23 17:55:21 +00005856 *
5857 * parse the declaration for a Mixed Element content
5858 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5859 *
5860 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
5861 * '(' S? '#PCDATA' S? ')'
5862 *
5863 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
5864 *
5865 * [ VC: No Duplicate Types ]
5866 * The same name must not appear more than once in a single
5867 * mixed-content declaration.
5868 *
5869 * returns: the list of the xmlElementContentPtr describing the element choices
5870 */
5871xmlElementContentPtr
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005872xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00005873 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005874 const xmlChar *elem = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005875
5876 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +00005877 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Owen Taylor3473f882001-02-23 17:55:21 +00005878 SKIP(7);
5879 SKIP_BLANKS;
5880 SHRINK;
5881 if (RAW == ')') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005882 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005883 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5884"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005885 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005886 }
Owen Taylor3473f882001-02-23 17:55:21 +00005887 NEXT;
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005888 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillard68b6e022008-03-31 09:26:00 +00005889 if (ret == NULL)
5890 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005891 if (RAW == '*') {
5892 ret->ocur = XML_ELEMENT_CONTENT_MULT;
5893 NEXT;
5894 }
5895 return(ret);
5896 }
5897 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005898 ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
Owen Taylor3473f882001-02-23 17:55:21 +00005899 if (ret == NULL) return(NULL);
5900 }
5901 while (RAW == '|') {
5902 NEXT;
5903 if (elem == NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005904 ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005905 if (ret == NULL) return(NULL);
5906 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005907 if (cur != NULL)
5908 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00005909 cur = ret;
5910 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005911 n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00005912 if (n == NULL) return(NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005913 n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005914 if (n->c1 != NULL)
5915 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005916 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005917 if (n != NULL)
5918 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005919 cur = n;
Owen Taylor3473f882001-02-23 17:55:21 +00005920 }
5921 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005922 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005923 if (elem == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005924 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00005925 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005926 xmlFreeDocElementContent(ctxt->myDoc, cur);
Owen Taylor3473f882001-02-23 17:55:21 +00005927 return(NULL);
5928 }
5929 SKIP_BLANKS;
5930 GROW;
5931 }
5932 if ((RAW == ')') && (NXT(1) == '*')) {
5933 if (elem != NULL) {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005934 cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
Owen Taylor3473f882001-02-23 17:55:21 +00005935 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00005936 if (cur->c2 != NULL)
5937 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00005938 }
Daniel Veillardd44b9362009-09-07 12:15:08 +02005939 if (ret != NULL)
5940 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00005941 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00005942 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
5943"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00005944 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00005945 }
Owen Taylor3473f882001-02-23 17:55:21 +00005946 SKIP(2);
5947 } else {
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00005948 xmlFreeDocElementContent(ctxt->myDoc, ret);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005949 xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005950 return(NULL);
5951 }
5952
5953 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00005954 xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00005955 }
5956 return(ret);
5957}
5958
5959/**
Daniel Veillard489f9672009-08-10 16:49:30 +02005960 * xmlParseElementChildrenContentDeclPriv:
Owen Taylor3473f882001-02-23 17:55:21 +00005961 * @ctxt: an XML parser context
Daniel Veillarda9b66d02002-12-11 14:23:49 +00005962 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02005963 * @depth: the level of recursion
Owen Taylor3473f882001-02-23 17:55:21 +00005964 *
5965 * parse the declaration for a Mixed Element content
5966 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
5967 *
5968 *
5969 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
5970 *
5971 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
5972 *
5973 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
5974 *
5975 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
5976 *
5977 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
5978 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005979 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00005980 * opening or closing parentheses in a choice, seq, or Mixed
5981 * construct is contained in the replacement text for a parameter
5982 * entity, both must be contained in the same replacement text. For
5983 * interoperability, if a parameter-entity reference appears in a
5984 * choice, seq, or Mixed construct, its replacement text should not
5985 * be empty, and neither the first nor last non-blank character of
5986 * the replacement text should be a connector (| or ,).
5987 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00005988 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00005989 * hierarchy.
5990 */
Daniel Veillard489f9672009-08-10 16:49:30 +02005991static xmlElementContentPtr
5992xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
5993 int depth) {
Owen Taylor3473f882001-02-23 17:55:21 +00005994 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillard2fdbd322003-08-18 12:15:38 +00005995 const xmlChar *elem;
Owen Taylor3473f882001-02-23 17:55:21 +00005996 xmlChar type = 0;
5997
Daniel Veillard489f9672009-08-10 16:49:30 +02005998 if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
5999 (depth > 2048)) {
6000 xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6001"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6002 depth);
6003 return(NULL);
6004 }
Owen Taylor3473f882001-02-23 17:55:21 +00006005 SKIP_BLANKS;
6006 GROW;
6007 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006008 int inputid = ctxt->input->id;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006009
Owen Taylor3473f882001-02-23 17:55:21 +00006010 /* Recurse on first child */
6011 NEXT;
6012 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006013 cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6014 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006015 SKIP_BLANKS;
6016 GROW;
6017 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006018 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006019 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006020 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006021 return(NULL);
6022 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006023 cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006024 if (cur == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006025 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00006026 return(NULL);
6027 }
Owen Taylor3473f882001-02-23 17:55:21 +00006028 GROW;
6029 if (RAW == '?') {
6030 cur->ocur = XML_ELEMENT_CONTENT_OPT;
6031 NEXT;
6032 } else if (RAW == '*') {
6033 cur->ocur = XML_ELEMENT_CONTENT_MULT;
6034 NEXT;
6035 } else if (RAW == '+') {
6036 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6037 NEXT;
6038 } else {
6039 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6040 }
Owen Taylor3473f882001-02-23 17:55:21 +00006041 GROW;
6042 }
6043 SKIP_BLANKS;
6044 SHRINK;
6045 while (RAW != ')') {
6046 /*
6047 * Each loop we parse one separator and one element.
6048 */
6049 if (RAW == ',') {
6050 if (type == 0) type = CUR;
6051
6052 /*
6053 * Detect "Name | Name , Name" error
6054 */
6055 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006056 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006057 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006058 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006059 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006060 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006061 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006062 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006063 return(NULL);
6064 }
6065 NEXT;
6066
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006067 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
Owen Taylor3473f882001-02-23 17:55:21 +00006068 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006069 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006070 xmlFreeDocElementContent(ctxt->myDoc, last);
6071 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006072 return(NULL);
6073 }
6074 if (last == NULL) {
6075 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006076 if (ret != NULL)
6077 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006078 ret = cur = op;
6079 } else {
6080 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006081 if (op != NULL)
6082 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006083 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006084 if (last != NULL)
6085 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006086 cur =op;
6087 last = NULL;
6088 }
6089 } else if (RAW == '|') {
6090 if (type == 0) type = CUR;
6091
6092 /*
6093 * Detect "Name , Name | Name" error
6094 */
6095 else if (type != CUR) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006096 xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00006097 "xmlParseElementChildrenContentDecl : '%c' expected\n",
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006098 type);
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006099 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006100 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006101 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006102 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006103 return(NULL);
6104 }
6105 NEXT;
6106
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006107 op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
Owen Taylor3473f882001-02-23 17:55:21 +00006108 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00006109 if ((last != NULL) && (last != ret))
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006110 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006111 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006112 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006113 return(NULL);
6114 }
6115 if (last == NULL) {
6116 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006117 if (ret != NULL)
6118 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006119 ret = cur = op;
6120 } else {
6121 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006122 if (op != NULL)
6123 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006124 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006125 if (last != NULL)
6126 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00006127 cur =op;
6128 last = NULL;
6129 }
6130 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006131 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
Daniel Veillardc707d0b2008-01-24 14:48:54 +00006132 if ((last != NULL) && (last != ret))
6133 xmlFreeDocElementContent(ctxt->myDoc, last);
Owen Taylor3473f882001-02-23 17:55:21 +00006134 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006135 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006136 return(NULL);
6137 }
6138 GROW;
6139 SKIP_BLANKS;
6140 GROW;
6141 if (RAW == '(') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006142 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006143 /* Recurse on second child */
6144 NEXT;
6145 SKIP_BLANKS;
Daniel Veillard489f9672009-08-10 16:49:30 +02006146 last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6147 depth + 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006148 SKIP_BLANKS;
6149 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00006150 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006151 if (elem == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006152 xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006153 if (ret != NULL)
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006154 xmlFreeDocElementContent(ctxt->myDoc, ret);
Owen Taylor3473f882001-02-23 17:55:21 +00006155 return(NULL);
6156 }
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006157 last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard68b6e022008-03-31 09:26:00 +00006158 if (last == NULL) {
6159 if (ret != NULL)
6160 xmlFreeDocElementContent(ctxt->myDoc, ret);
6161 return(NULL);
6162 }
Owen Taylor3473f882001-02-23 17:55:21 +00006163 if (RAW == '?') {
6164 last->ocur = XML_ELEMENT_CONTENT_OPT;
6165 NEXT;
6166 } else if (RAW == '*') {
6167 last->ocur = XML_ELEMENT_CONTENT_MULT;
6168 NEXT;
6169 } else if (RAW == '+') {
6170 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6171 NEXT;
6172 } else {
6173 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6174 }
6175 }
6176 SKIP_BLANKS;
6177 GROW;
6178 }
6179 if ((cur != NULL) && (last != NULL)) {
6180 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00006181 if (last != NULL)
6182 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006183 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006184 if ((ctxt->validate) && (ctxt->input->id != inputchk)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00006185 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6186"Element content declaration doesn't start and stop in the same entity\n",
Daniel Veillardae0765b2008-07-31 19:54:59 +00006187 NULL, NULL);
Daniel Veillard8dc16a62002-02-19 21:08:48 +00006188 }
Owen Taylor3473f882001-02-23 17:55:21 +00006189 NEXT;
6190 if (RAW == '?') {
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006191 if (ret != NULL) {
6192 if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6193 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6194 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6195 else
6196 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6197 }
Owen Taylor3473f882001-02-23 17:55:21 +00006198 NEXT;
6199 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006200 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00006201 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006202 cur = ret;
6203 /*
6204 * Some normalization:
6205 * (a | b* | c?)* == (a | b | c)*
6206 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006207 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006208 if ((cur->c1 != NULL) &&
6209 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6210 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6211 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6212 if ((cur->c2 != NULL) &&
6213 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6214 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6215 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6216 cur = cur->c2;
6217 }
6218 }
Owen Taylor3473f882001-02-23 17:55:21 +00006219 NEXT;
6220 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006221 if (ret != NULL) {
6222 int found = 0;
6223
William M. Brackf8f2e8f2004-05-14 04:37:41 +00006224 if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6225 (ret->ocur == XML_ELEMENT_CONTENT_MULT))
6226 ret->ocur = XML_ELEMENT_CONTENT_MULT;
William M. Brackeb8509c2004-05-14 03:48:02 +00006227 else
6228 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006229 /*
6230 * Some normalization:
6231 * (a | b*)+ == (a | b)*
6232 * (a | b?)+ == (a | b)*
6233 */
Daniel Veillard30e76072006-03-09 14:13:55 +00006234 while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00006235 if ((cur->c1 != NULL) &&
6236 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6237 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6238 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6239 found = 1;
6240 }
6241 if ((cur->c2 != NULL) &&
6242 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6243 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6244 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6245 found = 1;
6246 }
6247 cur = cur->c2;
6248 }
6249 if (found)
6250 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6251 }
Owen Taylor3473f882001-02-23 17:55:21 +00006252 NEXT;
6253 }
6254 return(ret);
6255}
6256
6257/**
Daniel Veillard489f9672009-08-10 16:49:30 +02006258 * xmlParseElementChildrenContentDecl:
6259 * @ctxt: an XML parser context
6260 * @inputchk: the input used for the current entity, needed for boundary checks
Daniel Veillard489f9672009-08-10 16:49:30 +02006261 *
6262 * parse the declaration for a Mixed Element content
6263 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6264 *
6265 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6266 *
6267 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6268 *
6269 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6270 *
6271 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6272 *
6273 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6274 * TODO Parameter-entity replacement text must be properly nested
6275 * with parenthesized groups. That is to say, if either of the
6276 * opening or closing parentheses in a choice, seq, or Mixed
6277 * construct is contained in the replacement text for a parameter
6278 * entity, both must be contained in the same replacement text. For
6279 * interoperability, if a parameter-entity reference appears in a
6280 * choice, seq, or Mixed construct, its replacement text should not
6281 * be empty, and neither the first nor last non-blank character of
6282 * the replacement text should be a connector (| or ,).
6283 *
6284 * Returns the tree of xmlElementContentPtr describing the element
6285 * hierarchy.
6286 */
6287xmlElementContentPtr
6288xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6289 /* stub left for API/ABI compat */
6290 return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6291}
6292
6293/**
Owen Taylor3473f882001-02-23 17:55:21 +00006294 * xmlParseElementContentDecl:
6295 * @ctxt: an XML parser context
6296 * @name: the name of the element being defined.
6297 * @result: the Element Content pointer will be stored here if any
6298 *
6299 * parse the declaration for an Element content either Mixed or Children,
6300 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6301 *
6302 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6303 *
6304 * returns: the type of element content XML_ELEMENT_TYPE_xxx
6305 */
6306
6307int
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006308xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
Owen Taylor3473f882001-02-23 17:55:21 +00006309 xmlElementContentPtr *result) {
6310
6311 xmlElementContentPtr tree = NULL;
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006312 int inputid = ctxt->input->id;
Owen Taylor3473f882001-02-23 17:55:21 +00006313 int res;
6314
6315 *result = NULL;
6316
6317 if (RAW != '(') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00006318 xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006319 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00006320 return(-1);
6321 }
6322 NEXT;
6323 GROW;
6324 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006325 if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006326 tree = xmlParseElementMixedContentDecl(ctxt, inputid);
Owen Taylor3473f882001-02-23 17:55:21 +00006327 res = XML_ELEMENT_TYPE_MIXED;
6328 } else {
Daniel Veillard489f9672009-08-10 16:49:30 +02006329 tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00006330 res = XML_ELEMENT_TYPE_ELEMENT;
6331 }
Owen Taylor3473f882001-02-23 17:55:21 +00006332 SKIP_BLANKS;
6333 *result = tree;
6334 return(res);
6335}
6336
6337/**
6338 * xmlParseElementDecl:
6339 * @ctxt: an XML parser context
6340 *
6341 * parse an Element declaration.
6342 *
6343 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6344 *
6345 * [ VC: Unique Element Type Declaration ]
6346 * No element type may be declared more than once
6347 *
6348 * Returns the type of the element, or -1 in case of error
6349 */
6350int
6351xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00006352 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00006353 int ret = -1;
6354 xmlElementContentPtr content = NULL;
6355
Daniel Veillard4c778d82005-01-23 17:37:44 +00006356 /* GROW; done in the caller */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006357 if (CMP9(CUR_PTR, '<', '!', 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006358 xmlParserInputPtr input = ctxt->input;
6359
6360 SKIP(9);
William M. Brack76e95df2003-10-18 16:20:14 +00006361 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006362 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6363 "Space required after 'ELEMENT'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006364 }
6365 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00006366 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006367 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006368 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6369 "xmlParseElementDecl: no name for Element\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006370 return(-1);
6371 }
6372 while ((RAW == 0) && (ctxt->inputNr > 1))
6373 xmlPopInput(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006374 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006375 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6376 "Space required after the element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006377 }
6378 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006379 if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006380 SKIP(5);
6381 /*
6382 * Element must always be empty.
6383 */
6384 ret = XML_ELEMENT_TYPE_EMPTY;
6385 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
6386 (NXT(2) == 'Y')) {
6387 SKIP(3);
6388 /*
6389 * Element is a generic container.
6390 */
6391 ret = XML_ELEMENT_TYPE_ANY;
6392 } else if (RAW == '(') {
6393 ret = xmlParseElementContentDecl(ctxt, name, &content);
6394 } else {
6395 /*
6396 * [ WFC: PEs in Internal Subset ] error handling.
6397 */
6398 if ((RAW == '%') && (ctxt->external == 0) &&
6399 (ctxt->inputNr == 1)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00006400 xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006401 "PEReference: forbidden within markup decl in internal subset\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006402 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00006403 xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
Owen Taylor3473f882001-02-23 17:55:21 +00006404 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6405 }
Owen Taylor3473f882001-02-23 17:55:21 +00006406 return(-1);
6407 }
6408
6409 SKIP_BLANKS;
6410 /*
6411 * Pop-up of finished entities.
6412 */
6413 while ((RAW == 0) && (ctxt->inputNr > 1))
6414 xmlPopInput(ctxt);
6415 SKIP_BLANKS;
6416
6417 if (RAW != '>') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006418 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006419 if (content != NULL) {
6420 xmlFreeDocElementContent(ctxt->myDoc, content);
6421 }
Owen Taylor3473f882001-02-23 17:55:21 +00006422 } else {
6423 if (input != ctxt->input) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006424 xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6425 "Element declaration doesn't start and stop in the same entity\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006426 }
6427
6428 NEXT;
6429 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006430 (ctxt->sax->elementDecl != NULL)) {
6431 if (content != NULL)
6432 content->parent = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00006433 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6434 content);
Daniel Veillardcee2b3a2005-01-25 00:22:52 +00006435 if ((content != NULL) && (content->parent == NULL)) {
6436 /*
6437 * this is a trick: if xmlAddElementDecl is called,
6438 * instead of copying the full tree it is plugged directly
6439 * if called from the parser. Avoid duplicating the
6440 * interfaces or change the API/ABI
6441 */
6442 xmlFreeDocElementContent(ctxt->myDoc, content);
6443 }
6444 } else if (content != NULL) {
6445 xmlFreeDocElementContent(ctxt->myDoc, content);
6446 }
Owen Taylor3473f882001-02-23 17:55:21 +00006447 }
Owen Taylor3473f882001-02-23 17:55:21 +00006448 }
6449 return(ret);
6450}
6451
6452/**
Owen Taylor3473f882001-02-23 17:55:21 +00006453 * xmlParseConditionalSections
6454 * @ctxt: an XML parser context
6455 *
6456 * [61] conditionalSect ::= includeSect | ignoreSect
6457 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6458 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6459 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6460 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6461 */
6462
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006463static void
Owen Taylor3473f882001-02-23 17:55:21 +00006464xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillard49d44052008-08-27 19:57:06 +00006465 int id = ctxt->input->id;
6466
Owen Taylor3473f882001-02-23 17:55:21 +00006467 SKIP(3);
6468 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00006469 if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006470 SKIP(7);
6471 SKIP_BLANKS;
6472 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006473 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006474 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006475 if (ctxt->input->id != id) {
6476 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6477 "All markup of the conditional section is not in the same entity\n",
6478 NULL, NULL);
6479 }
Owen Taylor3473f882001-02-23 17:55:21 +00006480 NEXT;
6481 }
6482 if (xmlParserDebugEntities) {
6483 if ((ctxt->input != NULL) && (ctxt->input->filename))
6484 xmlGenericError(xmlGenericErrorContext,
6485 "%s(%d): ", ctxt->input->filename,
6486 ctxt->input->line);
6487 xmlGenericError(xmlGenericErrorContext,
6488 "Entering INCLUDE Conditional Section\n");
6489 }
6490
6491 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6492 (NXT(2) != '>'))) {
6493 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006494 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006495
6496 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6497 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006498 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006499 NEXT;
6500 } else if (RAW == '%') {
6501 xmlParsePEReference(ctxt);
6502 } else
6503 xmlParseMarkupDecl(ctxt);
6504
6505 /*
6506 * Pop-up of finished entities.
6507 */
6508 while ((RAW == 0) && (ctxt->inputNr > 1))
6509 xmlPopInput(ctxt);
6510
Daniel Veillardfdc91562002-07-01 21:52:03 +00006511 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006512 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006513 break;
6514 }
6515 }
6516 if (xmlParserDebugEntities) {
6517 if ((ctxt->input != NULL) && (ctxt->input->filename))
6518 xmlGenericError(xmlGenericErrorContext,
6519 "%s(%d): ", ctxt->input->filename,
6520 ctxt->input->line);
6521 xmlGenericError(xmlGenericErrorContext,
6522 "Leaving INCLUDE Conditional Section\n");
6523 }
6524
Daniel Veillarda07050d2003-10-19 14:46:32 +00006525 } else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006526 int state;
William M. Brack78637da2003-07-31 14:47:38 +00006527 xmlParserInputState instate;
Owen Taylor3473f882001-02-23 17:55:21 +00006528 int depth = 0;
6529
6530 SKIP(6);
6531 SKIP_BLANKS;
6532 if (RAW != '[') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006533 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006534 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006535 if (ctxt->input->id != id) {
6536 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6537 "All markup of the conditional section is not in the same entity\n",
6538 NULL, NULL);
6539 }
Owen Taylor3473f882001-02-23 17:55:21 +00006540 NEXT;
6541 }
6542 if (xmlParserDebugEntities) {
6543 if ((ctxt->input != NULL) && (ctxt->input->filename))
6544 xmlGenericError(xmlGenericErrorContext,
6545 "%s(%d): ", ctxt->input->filename,
6546 ctxt->input->line);
6547 xmlGenericError(xmlGenericErrorContext,
6548 "Entering IGNORE Conditional Section\n");
6549 }
6550
6551 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006552 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00006553 * But disable SAX event generating DTD building in the meantime
6554 */
6555 state = ctxt->disableSAX;
6556 instate = ctxt->instate;
Daniel Veillarddad3f682002-11-17 16:47:27 +00006557 if (ctxt->recovery == 0) ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00006558 ctxt->instate = XML_PARSER_IGNORE;
6559
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00006560 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006561 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6562 depth++;
6563 SKIP(3);
6564 continue;
6565 }
6566 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6567 if (--depth >= 0) SKIP(3);
6568 continue;
6569 }
6570 NEXT;
6571 continue;
6572 }
6573
6574 ctxt->disableSAX = state;
6575 ctxt->instate = instate;
6576
6577 if (xmlParserDebugEntities) {
6578 if ((ctxt->input != NULL) && (ctxt->input->filename))
6579 xmlGenericError(xmlGenericErrorContext,
6580 "%s(%d): ", ctxt->input->filename,
6581 ctxt->input->line);
6582 xmlGenericError(xmlGenericErrorContext,
6583 "Leaving IGNORE Conditional Section\n");
6584 }
6585
6586 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006587 xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006588 }
6589
6590 if (RAW == 0)
6591 SHRINK;
6592
6593 if (RAW == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006594 xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006595 } else {
Daniel Veillard49d44052008-08-27 19:57:06 +00006596 if (ctxt->input->id != id) {
6597 xmlValidityError(ctxt, XML_ERR_ENTITY_BOUNDARY,
6598 "All markup of the conditional section is not in the same entity\n",
6599 NULL, NULL);
6600 }
Owen Taylor3473f882001-02-23 17:55:21 +00006601 SKIP(3);
6602 }
6603}
6604
6605/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006606 * xmlParseMarkupDecl:
6607 * @ctxt: an XML parser context
6608 *
6609 * parse Markup declarations
6610 *
6611 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6612 * NotationDecl | PI | Comment
6613 *
6614 * [ VC: Proper Declaration/PE Nesting ]
6615 * Parameter-entity replacement text must be properly nested with
6616 * markup declarations. That is to say, if either the first character
6617 * or the last character of a markup declaration (markupdecl above) is
6618 * contained in the replacement text for a parameter-entity reference,
6619 * both must be contained in the same replacement text.
6620 *
6621 * [ WFC: PEs in Internal Subset ]
6622 * In the internal DTD subset, parameter-entity references can occur
6623 * only where markup declarations can occur, not within markup declarations.
6624 * (This does not apply to references that occur in external parameter
6625 * entities or to the external subset.)
6626 */
6627void
6628xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
6629 GROW;
Daniel Veillard4c778d82005-01-23 17:37:44 +00006630 if (CUR == '<') {
6631 if (NXT(1) == '!') {
6632 switch (NXT(2)) {
6633 case 'E':
6634 if (NXT(3) == 'L')
6635 xmlParseElementDecl(ctxt);
6636 else if (NXT(3) == 'N')
6637 xmlParseEntityDecl(ctxt);
6638 break;
6639 case 'A':
6640 xmlParseAttributeListDecl(ctxt);
6641 break;
6642 case 'N':
6643 xmlParseNotationDecl(ctxt);
6644 break;
6645 case '-':
6646 xmlParseComment(ctxt);
6647 break;
6648 default:
6649 /* there is an error but it will be detected later */
6650 break;
6651 }
6652 } else if (NXT(1) == '?') {
6653 xmlParsePI(ctxt);
6654 }
6655 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006656 /*
6657 * This is only for internal subset. On external entities,
6658 * the replacement is done before parsing stage
6659 */
6660 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6661 xmlParsePEReference(ctxt);
6662
6663 /*
6664 * Conditional sections are allowed from entities included
6665 * by PE References in the internal subset.
6666 */
6667 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
6668 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6669 xmlParseConditionalSections(ctxt);
6670 }
6671 }
6672
6673 ctxt->instate = XML_PARSER_DTD;
6674}
6675
6676/**
6677 * xmlParseTextDecl:
6678 * @ctxt: an XML parser context
Daniel Veillard40ec29a2008-07-30 12:35:40 +00006679 *
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006680 * parse an XML declaration header for external entities
6681 *
6682 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006683 */
6684
6685void
6686xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
6687 xmlChar *version;
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006688 const xmlChar *encoding;
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006689
6690 /*
6691 * We know that '<?xml' is here.
6692 */
Daniel Veillarda07050d2003-10-19 14:46:32 +00006693 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006694 SKIP(5);
6695 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006696 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006697 return;
6698 }
6699
William M. Brack76e95df2003-10-18 16:20:14 +00006700 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006701 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6702 "Space needed after '<?xml'\n");
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006703 }
6704 SKIP_BLANKS;
6705
6706 /*
6707 * We may have the VersionInfo here.
6708 */
6709 version = xmlParseVersionInfo(ctxt);
6710 if (version == NULL)
6711 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00006712 else {
William M. Brack76e95df2003-10-18 16:20:14 +00006713 if (!IS_BLANK_CH(CUR)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00006714 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6715 "Space needed here\n");
Daniel Veillard401c2112002-01-07 16:54:10 +00006716 }
6717 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006718 ctxt->input->version = version;
6719
6720 /*
6721 * We must have the encoding declaration
6722 */
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006723 encoding = xmlParseEncodingDecl(ctxt);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006724 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6725 /*
6726 * The XML REC instructs us to stop parsing right here
6727 */
6728 return;
6729 }
Daniel Veillardf5cb3cd2003-10-28 13:58:13 +00006730 if ((encoding == NULL) && (ctxt->errNo == XML_ERR_OK)) {
6731 xmlFatalErrMsg(ctxt, XML_ERR_MISSING_ENCODING,
6732 "Missing encoding in text declaration\n");
6733 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006734
6735 SKIP_BLANKS;
6736 if ((RAW == '?') && (NXT(1) == '>')) {
6737 SKIP(2);
6738 } else if (RAW == '>') {
6739 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006740 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006741 NEXT;
6742 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006743 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00006744 MOVETO_ENDTAG(CUR_PTR);
6745 NEXT;
6746 }
6747}
6748
6749/**
Owen Taylor3473f882001-02-23 17:55:21 +00006750 * xmlParseExternalSubset:
6751 * @ctxt: an XML parser context
6752 * @ExternalID: the external identifier
6753 * @SystemID: the system identifier (or URL)
6754 *
6755 * parse Markup declarations from an external subset
6756 *
6757 * [30] extSubset ::= textDecl? extSubsetDecl
6758 *
6759 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
6760 */
6761void
6762xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6763 const xmlChar *SystemID) {
Daniel Veillard309f81d2003-09-23 09:02:53 +00006764 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00006765 GROW;
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006766
Nikolay Sivove6ad10a2010-11-01 11:35:14 +01006767 if ((ctxt->encoding == NULL) &&
Daniel Veillard6ccc56d2008-04-03 12:59:06 +00006768 (ctxt->input->end - ctxt->input->cur >= 4)) {
6769 xmlChar start[4];
6770 xmlCharEncoding enc;
6771
6772 start[0] = RAW;
6773 start[1] = NXT(1);
6774 start[2] = NXT(2);
6775 start[3] = NXT(3);
6776 enc = xmlDetectCharEncoding(start, 4);
6777 if (enc != XML_CHAR_ENCODING_NONE)
6778 xmlSwitchEncoding(ctxt, enc);
6779 }
6780
Daniel Veillarda07050d2003-10-19 14:46:32 +00006781 if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006782 xmlParseTextDecl(ctxt);
6783 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6784 /*
6785 * The XML REC instructs us to stop parsing right here
6786 */
6787 ctxt->instate = XML_PARSER_EOF;
6788 return;
6789 }
6790 }
6791 if (ctxt->myDoc == NULL) {
6792 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +00006793 if (ctxt->myDoc == NULL) {
6794 xmlErrMemory(ctxt, "New Doc failed");
6795 return;
6796 }
6797 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +00006798 }
6799 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6800 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6801
6802 ctxt->instate = XML_PARSER_DTD;
6803 ctxt->external = 1;
6804 while (((RAW == '<') && (NXT(1) == '?')) ||
6805 ((RAW == '<') && (NXT(1) == '!')) ||
William M. Brack76e95df2003-10-18 16:20:14 +00006806 (RAW == '%') || IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006807 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00006808 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00006809
6810 GROW;
6811 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6812 xmlParseConditionalSections(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +00006813 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00006814 NEXT;
6815 } else if (RAW == '%') {
6816 xmlParsePEReference(ctxt);
6817 } else
6818 xmlParseMarkupDecl(ctxt);
6819
6820 /*
6821 * Pop-up of finished entities.
6822 */
6823 while ((RAW == 0) && (ctxt->inputNr > 1))
6824 xmlPopInput(ctxt);
6825
Daniel Veillardfdc91562002-07-01 21:52:03 +00006826 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006827 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006828 break;
6829 }
6830 }
6831
6832 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00006833 xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00006834 }
6835
6836}
6837
6838/**
6839 * xmlParseReference:
6840 * @ctxt: an XML parser context
Daniel Veillard0161e632008-08-28 15:36:32 +00006841 *
Owen Taylor3473f882001-02-23 17:55:21 +00006842 * parse and handle entity references in content, depending on the SAX
6843 * interface, this may end-up in a call to character() if this is a
6844 * CharRef, a predefined entity, if there is no reference() callback.
6845 * or if the parser was asked to switch to that mode.
6846 *
6847 * [67] Reference ::= EntityRef | CharRef
6848 */
6849void
6850xmlParseReference(xmlParserCtxtPtr ctxt) {
6851 xmlEntityPtr ent;
6852 xmlChar *val;
Daniel Veillard0161e632008-08-28 15:36:32 +00006853 int was_checked;
6854 xmlNodePtr list = NULL;
6855 xmlParserErrors ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +00006856
Daniel Veillard0161e632008-08-28 15:36:32 +00006857
6858 if (RAW != '&')
6859 return;
6860
6861 /*
6862 * Simple case of a CharRef
6863 */
Owen Taylor3473f882001-02-23 17:55:21 +00006864 if (NXT(1) == '#') {
6865 int i = 0;
6866 xmlChar out[10];
6867 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006868 int value = xmlParseCharRef(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +00006869
Daniel Veillarddc171602008-03-26 17:41:38 +00006870 if (value == 0)
6871 return;
Owen Taylor3473f882001-02-23 17:55:21 +00006872 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
6873 /*
6874 * So we are using non-UTF-8 buffers
6875 * Check that the char fit on 8bits, if not
6876 * generate a CharRef.
6877 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006878 if (value <= 0xFF) {
6879 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00006880 out[1] = 0;
6881 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6882 (!ctxt->disableSAX))
6883 ctxt->sax->characters(ctxt->userData, out, 1);
6884 } else {
6885 if ((hex == 'x') || (hex == 'X'))
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006886 snprintf((char *)out, sizeof(out), "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006887 else
Aleksey Sanin49cc9752002-06-14 17:07:10 +00006888 snprintf((char *)out, sizeof(out), "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00006889 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
6890 (!ctxt->disableSAX))
6891 ctxt->sax->reference(ctxt->userData, out);
6892 }
6893 } else {
6894 /*
6895 * Just encode the value in UTF-8
6896 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006897 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00006898 out[i] = 0;
6899 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6900 (!ctxt->disableSAX))
6901 ctxt->sax->characters(ctxt->userData, out, i);
6902 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006903 return;
6904 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00006905
Daniel Veillard0161e632008-08-28 15:36:32 +00006906 /*
6907 * We are seeing an entity reference
6908 */
6909 ent = xmlParseEntityRef(ctxt);
6910 if (ent == NULL) return;
6911 if (!ctxt->wellFormed)
6912 return;
6913 was_checked = ent->checked;
6914
6915 /* special case of predefined entities */
6916 if ((ent->name == NULL) ||
6917 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
6918 val = ent->content;
6919 if (val == NULL) return;
6920 /*
6921 * inline the entity.
6922 */
6923 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
6924 (!ctxt->disableSAX))
6925 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
6926 return;
6927 }
6928
6929 /*
6930 * The first reference to the entity trigger a parsing phase
6931 * where the ent->children is filled with the result from
6932 * the parsing.
Daniel Veillard4629ee02012-07-23 14:15:40 +08006933 * Note: external parsed entities will not be loaded, it is not
6934 * required for a non-validating parser, unless the parsing option
6935 * of validating, or substituting entities were given. Doing so is
6936 * far more secure as the parser will only process data coming from
6937 * the document entity by default.
Daniel Veillard0161e632008-08-28 15:36:32 +00006938 */
Daniel Veillard4629ee02012-07-23 14:15:40 +08006939 if ((ent->checked == 0) &&
6940 ((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
6941 (ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
Daniel Veillard0161e632008-08-28 15:36:32 +00006942 unsigned long oldnbent = ctxt->nbentities;
6943
6944 /*
6945 * This is a bit hackish but this seems the best
6946 * way to make sure both SAX and DOM entity support
6947 * behaves okay.
6948 */
6949 void *user_data;
6950 if (ctxt->userData == ctxt)
6951 user_data = NULL;
6952 else
6953 user_data = ctxt->userData;
6954
6955 /*
6956 * Check that this entity is well formed
6957 * 4.3.2: An internal general parsed entity is well-formed
6958 * if its replacement text matches the production labeled
6959 * content.
6960 */
6961 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
6962 ctxt->depth++;
6963 ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
6964 user_data, &list);
6965 ctxt->depth--;
6966
6967 } else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
6968 ctxt->depth++;
6969 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
6970 user_data, ctxt->depth, ent->URI,
6971 ent->ExternalID, &list);
6972 ctxt->depth--;
6973 } else {
6974 ret = XML_ERR_ENTITY_PE_INTERNAL;
6975 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
6976 "invalid entity type found\n", NULL);
6977 }
6978
6979 /*
6980 * Store the number of entities needing parsing for this entity
6981 * content and do checkings
6982 */
6983 ent->checked = ctxt->nbentities - oldnbent;
6984 if (ret == XML_ERR_ENTITY_LOOP) {
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006985 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
Daniel Veillard0161e632008-08-28 15:36:32 +00006986 xmlFreeNodeList(list);
Daniel Veillard4bf899b2008-08-20 17:04:30 +00006987 return;
6988 }
Daniel Veillard0161e632008-08-28 15:36:32 +00006989 if (xmlParserEntityCheck(ctxt, 0, ent)) {
6990 xmlFreeNodeList(list);
6991 return;
6992 }
Owen Taylor3473f882001-02-23 17:55:21 +00006993
Daniel Veillard0161e632008-08-28 15:36:32 +00006994 if ((ret == XML_ERR_OK) && (list != NULL)) {
6995 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
6996 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
6997 (ent->children == NULL)) {
6998 ent->children = list;
6999 if (ctxt->replaceEntities) {
Owen Taylor3473f882001-02-23 17:55:21 +00007000 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007001 * Prune it directly in the generated document
7002 * except for single text nodes.
Owen Taylor3473f882001-02-23 17:55:21 +00007003 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007004 if (((list->type == XML_TEXT_NODE) &&
7005 (list->next == NULL)) ||
7006 (ctxt->parseMode == XML_PARSE_READER)) {
7007 list->parent = (xmlNodePtr) ent;
7008 list = NULL;
7009 ent->owner = 1;
7010 } else {
7011 ent->owner = 0;
7012 while (list != NULL) {
7013 list->parent = (xmlNodePtr) ctxt->node;
7014 list->doc = ctxt->myDoc;
7015 if (list->next == NULL)
7016 ent->last = list;
7017 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007018 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007019 list = ent->children;
7020#ifdef LIBXML_LEGACY_ENABLED
7021 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7022 xmlAddEntityReference(ent, list, NULL);
7023#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00007024 }
7025 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007026 ent->owner = 1;
7027 while (list != NULL) {
7028 list->parent = (xmlNodePtr) ent;
Rob Richardsc794eb52011-02-18 12:17:17 -05007029 xmlSetTreeDoc(list, ent->doc);
Daniel Veillard0161e632008-08-28 15:36:32 +00007030 if (list->next == NULL)
7031 ent->last = list;
7032 list = list->next;
Owen Taylor3473f882001-02-23 17:55:21 +00007033 }
7034 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007035 } else {
7036 xmlFreeNodeList(list);
7037 list = NULL;
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007038 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007039 } else if ((ret != XML_ERR_OK) &&
7040 (ret != XML_WAR_UNDECLARED_ENTITY)) {
7041 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7042 "Entity '%s' failed to parse\n", ent->name);
7043 } else if (list != NULL) {
7044 xmlFreeNodeList(list);
7045 list = NULL;
7046 }
7047 if (ent->checked == 0)
7048 ent->checked = 1;
7049 } else if (ent->checked != 1) {
7050 ctxt->nbentities += ent->checked;
7051 }
Daniel Veillarda37a6ad2006-10-10 20:05:45 +00007052
Daniel Veillard0161e632008-08-28 15:36:32 +00007053 /*
7054 * Now that the entity content has been gathered
7055 * provide it to the application, this can take different forms based
7056 * on the parsing modes.
7057 */
7058 if (ent->children == NULL) {
7059 /*
7060 * Probably running in SAX mode and the callbacks don't
7061 * build the entity content. So unless we already went
7062 * though parsing for first checking go though the entity
7063 * content to generate callbacks associated to the entity
7064 */
7065 if (was_checked != 0) {
7066 void *user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00007067 /*
Daniel Veillard0161e632008-08-28 15:36:32 +00007068 * This is a bit hackish but this seems the best
7069 * way to make sure both SAX and DOM entity support
7070 * behaves okay.
Owen Taylor3473f882001-02-23 17:55:21 +00007071 */
Daniel Veillard0161e632008-08-28 15:36:32 +00007072 if (ctxt->userData == ctxt)
7073 user_data = NULL;
7074 else
7075 user_data = ctxt->userData;
7076
7077 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7078 ctxt->depth++;
7079 ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7080 ent->content, user_data, NULL);
7081 ctxt->depth--;
7082 } else if (ent->etype ==
7083 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7084 ctxt->depth++;
7085 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7086 ctxt->sax, user_data, ctxt->depth,
7087 ent->URI, ent->ExternalID, NULL);
7088 ctxt->depth--;
7089 } else {
7090 ret = XML_ERR_ENTITY_PE_INTERNAL;
7091 xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7092 "invalid entity type found\n", NULL);
7093 }
7094 if (ret == XML_ERR_ENTITY_LOOP) {
7095 xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7096 return;
7097 }
7098 }
7099 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7100 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7101 /*
7102 * Entity reference callback comes second, it's somewhat
7103 * superfluous but a compatibility to historical behaviour
7104 */
7105 ctxt->sax->reference(ctxt->userData, ent->name);
7106 }
7107 return;
7108 }
7109
7110 /*
7111 * If we didn't get any children for the entity being built
7112 */
7113 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7114 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7115 /*
7116 * Create a node.
7117 */
7118 ctxt->sax->reference(ctxt->userData, ent->name);
7119 return;
7120 }
7121
7122 if ((ctxt->replaceEntities) || (ent->children == NULL)) {
7123 /*
7124 * There is a problem on the handling of _private for entities
7125 * (bug 155816): Should we copy the content of the field from
7126 * the entity (possibly overwriting some value set by the user
7127 * when a copy is created), should we leave it alone, or should
7128 * we try to take care of different situations? The problem
7129 * is exacerbated by the usage of this field by the xmlReader.
7130 * To fix this bug, we look at _private on the created node
7131 * and, if it's NULL, we copy in whatever was in the entity.
7132 * If it's not NULL we leave it alone. This is somewhat of a
7133 * hack - maybe we should have further tests to determine
7134 * what to do.
7135 */
7136 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7137 /*
7138 * Seems we are generating the DOM content, do
7139 * a simple tree copy for all references except the first
7140 * In the first occurrence list contains the replacement.
7141 * progressive == 2 means we are operating on the Reader
7142 * and since nodes are discarded we must copy all the time.
7143 */
7144 if (((list == NULL) && (ent->owner == 0)) ||
7145 (ctxt->parseMode == XML_PARSE_READER)) {
7146 xmlNodePtr nw = NULL, cur, firstChild = NULL;
7147
7148 /*
7149 * when operating on a reader, the entities definitions
7150 * are always owning the entities subtree.
7151 if (ctxt->parseMode == XML_PARSE_READER)
7152 ent->owner = 1;
7153 */
7154
7155 cur = ent->children;
7156 while (cur != NULL) {
7157 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7158 if (nw != NULL) {
7159 if (nw->_private == NULL)
7160 nw->_private = cur->_private;
7161 if (firstChild == NULL){
7162 firstChild = nw;
7163 }
7164 nw = xmlAddChild(ctxt->node, nw);
7165 }
7166 if (cur == ent->last) {
7167 /*
7168 * needed to detect some strange empty
7169 * node cases in the reader tests
7170 */
7171 if ((ctxt->parseMode == XML_PARSE_READER) &&
7172 (nw != NULL) &&
7173 (nw->type == XML_ELEMENT_NODE) &&
7174 (nw->children == NULL))
7175 nw->extra = 1;
7176
7177 break;
7178 }
7179 cur = cur->next;
7180 }
7181#ifdef LIBXML_LEGACY_ENABLED
7182 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7183 xmlAddEntityReference(ent, firstChild, nw);
7184#endif /* LIBXML_LEGACY_ENABLED */
7185 } else if (list == NULL) {
7186 xmlNodePtr nw = NULL, cur, next, last,
7187 firstChild = NULL;
7188 /*
7189 * Copy the entity child list and make it the new
7190 * entity child list. The goal is to make sure any
7191 * ID or REF referenced will be the one from the
7192 * document content and not the entity copy.
7193 */
7194 cur = ent->children;
7195 ent->children = NULL;
7196 last = ent->last;
7197 ent->last = NULL;
7198 while (cur != NULL) {
7199 next = cur->next;
7200 cur->next = NULL;
7201 cur->parent = NULL;
7202 nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7203 if (nw != NULL) {
7204 if (nw->_private == NULL)
7205 nw->_private = cur->_private;
7206 if (firstChild == NULL){
7207 firstChild = cur;
7208 }
7209 xmlAddChild((xmlNodePtr) ent, nw);
7210 xmlAddChild(ctxt->node, cur);
7211 }
7212 if (cur == last)
7213 break;
7214 cur = next;
7215 }
Daniel Veillardcba68392008-08-29 12:43:40 +00007216 if (ent->owner == 0)
7217 ent->owner = 1;
Daniel Veillard0161e632008-08-28 15:36:32 +00007218#ifdef LIBXML_LEGACY_ENABLED
7219 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7220 xmlAddEntityReference(ent, firstChild, nw);
7221#endif /* LIBXML_LEGACY_ENABLED */
7222 } else {
7223 const xmlChar *nbktext;
7224
7225 /*
7226 * the name change is to avoid coalescing of the
7227 * node with a possible previous text one which
7228 * would make ent->children a dangling pointer
7229 */
7230 nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7231 -1);
7232 if (ent->children->type == XML_TEXT_NODE)
7233 ent->children->name = nbktext;
7234 if ((ent->last != ent->children) &&
7235 (ent->last->type == XML_TEXT_NODE))
7236 ent->last->name = nbktext;
7237 xmlAddChildList(ctxt->node, ent->children);
7238 }
7239
7240 /*
7241 * This is to avoid a nasty side effect, see
7242 * characters() in SAX.c
7243 */
7244 ctxt->nodemem = 0;
7245 ctxt->nodelen = 0;
7246 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007247 }
7248 }
7249}
7250
7251/**
7252 * xmlParseEntityRef:
7253 * @ctxt: an XML parser context
7254 *
7255 * parse ENTITY references declarations
7256 *
7257 * [68] EntityRef ::= '&' Name ';'
7258 *
7259 * [ WFC: Entity Declared ]
7260 * In a document without any DTD, a document with only an internal DTD
7261 * subset which contains no parameter entity references, or a document
7262 * with "standalone='yes'", the Name given in the entity reference
7263 * must match that in an entity declaration, except that well-formed
7264 * documents need not declare any of the following entities: amp, lt,
7265 * gt, apos, quot. The declaration of a parameter entity must precede
7266 * any reference to it. Similarly, the declaration of a general entity
7267 * must precede any reference to it which appears in a default value in an
7268 * attribute-list declaration. Note that if entities are declared in the
7269 * external subset or in external parameter entities, a non-validating
7270 * processor is not obligated to read and process their declarations;
7271 * for such documents, the rule that an entity must be declared is a
7272 * well-formedness constraint only if standalone='yes'.
7273 *
7274 * [ WFC: Parsed Entity ]
7275 * An entity reference must not contain the name of an unparsed entity
7276 *
7277 * Returns the xmlEntityPtr if found, or NULL otherwise.
7278 */
7279xmlEntityPtr
7280xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007281 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007282 xmlEntityPtr ent = NULL;
7283
7284 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00007285
Daniel Veillard0161e632008-08-28 15:36:32 +00007286 if (RAW != '&')
7287 return(NULL);
7288 NEXT;
7289 name = xmlParseName(ctxt);
7290 if (name == NULL) {
7291 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7292 "xmlParseEntityRef: no name\n");
7293 return(NULL);
7294 }
7295 if (RAW != ';') {
7296 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7297 return(NULL);
7298 }
7299 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00007300
Daniel Veillard0161e632008-08-28 15:36:32 +00007301 /*
7302 * Predefined entites override any extra definition
7303 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007304 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7305 ent = xmlGetPredefinedEntity(name);
7306 if (ent != NULL)
7307 return(ent);
7308 }
Owen Taylor3473f882001-02-23 17:55:21 +00007309
Daniel Veillard0161e632008-08-28 15:36:32 +00007310 /*
7311 * Increate the number of entity references parsed
7312 */
7313 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007314
Daniel Veillard0161e632008-08-28 15:36:32 +00007315 /*
7316 * Ask first SAX for entity resolution, otherwise try the
7317 * entities which may have stored in the parser context.
7318 */
7319 if (ctxt->sax != NULL) {
7320 if (ctxt->sax->getEntity != NULL)
7321 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007322 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7323 (ctxt->options & XML_PARSE_OLDSAX))
7324 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007325 if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7326 (ctxt->userData==ctxt)) {
7327 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007328 }
7329 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007330 /*
7331 * [ WFC: Entity Declared ]
7332 * In a document without any DTD, a document with only an
7333 * internal DTD subset which contains no parameter entity
7334 * references, or a document with "standalone='yes'", the
7335 * Name given in the entity reference must match that in an
7336 * entity declaration, except that well-formed documents
7337 * need not declare any of the following entities: amp, lt,
7338 * gt, apos, quot.
7339 * The declaration of a parameter entity must precede any
7340 * reference to it.
7341 * Similarly, the declaration of a general entity must
7342 * precede any reference to it which appears in a default
7343 * value in an attribute-list declaration. Note that if
7344 * entities are declared in the external subset or in
7345 * external parameter entities, a non-validating processor
7346 * is not obligated to read and process their declarations;
7347 * for such documents, the rule that an entity must be
7348 * declared is a well-formedness constraint only if
7349 * standalone='yes'.
7350 */
7351 if (ent == NULL) {
7352 if ((ctxt->standalone == 1) ||
7353 ((ctxt->hasExternalSubset == 0) &&
7354 (ctxt->hasPErefs == 0))) {
7355 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7356 "Entity '%s' not defined\n", name);
7357 } else {
7358 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7359 "Entity '%s' not defined\n", name);
7360 if ((ctxt->inSubset == 0) &&
7361 (ctxt->sax != NULL) &&
7362 (ctxt->sax->reference != NULL)) {
7363 ctxt->sax->reference(ctxt->userData, name);
7364 }
7365 }
7366 ctxt->valid = 0;
7367 }
7368
7369 /*
7370 * [ WFC: Parsed Entity ]
7371 * An entity reference must not contain the name of an
7372 * unparsed entity
7373 */
7374 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7375 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7376 "Entity reference to unparsed entity %s\n", name);
7377 }
7378
7379 /*
7380 * [ WFC: No External Entity References ]
7381 * Attribute values cannot contain direct or indirect
7382 * entity references to external entities.
7383 */
7384 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7385 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7386 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7387 "Attribute references external entity '%s'\n", name);
7388 }
7389 /*
7390 * [ WFC: No < in Attribute Values ]
7391 * The replacement text of any entity referred to directly or
7392 * indirectly in an attribute value (other than "&lt;") must
7393 * not contain a <.
7394 */
7395 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7396 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007397 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007398 (xmlStrchr(ent->content, '<'))) {
7399 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7400 "'<' in entity '%s' is not allowed in attributes values\n", name);
7401 }
7402
7403 /*
7404 * Internal check, no parameter entities here ...
7405 */
7406 else {
7407 switch (ent->etype) {
7408 case XML_INTERNAL_PARAMETER_ENTITY:
7409 case XML_EXTERNAL_PARAMETER_ENTITY:
7410 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7411 "Attempt to reference the parameter entity '%s'\n",
7412 name);
7413 break;
7414 default:
7415 break;
7416 }
7417 }
7418
7419 /*
7420 * [ WFC: No Recursion ]
7421 * A parsed entity must not contain a recursive reference
7422 * to itself, either directly or indirectly.
7423 * Done somewhere else
7424 */
Owen Taylor3473f882001-02-23 17:55:21 +00007425 return(ent);
7426}
7427
7428/**
7429 * xmlParseStringEntityRef:
7430 * @ctxt: an XML parser context
7431 * @str: a pointer to an index in the string
7432 *
7433 * parse ENTITY references declarations, but this version parses it from
7434 * a string value.
7435 *
7436 * [68] EntityRef ::= '&' Name ';'
7437 *
7438 * [ WFC: Entity Declared ]
7439 * In a document without any DTD, a document with only an internal DTD
7440 * subset which contains no parameter entity references, or a document
7441 * with "standalone='yes'", the Name given in the entity reference
7442 * must match that in an entity declaration, except that well-formed
7443 * documents need not declare any of the following entities: amp, lt,
7444 * gt, apos, quot. The declaration of a parameter entity must precede
7445 * any reference to it. Similarly, the declaration of a general entity
7446 * must precede any reference to it which appears in a default value in an
7447 * attribute-list declaration. Note that if entities are declared in the
7448 * external subset or in external parameter entities, a non-validating
7449 * processor is not obligated to read and process their declarations;
7450 * for such documents, the rule that an entity must be declared is a
7451 * well-formedness constraint only if standalone='yes'.
7452 *
7453 * [ WFC: Parsed Entity ]
7454 * An entity reference must not contain the name of an unparsed entity
7455 *
7456 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7457 * is updated to the current location in the string.
7458 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007459static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007460xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7461 xmlChar *name;
7462 const xmlChar *ptr;
7463 xmlChar cur;
7464 xmlEntityPtr ent = NULL;
7465
7466 if ((str == NULL) || (*str == NULL))
7467 return(NULL);
7468 ptr = *str;
7469 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007470 if (cur != '&')
7471 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007472
Daniel Veillard0161e632008-08-28 15:36:32 +00007473 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007474 name = xmlParseStringName(ctxt, &ptr);
7475 if (name == NULL) {
7476 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7477 "xmlParseStringEntityRef: no name\n");
7478 *str = ptr;
7479 return(NULL);
7480 }
7481 if (*ptr != ';') {
7482 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
Daniel Veillard7f4547c2008-10-03 07:58:23 +00007483 xmlFree(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007484 *str = ptr;
7485 return(NULL);
7486 }
7487 ptr++;
Owen Taylor3473f882001-02-23 17:55:21 +00007488
Owen Taylor3473f882001-02-23 17:55:21 +00007489
Daniel Veillard0161e632008-08-28 15:36:32 +00007490 /*
7491 * Predefined entites override any extra definition
7492 */
Rob Richardsb9ed0172009-01-05 17:28:50 +00007493 if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7494 ent = xmlGetPredefinedEntity(name);
7495 if (ent != NULL) {
7496 xmlFree(name);
7497 *str = ptr;
7498 return(ent);
7499 }
Daniel Veillard34a7fc32008-10-02 20:55:10 +00007500 }
Owen Taylor3473f882001-02-23 17:55:21 +00007501
Daniel Veillard0161e632008-08-28 15:36:32 +00007502 /*
7503 * Increate the number of entity references parsed
7504 */
7505 ctxt->nbentities++;
Owen Taylor3473f882001-02-23 17:55:21 +00007506
Daniel Veillard0161e632008-08-28 15:36:32 +00007507 /*
7508 * Ask first SAX for entity resolution, otherwise try the
7509 * entities which may have stored in the parser context.
7510 */
7511 if (ctxt->sax != NULL) {
7512 if (ctxt->sax->getEntity != NULL)
7513 ent = ctxt->sax->getEntity(ctxt->userData, name);
Rob Richardsb9ed0172009-01-05 17:28:50 +00007514 if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7515 ent = xmlGetPredefinedEntity(name);
Daniel Veillard0161e632008-08-28 15:36:32 +00007516 if ((ent == NULL) && (ctxt->userData==ctxt)) {
7517 ent = xmlSAX2GetEntity(ctxt, name);
Owen Taylor3473f882001-02-23 17:55:21 +00007518 }
7519 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007520
7521 /*
7522 * [ WFC: Entity Declared ]
7523 * In a document without any DTD, a document with only an
7524 * internal DTD subset which contains no parameter entity
7525 * references, or a document with "standalone='yes'", the
7526 * Name given in the entity reference must match that in an
7527 * entity declaration, except that well-formed documents
7528 * need not declare any of the following entities: amp, lt,
7529 * gt, apos, quot.
7530 * The declaration of a parameter entity must precede any
7531 * reference to it.
7532 * Similarly, the declaration of a general entity must
7533 * precede any reference to it which appears in a default
7534 * value in an attribute-list declaration. Note that if
7535 * entities are declared in the external subset or in
7536 * external parameter entities, a non-validating processor
7537 * is not obligated to read and process their declarations;
7538 * for such documents, the rule that an entity must be
7539 * declared is a well-formedness constraint only if
7540 * standalone='yes'.
7541 */
7542 if (ent == NULL) {
7543 if ((ctxt->standalone == 1) ||
7544 ((ctxt->hasExternalSubset == 0) &&
7545 (ctxt->hasPErefs == 0))) {
7546 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7547 "Entity '%s' not defined\n", name);
7548 } else {
7549 xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7550 "Entity '%s' not defined\n",
7551 name);
7552 }
7553 /* TODO ? check regressions ctxt->valid = 0; */
7554 }
7555
7556 /*
7557 * [ WFC: Parsed Entity ]
7558 * An entity reference must not contain the name of an
7559 * unparsed entity
7560 */
7561 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7562 xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7563 "Entity reference to unparsed entity %s\n", name);
7564 }
7565
7566 /*
7567 * [ WFC: No External Entity References ]
7568 * Attribute values cannot contain direct or indirect
7569 * entity references to external entities.
7570 */
7571 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7572 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7573 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7574 "Attribute references external entity '%s'\n", name);
7575 }
7576 /*
7577 * [ WFC: No < in Attribute Values ]
7578 * The replacement text of any entity referred to directly or
7579 * indirectly in an attribute value (other than "&lt;") must
7580 * not contain a <.
7581 */
7582 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7583 (ent != NULL) && (ent->content != NULL) &&
Rob Richardsb9ed0172009-01-05 17:28:50 +00007584 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
Daniel Veillard0161e632008-08-28 15:36:32 +00007585 (xmlStrchr(ent->content, '<'))) {
7586 xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7587 "'<' in entity '%s' is not allowed in attributes values\n",
7588 name);
7589 }
7590
7591 /*
7592 * Internal check, no parameter entities here ...
7593 */
7594 else {
7595 switch (ent->etype) {
7596 case XML_INTERNAL_PARAMETER_ENTITY:
7597 case XML_EXTERNAL_PARAMETER_ENTITY:
7598 xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7599 "Attempt to reference the parameter entity '%s'\n",
7600 name);
7601 break;
7602 default:
7603 break;
7604 }
7605 }
7606
7607 /*
7608 * [ WFC: No Recursion ]
7609 * A parsed entity must not contain a recursive reference
7610 * to itself, either directly or indirectly.
7611 * Done somewhere else
7612 */
7613
7614 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007615 *str = ptr;
7616 return(ent);
7617}
7618
7619/**
7620 * xmlParsePEReference:
7621 * @ctxt: an XML parser context
7622 *
7623 * parse PEReference declarations
7624 * The entity content is handled directly by pushing it's content as
7625 * a new input stream.
7626 *
7627 * [69] PEReference ::= '%' Name ';'
7628 *
7629 * [ WFC: No Recursion ]
7630 * A parsed entity must not contain a recursive
7631 * reference to itself, either directly or indirectly.
7632 *
7633 * [ WFC: Entity Declared ]
7634 * In a document without any DTD, a document with only an internal DTD
7635 * subset which contains no parameter entity references, or a document
7636 * with "standalone='yes'", ... ... The declaration of a parameter
7637 * entity must precede any reference to it...
7638 *
7639 * [ VC: Entity Declared ]
7640 * In a document with an external subset or external parameter entities
7641 * with "standalone='no'", ... ... The declaration of a parameter entity
7642 * must precede any reference to it...
7643 *
7644 * [ WFC: In DTD ]
7645 * Parameter-entity references may only appear in the DTD.
7646 * NOTE: misleading but this is handled.
7647 */
7648void
Daniel Veillard8f597c32003-10-06 08:19:27 +00007649xmlParsePEReference(xmlParserCtxtPtr ctxt)
7650{
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007651 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00007652 xmlEntityPtr entity = NULL;
7653 xmlParserInputPtr input;
7654
Daniel Veillard0161e632008-08-28 15:36:32 +00007655 if (RAW != '%')
7656 return;
7657 NEXT;
7658 name = xmlParseName(ctxt);
7659 if (name == NULL) {
7660 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7661 "xmlParsePEReference: no name\n");
7662 return;
Owen Taylor3473f882001-02-23 17:55:21 +00007663 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007664 if (RAW != ';') {
7665 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7666 return;
7667 }
7668
7669 NEXT;
7670
7671 /*
7672 * Increate the number of entity references parsed
7673 */
7674 ctxt->nbentities++;
7675
7676 /*
7677 * Request the entity from SAX
7678 */
7679 if ((ctxt->sax != NULL) &&
7680 (ctxt->sax->getParameterEntity != NULL))
7681 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7682 name);
7683 if (entity == NULL) {
7684 /*
7685 * [ WFC: Entity Declared ]
7686 * In a document without any DTD, a document with only an
7687 * internal DTD subset which contains no parameter entity
7688 * references, or a document with "standalone='yes'", ...
7689 * ... The declaration of a parameter entity must precede
7690 * any reference to it...
7691 */
7692 if ((ctxt->standalone == 1) ||
7693 ((ctxt->hasExternalSubset == 0) &&
7694 (ctxt->hasPErefs == 0))) {
7695 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7696 "PEReference: %%%s; not found\n",
7697 name);
7698 } else {
7699 /*
7700 * [ VC: Entity Declared ]
7701 * In a document with an external subset or external
7702 * parameter entities with "standalone='no'", ...
7703 * ... The declaration of a parameter entity must
7704 * precede any reference to it...
7705 */
7706 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7707 "PEReference: %%%s; not found\n",
7708 name, NULL);
7709 ctxt->valid = 0;
7710 }
7711 } else {
7712 /*
7713 * Internal checking in case the entity quest barfed
7714 */
7715 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7716 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7717 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7718 "Internal: %%%s; is not a parameter entity\n",
7719 name, NULL);
7720 } else if (ctxt->input->free != deallocblankswrapper) {
7721 input = xmlNewBlanksWrapperInputStream(ctxt, entity);
7722 if (xmlPushInput(ctxt, input) < 0)
7723 return;
7724 } else {
7725 /*
7726 * TODO !!!
7727 * handle the extra spaces added before and after
7728 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7729 */
7730 input = xmlNewEntityInputStream(ctxt, entity);
7731 if (xmlPushInput(ctxt, input) < 0)
7732 return;
7733 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7734 (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
7735 (IS_BLANK_CH(NXT(5)))) {
7736 xmlParseTextDecl(ctxt);
7737 if (ctxt->errNo ==
7738 XML_ERR_UNSUPPORTED_ENCODING) {
7739 /*
7740 * The XML REC instructs us to stop parsing
7741 * right here
7742 */
7743 ctxt->instate = XML_PARSER_EOF;
7744 return;
7745 }
7746 }
7747 }
7748 }
7749 ctxt->hasPErefs = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00007750}
7751
7752/**
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007753 * xmlLoadEntityContent:
7754 * @ctxt: an XML parser context
7755 * @entity: an unloaded system entity
7756 *
7757 * Load the original content of the given system entity from the
7758 * ExternalID/SystemID given. This is to be used for Included in Literal
7759 * http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
7760 *
7761 * Returns 0 in case of success and -1 in case of failure
7762 */
7763static int
7764xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
7765 xmlParserInputPtr input;
7766 xmlBufferPtr buf;
7767 int l, c;
7768 int count = 0;
7769
7770 if ((ctxt == NULL) || (entity == NULL) ||
7771 ((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
7772 (entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
7773 (entity->content != NULL)) {
7774 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7775 "xmlLoadEntityContent parameter error");
7776 return(-1);
7777 }
7778
7779 if (xmlParserDebugEntities)
7780 xmlGenericError(xmlGenericErrorContext,
7781 "Reading %s entity content input\n", entity->name);
7782
7783 buf = xmlBufferCreate();
7784 if (buf == NULL) {
7785 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7786 "xmlLoadEntityContent parameter error");
7787 return(-1);
7788 }
7789
7790 input = xmlNewEntityInputStream(ctxt, entity);
7791 if (input == NULL) {
7792 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
7793 "xmlLoadEntityContent input error");
7794 xmlBufferFree(buf);
7795 return(-1);
7796 }
7797
7798 /*
7799 * Push the entity as the current input, read char by char
7800 * saving to the buffer until the end of the entity or an error
7801 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +00007802 if (xmlPushInput(ctxt, input) < 0) {
7803 xmlBufferFree(buf);
7804 return(-1);
7805 }
7806
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007807 GROW;
7808 c = CUR_CHAR(l);
7809 while ((ctxt->input == input) && (ctxt->input->cur < ctxt->input->end) &&
7810 (IS_CHAR(c))) {
7811 xmlBufferAdd(buf, ctxt->input->cur, l);
7812 if (count++ > 100) {
7813 count = 0;
7814 GROW;
7815 }
7816 NEXTL(l);
7817 c = CUR_CHAR(l);
7818 }
7819
7820 if ((ctxt->input == input) && (ctxt->input->cur >= ctxt->input->end)) {
7821 xmlPopInput(ctxt);
7822 } else if (!IS_CHAR(c)) {
7823 xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
7824 "xmlLoadEntityContent: invalid char value %d\n",
7825 c);
7826 xmlBufferFree(buf);
7827 return(-1);
7828 }
7829 entity->content = buf->content;
7830 buf->content = NULL;
7831 xmlBufferFree(buf);
7832
7833 return(0);
7834}
7835
7836/**
Owen Taylor3473f882001-02-23 17:55:21 +00007837 * xmlParseStringPEReference:
7838 * @ctxt: an XML parser context
7839 * @str: a pointer to an index in the string
7840 *
7841 * parse PEReference declarations
7842 *
7843 * [69] PEReference ::= '%' Name ';'
7844 *
7845 * [ WFC: No Recursion ]
7846 * A parsed entity must not contain a recursive
Daniel Veillard8bf64ae2008-03-24 20:45:21 +00007847 * reference to itself, either directly or indirectly.
Owen Taylor3473f882001-02-23 17:55:21 +00007848 *
7849 * [ WFC: Entity Declared ]
7850 * In a document without any DTD, a document with only an internal DTD
7851 * subset which contains no parameter entity references, or a document
7852 * with "standalone='yes'", ... ... The declaration of a parameter
7853 * entity must precede any reference to it...
7854 *
7855 * [ VC: Entity Declared ]
7856 * In a document with an external subset or external parameter entities
7857 * with "standalone='no'", ... ... The declaration of a parameter entity
7858 * must precede any reference to it...
7859 *
7860 * [ WFC: In DTD ]
7861 * Parameter-entity references may only appear in the DTD.
7862 * NOTE: misleading but this is handled.
7863 *
7864 * Returns the string of the entity content.
7865 * str is updated to the current value of the index
7866 */
Daniel Veillard8ed10722009-08-20 19:17:36 +02007867static xmlEntityPtr
Owen Taylor3473f882001-02-23 17:55:21 +00007868xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7869 const xmlChar *ptr;
7870 xmlChar cur;
7871 xmlChar *name;
7872 xmlEntityPtr entity = NULL;
7873
7874 if ((str == NULL) || (*str == NULL)) return(NULL);
7875 ptr = *str;
7876 cur = *ptr;
Daniel Veillard0161e632008-08-28 15:36:32 +00007877 if (cur != '%')
7878 return(NULL);
7879 ptr++;
Daniel Veillard0161e632008-08-28 15:36:32 +00007880 name = xmlParseStringName(ctxt, &ptr);
7881 if (name == NULL) {
7882 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7883 "xmlParseStringPEReference: no name\n");
7884 *str = ptr;
7885 return(NULL);
7886 }
7887 cur = *ptr;
7888 if (cur != ';') {
7889 xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7890 xmlFree(name);
7891 *str = ptr;
7892 return(NULL);
7893 }
7894 ptr++;
7895
7896 /*
7897 * Increate the number of entity references parsed
7898 */
7899 ctxt->nbentities++;
7900
7901 /*
7902 * Request the entity from SAX
7903 */
7904 if ((ctxt->sax != NULL) &&
7905 (ctxt->sax->getParameterEntity != NULL))
7906 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7907 name);
7908 if (entity == NULL) {
7909 /*
7910 * [ WFC: Entity Declared ]
7911 * In a document without any DTD, a document with only an
7912 * internal DTD subset which contains no parameter entity
7913 * references, or a document with "standalone='yes'", ...
7914 * ... The declaration of a parameter entity must precede
7915 * any reference to it...
7916 */
7917 if ((ctxt->standalone == 1) ||
7918 ((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
7919 xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7920 "PEReference: %%%s; not found\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00007921 } else {
Daniel Veillard0161e632008-08-28 15:36:32 +00007922 /*
7923 * [ VC: Entity Declared ]
7924 * In a document with an external subset or external
7925 * parameter entities with "standalone='no'", ...
7926 * ... The declaration of a parameter entity must
7927 * precede any reference to it...
7928 */
7929 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7930 "PEReference: %%%s; not found\n",
7931 name, NULL);
7932 ctxt->valid = 0;
7933 }
7934 } else {
7935 /*
7936 * Internal checking in case the entity quest barfed
7937 */
7938 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7939 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
7940 xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
7941 "%%%s; is not a parameter entity\n",
7942 name, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00007943 }
7944 }
Daniel Veillard0161e632008-08-28 15:36:32 +00007945 ctxt->hasPErefs = 1;
7946 xmlFree(name);
Owen Taylor3473f882001-02-23 17:55:21 +00007947 *str = ptr;
7948 return(entity);
7949}
7950
7951/**
7952 * xmlParseDocTypeDecl:
7953 * @ctxt: an XML parser context
7954 *
7955 * parse a DOCTYPE declaration
7956 *
7957 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7958 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7959 *
7960 * [ VC: Root Element Type ]
7961 * The Name in the document type declaration must match the element
7962 * type of the root element.
7963 */
7964
7965void
7966xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00007967 const xmlChar *name = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00007968 xmlChar *ExternalID = NULL;
7969 xmlChar *URI = NULL;
7970
7971 /*
7972 * We know that '<!DOCTYPE' has been detected.
7973 */
7974 SKIP(9);
7975
7976 SKIP_BLANKS;
7977
7978 /*
7979 * Parse the DOCTYPE name.
7980 */
7981 name = xmlParseName(ctxt);
7982 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00007983 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7984 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Owen Taylor3473f882001-02-23 17:55:21 +00007985 }
7986 ctxt->intSubName = name;
7987
7988 SKIP_BLANKS;
7989
7990 /*
7991 * Check for SystemID and ExternalID
7992 */
7993 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
7994
7995 if ((URI != NULL) || (ExternalID != NULL)) {
7996 ctxt->hasExternalSubset = 1;
7997 }
7998 ctxt->extSubURI = URI;
7999 ctxt->extSubSystem = ExternalID;
8000
8001 SKIP_BLANKS;
8002
8003 /*
8004 * Create and update the internal subset.
8005 */
8006 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8007 (!ctxt->disableSAX))
8008 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8009
8010 /*
8011 * Is there any internal subset declarations ?
8012 * they are handled separately in xmlParseInternalSubset()
8013 */
8014 if (RAW == '[')
8015 return;
8016
8017 /*
8018 * We should be at the end of the DOCTYPE declaration.
8019 */
8020 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008021 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008022 }
8023 NEXT;
8024}
8025
8026/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008027 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00008028 * @ctxt: an XML parser context
8029 *
8030 * parse the internal subset declaration
8031 *
8032 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8033 */
8034
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008035static void
Owen Taylor3473f882001-02-23 17:55:21 +00008036xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8037 /*
8038 * Is there any DTD definition ?
8039 */
8040 if (RAW == '[') {
8041 ctxt->instate = XML_PARSER_DTD;
8042 NEXT;
8043 /*
8044 * Parse the succession of Markup declarations and
8045 * PEReferences.
8046 * Subsequence (markupdecl | PEReference | S)*
8047 */
8048 while (RAW != ']') {
8049 const xmlChar *check = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008050 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008051
8052 SKIP_BLANKS;
8053 xmlParseMarkupDecl(ctxt);
8054 xmlParsePEReference(ctxt);
8055
8056 /*
8057 * Pop-up of finished entities.
8058 */
8059 while ((RAW == 0) && (ctxt->inputNr > 1))
8060 xmlPopInput(ctxt);
8061
8062 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008063 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Owen Taylor3473f882001-02-23 17:55:21 +00008064 "xmlParseInternalSubset: error detected in Markup declaration\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008065 break;
8066 }
8067 }
8068 if (RAW == ']') {
8069 NEXT;
8070 SKIP_BLANKS;
8071 }
8072 }
8073
8074 /*
8075 * We should be at the end of the DOCTYPE declaration.
8076 */
8077 if (RAW != '>') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008078 xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008079 }
8080 NEXT;
8081}
8082
Daniel Veillard81273902003-09-30 00:43:48 +00008083#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00008084/**
8085 * xmlParseAttribute:
8086 * @ctxt: an XML parser context
8087 * @value: a xmlChar ** used to store the value of the attribute
8088 *
8089 * parse an attribute
8090 *
8091 * [41] Attribute ::= Name Eq AttValue
8092 *
8093 * [ WFC: No External Entity References ]
8094 * Attribute values cannot contain direct or indirect entity references
8095 * to external entities.
8096 *
8097 * [ WFC: No < in Attribute Values ]
8098 * The replacement text of any entity referred to directly or indirectly in
8099 * an attribute value (other than "&lt;") must not contain a <.
8100 *
8101 * [ VC: Attribute Value Type ]
8102 * The attribute must have been declared; the value must be of the type
8103 * declared for it.
8104 *
8105 * [25] Eq ::= S? '=' S?
8106 *
8107 * With namespace:
8108 *
8109 * [NS 11] Attribute ::= QName Eq AttValue
8110 *
8111 * Also the case QName == xmlns:??? is handled independently as a namespace
8112 * definition.
8113 *
8114 * Returns the attribute name, and the value in *value.
8115 */
8116
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008117const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008118xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008119 const xmlChar *name;
8120 xmlChar *val;
Owen Taylor3473f882001-02-23 17:55:21 +00008121
8122 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00008123 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00008124 name = xmlParseName(ctxt);
8125 if (name == NULL) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008126 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008127 "error parsing attribute name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008128 return(NULL);
8129 }
8130
8131 /*
8132 * read the value
8133 */
8134 SKIP_BLANKS;
8135 if (RAW == '=') {
8136 NEXT;
8137 SKIP_BLANKS;
8138 val = xmlParseAttValue(ctxt);
8139 ctxt->instate = XML_PARSER_CONTENT;
8140 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00008141 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
Owen Taylor3473f882001-02-23 17:55:21 +00008142 "Specification mandate value for attribute %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00008143 return(NULL);
8144 }
8145
8146 /*
8147 * Check that xml:lang conforms to the specification
8148 * No more registered as an error, just generate a warning now
8149 * since this was deprecated in XML second edition
8150 */
8151 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8152 if (!xmlCheckLanguageID(val)) {
Daniel Veillard24eb9782003-10-04 21:08:09 +00008153 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8154 "Malformed value for xml:lang : %s\n",
8155 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008156 }
8157 }
8158
8159 /*
8160 * Check that xml:space conforms to the specification
8161 */
8162 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8163 if (xmlStrEqual(val, BAD_CAST "default"))
8164 *(ctxt->space) = 0;
8165 else if (xmlStrEqual(val, BAD_CAST "preserve"))
8166 *(ctxt->space) = 1;
8167 else {
Daniel Veillardd8925572005-06-08 22:34:55 +00008168 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
Daniel Veillard642104e2003-03-26 16:32:05 +00008169"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
Daniel Veillardd8925572005-06-08 22:34:55 +00008170 val, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008171 }
8172 }
8173
8174 *value = val;
8175 return(name);
8176}
8177
8178/**
8179 * xmlParseStartTag:
8180 * @ctxt: an XML parser context
8181 *
8182 * parse a start of tag either for rule element or
8183 * EmptyElement. In both case we don't parse the tag closing chars.
8184 *
8185 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8186 *
8187 * [ WFC: Unique Att Spec ]
8188 * No attribute name may appear more than once in the same start-tag or
8189 * empty-element tag.
8190 *
8191 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8192 *
8193 * [ WFC: Unique Att Spec ]
8194 * No attribute name may appear more than once in the same start-tag or
8195 * empty-element tag.
8196 *
8197 * With namespace:
8198 *
8199 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8200 *
8201 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8202 *
8203 * Returns the element name parsed
8204 */
8205
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008206const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00008207xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008208 const xmlChar *name;
8209 const xmlChar *attname;
Owen Taylor3473f882001-02-23 17:55:21 +00008210 xmlChar *attvalue;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008211 const xmlChar **atts = ctxt->atts;
Owen Taylor3473f882001-02-23 17:55:21 +00008212 int nbatts = 0;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008213 int maxatts = ctxt->maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008214 int i;
8215
8216 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00008217 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008218
8219 name = xmlParseName(ctxt);
8220 if (name == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008221 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
Owen Taylor3473f882001-02-23 17:55:21 +00008222 "xmlParseStartTag: invalid element name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008223 return(NULL);
8224 }
8225
8226 /*
8227 * Now parse the attributes, it ends up with the ending
8228 *
8229 * (S Attribute)* S?
8230 */
8231 SKIP_BLANKS;
8232 GROW;
8233
Daniel Veillard21a0f912001-02-25 19:54:14 +00008234 while ((RAW != '>') &&
8235 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008236 (IS_BYTE_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00008237 const xmlChar *q = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00008238 unsigned int cons = ctxt->input->consumed;
Owen Taylor3473f882001-02-23 17:55:21 +00008239
8240 attname = xmlParseAttribute(ctxt, &attvalue);
8241 if ((attname != NULL) && (attvalue != NULL)) {
8242 /*
8243 * [ WFC: Unique Att Spec ]
8244 * No attribute name may appear more than once in the same
8245 * start-tag or empty-element tag.
8246 */
8247 for (i = 0; i < nbatts;i += 2) {
8248 if (xmlStrEqual(atts[i], attname)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008249 xmlErrAttributeDup(ctxt, NULL, attname);
Owen Taylor3473f882001-02-23 17:55:21 +00008250 xmlFree(attvalue);
8251 goto failed;
8252 }
8253 }
Owen Taylor3473f882001-02-23 17:55:21 +00008254 /*
8255 * Add the pair to atts
8256 */
8257 if (atts == NULL) {
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008258 maxatts = 22; /* allow for 10 attrs by default */
8259 atts = (const xmlChar **)
8260 xmlMalloc(maxatts * sizeof(xmlChar *));
Owen Taylor3473f882001-02-23 17:55:21 +00008261 if (atts == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008262 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008263 if (attvalue != NULL)
8264 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008265 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008266 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008267 ctxt->atts = atts;
8268 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008269 } else if (nbatts + 4 > maxatts) {
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008270 const xmlChar **n;
8271
Owen Taylor3473f882001-02-23 17:55:21 +00008272 maxatts *= 2;
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008273 n = (const xmlChar **) xmlRealloc((void *) atts,
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008274 maxatts * sizeof(const xmlChar *));
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008275 if (n == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008276 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008277 if (attvalue != NULL)
8278 xmlFree(attvalue);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008279 goto failed;
Owen Taylor3473f882001-02-23 17:55:21 +00008280 }
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008281 atts = n;
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008282 ctxt->atts = atts;
8283 ctxt->maxatts = maxatts;
Owen Taylor3473f882001-02-23 17:55:21 +00008284 }
8285 atts[nbatts++] = attname;
8286 atts[nbatts++] = attvalue;
8287 atts[nbatts] = NULL;
8288 atts[nbatts + 1] = NULL;
8289 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00008290 if (attvalue != NULL)
8291 xmlFree(attvalue);
8292 }
8293
8294failed:
8295
Daniel Veillard3772de32002-12-17 10:31:45 +00008296 GROW
Owen Taylor3473f882001-02-23 17:55:21 +00008297 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8298 break;
William M. Brack76e95df2003-10-18 16:20:14 +00008299 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008300 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8301 "attributes construct error\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008302 }
8303 SKIP_BLANKS;
Daniel Veillard02111c12003-02-24 19:14:52 +00008304 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
8305 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008306 xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8307 "xmlParseStartTag: problem parsing attributes\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008308 break;
8309 }
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008310 SHRINK;
Owen Taylor3473f882001-02-23 17:55:21 +00008311 GROW;
8312 }
8313
8314 /*
8315 * SAX: Start of Element !
8316 */
8317 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
Daniel Veillard6155d8a2003-08-19 15:01:28 +00008318 (!ctxt->disableSAX)) {
8319 if (nbatts > 0)
8320 ctxt->sax->startElement(ctxt->userData, name, atts);
8321 else
8322 ctxt->sax->startElement(ctxt->userData, name, NULL);
8323 }
Owen Taylor3473f882001-02-23 17:55:21 +00008324
8325 if (atts != NULL) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008326 /* Free only the content strings */
8327 for (i = 1;i < nbatts;i+=2)
Daniel Veillarda76fe5c2003-04-24 16:06:47 +00008328 if (atts[i] != NULL)
8329 xmlFree((xmlChar *) atts[i]);
Owen Taylor3473f882001-02-23 17:55:21 +00008330 }
8331 return(name);
8332}
8333
8334/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008335 * xmlParseEndTag1:
Owen Taylor3473f882001-02-23 17:55:21 +00008336 * @ctxt: an XML parser context
Daniel Veillard0fb18932003-09-07 09:14:37 +00008337 * @line: line of the start tag
8338 * @nsNr: number of namespaces on the start tag
Owen Taylor3473f882001-02-23 17:55:21 +00008339 *
8340 * parse an end of tag
8341 *
8342 * [42] ETag ::= '</' Name S? '>'
8343 *
8344 * With namespace
8345 *
8346 * [NS 9] ETag ::= '</' QName S? '>'
8347 */
8348
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008349static void
Daniel Veillard0fb18932003-09-07 09:14:37 +00008350xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00008351 const xmlChar *name;
Owen Taylor3473f882001-02-23 17:55:21 +00008352
8353 GROW;
8354 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008355 xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008356 "xmlParseEndTag: '</' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +00008357 return;
8358 }
8359 SKIP(2);
8360
Daniel Veillard46de64e2002-05-29 08:21:33 +00008361 name = xmlParseNameAndCompare(ctxt,ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008362
8363 /*
8364 * We should definitely be at the ending "S? '>'" part
8365 */
8366 GROW;
8367 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00008368 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008369 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00008370 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00008371 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00008372
8373 /*
8374 * [ WFC: Element Type Match ]
8375 * The Name in an element's end-tag must match the element type in the
8376 * start-tag.
8377 *
8378 */
Daniel Veillard46de64e2002-05-29 08:21:33 +00008379 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00008380 if (name == NULL) name = BAD_CAST "unparseable";
8381 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008382 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00008383 ctxt->name, line, name);
Owen Taylor3473f882001-02-23 17:55:21 +00008384 }
8385
8386 /*
8387 * SAX: End of Tag
8388 */
8389 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8390 (!ctxt->disableSAX))
Daniel Veillard46de64e2002-05-29 08:21:33 +00008391 ctxt->sax->endElement(ctxt->userData, ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00008392
Daniel Veillarde57ec792003-09-10 10:50:59 +00008393 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008394 spacePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00008395 return;
8396}
8397
8398/**
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00008399 * xmlParseEndTag:
8400 * @ctxt: an XML parser context
8401 *
8402 * parse an end of tag
8403 *
8404 * [42] ETag ::= '</' Name S? '>'
8405 *
8406 * With namespace
8407 *
8408 * [NS 9] ETag ::= '</' QName S? '>'
8409 */
8410
8411void
8412xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008413 xmlParseEndTag1(ctxt, 0);
8414}
Daniel Veillard81273902003-09-30 00:43:48 +00008415#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard0fb18932003-09-07 09:14:37 +00008416
8417/************************************************************************
8418 * *
8419 * SAX 2 specific operations *
8420 * *
8421 ************************************************************************/
8422
Daniel Veillard0fb18932003-09-07 09:14:37 +00008423/*
8424 * xmlGetNamespace:
8425 * @ctxt: an XML parser context
8426 * @prefix: the prefix to lookup
8427 *
8428 * Lookup the namespace name for the @prefix (which ca be NULL)
8429 * The prefix must come from the @ctxt->dict dictionnary
8430 *
8431 * Returns the namespace name or NULL if not bound
8432 */
8433static const xmlChar *
8434xmlGetNamespace(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
8435 int i;
8436
Daniel Veillarde57ec792003-09-10 10:50:59 +00008437 if (prefix == ctxt->str_xml) return(ctxt->str_xml_ns);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008438 for (i = ctxt->nsNr - 2;i >= 0;i-=2)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008439 if (ctxt->nsTab[i] == prefix) {
8440 if ((prefix == NULL) && (*ctxt->nsTab[i + 1] == 0))
8441 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008442 return(ctxt->nsTab[i + 1]);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008443 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008444 return(NULL);
8445}
8446
8447/**
Daniel Veillard0fb18932003-09-07 09:14:37 +00008448 * xmlParseQName:
8449 * @ctxt: an XML parser context
8450 * @prefix: pointer to store the prefix part
8451 *
8452 * parse an XML Namespace QName
8453 *
8454 * [6] QName ::= (Prefix ':')? LocalPart
8455 * [7] Prefix ::= NCName
8456 * [8] LocalPart ::= NCName
8457 *
8458 * Returns the Name parsed or NULL
8459 */
8460
8461static const xmlChar *
8462xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
8463 const xmlChar *l, *p;
8464
8465 GROW;
8466
8467 l = xmlParseNCName(ctxt);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008468 if (l == NULL) {
8469 if (CUR == ':') {
8470 l = xmlParseName(ctxt);
8471 if (l != NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008472 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8473 "Failed to parse QName '%s'\n", l, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008474 *prefix = NULL;
8475 return(l);
8476 }
8477 }
8478 return(NULL);
8479 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008480 if (CUR == ':') {
8481 NEXT;
8482 p = l;
8483 l = xmlParseNCName(ctxt);
8484 if (l == NULL) {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008485 xmlChar *tmp;
8486
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008487 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8488 "Failed to parse QName '%s:'\n", p, NULL, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008489 l = xmlParseNmtoken(ctxt);
8490 if (l == NULL)
8491 tmp = xmlBuildQName(BAD_CAST "", p, NULL, 0);
8492 else {
8493 tmp = xmlBuildQName(l, p, NULL, 0);
8494 xmlFree((char *)l);
8495 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008496 p = xmlDictLookup(ctxt->dict, tmp, -1);
8497 if (tmp != NULL) xmlFree(tmp);
8498 *prefix = NULL;
8499 return(p);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008500 }
8501 if (CUR == ':') {
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008502 xmlChar *tmp;
8503
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008504 xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8505 "Failed to parse QName '%s:%s:'\n", p, l, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00008506 NEXT;
8507 tmp = (xmlChar *) xmlParseName(ctxt);
8508 if (tmp != NULL) {
8509 tmp = xmlBuildQName(tmp, l, NULL, 0);
8510 l = xmlDictLookup(ctxt->dict, tmp, -1);
8511 if (tmp != NULL) xmlFree(tmp);
8512 *prefix = p;
8513 return(l);
8514 }
8515 tmp = xmlBuildQName(BAD_CAST "", l, NULL, 0);
8516 l = xmlDictLookup(ctxt->dict, tmp, -1);
8517 if (tmp != NULL) xmlFree(tmp);
8518 *prefix = p;
8519 return(l);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008520 }
8521 *prefix = p;
8522 } else
8523 *prefix = NULL;
8524 return(l);
8525}
8526
8527/**
8528 * xmlParseQNameAndCompare:
8529 * @ctxt: an XML parser context
8530 * @name: the localname
8531 * @prefix: the prefix, if any.
8532 *
8533 * parse an XML name and compares for match
8534 * (specialized for endtag parsing)
8535 *
8536 * Returns NULL for an illegal name, (xmlChar*) 1 for success
8537 * and the name for mismatch
8538 */
8539
8540static const xmlChar *
8541xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
8542 xmlChar const *prefix) {
Daniel Veillardd44b9362009-09-07 12:15:08 +02008543 const xmlChar *cmp;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008544 const xmlChar *in;
8545 const xmlChar *ret;
8546 const xmlChar *prefix2;
8547
8548 if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
8549
8550 GROW;
8551 in = ctxt->input->cur;
Daniel Veillardd44b9362009-09-07 12:15:08 +02008552
Daniel Veillard0fb18932003-09-07 09:14:37 +00008553 cmp = prefix;
8554 while (*in != 0 && *in == *cmp) {
8555 ++in;
8556 ++cmp;
8557 }
8558 if ((*cmp == 0) && (*in == ':')) {
8559 in++;
8560 cmp = name;
8561 while (*in != 0 && *in == *cmp) {
8562 ++in;
8563 ++cmp;
8564 }
William M. Brack76e95df2003-10-18 16:20:14 +00008565 if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008566 /* success */
8567 ctxt->input->cur = in;
8568 return((const xmlChar*) 1);
8569 }
8570 }
8571 /*
8572 * all strings coms from the dictionary, equality can be done directly
8573 */
8574 ret = xmlParseQName (ctxt, &prefix2);
8575 if ((ret == name) && (prefix == prefix2))
8576 return((const xmlChar*) 1);
8577 return ret;
8578}
8579
8580/**
8581 * xmlParseAttValueInternal:
8582 * @ctxt: an XML parser context
8583 * @len: attribute len result
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008584 * @alloc: whether the attribute was reallocated as a new string
8585 * @normalize: if 1 then further non-CDATA normalization must be done
Daniel Veillard0fb18932003-09-07 09:14:37 +00008586 *
8587 * parse a value for an attribute.
8588 * NOTE: if no normalization is needed, the routine will return pointers
8589 * directly from the data buffer.
8590 *
8591 * 3.3.3 Attribute-Value Normalization:
8592 * Before the value of an attribute is passed to the application or
8593 * checked for validity, the XML processor must normalize it as follows:
8594 * - a character reference is processed by appending the referenced
8595 * character to the attribute value
8596 * - an entity reference is processed by recursively processing the
8597 * replacement text of the entity
8598 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
8599 * appending #x20 to the normalized value, except that only a single
8600 * #x20 is appended for a "#xD#xA" sequence that is part of an external
8601 * parsed entity or the literal entity value of an internal parsed entity
8602 * - other characters are processed by appending them to the normalized value
8603 * If the declared value is not CDATA, then the XML processor must further
8604 * process the normalized attribute value by discarding any leading and
8605 * trailing space (#x20) characters, and by replacing sequences of space
8606 * (#x20) characters by a single space (#x20) character.
8607 * All attributes for which no declaration has been read should be treated
8608 * by a non-validating parser as if declared CDATA.
8609 *
8610 * Returns the AttValue parsed or NULL. The value has to be freed by the
8611 * caller if it was copied, this can be detected by val[*len] == 0.
8612 */
8613
8614static xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008615xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
8616 int normalize)
Daniel Veillarde57ec792003-09-10 10:50:59 +00008617{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008618 xmlChar limit = 0;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008619 const xmlChar *in = NULL, *start, *end, *last;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008620 xmlChar *ret = NULL;
8621
8622 GROW;
8623 in = (xmlChar *) CUR_PTR;
8624 if (*in != '"' && *in != '\'') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008625 xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008626 return (NULL);
8627 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008628 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008629
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008630 /*
8631 * try to handle in this routine the most common case where no
8632 * allocation of a new string is required and where content is
8633 * pure ASCII.
8634 */
8635 limit = *in++;
8636 end = ctxt->input->end;
8637 start = in;
8638 if (in >= end) {
8639 const xmlChar *oldbase = ctxt->input->base;
8640 GROW;
8641 if (oldbase != ctxt->input->base) {
8642 long delta = ctxt->input->base - oldbase;
8643 start = start + delta;
8644 in = in + delta;
8645 }
8646 end = ctxt->input->end;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008647 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008648 if (normalize) {
8649 /*
8650 * Skip any leading spaces
8651 */
8652 while ((in < end) && (*in != limit) &&
8653 ((*in == 0x20) || (*in == 0x9) ||
8654 (*in == 0xA) || (*in == 0xD))) {
8655 in++;
8656 start = in;
8657 if (in >= end) {
8658 const xmlChar *oldbase = ctxt->input->base;
8659 GROW;
8660 if (oldbase != ctxt->input->base) {
8661 long delta = ctxt->input->base - oldbase;
8662 start = start + delta;
8663 in = in + delta;
8664 }
8665 end = ctxt->input->end;
8666 }
8667 }
8668 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8669 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8670 if ((*in++ == 0x20) && (*in == 0x20)) break;
8671 if (in >= end) {
8672 const xmlChar *oldbase = ctxt->input->base;
8673 GROW;
8674 if (oldbase != ctxt->input->base) {
8675 long delta = ctxt->input->base - oldbase;
8676 start = start + delta;
8677 in = in + delta;
8678 }
8679 end = ctxt->input->end;
8680 }
8681 }
8682 last = in;
8683 /*
8684 * skip the trailing blanks
8685 */
Daniel Veillardc6e20e42003-09-11 16:30:26 +00008686 while ((last[-1] == 0x20) && (last > start)) last--;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008687 while ((in < end) && (*in != limit) &&
8688 ((*in == 0x20) || (*in == 0x9) ||
8689 (*in == 0xA) || (*in == 0xD))) {
8690 in++;
8691 if (in >= end) {
8692 const xmlChar *oldbase = ctxt->input->base;
8693 GROW;
8694 if (oldbase != ctxt->input->base) {
8695 long delta = ctxt->input->base - oldbase;
8696 start = start + delta;
8697 in = in + delta;
8698 last = last + delta;
8699 }
8700 end = ctxt->input->end;
8701 }
8702 }
8703 if (*in != limit) goto need_complex;
8704 } else {
8705 while ((in < end) && (*in != limit) && (*in >= 0x20) &&
8706 (*in <= 0x7f) && (*in != '&') && (*in != '<')) {
8707 in++;
8708 if (in >= end) {
8709 const xmlChar *oldbase = ctxt->input->base;
8710 GROW;
8711 if (oldbase != ctxt->input->base) {
8712 long delta = ctxt->input->base - oldbase;
8713 start = start + delta;
8714 in = in + delta;
8715 }
8716 end = ctxt->input->end;
8717 }
8718 }
8719 last = in;
8720 if (*in != limit) goto need_complex;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008721 }
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008722 in++;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008723 if (len != NULL) {
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008724 *len = last - start;
8725 ret = (xmlChar *) start;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008726 } else {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008727 if (alloc) *alloc = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008728 ret = xmlStrndup(start, last - start);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008729 }
8730 CUR_PTR = in;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008731 if (alloc) *alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008732 return ret;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008733need_complex:
8734 if (alloc) *alloc = 1;
8735 return xmlParseAttValueComplex(ctxt, len, normalize);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008736}
8737
8738/**
8739 * xmlParseAttribute2:
8740 * @ctxt: an XML parser context
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008741 * @pref: the element prefix
8742 * @elem: the element name
8743 * @prefix: a xmlChar ** used to store the value of the attribute prefix
Daniel Veillard0fb18932003-09-07 09:14:37 +00008744 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008745 * @len: an int * to save the length of the attribute
8746 * @alloc: an int * to indicate if the attribute was allocated
Daniel Veillard0fb18932003-09-07 09:14:37 +00008747 *
8748 * parse an attribute in the new SAX2 framework.
8749 *
8750 * Returns the attribute name, and the value in *value, .
8751 */
8752
8753static const xmlChar *
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008754xmlParseAttribute2(xmlParserCtxtPtr ctxt,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008755 const xmlChar * pref, const xmlChar * elem,
8756 const xmlChar ** prefix, xmlChar ** value,
8757 int *len, int *alloc)
8758{
Daniel Veillard0fb18932003-09-07 09:14:37 +00008759 const xmlChar *name;
Daniel Veillardd8925572005-06-08 22:34:55 +00008760 xmlChar *val, *internal_val = NULL;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008761 int normalize = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008762
8763 *value = NULL;
8764 GROW;
8765 name = xmlParseQName(ctxt, prefix);
8766 if (name == NULL) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008767 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8768 "error parsing attribute name\n");
8769 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008770 }
8771
8772 /*
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008773 * get the type if needed
8774 */
8775 if (ctxt->attsSpecial != NULL) {
8776 int type;
8777
8778 type = (int) (long) xmlHashQLookup2(ctxt->attsSpecial,
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008779 pref, elem, *prefix, name);
8780 if (type != 0)
8781 normalize = 1;
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008782 }
8783
8784 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +00008785 * read the value
8786 */
8787 SKIP_BLANKS;
8788 if (RAW == '=') {
8789 NEXT;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008790 SKIP_BLANKS;
8791 val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
8792 if (normalize) {
8793 /*
8794 * Sometimes a second normalisation pass for spaces is needed
8795 * but that only happens if charrefs or entities refernces
8796 * have been used in the attribute value, i.e. the attribute
8797 * value have been extracted in an allocated string already.
8798 */
8799 if (*alloc) {
8800 const xmlChar *val2;
8801
8802 val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
Daniel Veillardae0765b2008-07-31 19:54:59 +00008803 if ((val2 != NULL) && (val2 != val)) {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008804 xmlFree(val);
Daniel Veillard6a31b832008-03-26 14:06:44 +00008805 val = (xmlChar *) val2;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008806 }
8807 }
8808 }
8809 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008810 } else {
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008811 xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8812 "Specification mandate value for attribute %s\n",
8813 name);
8814 return (NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008815 }
8816
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008817 if (*prefix == ctxt->str_xml) {
8818 /*
8819 * Check that xml:lang conforms to the specification
8820 * No more registered as an error, just generate a warning now
8821 * since this was deprecated in XML second edition
8822 */
8823 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
8824 internal_val = xmlStrndup(val, *len);
8825 if (!xmlCheckLanguageID(internal_val)) {
8826 xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8827 "Malformed value for xml:lang : %s\n",
8828 internal_val, NULL);
8829 }
8830 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008831
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008832 /*
8833 * Check that xml:space conforms to the specification
8834 */
8835 if (xmlStrEqual(name, BAD_CAST "space")) {
8836 internal_val = xmlStrndup(val, *len);
8837 if (xmlStrEqual(internal_val, BAD_CAST "default"))
8838 *(ctxt->space) = 0;
8839 else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
8840 *(ctxt->space) = 1;
8841 else {
8842 xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8843 "Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8844 internal_val, NULL);
8845 }
8846 }
8847 if (internal_val) {
8848 xmlFree(internal_val);
8849 }
8850 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008851
8852 *value = val;
Daniel Veillard97c9ce22008-03-25 16:52:41 +00008853 return (name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00008854}
Daniel Veillard0fb18932003-09-07 09:14:37 +00008855/**
8856 * xmlParseStartTag2:
8857 * @ctxt: an XML parser context
8858 *
8859 * parse a start of tag either for rule element or
8860 * EmptyElement. In both case we don't parse the tag closing chars.
8861 * This routine is called when running SAX2 parsing
8862 *
8863 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8864 *
8865 * [ WFC: Unique Att Spec ]
8866 * No attribute name may appear more than once in the same start-tag or
8867 * empty-element tag.
8868 *
8869 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8870 *
8871 * [ WFC: Unique Att Spec ]
8872 * No attribute name may appear more than once in the same start-tag or
8873 * empty-element tag.
8874 *
8875 * With namespace:
8876 *
8877 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8878 *
8879 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8880 *
8881 * Returns the element name parsed
8882 */
8883
8884static const xmlChar *
8885xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008886 const xmlChar **URI, int *tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008887 const xmlChar *localname;
8888 const xmlChar *prefix;
8889 const xmlChar *attname;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008890 const xmlChar *aprefix;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008891 const xmlChar *nsname;
8892 xmlChar *attvalue;
8893 const xmlChar **atts = ctxt->atts;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008894 int maxatts = ctxt->maxatts;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008895 int nratts, nbatts, nbdef;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008896 int i, j, nbNs, attval, oldline, oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008897 const xmlChar *base;
8898 unsigned long cur;
Daniel Veillard365cf672005-06-09 08:18:24 +00008899 int nsNr = ctxt->nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008900
8901 if (RAW != '<') return(NULL);
8902 NEXT1;
8903
8904 /*
8905 * NOTE: it is crucial with the SAX2 API to never call SHRINK beyond that
8906 * point since the attribute values may be stored as pointers to
8907 * the buffer and calling SHRINK would destroy them !
8908 * The Shrinking is only possible once the full set of attribute
8909 * callbacks have been done.
8910 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00008911reparse:
Daniel Veillard0fb18932003-09-07 09:14:37 +00008912 SHRINK;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008913 base = ctxt->input->base;
8914 cur = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddcec6722006-10-15 20:32:53 +00008915 oldline = ctxt->input->line;
8916 oldcol = ctxt->input->col;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008917 nbatts = 0;
8918 nratts = 0;
8919 nbdef = 0;
8920 nbNs = 0;
8921 attval = 0;
Daniel Veillard365cf672005-06-09 08:18:24 +00008922 /* Forget any namespaces added during an earlier parse of this element. */
8923 ctxt->nsNr = nsNr;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008924
8925 localname = xmlParseQName(ctxt, &prefix);
8926 if (localname == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00008927 xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8928 "StartTag: invalid element name\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00008929 return(NULL);
8930 }
Daniel Veillardc82c57e2004-01-12 16:24:34 +00008931 *tlen = ctxt->input->cur - ctxt->input->base - cur;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008932
8933 /*
8934 * Now parse the attributes, it ends up with the ending
8935 *
8936 * (S Attribute)* S?
8937 */
8938 SKIP_BLANKS;
8939 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008940 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008941
8942 while ((RAW != '>') &&
8943 ((RAW != '/') || (NXT(1) != '>')) &&
Daniel Veillard73b013f2003-09-30 12:36:01 +00008944 (IS_BYTE_CHAR(RAW))) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00008945 const xmlChar *q = CUR_PTR;
8946 unsigned int cons = ctxt->input->consumed;
Daniel Veillarde57ec792003-09-10 10:50:59 +00008947 int len = -1, alloc = 0;
Daniel Veillard0fb18932003-09-07 09:14:37 +00008948
Daniel Veillard8e36e6a2003-09-10 10:50:59 +00008949 attname = xmlParseAttribute2(ctxt, prefix, localname,
8950 &aprefix, &attvalue, &len, &alloc);
Daniel Veillarddcec6722006-10-15 20:32:53 +00008951 if (ctxt->input->base != base) {
8952 if ((attvalue != NULL) && (alloc != 0))
8953 xmlFree(attvalue);
8954 attvalue = NULL;
8955 goto base_changed;
8956 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008957 if ((attname != NULL) && (attvalue != NULL)) {
8958 if (len < 0) len = xmlStrlen(attvalue);
8959 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00008960 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
8961 xmlURIPtr uri;
8962
8963 if (*URL != 0) {
8964 uri = xmlParseURI((const char *) URL);
8965 if (uri == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008966 xmlNsErr(ctxt, XML_WAR_NS_URI,
8967 "xmlns: '%s' is not a valid URI\n",
8968 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008969 } else {
Daniel Veillardda3fee42008-09-01 13:08:57 +00008970 if (uri->scheme == NULL) {
Daniel Veillard37334572008-07-31 08:20:02 +00008971 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
8972 "xmlns: URI %s is not absolute\n",
8973 URL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00008974 }
8975 xmlFreeURI(uri);
8976 }
Daniel Veillard37334572008-07-31 08:20:02 +00008977 if (URL == ctxt->str_xml_ns) {
8978 if (attname != ctxt->str_xml) {
8979 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8980 "xml namespace URI cannot be the default namespace\n",
8981 NULL, NULL, NULL);
8982 }
8983 goto skip_default_ns;
8984 }
8985 if ((len == 29) &&
8986 (xmlStrEqual(URL,
8987 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
8988 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
8989 "reuse of the xmlns namespace name is forbidden\n",
8990 NULL, NULL, NULL);
8991 goto skip_default_ns;
8992 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00008993 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00008994 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008995 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00008996 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00008997 for (j = 1;j <= nbNs;j++)
8998 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
8999 break;
9000 if (j <= nbNs)
9001 xmlErrAttributeDup(ctxt, NULL, attname);
9002 else
9003 if (nsPush(ctxt, NULL, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009004skip_default_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009005 if (alloc != 0) xmlFree(attvalue);
9006 SKIP_BLANKS;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009007 continue;
9008 }
9009 if (aprefix == ctxt->str_xmlns) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009010 const xmlChar *URL = xmlDictLookup(ctxt->dict, attvalue, len);
9011 xmlURIPtr uri;
9012
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009013 if (attname == ctxt->str_xml) {
9014 if (URL != ctxt->str_xml_ns) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009015 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9016 "xml namespace prefix mapped to wrong URI\n",
9017 NULL, NULL, NULL);
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009018 }
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009019 /*
9020 * Do not keep a namespace definition node
9021 */
Daniel Veillard37334572008-07-31 08:20:02 +00009022 goto skip_ns;
Daniel Veillard3b7840c2003-09-11 23:42:01 +00009023 }
Daniel Veillard37334572008-07-31 08:20:02 +00009024 if (URL == ctxt->str_xml_ns) {
9025 if (attname != ctxt->str_xml) {
9026 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9027 "xml namespace URI mapped to wrong prefix\n",
9028 NULL, NULL, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009029 }
Daniel Veillard37334572008-07-31 08:20:02 +00009030 goto skip_ns;
9031 }
9032 if (attname == ctxt->str_xmlns) {
9033 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9034 "redefinition of the xmlns prefix is forbidden\n",
9035 NULL, NULL, NULL);
9036 goto skip_ns;
9037 }
9038 if ((len == 29) &&
9039 (xmlStrEqual(URL,
9040 BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9041 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9042 "reuse of the xmlns namespace name is forbidden\n",
9043 NULL, NULL, NULL);
9044 goto skip_ns;
9045 }
9046 if ((URL == NULL) || (URL[0] == 0)) {
9047 xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9048 "xmlns:%s: Empty XML namespace is not allowed\n",
9049 attname, NULL, NULL);
9050 goto skip_ns;
9051 } else {
9052 uri = xmlParseURI((const char *) URL);
9053 if (uri == NULL) {
9054 xmlNsErr(ctxt, XML_WAR_NS_URI,
9055 "xmlns:%s: '%s' is not a valid URI\n",
9056 attname, URL, NULL);
9057 } else {
9058 if ((ctxt->pedantic) && (uri->scheme == NULL)) {
9059 xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9060 "xmlns:%s: URI %s is not absolute\n",
9061 attname, URL, NULL);
9062 }
9063 xmlFreeURI(uri);
9064 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009065 }
9066
Daniel Veillard0fb18932003-09-07 09:14:37 +00009067 /*
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009068 * check that it's not a defined namespace
Daniel Veillard0fb18932003-09-07 09:14:37 +00009069 */
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009070 for (j = 1;j <= nbNs;j++)
9071 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9072 break;
9073 if (j <= nbNs)
9074 xmlErrAttributeDup(ctxt, aprefix, attname);
9075 else
9076 if (nsPush(ctxt, attname, URL) > 0) nbNs++;
Daniel Veillard37334572008-07-31 08:20:02 +00009077skip_ns:
Daniel Veillarde57ec792003-09-10 10:50:59 +00009078 if (alloc != 0) xmlFree(attvalue);
9079 SKIP_BLANKS;
Daniel Veillardd3999c72004-03-10 16:27:03 +00009080 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009081 continue;
9082 }
9083
9084 /*
9085 * Add the pair to atts
9086 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009087 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9088 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009089 if (attvalue[len] == 0)
9090 xmlFree(attvalue);
9091 goto failed;
9092 }
9093 maxatts = ctxt->maxatts;
9094 atts = ctxt->atts;
9095 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009096 ctxt->attallocs[nratts++] = alloc;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009097 atts[nbatts++] = attname;
9098 atts[nbatts++] = aprefix;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009099 atts[nbatts++] = NULL; /* the URI will be fetched later */
Daniel Veillard0fb18932003-09-07 09:14:37 +00009100 atts[nbatts++] = attvalue;
9101 attvalue += len;
9102 atts[nbatts++] = attvalue;
9103 /*
9104 * tag if some deallocation is needed
9105 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009106 if (alloc != 0) attval = 1;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009107 } else {
9108 if ((attvalue != NULL) && (attvalue[len] == 0))
9109 xmlFree(attvalue);
9110 }
9111
Daniel Veillard37334572008-07-31 08:20:02 +00009112failed:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009113
9114 GROW
Daniel Veillarde57ec792003-09-10 10:50:59 +00009115 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009116 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9117 break;
William M. Brack76e95df2003-10-18 16:20:14 +00009118 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009119 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9120 "attributes construct error\n");
William M. Brack13dfa872004-09-18 04:52:08 +00009121 break;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009122 }
9123 SKIP_BLANKS;
9124 if ((cons == ctxt->input->consumed) && (q == CUR_PTR) &&
9125 (attname == NULL) && (attvalue == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009126 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009127 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillard0fb18932003-09-07 09:14:37 +00009128 break;
9129 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009130 GROW;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009131 if (ctxt->input->base != base) goto base_changed;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009132 }
9133
Daniel Veillard0fb18932003-09-07 09:14:37 +00009134 /*
Daniel Veillarde57ec792003-09-10 10:50:59 +00009135 * The attributes defaulting
9136 */
9137 if (ctxt->attsDefault != NULL) {
9138 xmlDefAttrsPtr defaults;
9139
9140 defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9141 if (defaults != NULL) {
9142 for (i = 0;i < defaults->nbAttrs;i++) {
Daniel Veillardae0765b2008-07-31 19:54:59 +00009143 attname = defaults->values[5 * i];
9144 aprefix = defaults->values[5 * i + 1];
Daniel Veillarde57ec792003-09-10 10:50:59 +00009145
9146 /*
9147 * special work for namespaces defaulted defs
9148 */
9149 if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9150 /*
9151 * check that it's not a defined namespace
9152 */
9153 for (j = 1;j <= nbNs;j++)
9154 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == NULL)
9155 break;
9156 if (j <= nbNs) continue;
9157
9158 nsname = xmlGetNamespace(ctxt, NULL);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009159 if (nsname != defaults->values[5 * i + 2]) {
Daniel Veillarde57ec792003-09-10 10:50:59 +00009160 if (nsPush(ctxt, NULL,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009161 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009162 nbNs++;
9163 }
9164 } else if (aprefix == ctxt->str_xmlns) {
9165 /*
9166 * check that it's not a defined namespace
9167 */
9168 for (j = 1;j <= nbNs;j++)
9169 if (ctxt->nsTab[ctxt->nsNr - 2 * j] == attname)
9170 break;
9171 if (j <= nbNs) continue;
9172
9173 nsname = xmlGetNamespace(ctxt, attname);
9174 if (nsname != defaults->values[2]) {
9175 if (nsPush(ctxt, attname,
Daniel Veillardae0765b2008-07-31 19:54:59 +00009176 defaults->values[5 * i + 2]) > 0)
Daniel Veillarde57ec792003-09-10 10:50:59 +00009177 nbNs++;
9178 }
9179 } else {
9180 /*
9181 * check that it's not a defined attribute
9182 */
9183 for (j = 0;j < nbatts;j+=5) {
9184 if ((attname == atts[j]) && (aprefix == atts[j+1]))
9185 break;
9186 }
9187 if (j < nbatts) continue;
9188
9189 if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9190 if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
Daniel Veillard9ee35f32003-09-28 00:19:54 +00009191 return(NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009192 }
9193 maxatts = ctxt->maxatts;
9194 atts = ctxt->atts;
9195 }
9196 atts[nbatts++] = attname;
9197 atts[nbatts++] = aprefix;
9198 if (aprefix == NULL)
9199 atts[nbatts++] = NULL;
9200 else
9201 atts[nbatts++] = xmlGetNamespace(ctxt, aprefix);
Daniel Veillardae0765b2008-07-31 19:54:59 +00009202 atts[nbatts++] = defaults->values[5 * i + 2];
9203 atts[nbatts++] = defaults->values[5 * i + 3];
9204 if ((ctxt->standalone == 1) &&
9205 (defaults->values[5 * i + 4] != NULL)) {
9206 xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
9207 "standalone: attribute %s on %s defaulted from external subset\n",
9208 attname, localname);
9209 }
Daniel Veillarde57ec792003-09-10 10:50:59 +00009210 nbdef++;
9211 }
9212 }
9213 }
9214 }
9215
Daniel Veillarde70c8772003-11-25 07:21:18 +00009216 /*
9217 * The attributes checkings
9218 */
9219 for (i = 0; i < nbatts;i += 5) {
Kasimier T. Buchcik455472f2005-04-29 10:04:43 +00009220 /*
9221 * The default namespace does not apply to attribute names.
9222 */
9223 if (atts[i + 1] != NULL) {
9224 nsname = xmlGetNamespace(ctxt, atts[i + 1]);
9225 if (nsname == NULL) {
9226 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9227 "Namespace prefix %s for %s on %s is not defined\n",
9228 atts[i + 1], atts[i], localname);
9229 }
9230 atts[i + 2] = nsname;
9231 } else
9232 nsname = NULL;
Daniel Veillarde70c8772003-11-25 07:21:18 +00009233 /*
9234 * [ WFC: Unique Att Spec ]
9235 * No attribute name may appear more than once in the same
9236 * start-tag or empty-element tag.
9237 * As extended by the Namespace in XML REC.
9238 */
9239 for (j = 0; j < i;j += 5) {
9240 if (atts[i] == atts[j]) {
9241 if (atts[i+1] == atts[j+1]) {
9242 xmlErrAttributeDup(ctxt, atts[i+1], atts[i]);
9243 break;
9244 }
9245 if ((nsname != NULL) && (atts[j + 2] == nsname)) {
9246 xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9247 "Namespaced Attribute %s in '%s' redefined\n",
9248 atts[i], nsname, NULL);
9249 break;
9250 }
9251 }
9252 }
9253 }
9254
Daniel Veillarde57ec792003-09-10 10:50:59 +00009255 nsname = xmlGetNamespace(ctxt, prefix);
9256 if ((prefix != NULL) && (nsname == NULL)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009257 xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9258 "Namespace prefix %s on %s is not defined\n",
9259 prefix, localname, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009260 }
9261 *pref = prefix;
9262 *URI = nsname;
9263
9264 /*
9265 * SAX: Start of Element !
9266 */
9267 if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
9268 (!ctxt->disableSAX)) {
9269 if (nbNs > 0)
9270 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9271 nsname, nbNs, &ctxt->nsTab[ctxt->nsNr - 2 * nbNs],
9272 nbatts / 5, nbdef, atts);
9273 else
9274 ctxt->sax->startElementNs(ctxt->userData, localname, prefix,
9275 nsname, 0, NULL, nbatts / 5, nbdef, atts);
9276 }
9277
9278 /*
9279 * Free up attribute allocated strings if needed
9280 */
9281 if (attval != 0) {
9282 for (i = 3,j = 0; j < nratts;i += 5,j++)
9283 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9284 xmlFree((xmlChar *) atts[i]);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009285 }
9286
9287 return(localname);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009288
9289base_changed:
9290 /*
9291 * the attribute strings are valid iif the base didn't changed
9292 */
9293 if (attval != 0) {
9294 for (i = 3,j = 0; j < nratts;i += 5,j++)
9295 if ((ctxt->attallocs[j] != 0) && (atts[i] != NULL))
9296 xmlFree((xmlChar *) atts[i]);
9297 }
9298 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillarddcec6722006-10-15 20:32:53 +00009299 ctxt->input->line = oldline;
9300 ctxt->input->col = oldcol;
Daniel Veillarde57ec792003-09-10 10:50:59 +00009301 if (ctxt->wellFormed == 1) {
9302 goto reparse;
9303 }
9304 return(NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009305}
9306
9307/**
9308 * xmlParseEndTag2:
9309 * @ctxt: an XML parser context
9310 * @line: line of the start tag
9311 * @nsNr: number of namespaces on the start tag
9312 *
9313 * parse an end of tag
9314 *
9315 * [42] ETag ::= '</' Name S? '>'
9316 *
9317 * With namespace
9318 *
9319 * [NS 9] ETag ::= '</' QName S? '>'
9320 */
9321
9322static void
9323xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009324 const xmlChar *URI, int line, int nsNr, int tlen) {
Daniel Veillard0fb18932003-09-07 09:14:37 +00009325 const xmlChar *name;
9326
9327 GROW;
9328 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +00009329 xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009330 return;
9331 }
9332 SKIP(2);
9333
William M. Brack13dfa872004-09-18 04:52:08 +00009334 if ((tlen > 0) && (xmlStrncmp(ctxt->input->cur, ctxt->name, tlen) == 0)) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009335 if (ctxt->input->cur[tlen] == '>') {
9336 ctxt->input->cur += tlen + 1;
9337 goto done;
9338 }
9339 ctxt->input->cur += tlen;
9340 name = (xmlChar*)1;
9341 } else {
9342 if (prefix == NULL)
9343 name = xmlParseNameAndCompare(ctxt, ctxt->name);
9344 else
9345 name = xmlParseQNameAndCompare(ctxt, ctxt->name, prefix);
9346 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009347
9348 /*
9349 * We should definitely be at the ending "S? '>'" part
9350 */
9351 GROW;
9352 SKIP_BLANKS;
Daniel Veillard73b013f2003-09-30 12:36:01 +00009353 if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009354 xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009355 } else
9356 NEXT1;
9357
9358 /*
9359 * [ WFC: Element Type Match ]
9360 * The Name in an element's end-tag must match the element type in the
9361 * start-tag.
9362 *
9363 */
9364 if (name != (xmlChar*)1) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009365 if (name == NULL) name = BAD_CAST "unparseable";
Daniel Veillard852505b2009-08-23 15:44:48 +02009366 if ((line == 0) && (ctxt->node != NULL))
9367 line = ctxt->node->line;
Daniel Veillardf403d292003-10-05 13:51:35 +00009368 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
Daniel Veillard0fb18932003-09-07 09:14:37 +00009369 "Opening and ending tag mismatch: %s line %d and %s\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009370 ctxt->name, line, name);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009371 }
9372
9373 /*
9374 * SAX: End of Tag
9375 */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009376done:
Daniel Veillard0fb18932003-09-07 09:14:37 +00009377 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9378 (!ctxt->disableSAX))
9379 ctxt->sax->endElementNs(ctxt->userData, ctxt->name, prefix, URI);
9380
Daniel Veillard0fb18932003-09-07 09:14:37 +00009381 spacePop(ctxt);
9382 if (nsNr != 0)
9383 nsPop(ctxt, nsNr);
9384 return;
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009385}
9386
9387/**
Owen Taylor3473f882001-02-23 17:55:21 +00009388 * xmlParseCDSect:
9389 * @ctxt: an XML parser context
9390 *
9391 * Parse escaped pure raw content.
9392 *
9393 * [18] CDSect ::= CDStart CData CDEnd
9394 *
9395 * [19] CDStart ::= '<![CDATA['
9396 *
9397 * [20] Data ::= (Char* - (Char* ']]>' Char*))
9398 *
9399 * [21] CDEnd ::= ']]>'
9400 */
9401void
9402xmlParseCDSect(xmlParserCtxtPtr ctxt) {
9403 xmlChar *buf = NULL;
9404 int len = 0;
9405 int size = XML_PARSER_BUFFER_SIZE;
9406 int r, rl;
9407 int s, sl;
9408 int cur, l;
9409 int count = 0;
9410
Daniel Veillard8f597c32003-10-06 08:19:27 +00009411 /* Check 2.6.0 was NXT(0) not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009412 if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009413 SKIP(9);
9414 } else
9415 return;
9416
9417 ctxt->instate = XML_PARSER_CDATA_SECTION;
9418 r = CUR_CHAR(rl);
William M. Brack871611b2003-10-18 04:53:14 +00009419 if (!IS_CHAR(r)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009420 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009421 ctxt->instate = XML_PARSER_CONTENT;
9422 return;
9423 }
9424 NEXTL(rl);
9425 s = CUR_CHAR(sl);
William M. Brack871611b2003-10-18 04:53:14 +00009426 if (!IS_CHAR(s)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009427 xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009428 ctxt->instate = XML_PARSER_CONTENT;
9429 return;
9430 }
9431 NEXTL(sl);
9432 cur = CUR_CHAR(l);
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009433 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009434 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009435 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009436 return;
9437 }
William M. Brack871611b2003-10-18 04:53:14 +00009438 while (IS_CHAR(cur) &&
Owen Taylor3473f882001-02-23 17:55:21 +00009439 ((r != ']') || (s != ']') || (cur != '>'))) {
9440 if (len + 5 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009441 xmlChar *tmp;
9442
Owen Taylor3473f882001-02-23 17:55:21 +00009443 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009444 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9445 if (tmp == NULL) {
9446 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009447 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009448 return;
9449 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009450 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009451 }
9452 COPY_BUF(rl,buf,len,r);
9453 r = s;
9454 rl = sl;
9455 s = cur;
9456 sl = l;
9457 count++;
9458 if (count > 50) {
9459 GROW;
9460 count = 0;
9461 }
9462 NEXTL(l);
9463 cur = CUR_CHAR(l);
9464 }
9465 buf[len] = 0;
9466 ctxt->instate = XML_PARSER_CONTENT;
9467 if (cur != '>') {
Daniel Veillardbc92eca2003-09-15 09:48:06 +00009468 xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
Owen Taylor3473f882001-02-23 17:55:21 +00009469 "CData section not finished\n%.50s\n", buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009470 xmlFree(buf);
9471 return;
9472 }
9473 NEXTL(l);
9474
9475 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009476 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00009477 */
9478 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
9479 if (ctxt->sax->cdataBlock != NULL)
9480 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00009481 else if (ctxt->sax->characters != NULL)
9482 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00009483 }
9484 xmlFree(buf);
9485}
9486
9487/**
9488 * xmlParseContent:
9489 * @ctxt: an XML parser context
9490 *
9491 * Parse a content:
9492 *
9493 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9494 */
9495
9496void
9497xmlParseContent(xmlParserCtxtPtr ctxt) {
9498 GROW;
Daniel Veillardfdc91562002-07-01 21:52:03 +00009499 while ((RAW != 0) &&
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009500 ((RAW != '<') || (NXT(1) != '/')) &&
9501 (ctxt->instate != XML_PARSER_EOF)) {
Owen Taylor3473f882001-02-23 17:55:21 +00009502 const xmlChar *test = CUR_PTR;
Daniel Veillard3e59fc52003-04-18 12:34:58 +00009503 unsigned int cons = ctxt->input->consumed;
Daniel Veillard21a0f912001-02-25 19:54:14 +00009504 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009505
9506 /*
Owen Taylor3473f882001-02-23 17:55:21 +00009507 * First case : a Processing Instruction.
9508 */
Daniel Veillardfdc91562002-07-01 21:52:03 +00009509 if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009510 xmlParsePI(ctxt);
9511 }
9512
9513 /*
9514 * Second case : a CDSection
9515 */
Daniel Veillard8f597c32003-10-06 08:19:27 +00009516 /* 2.6.0 test was *cur not RAW */
Daniel Veillarda07050d2003-10-19 14:46:32 +00009517 else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009518 xmlParseCDSect(ctxt);
9519 }
9520
9521 /*
9522 * Third case : a comment
9523 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009524 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00009525 (NXT(2) == '-') && (NXT(3) == '-')) {
9526 xmlParseComment(ctxt);
9527 ctxt->instate = XML_PARSER_CONTENT;
9528 }
9529
9530 /*
9531 * Fourth case : a sub-element.
9532 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00009533 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00009534 xmlParseElement(ctxt);
9535 }
9536
9537 /*
9538 * Fifth case : a reference. If if has not been resolved,
9539 * parsing returns it's Name, create the node
9540 */
9541
Daniel Veillard21a0f912001-02-25 19:54:14 +00009542 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00009543 xmlParseReference(ctxt);
9544 }
9545
9546 /*
9547 * Last case, text. Note that References are handled directly.
9548 */
9549 else {
9550 xmlParseCharData(ctxt, 0);
9551 }
9552
9553 GROW;
9554 /*
9555 * Pop-up of finished entities.
9556 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00009557 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00009558 xmlPopInput(ctxt);
9559 SHRINK;
9560
Daniel Veillardfdc91562002-07-01 21:52:03 +00009561 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009562 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9563 "detected an error in element content\n");
Owen Taylor3473f882001-02-23 17:55:21 +00009564 ctxt->instate = XML_PARSER_EOF;
9565 break;
9566 }
9567 }
9568}
9569
9570/**
9571 * xmlParseElement:
9572 * @ctxt: an XML parser context
9573 *
9574 * parse an XML element, this is highly recursive
9575 *
9576 * [39] element ::= EmptyElemTag | STag content ETag
9577 *
9578 * [ WFC: Element Type Match ]
9579 * The Name in an element's end-tag must match the element type in the
9580 * start-tag.
9581 *
Owen Taylor3473f882001-02-23 17:55:21 +00009582 */
9583
9584void
9585xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillard2fdbd322003-08-18 12:15:38 +00009586 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +02009587 const xmlChar *prefix = NULL;
9588 const xmlChar *URI = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009589 xmlParserNodeInfo node_info;
Daniel Veillarded35d3d2012-05-11 10:52:27 +08009590 int line, tlen = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00009591 xmlNodePtr ret;
Daniel Veillard0fb18932003-09-07 09:14:37 +00009592 int nsNr = ctxt->nsNr;
Owen Taylor3473f882001-02-23 17:55:21 +00009593
Daniel Veillard8915c152008-08-26 13:05:34 +00009594 if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
9595 ((ctxt->options & XML_PARSE_HUGE) == 0)) {
9596 xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
9597 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
9598 xmlParserMaxDepth);
Daniel Veillard4a9fe382006-09-19 12:44:35 +00009599 ctxt->instate = XML_PARSER_EOF;
9600 return;
9601 }
9602
Owen Taylor3473f882001-02-23 17:55:21 +00009603 /* Capture start position */
9604 if (ctxt->record_info) {
9605 node_info.begin_pos = ctxt->input->consumed +
9606 (CUR_PTR - ctxt->input->base);
9607 node_info.begin_line = ctxt->input->line;
9608 }
9609
9610 if (ctxt->spaceNr == 0)
9611 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +00009612 else if (*ctxt->space == -2)
9613 spacePush(ctxt, -1);
Owen Taylor3473f882001-02-23 17:55:21 +00009614 else
9615 spacePush(ctxt, *ctxt->space);
9616
Daniel Veillard6c5b2d32003-03-27 14:55:52 +00009617 line = ctxt->input->line;
Daniel Veillard81273902003-09-30 00:43:48 +00009618#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009619 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +00009620#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009621 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +00009622#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009623 else
9624 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009625#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +08009626 if (ctxt->instate == XML_PARSER_EOF)
9627 return;
Owen Taylor3473f882001-02-23 17:55:21 +00009628 if (name == NULL) {
9629 spacePop(ctxt);
9630 return;
9631 }
9632 namePush(ctxt, name);
9633 ret = ctxt->node;
9634
Daniel Veillard4432df22003-09-28 18:58:27 +00009635#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +00009636 /*
9637 * [ VC: Root Element Type ]
9638 * The Name in the document type declaration must match the element
9639 * type of the root element.
9640 */
9641 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
9642 ctxt->node && (ctxt->node == ctxt->myDoc->children))
9643 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +00009644#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009645
9646 /*
9647 * Check for an Empty Element.
9648 */
9649 if ((RAW == '/') && (NXT(1) == '>')) {
9650 SKIP(2);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009651 if (ctxt->sax2) {
9652 if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
9653 (!ctxt->disableSAX))
9654 ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
Daniel Veillard81273902003-09-30 00:43:48 +00009655#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard0fb18932003-09-07 09:14:37 +00009656 } else {
9657 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
9658 (!ctxt->disableSAX))
9659 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +00009660#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009661 }
Daniel Veillard0fb18932003-09-07 09:14:37 +00009662 namePop(ctxt);
9663 spacePop(ctxt);
9664 if (nsNr != ctxt->nsNr)
9665 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009666 if ( ret != NULL && ctxt->record_info ) {
9667 node_info.end_pos = ctxt->input->consumed +
9668 (CUR_PTR - ctxt->input->base);
9669 node_info.end_line = ctxt->input->line;
9670 node_info.node = ret;
9671 xmlParserAddNodeInfo(ctxt, &node_info);
9672 }
9673 return;
9674 }
9675 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00009676 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00009677 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +00009678 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
9679 "Couldn't find end of Start Tag %s line %d\n",
9680 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009681
9682 /*
9683 * end of parsing of this node.
9684 */
9685 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009686 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009687 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009688 if (nsNr != ctxt->nsNr)
9689 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009690
9691 /*
9692 * Capture end position and add node
9693 */
9694 if ( ret != NULL && ctxt->record_info ) {
9695 node_info.end_pos = ctxt->input->consumed +
9696 (CUR_PTR - ctxt->input->base);
9697 node_info.end_line = ctxt->input->line;
9698 node_info.node = ret;
9699 xmlParserAddNodeInfo(ctxt, &node_info);
9700 }
9701 return;
9702 }
9703
9704 /*
9705 * Parse the content of the element:
9706 */
9707 xmlParseContent(ctxt);
Daniel Veillard73b013f2003-09-30 12:36:01 +00009708 if (!IS_BYTE_CHAR(RAW)) {
Daniel Veillardf403d292003-10-05 13:51:35 +00009709 xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
Daniel Veillard2b0f8792003-10-10 19:36:36 +00009710 "Premature end of data in tag %s line %d\n",
Daniel Veillardf403d292003-10-05 13:51:35 +00009711 name, line, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009712
9713 /*
9714 * end of parsing of this node.
9715 */
9716 nodePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009717 namePop(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009718 spacePop(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +00009719 if (nsNr != ctxt->nsNr)
9720 nsPop(ctxt, ctxt->nsNr - nsNr);
Owen Taylor3473f882001-02-23 17:55:21 +00009721 return;
9722 }
9723
9724 /*
9725 * parse the end of tag: '</' should be here.
9726 */
Daniel Veillarde57ec792003-09-10 10:50:59 +00009727 if (ctxt->sax2) {
Daniel Veillardc82c57e2004-01-12 16:24:34 +00009728 xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
Daniel Veillarde57ec792003-09-10 10:50:59 +00009729 namePop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +00009730 }
9731#ifdef LIBXML_SAX1_ENABLED
9732 else
Daniel Veillard0fb18932003-09-07 09:14:37 +00009733 xmlParseEndTag1(ctxt, line);
Daniel Veillard81273902003-09-30 00:43:48 +00009734#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +00009735
9736 /*
9737 * Capture end position and add node
9738 */
9739 if ( ret != NULL && ctxt->record_info ) {
9740 node_info.end_pos = ctxt->input->consumed +
9741 (CUR_PTR - ctxt->input->base);
9742 node_info.end_line = ctxt->input->line;
9743 node_info.node = ret;
9744 xmlParserAddNodeInfo(ctxt, &node_info);
9745 }
9746}
9747
9748/**
9749 * xmlParseVersionNum:
9750 * @ctxt: an XML parser context
9751 *
9752 * parse the XML version value.
9753 *
Daniel Veillard34e3f642008-07-29 09:02:27 +00009754 * [26] VersionNum ::= '1.' [0-9]+
9755 *
9756 * In practice allow [0-9].[0-9]+ at that level
Owen Taylor3473f882001-02-23 17:55:21 +00009757 *
9758 * Returns the string giving the XML version number, or NULL
9759 */
9760xmlChar *
9761xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
9762 xmlChar *buf = NULL;
9763 int len = 0;
9764 int size = 10;
9765 xmlChar cur;
9766
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009767 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009768 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009769 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009770 return(NULL);
9771 }
9772 cur = CUR;
Daniel Veillard34e3f642008-07-29 09:02:27 +00009773 if (!((cur >= '0') && (cur <= '9'))) {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009774 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009775 return(NULL);
9776 }
9777 buf[len++] = cur;
9778 NEXT;
9779 cur=CUR;
9780 if (cur != '.') {
Daniel Veillard7e5c3f42008-07-29 16:12:31 +00009781 xmlFree(buf);
Daniel Veillard34e3f642008-07-29 09:02:27 +00009782 return(NULL);
9783 }
9784 buf[len++] = cur;
9785 NEXT;
9786 cur=CUR;
9787 while ((cur >= '0') && (cur <= '9')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009788 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009789 xmlChar *tmp;
9790
Owen Taylor3473f882001-02-23 17:55:21 +00009791 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009792 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9793 if (tmp == NULL) {
Daniel Veillard68b6e022008-03-31 09:26:00 +00009794 xmlFree(buf);
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009795 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009796 return(NULL);
9797 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009798 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009799 }
9800 buf[len++] = cur;
9801 NEXT;
9802 cur=CUR;
9803 }
9804 buf[len] = 0;
9805 return(buf);
9806}
9807
9808/**
9809 * xmlParseVersionInfo:
9810 * @ctxt: an XML parser context
Daniel Veillard68b6e022008-03-31 09:26:00 +00009811 *
Owen Taylor3473f882001-02-23 17:55:21 +00009812 * parse the XML version.
9813 *
9814 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
Daniel Veillard68b6e022008-03-31 09:26:00 +00009815 *
Owen Taylor3473f882001-02-23 17:55:21 +00009816 * [25] Eq ::= S? '=' S?
9817 *
9818 * Returns the version string, e.g. "1.0"
9819 */
9820
9821xmlChar *
9822xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
9823 xmlChar *version = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009824
Daniel Veillarda07050d2003-10-19 14:46:32 +00009825 if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009826 SKIP(7);
9827 SKIP_BLANKS;
9828 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009829 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009830 return(NULL);
9831 }
9832 NEXT;
9833 SKIP_BLANKS;
9834 if (RAW == '"') {
9835 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009836 version = xmlParseVersionNum(ctxt);
9837 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009838 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009839 } else
9840 NEXT;
9841 } else if (RAW == '\''){
9842 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009843 version = xmlParseVersionNum(ctxt);
9844 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009845 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009846 } else
9847 NEXT;
9848 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009849 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009850 }
9851 }
9852 return(version);
9853}
9854
9855/**
9856 * xmlParseEncName:
9857 * @ctxt: an XML parser context
9858 *
9859 * parse the XML encoding name
9860 *
9861 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
9862 *
9863 * Returns the encoding name value or NULL
9864 */
9865xmlChar *
9866xmlParseEncName(xmlParserCtxtPtr ctxt) {
9867 xmlChar *buf = NULL;
9868 int len = 0;
9869 int size = 10;
9870 xmlChar cur;
9871
9872 cur = CUR;
9873 if (((cur >= 'a') && (cur <= 'z')) ||
9874 ((cur >= 'A') && (cur <= 'Z'))) {
Daniel Veillard3c908dc2003-04-19 00:07:51 +00009875 buf = (xmlChar *) xmlMallocAtomic(size * sizeof(xmlChar));
Owen Taylor3473f882001-02-23 17:55:21 +00009876 if (buf == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009877 xmlErrMemory(ctxt, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009878 return(NULL);
9879 }
Daniel Veillard34e3f642008-07-29 09:02:27 +00009880
Owen Taylor3473f882001-02-23 17:55:21 +00009881 buf[len++] = cur;
9882 NEXT;
9883 cur = CUR;
9884 while (((cur >= 'a') && (cur <= 'z')) ||
9885 ((cur >= 'A') && (cur <= 'Z')) ||
9886 ((cur >= '0') && (cur <= '9')) ||
9887 (cur == '.') || (cur == '_') ||
9888 (cur == '-')) {
9889 if (len + 1 >= size) {
Daniel Veillard2248ff12004-09-22 23:05:14 +00009890 xmlChar *tmp;
9891
Owen Taylor3473f882001-02-23 17:55:21 +00009892 size *= 2;
Daniel Veillard2248ff12004-09-22 23:05:14 +00009893 tmp = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
9894 if (tmp == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009895 xmlErrMemory(ctxt, NULL);
Daniel Veillard2248ff12004-09-22 23:05:14 +00009896 xmlFree(buf);
Owen Taylor3473f882001-02-23 17:55:21 +00009897 return(NULL);
9898 }
Daniel Veillard2248ff12004-09-22 23:05:14 +00009899 buf = tmp;
Owen Taylor3473f882001-02-23 17:55:21 +00009900 }
9901 buf[len++] = cur;
9902 NEXT;
9903 cur = CUR;
9904 if (cur == 0) {
9905 SHRINK;
9906 GROW;
9907 cur = CUR;
9908 }
9909 }
9910 buf[len] = 0;
9911 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009912 xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009913 }
9914 return(buf);
9915}
9916
9917/**
9918 * xmlParseEncodingDecl:
9919 * @ctxt: an XML parser context
9920 *
9921 * parse the XML encoding declaration
9922 *
9923 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
9924 *
9925 * this setups the conversion filters.
9926 *
9927 * Returns the encoding value or NULL
9928 */
9929
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009930const xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00009931xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
9932 xmlChar *encoding = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009933
9934 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +00009935 if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009936 SKIP(8);
9937 SKIP_BLANKS;
9938 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009939 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009940 return(NULL);
9941 }
9942 NEXT;
9943 SKIP_BLANKS;
9944 if (RAW == '"') {
9945 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009946 encoding = xmlParseEncName(ctxt);
9947 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009948 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009949 } else
9950 NEXT;
9951 } else if (RAW == '\''){
9952 NEXT;
Owen Taylor3473f882001-02-23 17:55:21 +00009953 encoding = xmlParseEncName(ctxt);
9954 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009955 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009956 } else
9957 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00009958 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +00009959 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +00009960 }
Daniel Veillardc62efc82011-05-16 16:03:50 +08009961
9962 /*
9963 * Non standard parsing, allowing the user to ignore encoding
9964 */
9965 if (ctxt->options & XML_PARSE_IGNORE_ENC)
9966 return(encoding);
9967
Daniel Veillard6b621b82003-08-11 15:03:34 +00009968 /*
9969 * UTF-16 encoding stwich has already taken place at this stage,
9970 * more over the little-endian/big-endian selection is already done
9971 */
9972 if ((encoding != NULL) &&
9973 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-16")) ||
9974 (!xmlStrcasecmp(encoding, BAD_CAST "UTF16")))) {
Daniel Veillard37334572008-07-31 08:20:02 +00009975 /*
9976 * If no encoding was passed to the parser, that we are
9977 * using UTF-16 and no decoder is present i.e. the
9978 * document is apparently UTF-8 compatible, then raise an
9979 * encoding mismatch fatal error
9980 */
9981 if ((ctxt->encoding == NULL) &&
9982 (ctxt->input->buf != NULL) &&
9983 (ctxt->input->buf->encoder == NULL)) {
9984 xmlFatalErrMsg(ctxt, XML_ERR_INVALID_ENCODING,
9985 "Document labelled UTF-16 but has UTF-8 content\n");
9986 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009987 if (ctxt->encoding != NULL)
9988 xmlFree((xmlChar *) ctxt->encoding);
9989 ctxt->encoding = encoding;
Daniel Veillardb19ba832003-08-14 00:33:46 +00009990 }
9991 /*
9992 * UTF-8 encoding is handled natively
9993 */
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009994 else if ((encoding != NULL) &&
Daniel Veillardb19ba832003-08-14 00:33:46 +00009995 ((!xmlStrcasecmp(encoding, BAD_CAST "UTF-8")) ||
9996 (!xmlStrcasecmp(encoding, BAD_CAST "UTF8")))) {
Daniel Veillardab1ae3a2003-08-14 12:19:54 +00009997 if (ctxt->encoding != NULL)
9998 xmlFree((xmlChar *) ctxt->encoding);
9999 ctxt->encoding = encoding;
Daniel Veillard6b621b82003-08-11 15:03:34 +000010000 }
Daniel Veillardab1ae3a2003-08-14 12:19:54 +000010001 else if (encoding != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000010002 xmlCharEncodingHandlerPtr handler;
10003
10004 if (ctxt->input->encoding != NULL)
10005 xmlFree((xmlChar *) ctxt->input->encoding);
10006 ctxt->input->encoding = encoding;
10007
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010008 handler = xmlFindCharEncodingHandler((const char *) encoding);
10009 if (handler != NULL) {
10010 xmlSwitchToEncoding(ctxt, handler);
Owen Taylor3473f882001-02-23 17:55:21 +000010011 } else {
Daniel Veillardf403d292003-10-05 13:51:35 +000010012 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
Daniel Veillarda6874ca2003-07-29 16:47:24 +000010013 "Unsupported encoding %s\n", encoding);
10014 return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010015 }
10016 }
10017 }
10018 return(encoding);
10019}
10020
10021/**
10022 * xmlParseSDDecl:
10023 * @ctxt: an XML parser context
10024 *
10025 * parse the XML standalone declaration
10026 *
10027 * [32] SDDecl ::= S 'standalone' Eq
10028 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10029 *
10030 * [ VC: Standalone Document Declaration ]
10031 * TODO The standalone document declaration must have the value "no"
10032 * if any external markup declarations contain declarations of:
10033 * - attributes with default values, if elements to which these
10034 * attributes apply appear in the document without specifications
10035 * of values for these attributes, or
10036 * - entities (other than amp, lt, gt, apos, quot), if references
10037 * to those entities appear in the document, or
10038 * - attributes with values subject to normalization, where the
10039 * attribute appears in the document with a value which will change
10040 * as a result of normalization, or
10041 * - element types with element content, if white space occurs directly
10042 * within any instance of those types.
10043 *
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010044 * Returns:
10045 * 1 if standalone="yes"
10046 * 0 if standalone="no"
10047 * -2 if standalone attribute is missing or invalid
10048 * (A standalone value of -2 means that the XML declaration was found,
10049 * but no value was specified for the standalone attribute).
Owen Taylor3473f882001-02-23 17:55:21 +000010050 */
10051
10052int
10053xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard602f2bd2006-12-04 09:26:04 +000010054 int standalone = -2;
Owen Taylor3473f882001-02-23 17:55:21 +000010055
10056 SKIP_BLANKS;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010057 if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010058 SKIP(10);
10059 SKIP_BLANKS;
10060 if (RAW != '=') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010061 xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010062 return(standalone);
10063 }
10064 NEXT;
10065 SKIP_BLANKS;
10066 if (RAW == '\''){
10067 NEXT;
10068 if ((RAW == 'n') && (NXT(1) == 'o')) {
10069 standalone = 0;
10070 SKIP(2);
10071 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10072 (NXT(2) == 's')) {
10073 standalone = 1;
10074 SKIP(3);
10075 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010076 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010077 }
10078 if (RAW != '\'') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010079 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010080 } else
10081 NEXT;
10082 } else if (RAW == '"'){
10083 NEXT;
10084 if ((RAW == 'n') && (NXT(1) == 'o')) {
10085 standalone = 0;
10086 SKIP(2);
10087 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
10088 (NXT(2) == 's')) {
10089 standalone = 1;
10090 SKIP(3);
10091 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010092 xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010093 }
10094 if (RAW != '"') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010095 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010096 } else
10097 NEXT;
10098 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010099 xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010100 }
10101 }
10102 return(standalone);
10103}
10104
10105/**
10106 * xmlParseXMLDecl:
10107 * @ctxt: an XML parser context
10108 *
10109 * parse an XML declaration header
10110 *
10111 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10112 */
10113
10114void
10115xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10116 xmlChar *version;
10117
10118 /*
Daniel Veillardae487ba2005-11-17 07:25:52 +000010119 * This value for standalone indicates that the document has an
10120 * XML declaration but it does not have a standalone attribute.
10121 * It will be overwritten later if a standalone attribute is found.
10122 */
10123 ctxt->input->standalone = -2;
10124
10125 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010126 * We know that '<?xml' is here.
10127 */
10128 SKIP(5);
10129
William M. Brack76e95df2003-10-18 16:20:14 +000010130 if (!IS_BLANK_CH(RAW)) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010131 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10132 "Blank needed after '<?xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010133 }
10134 SKIP_BLANKS;
10135
10136 /*
Daniel Veillard19840942001-11-29 16:11:38 +000010137 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +000010138 */
10139 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +000010140 if (version == NULL) {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010141 xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
Daniel Veillard19840942001-11-29 16:11:38 +000010142 } else {
10143 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10144 /*
Daniel Veillard34e3f642008-07-29 09:02:27 +000010145 * Changed here for XML-1.0 5th edition
Daniel Veillard19840942001-11-29 16:11:38 +000010146 */
Daniel Veillard34e3f642008-07-29 09:02:27 +000010147 if (ctxt->options & XML_PARSE_OLD10) {
10148 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10149 "Unsupported version '%s'\n",
10150 version);
10151 } else {
10152 if ((version[0] == '1') && ((version[1] == '.'))) {
10153 xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10154 "Unsupported version '%s'\n",
10155 version, NULL);
10156 } else {
10157 xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10158 "Unsupported version '%s'\n",
10159 version);
10160 }
10161 }
Daniel Veillard19840942001-11-29 16:11:38 +000010162 }
10163 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +000010164 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +000010165 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +000010166 }
Owen Taylor3473f882001-02-23 17:55:21 +000010167
10168 /*
10169 * We may have the encoding declaration
10170 */
William M. Brack76e95df2003-10-18 16:20:14 +000010171 if (!IS_BLANK_CH(RAW)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010172 if ((RAW == '?') && (NXT(1) == '>')) {
10173 SKIP(2);
10174 return;
10175 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010176 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010177 }
10178 xmlParseEncodingDecl(ctxt);
10179 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10180 /*
10181 * The XML REC instructs us to stop parsing right here
10182 */
10183 return;
10184 }
10185
10186 /*
10187 * We may have the standalone status.
10188 */
William M. Brack76e95df2003-10-18 16:20:14 +000010189 if ((ctxt->input->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010190 if ((RAW == '?') && (NXT(1) == '>')) {
10191 SKIP(2);
10192 return;
10193 }
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010194 xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010195 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020010196
10197 /*
10198 * We can grow the input buffer freely at that point
10199 */
10200 GROW;
10201
Owen Taylor3473f882001-02-23 17:55:21 +000010202 SKIP_BLANKS;
10203 ctxt->input->standalone = xmlParseSDDecl(ctxt);
10204
10205 SKIP_BLANKS;
10206 if ((RAW == '?') && (NXT(1) == '>')) {
10207 SKIP(2);
10208 } else if (RAW == '>') {
10209 /* Deprecated old WD ... */
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010210 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010211 NEXT;
10212 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010213 xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010214 MOVETO_ENDTAG(CUR_PTR);
10215 NEXT;
10216 }
10217}
10218
10219/**
10220 * xmlParseMisc:
10221 * @ctxt: an XML parser context
10222 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010223 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +000010224 *
10225 * [27] Misc ::= Comment | PI | S
10226 */
10227
10228void
10229xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010230 while (((RAW == '<') && (NXT(1) == '?')) ||
Daniel Veillarda07050d2003-10-19 14:46:32 +000010231 (CMP4(CUR_PTR, '<', '!', '-', '-')) ||
William M. Brack76e95df2003-10-18 16:20:14 +000010232 IS_BLANK_CH(CUR)) {
Daniel Veillard561b7f82002-03-20 21:55:57 +000010233 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010234 xmlParsePI(ctxt);
William M. Brack76e95df2003-10-18 16:20:14 +000010235 } else if (IS_BLANK_CH(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +000010236 NEXT;
10237 } else
10238 xmlParseComment(ctxt);
10239 }
10240}
10241
10242/**
10243 * xmlParseDocument:
10244 * @ctxt: an XML parser context
10245 *
10246 * parse an XML document (and build a tree if using the standard SAX
10247 * interface).
10248 *
10249 * [1] document ::= prolog element Misc*
10250 *
10251 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
10252 *
10253 * Returns 0, -1 in case of error. the parser context is augmented
10254 * as a result of the parsing.
10255 */
10256
10257int
10258xmlParseDocument(xmlParserCtxtPtr ctxt) {
10259 xmlChar start[4];
10260 xmlCharEncoding enc;
10261
10262 xmlInitParser();
10263
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010264 if ((ctxt == NULL) || (ctxt->input == NULL))
10265 return(-1);
10266
Owen Taylor3473f882001-02-23 17:55:21 +000010267 GROW;
10268
10269 /*
Daniel Veillard0fb18932003-09-07 09:14:37 +000010270 * SAX: detecting the level.
10271 */
Daniel Veillarde57ec792003-09-10 10:50:59 +000010272 xmlDetectSAX2(ctxt);
Daniel Veillard0fb18932003-09-07 09:14:37 +000010273
10274 /*
Owen Taylor3473f882001-02-23 17:55:21 +000010275 * SAX: beginning of the document processing.
10276 */
10277 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10278 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10279
Nikolay Sivove6ad10a2010-11-01 11:35:14 +010010280 if ((ctxt->encoding == NULL) &&
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010281 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard4aafa792001-07-28 17:21:12 +000010282 /*
10283 * Get the 4 first bytes and decode the charset
10284 * if enc != XML_CHAR_ENCODING_NONE
10285 * plug some encoding conversion routines.
10286 */
10287 start[0] = RAW;
10288 start[1] = NXT(1);
10289 start[2] = NXT(2);
10290 start[3] = NXT(3);
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010291 enc = xmlDetectCharEncoding(&start[0], 4);
Daniel Veillard4aafa792001-07-28 17:21:12 +000010292 if (enc != XML_CHAR_ENCODING_NONE) {
10293 xmlSwitchEncoding(ctxt, enc);
10294 }
Owen Taylor3473f882001-02-23 17:55:21 +000010295 }
10296
10297
10298 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010299 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010300 }
10301
10302 /*
10303 * Check for the XMLDecl in the Prolog.
Daniel Veillard7e385bd2009-08-26 11:38:49 +020010304 * do not GROW here to avoid the detected encoder to decode more
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010305 * than just the first line, unless the amount of data is really
10306 * too small to hold "<?xml version="1.0" encoding="foo"
Owen Taylor3473f882001-02-23 17:55:21 +000010307 */
Daniel Veillard9d3d1412009-09-15 18:41:30 +020010308 if ((ctxt->input->end - ctxt->input->cur) < 35) {
10309 GROW;
10310 }
Daniel Veillarda07050d2003-10-19 14:46:32 +000010311 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010312
10313 /*
10314 * Note that we will switch encoding on the fly.
10315 */
10316 xmlParseXMLDecl(ctxt);
10317 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10318 /*
10319 * The XML REC instructs us to stop parsing right here
10320 */
10321 return(-1);
10322 }
10323 ctxt->standalone = ctxt->input->standalone;
10324 SKIP_BLANKS;
10325 } else {
10326 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10327 }
10328 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10329 ctxt->sax->startDocument(ctxt->userData);
10330
10331 /*
10332 * The Misc part of the Prolog
10333 */
10334 GROW;
10335 xmlParseMisc(ctxt);
10336
10337 /*
10338 * Then possibly doc type declaration(s) and more Misc
10339 * (doctypedecl Misc*)?
10340 */
10341 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010342 if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
Owen Taylor3473f882001-02-23 17:55:21 +000010343
10344 ctxt->inSubset = 1;
10345 xmlParseDocTypeDecl(ctxt);
10346 if (RAW == '[') {
10347 ctxt->instate = XML_PARSER_DTD;
10348 xmlParseInternalSubset(ctxt);
10349 }
10350
10351 /*
10352 * Create and update the external subset.
10353 */
10354 ctxt->inSubset = 2;
10355 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
10356 (!ctxt->disableSAX))
10357 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10358 ctxt->extSubSystem, ctxt->extSubURI);
10359 ctxt->inSubset = 0;
10360
Daniel Veillardac4118d2008-01-11 05:27:32 +000010361 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000010362
10363 ctxt->instate = XML_PARSER_PROLOG;
10364 xmlParseMisc(ctxt);
10365 }
10366
10367 /*
10368 * Time to start parsing the tree itself
10369 */
10370 GROW;
10371 if (RAW != '<') {
Daniel Veillardbdbe0d42003-09-14 19:56:14 +000010372 xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
10373 "Start tag expected, '<' not found\n");
Owen Taylor3473f882001-02-23 17:55:21 +000010374 } else {
10375 ctxt->instate = XML_PARSER_CONTENT;
10376 xmlParseElement(ctxt);
10377 ctxt->instate = XML_PARSER_EPILOG;
10378
10379
10380 /*
10381 * The Misc part at the end
10382 */
10383 xmlParseMisc(ctxt);
10384
Daniel Veillard561b7f82002-03-20 21:55:57 +000010385 if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010386 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010387 }
10388 ctxt->instate = XML_PARSER_EOF;
10389 }
10390
10391 /*
10392 * SAX: end of the document processing.
10393 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010394 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010395 ctxt->sax->endDocument(ctxt->userData);
10396
Daniel Veillard5997aca2002-03-18 18:36:20 +000010397 /*
10398 * Remove locally kept entity definitions if the tree was not built
10399 */
10400 if ((ctxt->myDoc != NULL) &&
10401 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
10402 xmlFreeDoc(ctxt->myDoc);
10403 ctxt->myDoc = NULL;
10404 }
10405
Daniel Veillardae0765b2008-07-31 19:54:59 +000010406 if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
10407 ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
10408 if (ctxt->valid)
10409 ctxt->myDoc->properties |= XML_DOC_DTDVALID;
10410 if (ctxt->nsWellFormed)
10411 ctxt->myDoc->properties |= XML_DOC_NSVALID;
10412 if (ctxt->options & XML_PARSE_OLD10)
10413 ctxt->myDoc->properties |= XML_DOC_OLD10;
10414 }
Daniel Veillardc7612992002-02-17 22:47:37 +000010415 if (! ctxt->wellFormed) {
10416 ctxt->valid = 0;
10417 return(-1);
10418 }
Owen Taylor3473f882001-02-23 17:55:21 +000010419 return(0);
10420}
10421
10422/**
10423 * xmlParseExtParsedEnt:
10424 * @ctxt: an XML parser context
10425 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000010426 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +000010427 * An external general parsed entity is well-formed if it matches the
10428 * production labeled extParsedEnt.
10429 *
10430 * [78] extParsedEnt ::= TextDecl? content
10431 *
10432 * Returns 0, -1 in case of error. the parser context is augmented
10433 * as a result of the parsing.
10434 */
10435
10436int
10437xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
10438 xmlChar start[4];
10439 xmlCharEncoding enc;
10440
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010441 if ((ctxt == NULL) || (ctxt->input == NULL))
10442 return(-1);
10443
Owen Taylor3473f882001-02-23 17:55:21 +000010444 xmlDefaultSAXHandlerInit();
10445
Daniel Veillard309f81d2003-09-23 09:02:53 +000010446 xmlDetectSAX2(ctxt);
10447
Owen Taylor3473f882001-02-23 17:55:21 +000010448 GROW;
10449
10450 /*
10451 * SAX: beginning of the document processing.
10452 */
10453 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10454 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
10455
10456 /*
10457 * Get the 4 first bytes and decode the charset
10458 * if enc != XML_CHAR_ENCODING_NONE
10459 * plug some encoding conversion routines.
10460 */
Daniel Veillard4aede2e2003-10-17 12:43:59 +000010461 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
10462 start[0] = RAW;
10463 start[1] = NXT(1);
10464 start[2] = NXT(2);
10465 start[3] = NXT(3);
10466 enc = xmlDetectCharEncoding(start, 4);
10467 if (enc != XML_CHAR_ENCODING_NONE) {
10468 xmlSwitchEncoding(ctxt, enc);
10469 }
Owen Taylor3473f882001-02-23 17:55:21 +000010470 }
10471
10472
10473 if (CUR == 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010474 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010475 }
10476
10477 /*
10478 * Check for the XMLDecl in the Prolog.
10479 */
10480 GROW;
Daniel Veillarda07050d2003-10-19 14:46:32 +000010481 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010482
10483 /*
10484 * Note that we will switch encoding on the fly.
10485 */
10486 xmlParseXMLDecl(ctxt);
10487 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10488 /*
10489 * The XML REC instructs us to stop parsing right here
10490 */
10491 return(-1);
10492 }
10493 SKIP_BLANKS;
10494 } else {
10495 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10496 }
10497 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
10498 ctxt->sax->startDocument(ctxt->userData);
10499
10500 /*
10501 * Doing validity checking on chunk doesn't make sense
10502 */
10503 ctxt->instate = XML_PARSER_CONTENT;
10504 ctxt->validate = 0;
10505 ctxt->loadsubset = 0;
10506 ctxt->depth = 0;
10507
10508 xmlParseContent(ctxt);
10509
10510 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010511 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010512 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010513 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010514 }
10515
10516 /*
10517 * SAX: end of the document processing.
10518 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +000010519 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000010520 ctxt->sax->endDocument(ctxt->userData);
10521
10522 if (! ctxt->wellFormed) return(-1);
10523 return(0);
10524}
10525
Daniel Veillard73b013f2003-09-30 12:36:01 +000010526#ifdef LIBXML_PUSH_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000010527/************************************************************************
10528 * *
10529 * Progressive parsing interfaces *
10530 * *
10531 ************************************************************************/
10532
10533/**
10534 * xmlParseLookupSequence:
10535 * @ctxt: an XML parser context
10536 * @first: the first char to lookup
10537 * @next: the next char to lookup or zero
10538 * @third: the next char to lookup or zero
10539 *
10540 * Try to find if a sequence (first, next, third) or just (first next) or
10541 * (first) is available in the input stream.
10542 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
10543 * to avoid rescanning sequences of bytes, it DOES change the state of the
10544 * parser, do not use liberally.
10545 *
10546 * Returns the index to the current parsing point if the full sequence
10547 * is available, -1 otherwise.
10548 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010549static int
Owen Taylor3473f882001-02-23 17:55:21 +000010550xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
10551 xmlChar next, xmlChar third) {
10552 int base, len;
10553 xmlParserInputPtr in;
10554 const xmlChar *buf;
10555
10556 in = ctxt->input;
10557 if (in == NULL) return(-1);
10558 base = in->cur - in->base;
10559 if (base < 0) return(-1);
10560 if (ctxt->checkIndex > base)
10561 base = ctxt->checkIndex;
10562 if (in->buf == NULL) {
10563 buf = in->base;
10564 len = in->length;
10565 } else {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080010566 buf = xmlBufContent(in->buf->buffer);
10567 len = xmlBufUse(in->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000010568 }
10569 /* take into account the sequence length */
10570 if (third) len -= 2;
10571 else if (next) len --;
10572 for (;base < len;base++) {
10573 if (buf[base] == first) {
10574 if (third != 0) {
10575 if ((buf[base + 1] != next) ||
10576 (buf[base + 2] != third)) continue;
10577 } else if (next != 0) {
10578 if (buf[base + 1] != next) continue;
10579 }
10580 ctxt->checkIndex = 0;
10581#ifdef DEBUG_PUSH
10582 if (next == 0)
10583 xmlGenericError(xmlGenericErrorContext,
10584 "PP: lookup '%c' found at %d\n",
10585 first, base);
10586 else if (third == 0)
10587 xmlGenericError(xmlGenericErrorContext,
10588 "PP: lookup '%c%c' found at %d\n",
10589 first, next, base);
10590 else
10591 xmlGenericError(xmlGenericErrorContext,
10592 "PP: lookup '%c%c%c' found at %d\n",
10593 first, next, third, base);
10594#endif
10595 return(base - (in->cur - in->base));
10596 }
10597 }
10598 ctxt->checkIndex = base;
10599#ifdef DEBUG_PUSH
10600 if (next == 0)
10601 xmlGenericError(xmlGenericErrorContext,
10602 "PP: lookup '%c' failed\n", first);
10603 else if (third == 0)
10604 xmlGenericError(xmlGenericErrorContext,
10605 "PP: lookup '%c%c' failed\n", first, next);
10606 else
10607 xmlGenericError(xmlGenericErrorContext,
10608 "PP: lookup '%c%c%c' failed\n", first, next, third);
10609#endif
10610 return(-1);
10611}
10612
10613/**
Daniel Veillarda880b122003-04-21 21:36:41 +000010614 * xmlParseGetLasts:
10615 * @ctxt: an XML parser context
10616 * @lastlt: pointer to store the last '<' from the input
10617 * @lastgt: pointer to store the last '>' from the input
10618 *
10619 * Lookup the last < and > in the current chunk
10620 */
10621static void
10622xmlParseGetLasts(xmlParserCtxtPtr ctxt, const xmlChar **lastlt,
10623 const xmlChar **lastgt) {
10624 const xmlChar *tmp;
10625
10626 if ((ctxt == NULL) || (lastlt == NULL) || (lastgt == NULL)) {
10627 xmlGenericError(xmlGenericErrorContext,
10628 "Internal error: xmlParseGetLasts\n");
10629 return;
10630 }
Daniel Veillard0df3bc32004-06-08 12:03:41 +000010631 if ((ctxt->progressive != 0) && (ctxt->inputNr == 1)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010632 tmp = ctxt->input->end;
10633 tmp--;
Daniel Veillardeb70f932004-07-05 16:46:09 +000010634 while ((tmp >= ctxt->input->base) && (*tmp != '<')) tmp--;
Daniel Veillarda880b122003-04-21 21:36:41 +000010635 if (tmp < ctxt->input->base) {
10636 *lastlt = NULL;
10637 *lastgt = NULL;
Daniel Veillarda880b122003-04-21 21:36:41 +000010638 } else {
Daniel Veillardeb70f932004-07-05 16:46:09 +000010639 *lastlt = tmp;
10640 tmp++;
10641 while ((tmp < ctxt->input->end) && (*tmp != '>')) {
10642 if (*tmp == '\'') {
10643 tmp++;
10644 while ((tmp < ctxt->input->end) && (*tmp != '\'')) tmp++;
10645 if (tmp < ctxt->input->end) tmp++;
10646 } else if (*tmp == '"') {
10647 tmp++;
10648 while ((tmp < ctxt->input->end) && (*tmp != '"')) tmp++;
10649 if (tmp < ctxt->input->end) tmp++;
10650 } else
10651 tmp++;
10652 }
10653 if (tmp < ctxt->input->end)
10654 *lastgt = tmp;
10655 else {
10656 tmp = *lastlt;
10657 tmp--;
10658 while ((tmp >= ctxt->input->base) && (*tmp != '>')) tmp--;
10659 if (tmp >= ctxt->input->base)
10660 *lastgt = tmp;
10661 else
10662 *lastgt = NULL;
10663 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010664 }
Daniel Veillarda880b122003-04-21 21:36:41 +000010665 } else {
10666 *lastlt = NULL;
10667 *lastgt = NULL;
10668 }
10669}
10670/**
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010671 * xmlCheckCdataPush:
10672 * @cur: pointer to the bock of characters
10673 * @len: length of the block in bytes
10674 *
10675 * Check that the block of characters is okay as SCdata content [20]
10676 *
10677 * Returns the number of bytes to pass if okay, a negative index where an
10678 * UTF-8 error occured otherwise
10679 */
10680static int
10681xmlCheckCdataPush(const xmlChar *utf, int len) {
10682 int ix;
10683 unsigned char c;
10684 int codepoint;
10685
10686 if ((utf == NULL) || (len <= 0))
10687 return(0);
10688
10689 for (ix = 0; ix < len;) { /* string is 0-terminated */
10690 c = utf[ix];
10691 if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
10692 if (c >= 0x20)
10693 ix++;
10694 else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
10695 ix++;
10696 else
10697 return(-ix);
10698 } else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
10699 if (ix + 2 > len) return(ix);
10700 if ((utf[ix+1] & 0xc0 ) != 0x80)
10701 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010702 codepoint = (utf[ix] & 0x1f) << 6;
10703 codepoint |= utf[ix+1] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010704 if (!xmlIsCharQ(codepoint))
10705 return(-ix);
10706 ix += 2;
10707 } else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
10708 if (ix + 3 > len) return(ix);
10709 if (((utf[ix+1] & 0xc0) != 0x80) ||
10710 ((utf[ix+2] & 0xc0) != 0x80))
10711 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010712 codepoint = (utf[ix] & 0xf) << 12;
10713 codepoint |= (utf[ix+1] & 0x3f) << 6;
10714 codepoint |= utf[ix+2] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010715 if (!xmlIsCharQ(codepoint))
10716 return(-ix);
10717 ix += 3;
10718 } else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
10719 if (ix + 4 > len) return(ix);
10720 if (((utf[ix+1] & 0xc0) != 0x80) ||
10721 ((utf[ix+2] & 0xc0) != 0x80) ||
10722 ((utf[ix+3] & 0xc0) != 0x80))
10723 return(-ix);
Daniel Veillardeca59a22005-09-09 10:56:28 +000010724 codepoint = (utf[ix] & 0x7) << 18;
10725 codepoint |= (utf[ix+1] & 0x3f) << 12;
10726 codepoint |= (utf[ix+2] & 0x3f) << 6;
10727 codepoint |= utf[ix+3] & 0x3f;
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000010728 if (!xmlIsCharQ(codepoint))
10729 return(-ix);
10730 ix += 4;
10731 } else /* unknown encoding */
10732 return(-ix);
10733 }
10734 return(ix);
10735}
10736
10737/**
Owen Taylor3473f882001-02-23 17:55:21 +000010738 * xmlParseTryOrFinish:
10739 * @ctxt: an XML parser context
10740 * @terminate: last chunk indicator
10741 *
10742 * Try to progress on parsing
10743 *
10744 * Returns zero if no parsing was possible
10745 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +000010746static int
Owen Taylor3473f882001-02-23 17:55:21 +000010747xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
10748 int ret = 0;
Daniel Veillardc82c57e2004-01-12 16:24:34 +000010749 int avail, tlen;
Owen Taylor3473f882001-02-23 17:55:21 +000010750 xmlChar cur, next;
Daniel Veillarda880b122003-04-21 21:36:41 +000010751 const xmlChar *lastlt, *lastgt;
Owen Taylor3473f882001-02-23 17:55:21 +000010752
Daniel Veillard36e5cd52004-11-02 14:52:23 +000010753 if (ctxt->input == NULL)
10754 return(0);
10755
Owen Taylor3473f882001-02-23 17:55:21 +000010756#ifdef DEBUG_PUSH
10757 switch (ctxt->instate) {
10758 case XML_PARSER_EOF:
10759 xmlGenericError(xmlGenericErrorContext,
10760 "PP: try EOF\n"); break;
10761 case XML_PARSER_START:
10762 xmlGenericError(xmlGenericErrorContext,
10763 "PP: try START\n"); break;
10764 case XML_PARSER_MISC:
10765 xmlGenericError(xmlGenericErrorContext,
10766 "PP: try MISC\n");break;
10767 case XML_PARSER_COMMENT:
10768 xmlGenericError(xmlGenericErrorContext,
10769 "PP: try COMMENT\n");break;
10770 case XML_PARSER_PROLOG:
10771 xmlGenericError(xmlGenericErrorContext,
10772 "PP: try PROLOG\n");break;
10773 case XML_PARSER_START_TAG:
10774 xmlGenericError(xmlGenericErrorContext,
10775 "PP: try START_TAG\n");break;
10776 case XML_PARSER_CONTENT:
10777 xmlGenericError(xmlGenericErrorContext,
10778 "PP: try CONTENT\n");break;
10779 case XML_PARSER_CDATA_SECTION:
10780 xmlGenericError(xmlGenericErrorContext,
10781 "PP: try CDATA_SECTION\n");break;
10782 case XML_PARSER_END_TAG:
10783 xmlGenericError(xmlGenericErrorContext,
10784 "PP: try END_TAG\n");break;
10785 case XML_PARSER_ENTITY_DECL:
10786 xmlGenericError(xmlGenericErrorContext,
10787 "PP: try ENTITY_DECL\n");break;
10788 case XML_PARSER_ENTITY_VALUE:
10789 xmlGenericError(xmlGenericErrorContext,
10790 "PP: try ENTITY_VALUE\n");break;
10791 case XML_PARSER_ATTRIBUTE_VALUE:
10792 xmlGenericError(xmlGenericErrorContext,
10793 "PP: try ATTRIBUTE_VALUE\n");break;
10794 case XML_PARSER_DTD:
10795 xmlGenericError(xmlGenericErrorContext,
10796 "PP: try DTD\n");break;
10797 case XML_PARSER_EPILOG:
10798 xmlGenericError(xmlGenericErrorContext,
10799 "PP: try EPILOG\n");break;
10800 case XML_PARSER_PI:
10801 xmlGenericError(xmlGenericErrorContext,
10802 "PP: try PI\n");break;
10803 case XML_PARSER_IGNORE:
10804 xmlGenericError(xmlGenericErrorContext,
10805 "PP: try IGNORE\n");break;
10806 }
10807#endif
10808
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010809 if ((ctxt->input != NULL) &&
10810 (ctxt->input->cur - ctxt->input->base > 4096)) {
Daniel Veillarda880b122003-04-21 21:36:41 +000010811 xmlSHRINK(ctxt);
10812 ctxt->checkIndex = 0;
10813 }
10814 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Aleksey Sanine48a3182002-05-09 18:20:01 +000010815
Daniel Veillarda880b122003-04-21 21:36:41 +000010816 while (1) {
Daniel Veillard14412512005-01-21 23:53:26 +000010817 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000010818 return(0);
10819
10820
Owen Taylor3473f882001-02-23 17:55:21 +000010821 /*
10822 * Pop-up of finished entities.
10823 */
10824 while ((RAW == 0) && (ctxt->inputNr > 1))
10825 xmlPopInput(ctxt);
10826
Daniel Veillard198c1bf2003-10-20 17:07:41 +000010827 if (ctxt->input == NULL) break;
Owen Taylor3473f882001-02-23 17:55:21 +000010828 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000010829 avail = ctxt->input->length -
10830 (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010831 else {
10832 /*
10833 * If we are operating on converted input, try to flush
10834 * remainng chars to avoid them stalling in the non-converted
10835 * buffer.
10836 */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080010837 if (xmlBufIsEmpty(ctxt->input->buf->buffer) == 0) {
10838 size_t base = ctxt->input->base -
10839 xmlBufContent(ctxt->input->buf->buffer);
10840 size_t current = ctxt->input->cur - ctxt->input->base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000010841
10842 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
Daniel Veillard768eb3b2012-07-16 14:19:49 +080010843 ctxt->input->base = xmlBufContent(ctxt->input->buf->buffer) +
10844 base;
Daniel Veillard158a4d22002-02-20 22:17:58 +000010845 ctxt->input->cur = ctxt->input->base + current;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080010846 ctxt->input->end = xmlBufEnd(ctxt->input->buf->buffer);
Daniel Veillard158a4d22002-02-20 22:17:58 +000010847 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080010848 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard158a4d22002-02-20 22:17:58 +000010849 (ctxt->input->cur - ctxt->input->base);
10850 }
Owen Taylor3473f882001-02-23 17:55:21 +000010851 if (avail < 1)
10852 goto done;
10853 switch (ctxt->instate) {
10854 case XML_PARSER_EOF:
10855 /*
10856 * Document parsing is done !
10857 */
10858 goto done;
10859 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010860 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
10861 xmlChar start[4];
10862 xmlCharEncoding enc;
10863
10864 /*
10865 * Very first chars read from the document flow.
10866 */
10867 if (avail < 4)
10868 goto done;
10869
10870 /*
10871 * Get the 4 first bytes and decode the charset
10872 * if enc != XML_CHAR_ENCODING_NONE
William M. Brack3a1cd212005-02-11 14:35:54 +000010873 * plug some encoding conversion routines,
10874 * else xmlSwitchEncoding will set to (default)
10875 * UTF8.
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010876 */
10877 start[0] = RAW;
10878 start[1] = NXT(1);
10879 start[2] = NXT(2);
10880 start[3] = NXT(3);
10881 enc = xmlDetectCharEncoding(start, 4);
William M. Brack3a1cd212005-02-11 14:35:54 +000010882 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard0e4cd172001-06-28 12:13:56 +000010883 break;
10884 }
Owen Taylor3473f882001-02-23 17:55:21 +000010885
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000010886 if (avail < 2)
10887 goto done;
Owen Taylor3473f882001-02-23 17:55:21 +000010888 cur = ctxt->input->cur[0];
10889 next = ctxt->input->cur[1];
10890 if (cur == 0) {
10891 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10892 ctxt->sax->setDocumentLocator(ctxt->userData,
10893 &xmlDefaultSAXLocator);
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010894 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010895 ctxt->instate = XML_PARSER_EOF;
10896#ifdef DEBUG_PUSH
10897 xmlGenericError(xmlGenericErrorContext,
10898 "PP: entering EOF\n");
10899#endif
10900 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10901 ctxt->sax->endDocument(ctxt->userData);
10902 goto done;
10903 }
10904 if ((cur == '<') && (next == '?')) {
10905 /* PI or XML decl */
10906 if (avail < 5) return(ret);
10907 if ((!terminate) &&
10908 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
10909 return(ret);
10910 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10911 ctxt->sax->setDocumentLocator(ctxt->userData,
10912 &xmlDefaultSAXLocator);
10913 if ((ctxt->input->cur[2] == 'x') &&
10914 (ctxt->input->cur[3] == 'm') &&
10915 (ctxt->input->cur[4] == 'l') &&
William M. Brack76e95df2003-10-18 16:20:14 +000010916 (IS_BLANK_CH(ctxt->input->cur[5]))) {
Owen Taylor3473f882001-02-23 17:55:21 +000010917 ret += 5;
10918#ifdef DEBUG_PUSH
10919 xmlGenericError(xmlGenericErrorContext,
10920 "PP: Parsing XML Decl\n");
10921#endif
10922 xmlParseXMLDecl(ctxt);
10923 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
10924 /*
10925 * The XML REC instructs us to stop parsing right
10926 * here
10927 */
10928 ctxt->instate = XML_PARSER_EOF;
10929 return(0);
10930 }
10931 ctxt->standalone = ctxt->input->standalone;
10932 if ((ctxt->encoding == NULL) &&
10933 (ctxt->input->encoding != NULL))
10934 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
10935 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10936 (!ctxt->disableSAX))
10937 ctxt->sax->startDocument(ctxt->userData);
10938 ctxt->instate = XML_PARSER_MISC;
10939#ifdef DEBUG_PUSH
10940 xmlGenericError(xmlGenericErrorContext,
10941 "PP: entering MISC\n");
10942#endif
10943 } else {
10944 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
10945 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10946 (!ctxt->disableSAX))
10947 ctxt->sax->startDocument(ctxt->userData);
10948 ctxt->instate = XML_PARSER_MISC;
10949#ifdef DEBUG_PUSH
10950 xmlGenericError(xmlGenericErrorContext,
10951 "PP: entering MISC\n");
10952#endif
10953 }
10954 } else {
10955 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
10956 ctxt->sax->setDocumentLocator(ctxt->userData,
10957 &xmlDefaultSAXLocator);
10958 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
William M. Bracka3215c72004-07-31 16:24:01 +000010959 if (ctxt->version == NULL) {
10960 xmlErrMemory(ctxt, NULL);
10961 break;
10962 }
Owen Taylor3473f882001-02-23 17:55:21 +000010963 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
10964 (!ctxt->disableSAX))
10965 ctxt->sax->startDocument(ctxt->userData);
10966 ctxt->instate = XML_PARSER_MISC;
10967#ifdef DEBUG_PUSH
10968 xmlGenericError(xmlGenericErrorContext,
10969 "PP: entering MISC\n");
10970#endif
10971 }
10972 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000010973 case XML_PARSER_START_TAG: {
Daniel Veillarde57ec792003-09-10 10:50:59 +000010974 const xmlChar *name;
Daniel Veillard15495612009-09-05 15:04:41 +020010975 const xmlChar *prefix = NULL;
10976 const xmlChar *URI = NULL;
Daniel Veillarde57ec792003-09-10 10:50:59 +000010977 int nsNr = ctxt->nsNr;
Daniel Veillarda880b122003-04-21 21:36:41 +000010978
10979 if ((avail < 2) && (ctxt->inputNr == 1))
10980 goto done;
10981 cur = ctxt->input->cur[0];
10982 if (cur != '<') {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000010983 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
Daniel Veillarda880b122003-04-21 21:36:41 +000010984 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000010985 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
10986 ctxt->sax->endDocument(ctxt->userData);
10987 goto done;
10988 }
10989 if (!terminate) {
10990 if (ctxt->progressive) {
Daniel Veillardb3744002004-02-18 14:28:22 +000010991 /* > can be found unescaped in attribute values */
Daniel Veillardeb70f932004-07-05 16:46:09 +000010992 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000010993 goto done;
10994 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
10995 goto done;
10996 }
10997 }
10998 if (ctxt->spaceNr == 0)
10999 spacePush(ctxt, -1);
Daniel Veillard1114d002006-10-12 16:24:35 +000011000 else if (*ctxt->space == -2)
11001 spacePush(ctxt, -1);
Daniel Veillarda880b122003-04-21 21:36:41 +000011002 else
11003 spacePush(ctxt, *ctxt->space);
Daniel Veillard81273902003-09-30 00:43:48 +000011004#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011005 if (ctxt->sax2)
Daniel Veillard81273902003-09-30 00:43:48 +000011006#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011007 name = xmlParseStartTag2(ctxt, &prefix, &URI, &tlen);
Daniel Veillard81273902003-09-30 00:43:48 +000011008#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011009 else
11010 name = xmlParseStartTag(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011011#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011012 if (ctxt->instate == XML_PARSER_EOF)
11013 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011014 if (name == NULL) {
11015 spacePop(ctxt);
11016 ctxt->instate = XML_PARSER_EOF;
Daniel Veillarda880b122003-04-21 21:36:41 +000011017 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11018 ctxt->sax->endDocument(ctxt->userData);
11019 goto done;
11020 }
Daniel Veillard4432df22003-09-28 18:58:27 +000011021#ifdef LIBXML_VALID_ENABLED
Daniel Veillarda880b122003-04-21 21:36:41 +000011022 /*
11023 * [ VC: Root Element Type ]
11024 * The Name in the document type declaration must match
11025 * the element type of the root element.
11026 */
11027 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11028 ctxt->node && (ctxt->node == ctxt->myDoc->children))
11029 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
Daniel Veillard4432df22003-09-28 18:58:27 +000011030#endif /* LIBXML_VALID_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011031
11032 /*
11033 * Check for an Empty Element.
11034 */
11035 if ((RAW == '/') && (NXT(1) == '>')) {
11036 SKIP(2);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011037
11038 if (ctxt->sax2) {
11039 if ((ctxt->sax != NULL) &&
11040 (ctxt->sax->endElementNs != NULL) &&
11041 (!ctxt->disableSAX))
11042 ctxt->sax->endElementNs(ctxt->userData, name,
11043 prefix, URI);
Daniel Veillard48df9612005-01-04 21:50:05 +000011044 if (ctxt->nsNr - nsNr > 0)
11045 nsPop(ctxt, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011046#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011047 } else {
11048 if ((ctxt->sax != NULL) &&
11049 (ctxt->sax->endElement != NULL) &&
11050 (!ctxt->disableSAX))
11051 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011052#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarda880b122003-04-21 21:36:41 +000011053 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011054 spacePop(ctxt);
11055 if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011056 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011057 } else {
11058 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011059 }
11060 break;
11061 }
11062 if (RAW == '>') {
11063 NEXT;
11064 } else {
Daniel Veillardbc92eca2003-09-15 09:48:06 +000011065 xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
Daniel Veillarda880b122003-04-21 21:36:41 +000011066 "Couldn't find end of Start Tag %s\n",
11067 name);
Daniel Veillarda880b122003-04-21 21:36:41 +000011068 nodePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011069 spacePop(ctxt);
Daniel Veillarda880b122003-04-21 21:36:41 +000011070 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011071 if (ctxt->sax2)
11072 nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
Daniel Veillard81273902003-09-30 00:43:48 +000011073#ifdef LIBXML_SAX1_ENABLED
Daniel Veillarde57ec792003-09-10 10:50:59 +000011074 else
11075 namePush(ctxt, name);
Daniel Veillard81273902003-09-30 00:43:48 +000011076#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde57ec792003-09-10 10:50:59 +000011077
Daniel Veillarda880b122003-04-21 21:36:41 +000011078 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011079 break;
11080 }
11081 case XML_PARSER_CONTENT: {
11082 const xmlChar *test;
11083 unsigned int cons;
11084 if ((avail < 2) && (ctxt->inputNr == 1))
11085 goto done;
11086 cur = ctxt->input->cur[0];
11087 next = ctxt->input->cur[1];
11088
11089 test = CUR_PTR;
11090 cons = ctxt->input->consumed;
11091 if ((cur == '<') && (next == '/')) {
11092 ctxt->instate = XML_PARSER_END_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011093 break;
11094 } else if ((cur == '<') && (next == '?')) {
11095 if ((!terminate) &&
11096 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11097 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011098 xmlParsePI(ctxt);
11099 } else if ((cur == '<') && (next != '!')) {
11100 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011101 break;
11102 } else if ((cur == '<') && (next == '!') &&
11103 (ctxt->input->cur[2] == '-') &&
11104 (ctxt->input->cur[3] == '-')) {
Daniel Veillard6974feb2006-02-05 02:43:36 +000011105 int term;
11106
11107 if (avail < 4)
11108 goto done;
11109 ctxt->input->cur += 4;
11110 term = xmlParseLookupSequence(ctxt, '-', '-', '>');
11111 ctxt->input->cur -= 4;
11112 if ((!terminate) && (term < 0))
Daniel Veillarda880b122003-04-21 21:36:41 +000011113 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011114 xmlParseComment(ctxt);
11115 ctxt->instate = XML_PARSER_CONTENT;
11116 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
11117 (ctxt->input->cur[2] == '[') &&
11118 (ctxt->input->cur[3] == 'C') &&
11119 (ctxt->input->cur[4] == 'D') &&
11120 (ctxt->input->cur[5] == 'A') &&
11121 (ctxt->input->cur[6] == 'T') &&
11122 (ctxt->input->cur[7] == 'A') &&
11123 (ctxt->input->cur[8] == '[')) {
11124 SKIP(9);
11125 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillarda880b122003-04-21 21:36:41 +000011126 break;
11127 } else if ((cur == '<') && (next == '!') &&
11128 (avail < 9)) {
11129 goto done;
11130 } else if (cur == '&') {
11131 if ((!terminate) &&
11132 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
11133 goto done;
Daniel Veillarda880b122003-04-21 21:36:41 +000011134 xmlParseReference(ctxt);
11135 } else {
11136 /* TODO Avoid the extra copy, handle directly !!! */
11137 /*
11138 * Goal of the following test is:
11139 * - minimize calls to the SAX 'character' callback
11140 * when they are mergeable
11141 * - handle an problem for isBlank when we only parse
11142 * a sequence of blank chars and the next one is
11143 * not available to check against '<' presence.
11144 * - tries to homogenize the differences in SAX
11145 * callbacks between the push and pull versions
11146 * of the parser.
11147 */
11148 if ((ctxt->inputNr == 1) &&
11149 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
11150 if (!terminate) {
11151 if (ctxt->progressive) {
11152 if ((lastlt == NULL) ||
11153 (ctxt->input->cur > lastlt))
11154 goto done;
11155 } else if (xmlParseLookupSequence(ctxt,
11156 '<', 0, 0) < 0) {
11157 goto done;
11158 }
11159 }
11160 }
11161 ctxt->checkIndex = 0;
Daniel Veillarda880b122003-04-21 21:36:41 +000011162 xmlParseCharData(ctxt, 0);
11163 }
11164 /*
11165 * Pop-up of finished entities.
11166 */
11167 while ((RAW == 0) && (ctxt->inputNr > 1))
11168 xmlPopInput(ctxt);
11169 if ((cons == ctxt->input->consumed) && (test == CUR_PTR)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011170 xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
11171 "detected an error in element content\n");
Daniel Veillarda880b122003-04-21 21:36:41 +000011172 ctxt->instate = XML_PARSER_EOF;
11173 break;
11174 }
11175 break;
11176 }
11177 case XML_PARSER_END_TAG:
11178 if (avail < 2)
11179 goto done;
11180 if (!terminate) {
11181 if (ctxt->progressive) {
Daniel Veillardeb70f932004-07-05 16:46:09 +000011182 /* > can be found unescaped in attribute values */
11183 if ((lastgt == NULL) || (ctxt->input->cur >= lastgt))
Daniel Veillarda880b122003-04-21 21:36:41 +000011184 goto done;
11185 } else if (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0) {
11186 goto done;
11187 }
11188 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000011189 if (ctxt->sax2) {
11190 xmlParseEndTag2(ctxt,
11191 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 3],
11192 (void *) ctxt->pushTab[ctxt->nameNr * 3 - 2], 0,
Daniel Veillardc82c57e2004-01-12 16:24:34 +000011193 (int) (long) ctxt->pushTab[ctxt->nameNr * 3 - 1], 0);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011194 nameNsPop(ctxt);
Daniel Veillard81273902003-09-30 00:43:48 +000011195 }
11196#ifdef LIBXML_SAX1_ENABLED
11197 else
Daniel Veillarde57ec792003-09-10 10:50:59 +000011198 xmlParseEndTag1(ctxt, 0);
Daniel Veillard81273902003-09-30 00:43:48 +000011199#endif /* LIBXML_SAX1_ENABLED */
Chris Evans77404b82011-12-14 16:18:25 +080011200 if (ctxt->instate == XML_PARSER_EOF) {
11201 /* Nothing */
11202 } else if (ctxt->nameNr == 0) {
Daniel Veillarda880b122003-04-21 21:36:41 +000011203 ctxt->instate = XML_PARSER_EPILOG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011204 } else {
11205 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillarda880b122003-04-21 21:36:41 +000011206 }
11207 break;
11208 case XML_PARSER_CDATA_SECTION: {
11209 /*
11210 * The Push mode need to have the SAX callback for
11211 * cdataBlock merge back contiguous callbacks.
11212 */
11213 int base;
11214
11215 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
11216 if (base < 0) {
11217 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011218 int tmp;
11219
11220 tmp = xmlCheckCdataPush(ctxt->input->cur,
11221 XML_PARSER_BIG_BUFFER_SIZE);
11222 if (tmp < 0) {
11223 tmp = -tmp;
11224 ctxt->input->cur += tmp;
11225 goto encoding_error;
11226 }
Daniel Veillarda880b122003-04-21 21:36:41 +000011227 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
11228 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011229 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011230 ctxt->input->cur, tmp);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011231 else if (ctxt->sax->characters != NULL)
11232 ctxt->sax->characters(ctxt->userData,
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011233 ctxt->input->cur, tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011234 }
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011235 SKIPL(tmp);
Daniel Veillarda880b122003-04-21 21:36:41 +000011236 ctxt->checkIndex = 0;
11237 }
11238 goto done;
11239 } else {
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011240 int tmp;
11241
11242 tmp = xmlCheckCdataPush(ctxt->input->cur, base);
11243 if ((tmp < 0) || (tmp != base)) {
11244 tmp = -tmp;
11245 ctxt->input->cur += tmp;
11246 goto encoding_error;
11247 }
Daniel Veillardd0d2f092008-03-07 16:50:21 +000011248 if ((ctxt->sax != NULL) && (base == 0) &&
11249 (ctxt->sax->cdataBlock != NULL) &&
11250 (!ctxt->disableSAX)) {
11251 /*
11252 * Special case to provide identical behaviour
11253 * between pull and push parsers on enpty CDATA
11254 * sections
11255 */
11256 if ((ctxt->input->cur - ctxt->input->base >= 9) &&
11257 (!strncmp((const char *)&ctxt->input->cur[-9],
11258 "<![CDATA[", 9)))
11259 ctxt->sax->cdataBlock(ctxt->userData,
11260 BAD_CAST "", 0);
11261 } else if ((ctxt->sax != NULL) && (base > 0) &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011262 (!ctxt->disableSAX)) {
11263 if (ctxt->sax->cdataBlock != NULL)
11264 ctxt->sax->cdataBlock(ctxt->userData,
11265 ctxt->input->cur, base);
Daniel Veillardd9d32ae2003-07-05 20:32:43 +000011266 else if (ctxt->sax->characters != NULL)
11267 ctxt->sax->characters(ctxt->userData,
11268 ctxt->input->cur, base);
Daniel Veillarda880b122003-04-21 21:36:41 +000011269 }
Daniel Veillard0b787f32004-03-26 17:29:53 +000011270 SKIPL(base + 3);
Daniel Veillarda880b122003-04-21 21:36:41 +000011271 ctxt->checkIndex = 0;
11272 ctxt->instate = XML_PARSER_CONTENT;
11273#ifdef DEBUG_PUSH
11274 xmlGenericError(xmlGenericErrorContext,
11275 "PP: entering CONTENT\n");
11276#endif
11277 }
11278 break;
11279 }
Owen Taylor3473f882001-02-23 17:55:21 +000011280 case XML_PARSER_MISC:
11281 SKIP_BLANKS;
11282 if (ctxt->input->buf == NULL)
Daniel Veillarda880b122003-04-21 21:36:41 +000011283 avail = ctxt->input->length -
11284 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011285 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011286 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillarda880b122003-04-21 21:36:41 +000011287 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011288 if (avail < 2)
11289 goto done;
11290 cur = ctxt->input->cur[0];
11291 next = ctxt->input->cur[1];
11292 if ((cur == '<') && (next == '?')) {
11293 if ((!terminate) &&
11294 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11295 goto done;
11296#ifdef DEBUG_PUSH
11297 xmlGenericError(xmlGenericErrorContext,
11298 "PP: Parsing PI\n");
11299#endif
11300 xmlParsePI(ctxt);
Daniel Veillard40e4b212007-06-12 14:46:40 +000011301 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011302 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011303 (ctxt->input->cur[2] == '-') &&
11304 (ctxt->input->cur[3] == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +000011305 if ((!terminate) &&
11306 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11307 goto done;
11308#ifdef DEBUG_PUSH
11309 xmlGenericError(xmlGenericErrorContext,
11310 "PP: Parsing Comment\n");
11311#endif
11312 xmlParseComment(ctxt);
11313 ctxt->instate = XML_PARSER_MISC;
Daniel Veillarddfac9462007-06-12 14:44:32 +000011314 ctxt->checkIndex = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000011315 } else if ((cur == '<') && (next == '!') &&
Daniel Veillarda880b122003-04-21 21:36:41 +000011316 (ctxt->input->cur[2] == 'D') &&
11317 (ctxt->input->cur[3] == 'O') &&
11318 (ctxt->input->cur[4] == 'C') &&
11319 (ctxt->input->cur[5] == 'T') &&
11320 (ctxt->input->cur[6] == 'Y') &&
11321 (ctxt->input->cur[7] == 'P') &&
Owen Taylor3473f882001-02-23 17:55:21 +000011322 (ctxt->input->cur[8] == 'E')) {
11323 if ((!terminate) &&
11324 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
11325 goto done;
11326#ifdef DEBUG_PUSH
11327 xmlGenericError(xmlGenericErrorContext,
11328 "PP: Parsing internal subset\n");
11329#endif
11330 ctxt->inSubset = 1;
11331 xmlParseDocTypeDecl(ctxt);
11332 if (RAW == '[') {
11333 ctxt->instate = XML_PARSER_DTD;
11334#ifdef DEBUG_PUSH
11335 xmlGenericError(xmlGenericErrorContext,
11336 "PP: entering DTD\n");
11337#endif
11338 } else {
11339 /*
11340 * Create and update the external subset.
11341 */
11342 ctxt->inSubset = 2;
11343 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11344 (ctxt->sax->externalSubset != NULL))
11345 ctxt->sax->externalSubset(ctxt->userData,
11346 ctxt->intSubName, ctxt->extSubSystem,
11347 ctxt->extSubURI);
11348 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011349 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011350 ctxt->instate = XML_PARSER_PROLOG;
11351#ifdef DEBUG_PUSH
11352 xmlGenericError(xmlGenericErrorContext,
11353 "PP: entering PROLOG\n");
11354#endif
11355 }
11356 } else if ((cur == '<') && (next == '!') &&
11357 (avail < 9)) {
11358 goto done;
11359 } else {
11360 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillarda880b122003-04-21 21:36:41 +000011361 ctxt->progressive = 1;
11362 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011363#ifdef DEBUG_PUSH
11364 xmlGenericError(xmlGenericErrorContext,
11365 "PP: entering START_TAG\n");
11366#endif
11367 }
11368 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011369 case XML_PARSER_PROLOG:
11370 SKIP_BLANKS;
11371 if (ctxt->input->buf == NULL)
11372 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11373 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011374 avail = xmlBufUse(ctxt->input->buf->buffer) -
11375 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011376 if (avail < 2)
11377 goto done;
11378 cur = ctxt->input->cur[0];
11379 next = ctxt->input->cur[1];
11380 if ((cur == '<') && (next == '?')) {
11381 if ((!terminate) &&
11382 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11383 goto done;
11384#ifdef DEBUG_PUSH
11385 xmlGenericError(xmlGenericErrorContext,
11386 "PP: Parsing PI\n");
11387#endif
11388 xmlParsePI(ctxt);
11389 } else if ((cur == '<') && (next == '!') &&
11390 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11391 if ((!terminate) &&
11392 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11393 goto done;
11394#ifdef DEBUG_PUSH
11395 xmlGenericError(xmlGenericErrorContext,
11396 "PP: Parsing Comment\n");
11397#endif
11398 xmlParseComment(ctxt);
11399 ctxt->instate = XML_PARSER_PROLOG;
11400 } else if ((cur == '<') && (next == '!') &&
11401 (avail < 4)) {
11402 goto done;
11403 } else {
11404 ctxt->instate = XML_PARSER_START_TAG;
Daniel Veillard0df3bc32004-06-08 12:03:41 +000011405 if (ctxt->progressive == 0)
11406 ctxt->progressive = 1;
Daniel Veillarda880b122003-04-21 21:36:41 +000011407 xmlParseGetLasts(ctxt, &lastlt, &lastgt);
Owen Taylor3473f882001-02-23 17:55:21 +000011408#ifdef DEBUG_PUSH
11409 xmlGenericError(xmlGenericErrorContext,
11410 "PP: entering START_TAG\n");
11411#endif
11412 }
11413 break;
11414 case XML_PARSER_EPILOG:
11415 SKIP_BLANKS;
11416 if (ctxt->input->buf == NULL)
11417 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
11418 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011419 avail = xmlBufUse(ctxt->input->buf->buffer) -
11420 (ctxt->input->cur - ctxt->input->base);
Owen Taylor3473f882001-02-23 17:55:21 +000011421 if (avail < 2)
11422 goto done;
11423 cur = ctxt->input->cur[0];
11424 next = ctxt->input->cur[1];
11425 if ((cur == '<') && (next == '?')) {
11426 if ((!terminate) &&
11427 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
11428 goto done;
11429#ifdef DEBUG_PUSH
11430 xmlGenericError(xmlGenericErrorContext,
11431 "PP: Parsing PI\n");
11432#endif
11433 xmlParsePI(ctxt);
11434 ctxt->instate = XML_PARSER_EPILOG;
11435 } else if ((cur == '<') && (next == '!') &&
11436 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
11437 if ((!terminate) &&
11438 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
11439 goto done;
11440#ifdef DEBUG_PUSH
11441 xmlGenericError(xmlGenericErrorContext,
11442 "PP: Parsing Comment\n");
11443#endif
11444 xmlParseComment(ctxt);
11445 ctxt->instate = XML_PARSER_EPILOG;
11446 } else if ((cur == '<') && (next == '!') &&
11447 (avail < 4)) {
11448 goto done;
11449 } else {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011450 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000011451 ctxt->instate = XML_PARSER_EOF;
11452#ifdef DEBUG_PUSH
11453 xmlGenericError(xmlGenericErrorContext,
11454 "PP: entering EOF\n");
11455#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011456 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011457 ctxt->sax->endDocument(ctxt->userData);
11458 goto done;
11459 }
11460 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011461 case XML_PARSER_DTD: {
11462 /*
11463 * Sorry but progressive parsing of the internal subset
11464 * is not expected to be supported. We first check that
11465 * the full content of the internal subset is available and
11466 * the parsing is launched only at that point.
11467 * Internal subset ends up with "']' S? '>'" in an unescaped
11468 * section and not in a ']]>' sequence which are conditional
11469 * sections (whoever argued to keep that crap in XML deserve
11470 * a place in hell !).
11471 */
11472 int base, i;
11473 xmlChar *buf;
11474 xmlChar quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011475 size_t use;
Owen Taylor3473f882001-02-23 17:55:21 +000011476
11477 base = ctxt->input->cur - ctxt->input->base;
11478 if (base < 0) return(0);
11479 if (ctxt->checkIndex > base)
11480 base = ctxt->checkIndex;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011481 buf = xmlBufContent(ctxt->input->buf->buffer);
11482 use = xmlBufUse(ctxt->input->buf->buffer);
11483 for (;(unsigned int) base < use; base++) {
Owen Taylor3473f882001-02-23 17:55:21 +000011484 if (quote != 0) {
11485 if (buf[base] == quote)
11486 quote = 0;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011487 continue;
Owen Taylor3473f882001-02-23 17:55:21 +000011488 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011489 if ((quote == 0) && (buf[base] == '<')) {
11490 int found = 0;
11491 /* special handling of comments */
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011492 if (((unsigned int) base + 4 < use) &&
Daniel Veillard036143b2004-02-12 11:57:52 +000011493 (buf[base + 1] == '!') &&
11494 (buf[base + 2] == '-') &&
11495 (buf[base + 3] == '-')) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011496 for (;(unsigned int) base + 3 < use; base++) {
Daniel Veillard036143b2004-02-12 11:57:52 +000011497 if ((buf[base] == '-') &&
11498 (buf[base + 1] == '-') &&
11499 (buf[base + 2] == '>')) {
11500 found = 1;
11501 base += 2;
11502 break;
11503 }
11504 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011505 if (!found) {
11506#if 0
11507 fprintf(stderr, "unfinished comment\n");
11508#endif
11509 break; /* for */
11510 }
Daniel Veillard036143b2004-02-12 11:57:52 +000011511 continue;
11512 }
11513 }
Owen Taylor3473f882001-02-23 17:55:21 +000011514 if (buf[base] == '"') {
11515 quote = '"';
11516 continue;
11517 }
11518 if (buf[base] == '\'') {
11519 quote = '\'';
11520 continue;
11521 }
11522 if (buf[base] == ']') {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011523#if 0
11524 fprintf(stderr, "%c%c%c%c: ", buf[base],
11525 buf[base + 1], buf[base + 2], buf[base + 3]);
11526#endif
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011527 if ((unsigned int) base +1 >= use)
Owen Taylor3473f882001-02-23 17:55:21 +000011528 break;
11529 if (buf[base + 1] == ']') {
11530 /* conditional crap, skip both ']' ! */
11531 base++;
11532 continue;
11533 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011534 for (i = 1; (unsigned int) base + i < use; i++) {
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011535 if (buf[base + i] == '>') {
11536#if 0
11537 fprintf(stderr, "found\n");
11538#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011539 goto found_end_int_subset;
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011540 }
11541 if (!IS_BLANK_CH(buf[base + i])) {
11542#if 0
11543 fprintf(stderr, "not found\n");
11544#endif
11545 goto not_end_of_int_subset;
11546 }
Owen Taylor3473f882001-02-23 17:55:21 +000011547 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011548#if 0
11549 fprintf(stderr, "end of stream\n");
11550#endif
Owen Taylor3473f882001-02-23 17:55:21 +000011551 break;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011552
Owen Taylor3473f882001-02-23 17:55:21 +000011553 }
Daniel Veillard8f8a9dd2005-01-25 21:41:42 +000011554not_end_of_int_subset:
11555 continue; /* for */
Owen Taylor3473f882001-02-23 17:55:21 +000011556 }
11557 /*
11558 * We didn't found the end of the Internal subset
11559 */
Owen Taylor3473f882001-02-23 17:55:21 +000011560#ifdef DEBUG_PUSH
11561 if (next == 0)
11562 xmlGenericError(xmlGenericErrorContext,
11563 "PP: lookup of int subset end filed\n");
11564#endif
11565 goto done;
11566
11567found_end_int_subset:
11568 xmlParseInternalSubset(ctxt);
11569 ctxt->inSubset = 2;
11570 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
11571 (ctxt->sax->externalSubset != NULL))
11572 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11573 ctxt->extSubSystem, ctxt->extSubURI);
11574 ctxt->inSubset = 0;
Daniel Veillardac4118d2008-01-11 05:27:32 +000011575 xmlCleanSpecialAttr(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011576 ctxt->instate = XML_PARSER_PROLOG;
11577 ctxt->checkIndex = 0;
11578#ifdef DEBUG_PUSH
11579 xmlGenericError(xmlGenericErrorContext,
11580 "PP: entering PROLOG\n");
11581#endif
11582 break;
11583 }
11584 case XML_PARSER_COMMENT:
11585 xmlGenericError(xmlGenericErrorContext,
11586 "PP: internal error, state == COMMENT\n");
11587 ctxt->instate = XML_PARSER_CONTENT;
11588#ifdef DEBUG_PUSH
11589 xmlGenericError(xmlGenericErrorContext,
11590 "PP: entering CONTENT\n");
11591#endif
11592 break;
Daniel Veillarda880b122003-04-21 21:36:41 +000011593 case XML_PARSER_IGNORE:
11594 xmlGenericError(xmlGenericErrorContext,
11595 "PP: internal error, state == IGNORE");
11596 ctxt->instate = XML_PARSER_DTD;
11597#ifdef DEBUG_PUSH
11598 xmlGenericError(xmlGenericErrorContext,
11599 "PP: entering DTD\n");
11600#endif
11601 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011602 case XML_PARSER_PI:
11603 xmlGenericError(xmlGenericErrorContext,
11604 "PP: internal error, state == PI\n");
11605 ctxt->instate = XML_PARSER_CONTENT;
11606#ifdef DEBUG_PUSH
11607 xmlGenericError(xmlGenericErrorContext,
11608 "PP: entering CONTENT\n");
11609#endif
11610 break;
11611 case XML_PARSER_ENTITY_DECL:
11612 xmlGenericError(xmlGenericErrorContext,
11613 "PP: internal error, state == ENTITY_DECL\n");
11614 ctxt->instate = XML_PARSER_DTD;
11615#ifdef DEBUG_PUSH
11616 xmlGenericError(xmlGenericErrorContext,
11617 "PP: entering DTD\n");
11618#endif
11619 break;
11620 case XML_PARSER_ENTITY_VALUE:
11621 xmlGenericError(xmlGenericErrorContext,
11622 "PP: internal error, state == ENTITY_VALUE\n");
11623 ctxt->instate = XML_PARSER_CONTENT;
11624#ifdef DEBUG_PUSH
11625 xmlGenericError(xmlGenericErrorContext,
11626 "PP: entering DTD\n");
11627#endif
11628 break;
11629 case XML_PARSER_ATTRIBUTE_VALUE:
11630 xmlGenericError(xmlGenericErrorContext,
11631 "PP: internal error, state == ATTRIBUTE_VALUE\n");
11632 ctxt->instate = XML_PARSER_START_TAG;
11633#ifdef DEBUG_PUSH
11634 xmlGenericError(xmlGenericErrorContext,
11635 "PP: entering START_TAG\n");
11636#endif
11637 break;
11638 case XML_PARSER_SYSTEM_LITERAL:
11639 xmlGenericError(xmlGenericErrorContext,
11640 "PP: internal error, state == SYSTEM_LITERAL\n");
11641 ctxt->instate = XML_PARSER_START_TAG;
11642#ifdef DEBUG_PUSH
11643 xmlGenericError(xmlGenericErrorContext,
11644 "PP: entering START_TAG\n");
11645#endif
11646 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +000011647 case XML_PARSER_PUBLIC_LITERAL:
11648 xmlGenericError(xmlGenericErrorContext,
11649 "PP: internal error, state == PUBLIC_LITERAL\n");
11650 ctxt->instate = XML_PARSER_START_TAG;
11651#ifdef DEBUG_PUSH
11652 xmlGenericError(xmlGenericErrorContext,
11653 "PP: entering START_TAG\n");
11654#endif
11655 break;
Owen Taylor3473f882001-02-23 17:55:21 +000011656 }
11657 }
11658done:
11659#ifdef DEBUG_PUSH
11660 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
11661#endif
11662 return(ret);
Daniel Veillard3fa5e7e2005-07-04 11:12:25 +000011663encoding_error:
11664 {
11665 char buffer[150];
11666
11667 snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
11668 ctxt->input->cur[0], ctxt->input->cur[1],
11669 ctxt->input->cur[2], ctxt->input->cur[3]);
11670 __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
11671 "Input is not proper UTF-8, indicate encoding !\n%s",
11672 BAD_CAST buffer, NULL);
11673 }
11674 return(0);
Owen Taylor3473f882001-02-23 17:55:21 +000011675}
11676
11677/**
Owen Taylor3473f882001-02-23 17:55:21 +000011678 * xmlParseChunk:
11679 * @ctxt: an XML parser context
11680 * @chunk: an char array
11681 * @size: the size in byte of the chunk
11682 * @terminate: last chunk indicator
11683 *
11684 * Parse a Chunk of memory
11685 *
11686 * Returns zero if no error, the xmlParserErrors otherwise.
11687 */
11688int
11689xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
11690 int terminate) {
Daniel Veillarda617e242006-01-09 14:38:44 +000011691 int end_in_lf = 0;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011692 int remain = 0;
Daniel Veillarda617e242006-01-09 14:38:44 +000011693
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011694 if (ctxt == NULL)
11695 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard14412512005-01-21 23:53:26 +000011696 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011697 return(ctxt->errNo);
Daniel Veillard309f81d2003-09-23 09:02:53 +000011698 if (ctxt->instate == XML_PARSER_START)
11699 xmlDetectSAX2(ctxt);
Daniel Veillarda617e242006-01-09 14:38:44 +000011700 if ((size > 0) && (chunk != NULL) && (!terminate) &&
11701 (chunk[size - 1] == '\r')) {
11702 end_in_lf = 1;
11703 size--;
11704 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011705
11706xmldecl_done:
11707
Owen Taylor3473f882001-02-23 17:55:21 +000011708 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
11709 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011710 int base = ctxt->input->base - xmlBufContent(ctxt->input->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000011711 int cur = ctxt->input->cur - ctxt->input->base;
William M. Bracka3215c72004-07-31 16:24:01 +000011712 int res;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011713
11714 /*
11715 * Specific handling if we autodetected an encoding, we should not
11716 * push more than the first line ... which depend on the encoding
11717 * And only push the rest once the final encoding was detected
11718 */
11719 if ((ctxt->instate == XML_PARSER_START) && (ctxt->input != NULL) &&
11720 (ctxt->input->buf != NULL) && (ctxt->input->buf->encoder != NULL)) {
Nikolay Sivov73046832010-01-19 15:38:05 +010011721 unsigned int len = 45;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011722
11723 if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11724 BAD_CAST "UTF-16")) ||
11725 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11726 BAD_CAST "UTF16")))
11727 len = 90;
11728 else if ((xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11729 BAD_CAST "UCS-4")) ||
11730 (xmlStrcasestr(BAD_CAST ctxt->input->buf->encoder->name,
11731 BAD_CAST "UCS4")))
11732 len = 180;
11733
11734 if (ctxt->input->buf->rawconsumed < len)
11735 len -= ctxt->input->buf->rawconsumed;
11736
Raul Hudeaba9716a2010-03-15 10:13:29 +010011737 /*
11738 * Change size for reading the initial declaration only
11739 * if size is greater than len. Otherwise, memmove in xmlBufferAdd
11740 * will blindly copy extra bytes from memory.
11741 */
Daniel Veillard60587d62010-11-04 15:16:27 +010011742 if ((unsigned int) size > len) {
Raul Hudeaba9716a2010-03-15 10:13:29 +010011743 remain = size - len;
11744 size = len;
11745 } else {
11746 remain = 0;
11747 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011748 }
William M. Bracka3215c72004-07-31 16:24:01 +000011749 res =xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
11750 if (res < 0) {
11751 ctxt->errNo = XML_PARSER_EOF;
11752 ctxt->disableSAX = 1;
11753 return (XML_PARSER_EOF);
11754 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011755 ctxt->input->base = xmlBufContent(ctxt->input->buf->buffer) + base;
Owen Taylor3473f882001-02-23 17:55:21 +000011756 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011757 ctxt->input->end = xmlBufEnd(ctxt->input->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000011758#ifdef DEBUG_PUSH
11759 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11760#endif
11761
Owen Taylor3473f882001-02-23 17:55:21 +000011762 } else if (ctxt->instate != XML_PARSER_EOF) {
11763 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
11764 xmlParserInputBufferPtr in = ctxt->input->buf;
11765 if ((in->encoder != NULL) && (in->buffer != NULL) &&
11766 (in->raw != NULL)) {
11767 int nbchars;
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011768
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011769 nbchars = xmlCharEncInput(in);
Owen Taylor3473f882001-02-23 17:55:21 +000011770 if (nbchars < 0) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011771 /* TODO 2.6.0 */
Owen Taylor3473f882001-02-23 17:55:21 +000011772 xmlGenericError(xmlGenericErrorContext,
11773 "xmlParseChunk: encoder error\n");
11774 return(XML_ERR_INVALID_ENCODING);
11775 }
11776 }
11777 }
11778 }
Daniel Veillarda6c76a22009-08-26 14:37:00 +020011779 if (remain != 0)
11780 xmlParseTryOrFinish(ctxt, 0);
11781 else
11782 xmlParseTryOrFinish(ctxt, terminate);
11783 if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11784 return(ctxt->errNo);
11785
11786 if (remain != 0) {
11787 chunk += size;
11788 size = remain;
11789 remain = 0;
11790 goto xmldecl_done;
11791 }
Daniel Veillarda617e242006-01-09 14:38:44 +000011792 if ((end_in_lf == 1) && (ctxt->input != NULL) &&
11793 (ctxt->input->buf != NULL)) {
11794 xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
11795 }
Owen Taylor3473f882001-02-23 17:55:21 +000011796 if (terminate) {
11797 /*
11798 * Check for termination
11799 */
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011800 int avail = 0;
11801
11802 if (ctxt->input != NULL) {
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011803 if (ctxt->input->buf == NULL)
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011804 avail = ctxt->input->length -
11805 (ctxt->input->cur - ctxt->input->base);
11806 else
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011807 avail = xmlBufUse(ctxt->input->buf->buffer) -
Daniel Veillard36e5cd52004-11-02 14:52:23 +000011808 (ctxt->input->cur - ctxt->input->base);
11809 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011810
Owen Taylor3473f882001-02-23 17:55:21 +000011811 if ((ctxt->instate != XML_PARSER_EOF) &&
11812 (ctxt->instate != XML_PARSER_EPILOG)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011813 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011814 }
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011815 if ((ctxt->instate == XML_PARSER_EPILOG) && (avail > 0)) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011816 xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
Daniel Veillard819d5cb2002-10-14 11:15:18 +000011817 }
Owen Taylor3473f882001-02-23 17:55:21 +000011818 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +000011819 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +000011820 ctxt->sax->endDocument(ctxt->userData);
11821 }
11822 ctxt->instate = XML_PARSER_EOF;
11823 }
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011824 return((xmlParserErrors) ctxt->errNo);
Owen Taylor3473f882001-02-23 17:55:21 +000011825}
11826
11827/************************************************************************
11828 * *
11829 * I/O front end functions to the parser *
11830 * *
11831 ************************************************************************/
11832
11833/**
Owen Taylor3473f882001-02-23 17:55:21 +000011834 * xmlCreatePushParserCtxt:
11835 * @sax: a SAX handler
11836 * @user_data: The user data returned on SAX callbacks
11837 * @chunk: a pointer to an array of chars
11838 * @size: number of chars in the array
11839 * @filename: an optional file name or URI
11840 *
Daniel Veillard176d99f2002-07-06 19:22:28 +000011841 * Create a parser context for using the XML parser in push mode.
11842 * If @buffer and @size are non-NULL, the data is used to detect
11843 * the encoding. The remaining characters will be parsed so they
11844 * don't need to be fed in again through xmlParseChunk.
Owen Taylor3473f882001-02-23 17:55:21 +000011845 * To allow content encoding detection, @size should be >= 4
11846 * The value of @filename is used for fetching external entities
11847 * and error/warning reports.
11848 *
11849 * Returns the new parser context or NULL
11850 */
Daniel Veillard176d99f2002-07-06 19:22:28 +000011851
Owen Taylor3473f882001-02-23 17:55:21 +000011852xmlParserCtxtPtr
11853xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11854 const char *chunk, int size, const char *filename) {
11855 xmlParserCtxtPtr ctxt;
11856 xmlParserInputPtr inputStream;
11857 xmlParserInputBufferPtr buf;
11858 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
11859
11860 /*
11861 * plug some encoding conversion routines
11862 */
11863 if ((chunk != NULL) && (size >= 4))
11864 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
11865
11866 buf = xmlAllocParserInputBuffer(enc);
11867 if (buf == NULL) return(NULL);
11868
11869 ctxt = xmlNewParserCtxt();
11870 if (ctxt == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000011871 xmlErrMemory(NULL, "creating parser: out of memory\n");
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011872 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011873 return(NULL);
11874 }
Daniel Veillard03a53c32004-10-26 16:06:51 +000011875 ctxt->dictNames = 1;
Daniel Veillarde57ec792003-09-10 10:50:59 +000011876 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
11877 if (ctxt->pushTab == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011878 xmlErrMemory(ctxt, NULL);
Daniel Veillarde57ec792003-09-10 10:50:59 +000011879 xmlFreeParserInputBuffer(buf);
11880 xmlFreeParserCtxt(ctxt);
11881 return(NULL);
11882 }
Owen Taylor3473f882001-02-23 17:55:21 +000011883 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000011884#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000011885 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000011886#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011887 xmlFree(ctxt->sax);
11888 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
11889 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000011890 xmlErrMemory(ctxt, NULL);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011891 xmlFreeParserInputBuffer(buf);
11892 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000011893 return(NULL);
11894 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000011895 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
11896 if (sax->initialized == XML_SAX2_MAGIC)
11897 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
11898 else
11899 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000011900 if (user_data != NULL)
11901 ctxt->userData = user_data;
11902 }
11903 if (filename == NULL) {
11904 ctxt->directory = NULL;
11905 } else {
11906 ctxt->directory = xmlParserGetDirectory(filename);
11907 }
11908
11909 inputStream = xmlNewInputStream(ctxt);
11910 if (inputStream == NULL) {
11911 xmlFreeParserCtxt(ctxt);
Daniel Veillarda76fe5c2003-04-24 16:06:47 +000011912 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000011913 return(NULL);
11914 }
11915
11916 if (filename == NULL)
11917 inputStream->filename = NULL;
William M. Bracka3215c72004-07-31 16:24:01 +000011918 else {
Daniel Veillardf4862f02002-09-10 11:13:43 +000011919 inputStream->filename = (char *)
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000011920 xmlCanonicPath((const xmlChar *) filename);
William M. Bracka3215c72004-07-31 16:24:01 +000011921 if (inputStream->filename == NULL) {
11922 xmlFreeParserCtxt(ctxt);
11923 xmlFreeParserInputBuffer(buf);
11924 return(NULL);
11925 }
11926 }
Owen Taylor3473f882001-02-23 17:55:21 +000011927 inputStream->buf = buf;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011928 inputStream->cur =
11929 inputStream->base = xmlBufContent(inputStream->buf->buffer);
11930 inputStream->end = xmlBufEnd(inputStream->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000011931
11932 inputPush(ctxt, inputStream);
11933
William M. Brack3a1cd212005-02-11 14:35:54 +000011934 /*
11935 * If the caller didn't provide an initial 'chunk' for determining
11936 * the encoding, we set the context to XML_CHAR_ENCODING_NONE so
11937 * that it can be automatically determined later
11938 */
11939 if ((size == 0) || (chunk == NULL)) {
11940 ctxt->charset = XML_CHAR_ENCODING_NONE;
11941 } else if ((ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011942 int base = ctxt->input->base - xmlBufContent(ctxt->input->buf->buffer);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011943 int cur = ctxt->input->cur - ctxt->input->base;
11944
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011945 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011946
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011947 ctxt->input->base = xmlBufContent(ctxt->input->buf->buffer) + base;
Daniel Veillardaa39a0f2002-01-06 12:47:22 +000011948 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080011949 ctxt->input->end = xmlBufEnd(ctxt->input->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000011950#ifdef DEBUG_PUSH
11951 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
11952#endif
11953 }
11954
Daniel Veillard0e4cd172001-06-28 12:13:56 +000011955 if (enc != XML_CHAR_ENCODING_NONE) {
11956 xmlSwitchEncoding(ctxt, enc);
11957 }
11958
Owen Taylor3473f882001-02-23 17:55:21 +000011959 return(ctxt);
11960}
Daniel Veillard73b013f2003-09-30 12:36:01 +000011961#endif /* LIBXML_PUSH_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000011962
11963/**
Daniel Veillard39e5c892005-07-03 22:48:50 +000011964 * xmlStopParser:
11965 * @ctxt: an XML parser context
11966 *
11967 * Blocks further parser processing
11968 */
11969void
11970xmlStopParser(xmlParserCtxtPtr ctxt) {
11971 if (ctxt == NULL)
11972 return;
11973 ctxt->instate = XML_PARSER_EOF;
11974 ctxt->disableSAX = 1;
11975 if (ctxt->input != NULL) {
11976 ctxt->input->cur = BAD_CAST"";
11977 ctxt->input->base = ctxt->input->cur;
11978 }
11979}
11980
11981/**
Owen Taylor3473f882001-02-23 17:55:21 +000011982 * xmlCreateIOParserCtxt:
11983 * @sax: a SAX handler
11984 * @user_data: The user data returned on SAX callbacks
11985 * @ioread: an I/O read function
11986 * @ioclose: an I/O close function
11987 * @ioctx: an I/O handler
11988 * @enc: the charset encoding if known
11989 *
11990 * Create a parser context for using the XML parser with an existing
11991 * I/O stream
11992 *
11993 * Returns the new parser context or NULL
11994 */
11995xmlParserCtxtPtr
11996xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
11997 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
11998 void *ioctx, xmlCharEncoding enc) {
11999 xmlParserCtxtPtr ctxt;
12000 xmlParserInputPtr inputStream;
12001 xmlParserInputBufferPtr buf;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012002
Daniel Veillard42595322004-11-08 10:52:06 +000012003 if (ioread == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012004
12005 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
Lin Yi-Li24464be2012-05-10 16:14:55 +080012006 if (buf == NULL) {
12007 if (ioclose != NULL)
12008 ioclose(ioctx);
12009 return (NULL);
12010 }
Owen Taylor3473f882001-02-23 17:55:21 +000012011
12012 ctxt = xmlNewParserCtxt();
12013 if (ctxt == NULL) {
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012014 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000012015 return(NULL);
12016 }
12017 if (sax != NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000012018#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard092643b2003-09-25 14:29:29 +000012019 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Daniel Veillard81273902003-09-30 00:43:48 +000012020#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012021 xmlFree(ctxt->sax);
12022 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
12023 if (ctxt->sax == NULL) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012024 xmlErrMemory(ctxt, NULL);
Daniel Veillardf0af8ec2005-07-08 17:27:33 +000012025 xmlFreeParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012026 return(NULL);
12027 }
Daniel Veillard5ea30d72004-11-08 11:54:28 +000012028 memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
12029 if (sax->initialized == XML_SAX2_MAGIC)
12030 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
12031 else
12032 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
Owen Taylor3473f882001-02-23 17:55:21 +000012033 if (user_data != NULL)
12034 ctxt->userData = user_data;
Lin Yi-Li24464be2012-05-10 16:14:55 +080012035 }
Owen Taylor3473f882001-02-23 17:55:21 +000012036
12037 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12038 if (inputStream == NULL) {
12039 xmlFreeParserCtxt(ctxt);
12040 return(NULL);
12041 }
12042 inputPush(ctxt, inputStream);
12043
12044 return(ctxt);
12045}
12046
Daniel Veillard4432df22003-09-28 18:58:27 +000012047#ifdef LIBXML_VALID_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012048/************************************************************************
12049 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +000012050 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +000012051 * *
12052 ************************************************************************/
12053
12054/**
12055 * xmlIOParseDTD:
12056 * @sax: the SAX handler block or NULL
12057 * @input: an Input Buffer
12058 * @enc: the charset encoding if known
12059 *
12060 * Load and parse a DTD
12061 *
12062 * Returns the resulting xmlDtdPtr or NULL in case of error.
Daniel Veillard402b3442006-10-13 10:28:21 +000012063 * @input will be freed by the function in any case.
Owen Taylor3473f882001-02-23 17:55:21 +000012064 */
12065
12066xmlDtdPtr
12067xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12068 xmlCharEncoding enc) {
12069 xmlDtdPtr ret = NULL;
12070 xmlParserCtxtPtr ctxt;
12071 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012072 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +000012073
12074 if (input == NULL)
12075 return(NULL);
12076
12077 ctxt = xmlNewParserCtxt();
12078 if (ctxt == NULL) {
Daniel Veillard402b3442006-10-13 10:28:21 +000012079 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012080 return(NULL);
12081 }
12082
12083 /*
12084 * Set-up the SAX context
12085 */
12086 if (sax != NULL) {
12087 if (ctxt->sax != NULL)
12088 xmlFree(ctxt->sax);
12089 ctxt->sax = sax;
Daniel Veillard500a1de2004-03-22 15:22:58 +000012090 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012091 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012092 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012093
12094 /*
12095 * generate a parser input from the I/O handler
12096 */
12097
Daniel Veillard43caefb2003-12-07 19:32:22 +000012098 pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
Owen Taylor3473f882001-02-23 17:55:21 +000012099 if (pinput == NULL) {
12100 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard402b3442006-10-13 10:28:21 +000012101 xmlFreeParserInputBuffer(input);
Owen Taylor3473f882001-02-23 17:55:21 +000012102 xmlFreeParserCtxt(ctxt);
12103 return(NULL);
12104 }
12105
12106 /*
12107 * plug some encoding conversion routines here.
12108 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012109 if (xmlPushInput(ctxt, pinput) < 0) {
12110 if (sax != NULL) ctxt->sax = NULL;
12111 xmlFreeParserCtxt(ctxt);
12112 return(NULL);
12113 }
Daniel Veillard43caefb2003-12-07 19:32:22 +000012114 if (enc != XML_CHAR_ENCODING_NONE) {
12115 xmlSwitchEncoding(ctxt, enc);
12116 }
Owen Taylor3473f882001-02-23 17:55:21 +000012117
12118 pinput->filename = NULL;
12119 pinput->line = 1;
12120 pinput->col = 1;
12121 pinput->base = ctxt->input->cur;
12122 pinput->cur = ctxt->input->cur;
12123 pinput->free = NULL;
12124
12125 /*
12126 * let's parse that entity knowing it's an external subset.
12127 */
12128 ctxt->inSubset = 2;
12129 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012130 if (ctxt->myDoc == NULL) {
12131 xmlErrMemory(ctxt, "New Doc failed");
12132 return(NULL);
12133 }
12134 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012135 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12136 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +000012137
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012138 if ((enc == XML_CHAR_ENCODING_NONE) &&
12139 ((ctxt->input->end - ctxt->input->cur) >= 4)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +000012140 /*
12141 * Get the 4 first bytes and decode the charset
12142 * if enc != XML_CHAR_ENCODING_NONE
12143 * plug some encoding conversion routines.
12144 */
12145 start[0] = RAW;
12146 start[1] = NXT(1);
12147 start[2] = NXT(2);
12148 start[3] = NXT(3);
12149 enc = xmlDetectCharEncoding(start, 4);
12150 if (enc != XML_CHAR_ENCODING_NONE) {
12151 xmlSwitchEncoding(ctxt, enc);
12152 }
12153 }
12154
Owen Taylor3473f882001-02-23 17:55:21 +000012155 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12156
12157 if (ctxt->myDoc != NULL) {
12158 if (ctxt->wellFormed) {
12159 ret = ctxt->myDoc->extSubset;
12160 ctxt->myDoc->extSubset = NULL;
Daniel Veillard329456a2003-04-26 21:21:00 +000012161 if (ret != NULL) {
12162 xmlNodePtr tmp;
12163
12164 ret->doc = NULL;
12165 tmp = ret->children;
12166 while (tmp != NULL) {
12167 tmp->doc = NULL;
12168 tmp = tmp->next;
12169 }
12170 }
Owen Taylor3473f882001-02-23 17:55:21 +000012171 } else {
12172 ret = NULL;
12173 }
12174 xmlFreeDoc(ctxt->myDoc);
12175 ctxt->myDoc = NULL;
12176 }
12177 if (sax != NULL) ctxt->sax = NULL;
12178 xmlFreeParserCtxt(ctxt);
12179
12180 return(ret);
12181}
12182
12183/**
12184 * xmlSAXParseDTD:
12185 * @sax: the SAX handler block
12186 * @ExternalID: a NAME* containing the External ID of the DTD
12187 * @SystemID: a NAME* containing the URL to the DTD
12188 *
12189 * Load and parse an external subset.
12190 *
12191 * Returns the resulting xmlDtdPtr or NULL in case of error.
12192 */
12193
12194xmlDtdPtr
12195xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12196 const xmlChar *SystemID) {
12197 xmlDtdPtr ret = NULL;
12198 xmlParserCtxtPtr ctxt;
12199 xmlParserInputPtr input = NULL;
12200 xmlCharEncoding enc;
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012201 xmlChar* systemIdCanonic;
Owen Taylor3473f882001-02-23 17:55:21 +000012202
12203 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12204
12205 ctxt = xmlNewParserCtxt();
12206 if (ctxt == NULL) {
12207 return(NULL);
12208 }
12209
12210 /*
12211 * Set-up the SAX context
12212 */
12213 if (sax != NULL) {
12214 if (ctxt->sax != NULL)
12215 xmlFree(ctxt->sax);
12216 ctxt->sax = sax;
Daniel Veillardbf1e3d82003-08-14 23:57:26 +000012217 ctxt->userData = ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000012218 }
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012219
12220 /*
12221 * Canonicalise the system ID
12222 */
12223 systemIdCanonic = xmlCanonicPath(SystemID);
Daniel Veillardc93a19f2004-10-04 11:53:20 +000012224 if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012225 xmlFreeParserCtxt(ctxt);
12226 return(NULL);
12227 }
Owen Taylor3473f882001-02-23 17:55:21 +000012228
12229 /*
12230 * Ask the Entity resolver to load the damn thing
12231 */
12232
12233 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
Daniel Veillarda9557952006-10-12 12:53:15 +000012234 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12235 systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012236 if (input == NULL) {
12237 if (sax != NULL) ctxt->sax = NULL;
12238 xmlFreeParserCtxt(ctxt);
Daniel Veillard34099b42004-11-04 17:34:35 +000012239 if (systemIdCanonic != NULL)
12240 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012241 return(NULL);
12242 }
12243
12244 /*
12245 * plug some encoding conversion routines here.
12246 */
Daniel Veillarda8f09ce2008-08-27 13:02:01 +000012247 if (xmlPushInput(ctxt, input) < 0) {
12248 if (sax != NULL) ctxt->sax = NULL;
12249 xmlFreeParserCtxt(ctxt);
12250 if (systemIdCanonic != NULL)
12251 xmlFree(systemIdCanonic);
12252 return(NULL);
12253 }
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012254 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12255 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
12256 xmlSwitchEncoding(ctxt, enc);
12257 }
Owen Taylor3473f882001-02-23 17:55:21 +000012258
12259 if (input->filename == NULL)
Igor Zlatkovic07d59762004-08-24 19:12:51 +000012260 input->filename = (char *) systemIdCanonic;
12261 else
12262 xmlFree(systemIdCanonic);
Owen Taylor3473f882001-02-23 17:55:21 +000012263 input->line = 1;
12264 input->col = 1;
12265 input->base = ctxt->input->cur;
12266 input->cur = ctxt->input->cur;
12267 input->free = NULL;
12268
12269 /*
12270 * let's parse that entity knowing it's an external subset.
12271 */
12272 ctxt->inSubset = 2;
12273 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillardae0765b2008-07-31 19:54:59 +000012274 if (ctxt->myDoc == NULL) {
12275 xmlErrMemory(ctxt, "New Doc failed");
12276 if (sax != NULL) ctxt->sax = NULL;
12277 xmlFreeParserCtxt(ctxt);
12278 return(NULL);
12279 }
12280 ctxt->myDoc->properties = XML_DOC_INTERNAL;
Owen Taylor3473f882001-02-23 17:55:21 +000012281 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12282 ExternalID, SystemID);
12283 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12284
12285 if (ctxt->myDoc != NULL) {
12286 if (ctxt->wellFormed) {
12287 ret = ctxt->myDoc->extSubset;
12288 ctxt->myDoc->extSubset = NULL;
Daniel Veillardc5573462003-04-25 16:43:49 +000012289 if (ret != NULL) {
12290 xmlNodePtr tmp;
12291
12292 ret->doc = NULL;
12293 tmp = ret->children;
12294 while (tmp != NULL) {
12295 tmp->doc = NULL;
12296 tmp = tmp->next;
12297 }
12298 }
Owen Taylor3473f882001-02-23 17:55:21 +000012299 } else {
12300 ret = NULL;
12301 }
12302 xmlFreeDoc(ctxt->myDoc);
12303 ctxt->myDoc = NULL;
12304 }
12305 if (sax != NULL) ctxt->sax = NULL;
12306 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012307
Owen Taylor3473f882001-02-23 17:55:21 +000012308 return(ret);
12309}
12310
Daniel Veillard4432df22003-09-28 18:58:27 +000012311
Owen Taylor3473f882001-02-23 17:55:21 +000012312/**
12313 * xmlParseDTD:
12314 * @ExternalID: a NAME* containing the External ID of the DTD
12315 * @SystemID: a NAME* containing the URL to the DTD
12316 *
12317 * Load and parse an external subset.
Daniel Veillard0161e632008-08-28 15:36:32 +000012318 *
Owen Taylor3473f882001-02-23 17:55:21 +000012319 * Returns the resulting xmlDtdPtr or NULL in case of error.
12320 */
12321
12322xmlDtdPtr
12323xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12324 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12325}
Daniel Veillard4432df22003-09-28 18:58:27 +000012326#endif /* LIBXML_VALID_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000012327
12328/************************************************************************
12329 * *
12330 * Front ends when parsing an Entity *
12331 * *
12332 ************************************************************************/
12333
12334/**
Owen Taylor3473f882001-02-23 17:55:21 +000012335 * xmlParseCtxtExternalEntity:
12336 * @ctx: the existing parsing context
12337 * @URL: the URL for the entity to load
12338 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012339 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012340 *
12341 * Parse an external general entity within an existing parsing context
12342 * An external general parsed entity is well-formed if it matches the
12343 * production labeled extParsedEnt.
12344 *
12345 * [78] extParsedEnt ::= TextDecl? content
12346 *
12347 * Returns 0 if the entity is well formed, -1 in case of args problem and
12348 * the parser error code otherwise
12349 */
12350
12351int
12352xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012353 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +000012354 xmlParserCtxtPtr ctxt;
12355 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012356 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012357 xmlSAXHandlerPtr oldsax = NULL;
12358 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012359 xmlChar start[4];
12360 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012361
Daniel Veillardce682bc2004-11-05 17:22:25 +000012362 if (ctx == NULL) return(-1);
12363
Daniel Veillard0161e632008-08-28 15:36:32 +000012364 if (((ctx->depth > 40) && ((ctx->options & XML_PARSE_HUGE) == 0)) ||
12365 (ctx->depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012366 return(XML_ERR_ENTITY_LOOP);
12367 }
12368
Daniel Veillardcda96922001-08-21 10:56:31 +000012369 if (lst != NULL)
12370 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012371 if ((URL == NULL) && (ID == NULL))
12372 return(-1);
12373 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
12374 return(-1);
12375
Rob Richards798743a2009-06-19 13:54:25 -040012376 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, ctx);
Daniel Veillard2937b3a2006-10-10 08:52:34 +000012377 if (ctxt == NULL) {
12378 return(-1);
12379 }
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012380
Owen Taylor3473f882001-02-23 17:55:21 +000012381 oldsax = ctxt->sax;
12382 ctxt->sax = ctx->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012383 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012384 newDoc = xmlNewDoc(BAD_CAST "1.0");
12385 if (newDoc == NULL) {
12386 xmlFreeParserCtxt(ctxt);
12387 return(-1);
12388 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012389 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012390 if (ctx->myDoc->dict) {
12391 newDoc->dict = ctx->myDoc->dict;
12392 xmlDictReference(newDoc->dict);
12393 }
Owen Taylor3473f882001-02-23 17:55:21 +000012394 if (ctx->myDoc != NULL) {
12395 newDoc->intSubset = ctx->myDoc->intSubset;
12396 newDoc->extSubset = ctx->myDoc->extSubset;
12397 }
12398 if (ctx->myDoc->URL != NULL) {
12399 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
12400 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012401 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12402 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012403 ctxt->sax = oldsax;
12404 xmlFreeParserCtxt(ctxt);
12405 newDoc->intSubset = NULL;
12406 newDoc->extSubset = NULL;
12407 xmlFreeDoc(newDoc);
12408 return(-1);
12409 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012410 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012411 nodePush(ctxt, newDoc->children);
12412 if (ctx->myDoc == NULL) {
12413 ctxt->myDoc = newDoc;
12414 } else {
12415 ctxt->myDoc = ctx->myDoc;
12416 newDoc->children->doc = ctx->myDoc;
12417 }
12418
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012419 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012420 * Get the 4 first bytes and decode the charset
12421 * if enc != XML_CHAR_ENCODING_NONE
12422 * plug some encoding conversion routines.
12423 */
12424 GROW
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012425 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12426 start[0] = RAW;
12427 start[1] = NXT(1);
12428 start[2] = NXT(2);
12429 start[3] = NXT(3);
12430 enc = xmlDetectCharEncoding(start, 4);
12431 if (enc != XML_CHAR_ENCODING_NONE) {
12432 xmlSwitchEncoding(ctxt, enc);
12433 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012434 }
12435
Owen Taylor3473f882001-02-23 17:55:21 +000012436 /*
12437 * Parse a possible text declaration first
12438 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012439 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012440 xmlParseTextDecl(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012441 /*
12442 * An XML-1.0 document can't reference an entity not XML-1.0
12443 */
12444 if ((xmlStrEqual(ctx->version, BAD_CAST "1.0")) &&
12445 (!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12446 xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12447 "Version mismatch between document and entity\n");
12448 }
Owen Taylor3473f882001-02-23 17:55:21 +000012449 }
12450
12451 /*
Daniel Veillard4aa68ab2012-04-02 17:50:54 +080012452 * If the user provided its own SAX callbacks then reuse the
12453 * useData callback field, otherwise the expected setup in a
12454 * DOM builder is to have userData == ctxt
12455 */
12456 if (ctx->userData == ctx)
12457 ctxt->userData = ctxt;
12458 else
12459 ctxt->userData = ctx->userData;
12460
12461 /*
Owen Taylor3473f882001-02-23 17:55:21 +000012462 * Doing validity checking on chunk doesn't make sense
12463 */
12464 ctxt->instate = XML_PARSER_CONTENT;
12465 ctxt->validate = ctx->validate;
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012466 ctxt->valid = ctx->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012467 ctxt->loadsubset = ctx->loadsubset;
12468 ctxt->depth = ctx->depth + 1;
12469 ctxt->replaceEntities = ctx->replaceEntities;
12470 if (ctxt->validate) {
12471 ctxt->vctxt.error = ctx->vctxt.error;
12472 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +000012473 } else {
12474 ctxt->vctxt.error = NULL;
12475 ctxt->vctxt.warning = NULL;
12476 }
Daniel Veillarda9142e72001-06-19 11:07:54 +000012477 ctxt->vctxt.nodeTab = NULL;
12478 ctxt->vctxt.nodeNr = 0;
12479 ctxt->vctxt.nodeMax = 0;
12480 ctxt->vctxt.node = NULL;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012481 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12482 ctxt->dict = ctx->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012483 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12484 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12485 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012486 ctxt->dictNames = ctx->dictNames;
12487 ctxt->attsDefault = ctx->attsDefault;
12488 ctxt->attsSpecial = ctx->attsSpecial;
William M. Brack503b6102004-08-19 02:17:27 +000012489 ctxt->linenumbers = ctx->linenumbers;
Owen Taylor3473f882001-02-23 17:55:21 +000012490
12491 xmlParseContent(ctxt);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012492
Daniel Veillard5f8d1a32003-03-23 21:02:00 +000012493 ctx->validate = ctxt->validate;
12494 ctx->valid = ctxt->valid;
Owen Taylor3473f882001-02-23 17:55:21 +000012495 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012496 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012497 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012498 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012499 }
12500 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012501 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012502 }
12503
12504 if (!ctxt->wellFormed) {
12505 if (ctxt->errNo == 0)
12506 ret = 1;
12507 else
12508 ret = ctxt->errNo;
12509 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +000012510 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012511 xmlNodePtr cur;
12512
12513 /*
12514 * Return the newly created nodeset after unlinking it from
12515 * they pseudo parent.
12516 */
12517 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +000012518 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +000012519 while (cur != NULL) {
12520 cur->parent = NULL;
12521 cur = cur->next;
12522 }
12523 newDoc->children->children = NULL;
12524 }
12525 ret = 0;
12526 }
12527 ctxt->sax = oldsax;
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000012528 ctxt->dict = NULL;
12529 ctxt->attsDefault = NULL;
12530 ctxt->attsSpecial = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012531 xmlFreeParserCtxt(ctxt);
12532 newDoc->intSubset = NULL;
12533 newDoc->extSubset = NULL;
12534 xmlFreeDoc(newDoc);
Daniel Veillard40ec29a2008-07-30 12:35:40 +000012535
Owen Taylor3473f882001-02-23 17:55:21 +000012536 return(ret);
12537}
12538
12539/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012540 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +000012541 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012542 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +000012543 * @sax: the SAX handler bloc (possibly NULL)
12544 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12545 * @depth: Used for loop detection, use 0
12546 * @URL: the URL for the entity to load
12547 * @ID: the System ID for the entity to load
12548 * @list: the return value for the set of parsed nodes
12549 *
Daniel Veillard257d9102001-05-08 10:41:44 +000012550 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +000012551 *
12552 * Returns 0 if the entity is well formed, -1 in case of args problem and
12553 * the parser error code otherwise
12554 */
12555
Daniel Veillard7d515752003-09-26 19:12:37 +000012556static xmlParserErrors
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012557xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12558 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000012559 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012560 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +000012561 xmlParserCtxtPtr ctxt;
12562 xmlDocPtr newDoc;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012563 xmlNodePtr newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000012564 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7d515752003-09-26 19:12:37 +000012565 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard87a764e2001-06-20 17:41:10 +000012566 xmlChar start[4];
12567 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +000012568
Daniel Veillard0161e632008-08-28 15:36:32 +000012569 if (((depth > 40) &&
12570 ((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12571 (depth > 1024)) {
Owen Taylor3473f882001-02-23 17:55:21 +000012572 return(XML_ERR_ENTITY_LOOP);
12573 }
12574
Owen Taylor3473f882001-02-23 17:55:21 +000012575 if (list != NULL)
12576 *list = NULL;
12577 if ((URL == NULL) && (ID == NULL))
Daniel Veillard7d515752003-09-26 19:12:37 +000012578 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard30e76072006-03-09 14:13:55 +000012579 if (doc == NULL)
Daniel Veillard7d515752003-09-26 19:12:37 +000012580 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012581
12582
Rob Richards9c0aa472009-03-26 18:10:19 +000012583 ctxt = xmlCreateEntityParserCtxtInternal(URL, ID, NULL, oldctxt);
William M. Brackb670e2e2003-09-27 01:05:55 +000012584 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Owen Taylor3473f882001-02-23 17:55:21 +000012585 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012586 if (oldctxt != NULL) {
12587 ctxt->_private = oldctxt->_private;
12588 ctxt->loadsubset = oldctxt->loadsubset;
12589 ctxt->validate = oldctxt->validate;
12590 ctxt->external = oldctxt->external;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012591 ctxt->record_info = oldctxt->record_info;
12592 ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12593 ctxt->node_seq.length = oldctxt->node_seq.length;
12594 ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012595 } else {
12596 /*
12597 * Doing validity checking on chunk without context
12598 * doesn't make sense
12599 */
12600 ctxt->_private = NULL;
12601 ctxt->validate = 0;
12602 ctxt->external = 2;
12603 ctxt->loadsubset = 0;
12604 }
Owen Taylor3473f882001-02-23 17:55:21 +000012605 if (sax != NULL) {
12606 oldsax = ctxt->sax;
12607 ctxt->sax = sax;
12608 if (user_data != NULL)
12609 ctxt->userData = user_data;
12610 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000012611 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000012612 newDoc = xmlNewDoc(BAD_CAST "1.0");
12613 if (newDoc == NULL) {
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012614 ctxt->node_seq.maximum = 0;
12615 ctxt->node_seq.length = 0;
12616 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012617 xmlFreeParserCtxt(ctxt);
Daniel Veillard7d515752003-09-26 19:12:37 +000012618 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012619 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012620 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard30e76072006-03-09 14:13:55 +000012621 newDoc->intSubset = doc->intSubset;
12622 newDoc->extSubset = doc->extSubset;
12623 newDoc->dict = doc->dict;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012624 xmlDictReference(newDoc->dict);
12625
Owen Taylor3473f882001-02-23 17:55:21 +000012626 if (doc->URL != NULL) {
12627 newDoc->URL = xmlStrdup(doc->URL);
12628 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012629 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12630 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000012631 if (sax != NULL)
12632 ctxt->sax = oldsax;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012633 ctxt->node_seq.maximum = 0;
12634 ctxt->node_seq.length = 0;
12635 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012636 xmlFreeParserCtxt(ctxt);
12637 newDoc->intSubset = NULL;
12638 newDoc->extSubset = NULL;
12639 xmlFreeDoc(newDoc);
Daniel Veillard7d515752003-09-26 19:12:37 +000012640 return(XML_ERR_INTERNAL_ERROR);
Owen Taylor3473f882001-02-23 17:55:21 +000012641 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012642 xmlAddChild((xmlNodePtr) newDoc, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000012643 nodePush(ctxt, newDoc->children);
Daniel Veillard30e76072006-03-09 14:13:55 +000012644 ctxt->myDoc = doc;
12645 newRoot->doc = doc;
Owen Taylor3473f882001-02-23 17:55:21 +000012646
Daniel Veillard0161e632008-08-28 15:36:32 +000012647 /*
Daniel Veillard87a764e2001-06-20 17:41:10 +000012648 * Get the 4 first bytes and decode the charset
12649 * if enc != XML_CHAR_ENCODING_NONE
12650 * plug some encoding conversion routines.
12651 */
12652 GROW;
Daniel Veillard4aede2e2003-10-17 12:43:59 +000012653 if ((ctxt->input->end - ctxt->input->cur) >= 4) {
12654 start[0] = RAW;
12655 start[1] = NXT(1);
12656 start[2] = NXT(2);
12657 start[3] = NXT(3);
12658 enc = xmlDetectCharEncoding(start, 4);
12659 if (enc != XML_CHAR_ENCODING_NONE) {
12660 xmlSwitchEncoding(ctxt, enc);
12661 }
Daniel Veillard87a764e2001-06-20 17:41:10 +000012662 }
12663
Owen Taylor3473f882001-02-23 17:55:21 +000012664 /*
12665 * Parse a possible text declaration first
12666 */
Daniel Veillarda07050d2003-10-19 14:46:32 +000012667 if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
Owen Taylor3473f882001-02-23 17:55:21 +000012668 xmlParseTextDecl(ctxt);
12669 }
12670
Owen Taylor3473f882001-02-23 17:55:21 +000012671 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +000012672 ctxt->depth = depth;
12673
12674 xmlParseContent(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000012675
Daniel Veillard561b7f82002-03-20 21:55:57 +000012676 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012677 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard561b7f82002-03-20 21:55:57 +000012678 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012679 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012680 }
12681 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012682 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000012683 }
12684
12685 if (!ctxt->wellFormed) {
12686 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012687 ret = XML_ERR_INTERNAL_ERROR;
Owen Taylor3473f882001-02-23 17:55:21 +000012688 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012689 ret = (xmlParserErrors)ctxt->errNo;
Owen Taylor3473f882001-02-23 17:55:21 +000012690 } else {
12691 if (list != NULL) {
12692 xmlNodePtr cur;
12693
12694 /*
12695 * Return the newly created nodeset after unlinking it from
12696 * they pseudo parent.
12697 */
12698 cur = newDoc->children->children;
12699 *list = cur;
12700 while (cur != NULL) {
12701 cur->parent = NULL;
12702 cur = cur->next;
12703 }
12704 newDoc->children->children = NULL;
12705 }
Daniel Veillard7d515752003-09-26 19:12:37 +000012706 ret = XML_ERR_OK;
Owen Taylor3473f882001-02-23 17:55:21 +000012707 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012708
12709 /*
12710 * Record in the parent context the number of entities replacement
12711 * done when parsing that reference.
12712 */
Daniel Veillard76d36452009-09-07 11:19:33 +020012713 if (oldctxt != NULL)
12714 oldctxt->nbentities += ctxt->nbentities;
12715
Daniel Veillard0161e632008-08-28 15:36:32 +000012716 /*
12717 * Also record the size of the entity parsed
12718 */
12719 if (ctxt->input != NULL) {
12720 oldctxt->sizeentities += ctxt->input->consumed;
12721 oldctxt->sizeentities += (ctxt->input->cur - ctxt->input->base);
12722 }
12723 /*
12724 * And record the last error if any
12725 */
12726 if (ctxt->lastError.code != XML_ERR_OK)
12727 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12728
Owen Taylor3473f882001-02-23 17:55:21 +000012729 if (sax != NULL)
12730 ctxt->sax = oldsax;
Daniel Veillard0046c0f2003-02-23 13:52:30 +000012731 oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12732 oldctxt->node_seq.length = ctxt->node_seq.length;
12733 oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
Daniel Veillard44e1dd02003-02-21 23:23:28 +000012734 ctxt->node_seq.maximum = 0;
12735 ctxt->node_seq.length = 0;
12736 ctxt->node_seq.buffer = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000012737 xmlFreeParserCtxt(ctxt);
12738 newDoc->intSubset = NULL;
12739 newDoc->extSubset = NULL;
12740 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000012741
Owen Taylor3473f882001-02-23 17:55:21 +000012742 return(ret);
12743}
12744
Daniel Veillard81273902003-09-30 00:43:48 +000012745#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000012746/**
Daniel Veillard257d9102001-05-08 10:41:44 +000012747 * xmlParseExternalEntity:
12748 * @doc: the document the chunk pertains to
12749 * @sax: the SAX handler bloc (possibly NULL)
12750 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12751 * @depth: Used for loop detection, use 0
12752 * @URL: the URL for the entity to load
12753 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +000012754 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +000012755 *
12756 * Parse an external general entity
12757 * An external general parsed entity is well-formed if it matches the
12758 * production labeled extParsedEnt.
12759 *
12760 * [78] extParsedEnt ::= TextDecl? content
12761 *
12762 * Returns 0 if the entity is well formed, -1 in case of args problem and
12763 * the parser error code otherwise
12764 */
12765
12766int
12767xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +000012768 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +000012769 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +000012770 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +000012771}
12772
12773/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +000012774 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +000012775 * @doc: the document the chunk pertains to
12776 * @sax: the SAX handler bloc (possibly NULL)
12777 * @user_data: The user data returned on SAX callbacks (possibly NULL)
12778 * @depth: Used for loop detection, use 0
12779 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +000012780 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +000012781 *
12782 * Parse a well-balanced chunk of an XML document
12783 * called by the parser
12784 * The allowed sequence for the Well Balanced Chunk is the one defined by
12785 * the content production in the XML grammar:
12786 *
12787 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12788 *
12789 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
12790 * the parser error code otherwise
12791 */
12792
12793int
12794xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +000012795 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Daniel Veillard58e44c92002-08-02 22:19:49 +000012796 return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12797 depth, string, lst, 0 );
12798}
Daniel Veillard81273902003-09-30 00:43:48 +000012799#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillard58e44c92002-08-02 22:19:49 +000012800
12801/**
Daniel Veillard328f48c2002-11-15 15:24:34 +000012802 * xmlParseBalancedChunkMemoryInternal:
12803 * @oldctxt: the existing parsing context
12804 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
12805 * @user_data: the user data field for the parser context
12806 * @lst: the return value for the set of parsed nodes
12807 *
12808 *
12809 * Parse a well-balanced chunk of an XML document
12810 * called by the parser
12811 * The allowed sequence for the Well Balanced Chunk is the one defined by
12812 * the content production in the XML grammar:
12813 *
12814 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12815 *
Daniel Veillard7d515752003-09-26 19:12:37 +000012816 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
12817 * error code otherwise
Daniel Veillard0161e632008-08-28 15:36:32 +000012818 *
Daniel Veillard328f48c2002-11-15 15:24:34 +000012819 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard0161e632008-08-28 15:36:32 +000012820 * the parsed chunk is not well balanced.
Daniel Veillard328f48c2002-11-15 15:24:34 +000012821 */
Daniel Veillard7d515752003-09-26 19:12:37 +000012822static xmlParserErrors
Daniel Veillard328f48c2002-11-15 15:24:34 +000012823xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
12824 const xmlChar *string, void *user_data, xmlNodePtr *lst) {
12825 xmlParserCtxtPtr ctxt;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012826 xmlDocPtr newDoc = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012827 xmlNodePtr newRoot;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012828 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012829 xmlNodePtr content = NULL;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012830 xmlNodePtr last = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012831 int size;
Daniel Veillard7d515752003-09-26 19:12:37 +000012832 xmlParserErrors ret = XML_ERR_OK;
Daniel Veillard74eaec12009-08-26 15:57:20 +020012833#ifdef SAX2
12834 int i;
12835#endif
Daniel Veillard328f48c2002-11-15 15:24:34 +000012836
Daniel Veillard0161e632008-08-28 15:36:32 +000012837 if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12838 (oldctxt->depth > 1024)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012839 return(XML_ERR_ENTITY_LOOP);
12840 }
12841
12842
12843 if (lst != NULL)
12844 *lst = NULL;
12845 if (string == NULL)
William M. Brack7b9154b2003-09-27 19:23:50 +000012846 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012847
12848 size = xmlStrlen(string);
12849
12850 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
William M. Brack7b9154b2003-09-27 19:23:50 +000012851 if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012852 if (user_data != NULL)
12853 ctxt->userData = user_data;
12854 else
12855 ctxt->userData = ctxt;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012856 if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
12857 ctxt->dict = oldctxt->dict;
Daniel Veillardfd343dc2003-10-31 10:55:22 +000012858 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12859 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12860 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012861
Daniel Veillard74eaec12009-08-26 15:57:20 +020012862#ifdef SAX2
12863 /* propagate namespaces down the entity */
12864 for (i = 0;i < oldctxt->nsNr;i += 2) {
12865 nsPush(ctxt, oldctxt->nsTab[i], oldctxt->nsTab[i+1]);
12866 }
12867#endif
12868
Daniel Veillard328f48c2002-11-15 15:24:34 +000012869 oldsax = ctxt->sax;
12870 ctxt->sax = oldctxt->sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000012871 xmlDetectSAX2(ctxt);
Daniel Veillard87ab1c12003-12-21 13:01:56 +000012872 ctxt->replaceEntities = oldctxt->replaceEntities;
12873 ctxt->options = oldctxt->options;
Daniel Veillard0161e632008-08-28 15:36:32 +000012874
Daniel Veillarde1ca5032002-12-09 14:13:43 +000012875 ctxt->_private = oldctxt->_private;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012876 if (oldctxt->myDoc == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012877 newDoc = xmlNewDoc(BAD_CAST "1.0");
12878 if (newDoc == NULL) {
12879 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012880 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012881 xmlFreeParserCtxt(ctxt);
William M. Brack7b9154b2003-09-27 19:23:50 +000012882 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012883 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000012884 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillard03a53c32004-10-26 16:06:51 +000012885 newDoc->dict = ctxt->dict;
12886 xmlDictReference(newDoc->dict);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012887 ctxt->myDoc = newDoc;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012888 } else {
12889 ctxt->myDoc = oldctxt->myDoc;
12890 content = ctxt->myDoc->children;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012891 last = ctxt->myDoc->last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012892 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012893 newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
12894 if (newRoot == NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012895 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012896 ctxt->dict = NULL;
Daniel Veillard68e9e742002-11-16 15:35:11 +000012897 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012898 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012899 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012900 }
William M. Brack7b9154b2003-09-27 19:23:50 +000012901 return(XML_ERR_INTERNAL_ERROR);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012902 }
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012903 ctxt->myDoc->children = NULL;
12904 ctxt->myDoc->last = NULL;
12905 xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
Daniel Veillard68e9e742002-11-16 15:35:11 +000012906 nodePush(ctxt, ctxt->myDoc->children);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012907 ctxt->instate = XML_PARSER_CONTENT;
12908 ctxt->depth = oldctxt->depth + 1;
12909
Daniel Veillard328f48c2002-11-15 15:24:34 +000012910 ctxt->validate = 0;
12911 ctxt->loadsubset = oldctxt->loadsubset;
Daniel Veillardef8dd7b2003-03-23 12:02:56 +000012912 if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
12913 /*
12914 * ID/IDREF registration will be done in xmlValidateElement below
12915 */
12916 ctxt->loadsubset |= XML_SKIP_IDS;
12917 }
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012918 ctxt->dictNames = oldctxt->dictNames;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012919 ctxt->attsDefault = oldctxt->attsDefault;
12920 ctxt->attsSpecial = oldctxt->attsSpecial;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012921
Daniel Veillard68e9e742002-11-16 15:35:11 +000012922 xmlParseContent(ctxt);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012923 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012924 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012925 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012926 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012927 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012928 if (ctxt->node != ctxt->myDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000012929 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Daniel Veillard328f48c2002-11-15 15:24:34 +000012930 }
12931
12932 if (!ctxt->wellFormed) {
12933 if (ctxt->errNo == 0)
Daniel Veillard7d515752003-09-26 19:12:37 +000012934 ret = XML_ERR_INTERNAL_ERROR;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012935 else
William M. Brack7b9154b2003-09-27 19:23:50 +000012936 ret = (xmlParserErrors)ctxt->errNo;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012937 } else {
William M. Brack7b9154b2003-09-27 19:23:50 +000012938 ret = XML_ERR_OK;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012939 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012940
William M. Brack7b9154b2003-09-27 19:23:50 +000012941 if ((lst != NULL) && (ret == XML_ERR_OK)) {
Daniel Veillard328f48c2002-11-15 15:24:34 +000012942 xmlNodePtr cur;
12943
12944 /*
12945 * Return the newly created nodeset after unlinking it from
12946 * they pseudo parent.
12947 */
Daniel Veillard68e9e742002-11-16 15:35:11 +000012948 cur = ctxt->myDoc->children->children;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012949 *lst = cur;
12950 while (cur != NULL) {
Daniel Veillard4432df22003-09-28 18:58:27 +000012951#ifdef LIBXML_VALID_ENABLED
Daniel Veillard8874b942005-08-25 13:19:21 +000012952 if ((oldctxt->validate) && (oldctxt->wellFormed) &&
12953 (oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
12954 (cur->type == XML_ELEMENT_NODE)) {
Daniel Veillard8d589042003-02-04 15:07:21 +000012955 oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
12956 oldctxt->myDoc, cur);
12957 }
Daniel Veillard4432df22003-09-28 18:58:27 +000012958#endif /* LIBXML_VALID_ENABLED */
Daniel Veillard328f48c2002-11-15 15:24:34 +000012959 cur->parent = NULL;
12960 cur = cur->next;
12961 }
Daniel Veillard68e9e742002-11-16 15:35:11 +000012962 ctxt->myDoc->children->children = NULL;
12963 }
12964 if (ctxt->myDoc != NULL) {
12965 xmlFreeNode(ctxt->myDoc->children);
12966 ctxt->myDoc->children = content;
Daniel Veillard8de5c0b2004-10-07 13:14:19 +000012967 ctxt->myDoc->last = last;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012968 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012969
12970 /*
12971 * Record in the parent context the number of entities replacement
12972 * done when parsing that reference.
12973 */
Daniel Veillardd44b9362009-09-07 12:15:08 +020012974 if (oldctxt != NULL)
12975 oldctxt->nbentities += ctxt->nbentities;
12976
Daniel Veillard0161e632008-08-28 15:36:32 +000012977 /*
12978 * Also record the last error if any
12979 */
12980 if (ctxt->lastError.code != XML_ERR_OK)
12981 xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12982
Daniel Veillard328f48c2002-11-15 15:24:34 +000012983 ctxt->sax = oldsax;
Daniel Veillard9f7eb0b2003-09-17 10:26:25 +000012984 ctxt->dict = NULL;
Daniel Veillard95d2d5b2003-10-27 14:54:49 +000012985 ctxt->attsDefault = NULL;
12986 ctxt->attsSpecial = NULL;
Daniel Veillard328f48c2002-11-15 15:24:34 +000012987 xmlFreeParserCtxt(ctxt);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012988 if (newDoc != NULL) {
Daniel Veillard68e9e742002-11-16 15:35:11 +000012989 xmlFreeDoc(newDoc);
Daniel Veillard03a53c32004-10-26 16:06:51 +000012990 }
Daniel Veillard0161e632008-08-28 15:36:32 +000012991
Daniel Veillard328f48c2002-11-15 15:24:34 +000012992 return(ret);
12993}
12994
Daniel Veillard29b17482004-08-16 00:39:03 +000012995/**
12996 * xmlParseInNodeContext:
12997 * @node: the context node
12998 * @data: the input string
12999 * @datalen: the input string length in bytes
13000 * @options: a combination of xmlParserOption
13001 * @lst: the return value for the set of parsed nodes
13002 *
13003 * Parse a well-balanced chunk of an XML document
13004 * within the context (DTD, namespaces, etc ...) of the given node.
13005 *
13006 * The allowed sequence for the data is a Well Balanced Chunk defined by
13007 * the content production in the XML grammar:
13008 *
13009 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13010 *
13011 * Returns XML_ERR_OK if the chunk is well balanced, and the parser
13012 * error code otherwise
13013 */
13014xmlParserErrors
13015xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13016 int options, xmlNodePtr *lst) {
13017#ifdef SAX2
13018 xmlParserCtxtPtr ctxt;
13019 xmlDocPtr doc = NULL;
13020 xmlNodePtr fake, cur;
13021 int nsnr = 0;
13022
13023 xmlParserErrors ret = XML_ERR_OK;
13024
13025 /*
13026 * check all input parameters, grab the document
13027 */
13028 if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13029 return(XML_ERR_INTERNAL_ERROR);
13030 switch (node->type) {
13031 case XML_ELEMENT_NODE:
13032 case XML_ATTRIBUTE_NODE:
13033 case XML_TEXT_NODE:
13034 case XML_CDATA_SECTION_NODE:
13035 case XML_ENTITY_REF_NODE:
13036 case XML_PI_NODE:
13037 case XML_COMMENT_NODE:
13038 case XML_DOCUMENT_NODE:
13039 case XML_HTML_DOCUMENT_NODE:
13040 break;
13041 default:
13042 return(XML_ERR_INTERNAL_ERROR);
13043
13044 }
13045 while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13046 (node->type != XML_DOCUMENT_NODE) &&
13047 (node->type != XML_HTML_DOCUMENT_NODE))
13048 node = node->parent;
13049 if (node == NULL)
13050 return(XML_ERR_INTERNAL_ERROR);
13051 if (node->type == XML_ELEMENT_NODE)
13052 doc = node->doc;
13053 else
13054 doc = (xmlDocPtr) node;
13055 if (doc == NULL)
13056 return(XML_ERR_INTERNAL_ERROR);
13057
13058 /*
13059 * allocate a context and set-up everything not related to the
13060 * node position in the tree
13061 */
13062 if (doc->type == XML_DOCUMENT_NODE)
13063 ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13064#ifdef LIBXML_HTML_ENABLED
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013065 else if (doc->type == XML_HTML_DOCUMENT_NODE) {
Daniel Veillard29b17482004-08-16 00:39:03 +000013066 ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
Daniel Veillarde20fb5a2010-01-29 20:47:08 +010013067 /*
13068 * When parsing in context, it makes no sense to add implied
13069 * elements like html/body/etc...
13070 */
13071 options |= HTML_PARSE_NOIMPLIED;
13072 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013073#endif
13074 else
13075 return(XML_ERR_INTERNAL_ERROR);
13076
13077 if (ctxt == NULL)
13078 return(XML_ERR_NO_MEMORY);
William M. Brackc3f81342004-10-03 01:22:44 +000013079
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013080 /*
William M. Brackc3f81342004-10-03 01:22:44 +000013081 * Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13082 * We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13083 * we must wait until the last moment to free the original one.
13084 */
Daniel Veillard29b17482004-08-16 00:39:03 +000013085 if (doc->dict != NULL) {
William M. Brackc3f81342004-10-03 01:22:44 +000013086 if (ctxt->dict != NULL)
Daniel Veillard29b17482004-08-16 00:39:03 +000013087 xmlDictFree(ctxt->dict);
13088 ctxt->dict = doc->dict;
William M. Brackc3f81342004-10-03 01:22:44 +000013089 } else
13090 options |= XML_PARSE_NODICT;
13091
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013092 if (doc->encoding != NULL) {
13093 xmlCharEncodingHandlerPtr hdlr;
13094
13095 if (ctxt->encoding != NULL)
13096 xmlFree((xmlChar *) ctxt->encoding);
13097 ctxt->encoding = xmlStrdup((const xmlChar *) doc->encoding);
13098
13099 hdlr = xmlFindCharEncodingHandler(doc->encoding);
13100 if (hdlr != NULL) {
13101 xmlSwitchToEncoding(ctxt, hdlr);
13102 } else {
13103 return(XML_ERR_UNSUPPORTED_ENCODING);
13104 }
13105 }
13106
Daniel Veillard37334572008-07-31 08:20:02 +000013107 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillard29b17482004-08-16 00:39:03 +000013108 xmlDetectSAX2(ctxt);
13109 ctxt->myDoc = doc;
13110
Daniel Veillard47cd14e2010-02-04 18:49:01 +010013111 fake = xmlNewComment(NULL);
13112 if (fake == NULL) {
13113 xmlFreeParserCtxt(ctxt);
13114 return(XML_ERR_NO_MEMORY);
13115 }
13116 xmlAddChild(node, fake);
13117
Daniel Veillard29b17482004-08-16 00:39:03 +000013118 if (node->type == XML_ELEMENT_NODE) {
13119 nodePush(ctxt, node);
13120 /*
13121 * initialize the SAX2 namespaces stack
13122 */
13123 cur = node;
13124 while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13125 xmlNsPtr ns = cur->nsDef;
13126 const xmlChar *iprefix, *ihref;
13127
13128 while (ns != NULL) {
13129 if (ctxt->dict) {
13130 iprefix = xmlDictLookup(ctxt->dict, ns->prefix, -1);
13131 ihref = xmlDictLookup(ctxt->dict, ns->href, -1);
13132 } else {
13133 iprefix = ns->prefix;
13134 ihref = ns->href;
13135 }
13136
13137 if (xmlGetNamespace(ctxt, iprefix) == NULL) {
13138 nsPush(ctxt, iprefix, ihref);
13139 nsnr++;
13140 }
13141 ns = ns->next;
13142 }
13143 cur = cur->parent;
13144 }
13145 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard0161e632008-08-28 15:36:32 +000013146 }
Daniel Veillard29b17482004-08-16 00:39:03 +000013147
13148 if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13149 /*
13150 * ID/IDREF registration will be done in xmlValidateElement below
13151 */
13152 ctxt->loadsubset |= XML_SKIP_IDS;
13153 }
13154
Daniel Veillard499cc922006-01-18 17:22:35 +000013155#ifdef LIBXML_HTML_ENABLED
13156 if (doc->type == XML_HTML_DOCUMENT_NODE)
13157 __htmlParseContent(ctxt);
13158 else
13159#endif
13160 xmlParseContent(ctxt);
13161
Daniel Veillard29b17482004-08-16 00:39:03 +000013162 nsPop(ctxt, nsnr);
13163 if ((RAW == '<') && (NXT(1) == '/')) {
13164 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13165 } else if (RAW != 0) {
13166 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13167 }
13168 if ((ctxt->node != NULL) && (ctxt->node != node)) {
13169 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13170 ctxt->wellFormed = 0;
13171 }
13172
13173 if (!ctxt->wellFormed) {
13174 if (ctxt->errNo == 0)
13175 ret = XML_ERR_INTERNAL_ERROR;
13176 else
13177 ret = (xmlParserErrors)ctxt->errNo;
13178 } else {
13179 ret = XML_ERR_OK;
13180 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013181
Daniel Veillard29b17482004-08-16 00:39:03 +000013182 /*
13183 * Return the newly created nodeset after unlinking it from
13184 * the pseudo sibling.
13185 */
Daniel Veillard0161e632008-08-28 15:36:32 +000013186
Daniel Veillard29b17482004-08-16 00:39:03 +000013187 cur = fake->next;
13188 fake->next = NULL;
13189 node->last = fake;
13190
13191 if (cur != NULL) {
13192 cur->prev = NULL;
13193 }
13194
13195 *lst = cur;
13196
13197 while (cur != NULL) {
13198 cur->parent = NULL;
13199 cur = cur->next;
13200 }
13201
13202 xmlUnlinkNode(fake);
13203 xmlFreeNode(fake);
13204
13205
13206 if (ret != XML_ERR_OK) {
13207 xmlFreeNodeList(*lst);
13208 *lst = NULL;
13209 }
William M. Brackc3f81342004-10-03 01:22:44 +000013210
William M. Brackb7b54de2004-10-06 16:38:01 +000013211 if (doc->dict != NULL)
13212 ctxt->dict = NULL;
Daniel Veillard29b17482004-08-16 00:39:03 +000013213 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013214
Daniel Veillard29b17482004-08-16 00:39:03 +000013215 return(ret);
13216#else /* !SAX2 */
13217 return(XML_ERR_INTERNAL_ERROR);
13218#endif
13219}
13220
Daniel Veillard81273902003-09-30 00:43:48 +000013221#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard328f48c2002-11-15 15:24:34 +000013222/**
Daniel Veillard58e44c92002-08-02 22:19:49 +000013223 * xmlParseBalancedChunkMemoryRecover:
13224 * @doc: the document the chunk pertains to
13225 * @sax: the SAX handler bloc (possibly NULL)
13226 * @user_data: The user data returned on SAX callbacks (possibly NULL)
13227 * @depth: Used for loop detection, use 0
13228 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
13229 * @lst: the return value for the set of parsed nodes
13230 * @recover: return nodes even if the data is broken (use 0)
13231 *
13232 *
13233 * Parse a well-balanced chunk of an XML document
13234 * called by the parser
13235 * The allowed sequence for the Well Balanced Chunk is the one defined by
13236 * the content production in the XML grammar:
13237 *
13238 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13239 *
13240 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
13241 * the parser error code otherwise
Daniel Veillard2135fc22008-04-04 16:10:51 +000013242 *
Daniel Veillard58e44c92002-08-02 22:19:49 +000013243 * In case recover is set to 1, the nodelist will not be empty even if
Daniel Veillard2135fc22008-04-04 16:10:51 +000013244 * the parsed chunk is not well balanced, assuming the parsing succeeded to
13245 * some extent.
Daniel Veillard58e44c92002-08-02 22:19:49 +000013246 */
13247int
13248xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillard2135fc22008-04-04 16:10:51 +000013249 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
Daniel Veillard58e44c92002-08-02 22:19:49 +000013250 int recover) {
Owen Taylor3473f882001-02-23 17:55:21 +000013251 xmlParserCtxtPtr ctxt;
13252 xmlDocPtr newDoc;
13253 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013254 xmlNodePtr content, newRoot;
Owen Taylor3473f882001-02-23 17:55:21 +000013255 int size;
13256 int ret = 0;
13257
Daniel Veillard0161e632008-08-28 15:36:32 +000013258 if (depth > 40) {
Owen Taylor3473f882001-02-23 17:55:21 +000013259 return(XML_ERR_ENTITY_LOOP);
13260 }
13261
13262
Daniel Veillardcda96922001-08-21 10:56:31 +000013263 if (lst != NULL)
13264 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013265 if (string == NULL)
13266 return(-1);
13267
13268 size = xmlStrlen(string);
13269
13270 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
13271 if (ctxt == NULL) return(-1);
13272 ctxt->userData = ctxt;
13273 if (sax != NULL) {
13274 oldsax = ctxt->sax;
13275 ctxt->sax = sax;
13276 if (user_data != NULL)
13277 ctxt->userData = user_data;
13278 }
13279 newDoc = xmlNewDoc(BAD_CAST "1.0");
13280 if (newDoc == NULL) {
13281 xmlFreeParserCtxt(ctxt);
13282 return(-1);
13283 }
Daniel Veillardae0765b2008-07-31 19:54:59 +000013284 newDoc->properties = XML_DOC_INTERNAL;
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013285 if ((doc != NULL) && (doc->dict != NULL)) {
13286 xmlDictFree(ctxt->dict);
13287 ctxt->dict = doc->dict;
13288 xmlDictReference(ctxt->dict);
13289 ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13290 ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13291 ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13292 ctxt->dictNames = 1;
13293 } else {
Daniel Veillard37334572008-07-31 08:20:02 +000013294 xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT, NULL);
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013295 }
Owen Taylor3473f882001-02-23 17:55:21 +000013296 if (doc != NULL) {
13297 newDoc->intSubset = doc->intSubset;
13298 newDoc->extSubset = doc->extSubset;
13299 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013300 newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13301 if (newRoot == NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +000013302 if (sax != NULL)
13303 ctxt->sax = oldsax;
13304 xmlFreeParserCtxt(ctxt);
13305 newDoc->intSubset = NULL;
13306 newDoc->extSubset = NULL;
13307 xmlFreeDoc(newDoc);
13308 return(-1);
13309 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013310 xmlAddChild((xmlNodePtr) newDoc, newRoot);
13311 nodePush(ctxt, newRoot);
Owen Taylor3473f882001-02-23 17:55:21 +000013312 if (doc == NULL) {
13313 ctxt->myDoc = newDoc;
13314 } else {
Daniel Veillard42766c02002-08-22 20:52:17 +000013315 ctxt->myDoc = newDoc;
Owen Taylor3473f882001-02-23 17:55:21 +000013316 newDoc->children->doc = doc;
Rob Richardsa02f1992006-09-16 14:04:26 +000013317 /* Ensure that doc has XML spec namespace */
13318 xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13319 newDoc->oldNs = doc->oldNs;
Owen Taylor3473f882001-02-23 17:55:21 +000013320 }
13321 ctxt->instate = XML_PARSER_CONTENT;
13322 ctxt->depth = depth;
13323
13324 /*
13325 * Doing validity checking on chunk doesn't make sense
13326 */
13327 ctxt->validate = 0;
13328 ctxt->loadsubset = 0;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013329 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013330
Daniel Veillardb39bc392002-10-26 19:29:51 +000013331 if ( doc != NULL ){
13332 content = doc->children;
13333 doc->children = NULL;
13334 xmlParseContent(ctxt);
13335 doc->children = content;
13336 }
13337 else {
13338 xmlParseContent(ctxt);
13339 }
Owen Taylor3473f882001-02-23 17:55:21 +000013340 if ((RAW == '<') && (NXT(1) == '/')) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013341 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013342 } else if (RAW != 0) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013343 xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013344 }
13345 if (ctxt->node != newDoc->children) {
Daniel Veillard1afc9f32003-09-13 12:44:05 +000013346 xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013347 }
13348
13349 if (!ctxt->wellFormed) {
13350 if (ctxt->errNo == 0)
13351 ret = 1;
13352 else
13353 ret = ctxt->errNo;
13354 } else {
Daniel Veillard58e44c92002-08-02 22:19:49 +000013355 ret = 0;
13356 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013357
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013358 if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13359 xmlNodePtr cur;
Owen Taylor3473f882001-02-23 17:55:21 +000013360
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013361 /*
13362 * Return the newly created nodeset after unlinking it from
13363 * they pseudo parent.
13364 */
13365 cur = newDoc->children->children;
13366 *lst = cur;
13367 while (cur != NULL) {
13368 xmlSetTreeDoc(cur, doc);
13369 cur->parent = NULL;
13370 cur = cur->next;
Owen Taylor3473f882001-02-23 17:55:21 +000013371 }
Daniel Veillardacbe6cf2004-10-31 21:04:50 +000013372 newDoc->children->children = NULL;
13373 }
Daniel Veillard0161e632008-08-28 15:36:32 +000013374
13375 if (sax != NULL)
Owen Taylor3473f882001-02-23 17:55:21 +000013376 ctxt->sax = oldsax;
13377 xmlFreeParserCtxt(ctxt);
13378 newDoc->intSubset = NULL;
13379 newDoc->extSubset = NULL;
Rob Richardsa02f1992006-09-16 14:04:26 +000013380 newDoc->oldNs = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000013381 xmlFreeDoc(newDoc);
Daniel Veillard0161e632008-08-28 15:36:32 +000013382
Owen Taylor3473f882001-02-23 17:55:21 +000013383 return(ret);
13384}
13385
13386/**
13387 * xmlSAXParseEntity:
13388 * @sax: the SAX handler block
13389 * @filename: the filename
13390 *
13391 * parse an XML external entity out of context and build a tree.
13392 * It use the given SAX function block to handle the parsing callback.
13393 * If sax is NULL, fallback to the default DOM tree building routines.
13394 *
13395 * [78] extParsedEnt ::= TextDecl? content
13396 *
13397 * This correspond to a "Well Balanced" chunk
13398 *
13399 * Returns the resulting document tree
13400 */
13401
13402xmlDocPtr
13403xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13404 xmlDocPtr ret;
13405 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013406
13407 ctxt = xmlCreateFileParserCtxt(filename);
13408 if (ctxt == NULL) {
13409 return(NULL);
13410 }
13411 if (sax != NULL) {
13412 if (ctxt->sax != NULL)
13413 xmlFree(ctxt->sax);
13414 ctxt->sax = sax;
13415 ctxt->userData = NULL;
13416 }
13417
Owen Taylor3473f882001-02-23 17:55:21 +000013418 xmlParseExtParsedEnt(ctxt);
13419
13420 if (ctxt->wellFormed)
13421 ret = ctxt->myDoc;
13422 else {
13423 ret = NULL;
13424 xmlFreeDoc(ctxt->myDoc);
13425 ctxt->myDoc = NULL;
13426 }
13427 if (sax != NULL)
13428 ctxt->sax = NULL;
13429 xmlFreeParserCtxt(ctxt);
Daniel Veillard0161e632008-08-28 15:36:32 +000013430
Owen Taylor3473f882001-02-23 17:55:21 +000013431 return(ret);
13432}
13433
13434/**
13435 * xmlParseEntity:
13436 * @filename: the filename
13437 *
13438 * parse an XML external entity out of context and build a tree.
13439 *
13440 * [78] extParsedEnt ::= TextDecl? content
13441 *
13442 * This correspond to a "Well Balanced" chunk
13443 *
13444 * Returns the resulting document tree
13445 */
13446
13447xmlDocPtr
13448xmlParseEntity(const char *filename) {
13449 return(xmlSAXParseEntity(NULL, filename));
13450}
Daniel Veillard81273902003-09-30 00:43:48 +000013451#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013452
13453/**
Rob Richards9c0aa472009-03-26 18:10:19 +000013454 * xmlCreateEntityParserCtxtInternal:
Owen Taylor3473f882001-02-23 17:55:21 +000013455 * @URL: the entity URL
13456 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +000013457 * @base: a possible base for the target URI
Rob Richards9c0aa472009-03-26 18:10:19 +000013458 * @pctx: parser context used to set options on new context
Owen Taylor3473f882001-02-23 17:55:21 +000013459 *
13460 * Create a parser context for an external entity
13461 * Automatic support for ZLIB/Compress compressed document is provided
13462 * by default if found at compile-time.
13463 *
13464 * Returns the new parser context or NULL
13465 */
Rob Richards9c0aa472009-03-26 18:10:19 +000013466static xmlParserCtxtPtr
13467xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
13468 const xmlChar *base, xmlParserCtxtPtr pctx) {
Owen Taylor3473f882001-02-23 17:55:21 +000013469 xmlParserCtxtPtr ctxt;
13470 xmlParserInputPtr inputStream;
13471 char *directory = NULL;
13472 xmlChar *uri;
Daniel Veillard0161e632008-08-28 15:36:32 +000013473
Owen Taylor3473f882001-02-23 17:55:21 +000013474 ctxt = xmlNewParserCtxt();
13475 if (ctxt == NULL) {
13476 return(NULL);
13477 }
13478
Daniel Veillard48247b42009-07-10 16:12:46 +020013479 if (pctx != NULL) {
13480 ctxt->options = pctx->options;
13481 ctxt->_private = pctx->_private;
Rob Richards9c0aa472009-03-26 18:10:19 +000013482 }
13483
Owen Taylor3473f882001-02-23 17:55:21 +000013484 uri = xmlBuildURI(URL, base);
13485
13486 if (uri == NULL) {
13487 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13488 if (inputStream == NULL) {
13489 xmlFreeParserCtxt(ctxt);
13490 return(NULL);
13491 }
13492
13493 inputPush(ctxt, inputStream);
13494
13495 if ((ctxt->directory == NULL) && (directory == NULL))
13496 directory = xmlParserGetDirectory((char *)URL);
13497 if ((ctxt->directory == NULL) && (directory != NULL))
13498 ctxt->directory = directory;
13499 } else {
13500 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13501 if (inputStream == NULL) {
13502 xmlFree(uri);
13503 xmlFreeParserCtxt(ctxt);
13504 return(NULL);
13505 }
13506
13507 inputPush(ctxt, inputStream);
13508
13509 if ((ctxt->directory == NULL) && (directory == NULL))
13510 directory = xmlParserGetDirectory((char *)uri);
13511 if ((ctxt->directory == NULL) && (directory != NULL))
13512 ctxt->directory = directory;
13513 xmlFree(uri);
13514 }
Owen Taylor3473f882001-02-23 17:55:21 +000013515 return(ctxt);
13516}
13517
Rob Richards9c0aa472009-03-26 18:10:19 +000013518/**
13519 * xmlCreateEntityParserCtxt:
13520 * @URL: the entity URL
13521 * @ID: the entity PUBLIC ID
13522 * @base: a possible base for the target URI
13523 *
13524 * Create a parser context for an external entity
13525 * Automatic support for ZLIB/Compress compressed document is provided
13526 * by default if found at compile-time.
13527 *
13528 * Returns the new parser context or NULL
13529 */
13530xmlParserCtxtPtr
13531xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13532 const xmlChar *base) {
13533 return xmlCreateEntityParserCtxtInternal(URL, ID, base, NULL);
13534
13535}
13536
Owen Taylor3473f882001-02-23 17:55:21 +000013537/************************************************************************
13538 * *
Daniel Veillard0161e632008-08-28 15:36:32 +000013539 * Front ends when parsing from a file *
Owen Taylor3473f882001-02-23 17:55:21 +000013540 * *
13541 ************************************************************************/
13542
13543/**
Daniel Veillard61b93382003-11-03 14:28:31 +000013544 * xmlCreateURLParserCtxt:
13545 * @filename: the filename or URL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000013546 * @options: a combination of xmlParserOption
Owen Taylor3473f882001-02-23 17:55:21 +000013547 *
Daniel Veillard61b93382003-11-03 14:28:31 +000013548 * Create a parser context for a file or URL content.
Owen Taylor3473f882001-02-23 17:55:21 +000013549 * Automatic support for ZLIB/Compress compressed document is provided
Daniel Veillard61b93382003-11-03 14:28:31 +000013550 * by default if found at compile-time and for file accesses
Owen Taylor3473f882001-02-23 17:55:21 +000013551 *
13552 * Returns the new parser context or NULL
13553 */
13554xmlParserCtxtPtr
Daniel Veillard61b93382003-11-03 14:28:31 +000013555xmlCreateURLParserCtxt(const char *filename, int options)
Owen Taylor3473f882001-02-23 17:55:21 +000013556{
13557 xmlParserCtxtPtr ctxt;
13558 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +000013559 char *directory = NULL;
13560
Owen Taylor3473f882001-02-23 17:55:21 +000013561 ctxt = xmlNewParserCtxt();
13562 if (ctxt == NULL) {
Daniel Veillard81273902003-09-30 00:43:48 +000013563 xmlErrMemory(NULL, "cannot allocate parser context");
Owen Taylor3473f882001-02-23 17:55:21 +000013564 return(NULL);
13565 }
13566
Daniel Veillarddf292f72005-01-16 19:00:15 +000013567 if (options)
Daniel Veillard37334572008-07-31 08:20:02 +000013568 xmlCtxtUseOptionsInternal(ctxt, options, NULL);
Daniel Veillarddf292f72005-01-16 19:00:15 +000013569 ctxt->linenumbers = 1;
Daniel Veillard37334572008-07-31 08:20:02 +000013570
Daniel Veillard4e9b1bc2003-06-09 10:30:33 +000013571 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013572 if (inputStream == NULL) {
13573 xmlFreeParserCtxt(ctxt);
13574 return(NULL);
13575 }
13576
Owen Taylor3473f882001-02-23 17:55:21 +000013577 inputPush(ctxt, inputStream);
13578 if ((ctxt->directory == NULL) && (directory == NULL))
Igor Zlatkovic5f9fada2003-02-19 14:51:00 +000013579 directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013580 if ((ctxt->directory == NULL) && (directory != NULL))
13581 ctxt->directory = directory;
13582
13583 return(ctxt);
13584}
13585
Daniel Veillard61b93382003-11-03 14:28:31 +000013586/**
13587 * xmlCreateFileParserCtxt:
13588 * @filename: the filename
13589 *
13590 * Create a parser context for a file content.
13591 * Automatic support for ZLIB/Compress compressed document is provided
13592 * by default if found at compile-time.
13593 *
13594 * Returns the new parser context or NULL
13595 */
13596xmlParserCtxtPtr
13597xmlCreateFileParserCtxt(const char *filename)
13598{
13599 return(xmlCreateURLParserCtxt(filename, 0));
13600}
13601
Daniel Veillard81273902003-09-30 00:43:48 +000013602#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013603/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013604 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +000013605 * @sax: the SAX handler block
13606 * @filename: the filename
13607 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13608 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +000013609 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +000013610 *
13611 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13612 * compressed document is provided by default if found at compile-time.
13613 * It use the given SAX function block to handle the parsing callback.
13614 * If sax is NULL, fallback to the default DOM tree building routines.
13615 *
Daniel Veillarde19fc232002-04-22 16:01:24 +000013616 * User data (void *) is stored within the parser context in the
13617 * context's _private member, so it is available nearly everywhere in libxml
Daniel Veillarda293c322001-10-02 13:54:14 +000013618 *
Owen Taylor3473f882001-02-23 17:55:21 +000013619 * Returns the resulting document tree
13620 */
13621
13622xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +000013623xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13624 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +000013625 xmlDocPtr ret;
13626 xmlParserCtxtPtr ctxt;
Owen Taylor3473f882001-02-23 17:55:21 +000013627
Daniel Veillard635ef722001-10-29 11:48:19 +000013628 xmlInitParser();
13629
Owen Taylor3473f882001-02-23 17:55:21 +000013630 ctxt = xmlCreateFileParserCtxt(filename);
13631 if (ctxt == NULL) {
13632 return(NULL);
13633 }
13634 if (sax != NULL) {
13635 if (ctxt->sax != NULL)
13636 xmlFree(ctxt->sax);
13637 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000013638 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013639 xmlDetectSAX2(ctxt);
Daniel Veillarda293c322001-10-02 13:54:14 +000013640 if (data!=NULL) {
Daniel Veillardc790bf42003-10-11 10:50:10 +000013641 ctxt->_private = data;
Daniel Veillarda293c322001-10-02 13:54:14 +000013642 }
Owen Taylor3473f882001-02-23 17:55:21 +000013643
Daniel Veillard37d2d162008-03-14 10:54:00 +000013644 if (ctxt->directory == NULL)
13645 ctxt->directory = xmlParserGetDirectory(filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013646
Daniel Veillarddad3f682002-11-17 16:47:27 +000013647 ctxt->recovery = recovery;
13648
Owen Taylor3473f882001-02-23 17:55:21 +000013649 xmlParseDocument(ctxt);
13650
William M. Brackc07329e2003-09-08 01:57:30 +000013651 if ((ctxt->wellFormed) || recovery) {
13652 ret = ctxt->myDoc;
Daniel Veillardb65e12e2003-10-08 21:33:28 +000013653 if (ret != NULL) {
13654 if (ctxt->input->buf->compressed > 0)
13655 ret->compression = 9;
13656 else
13657 ret->compression = ctxt->input->buf->compressed;
13658 }
William M. Brackc07329e2003-09-08 01:57:30 +000013659 }
Owen Taylor3473f882001-02-23 17:55:21 +000013660 else {
13661 ret = NULL;
13662 xmlFreeDoc(ctxt->myDoc);
13663 ctxt->myDoc = NULL;
13664 }
13665 if (sax != NULL)
13666 ctxt->sax = NULL;
13667 xmlFreeParserCtxt(ctxt);
13668
13669 return(ret);
13670}
13671
13672/**
Daniel Veillarda293c322001-10-02 13:54:14 +000013673 * xmlSAXParseFile:
13674 * @sax: the SAX handler block
13675 * @filename: the filename
13676 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13677 * documents
13678 *
13679 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13680 * compressed document is provided by default if found at compile-time.
13681 * It use the given SAX function block to handle the parsing callback.
13682 * If sax is NULL, fallback to the default DOM tree building routines.
13683 *
13684 * Returns the resulting document tree
13685 */
13686
13687xmlDocPtr
13688xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13689 int recovery) {
13690 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13691}
13692
13693/**
Owen Taylor3473f882001-02-23 17:55:21 +000013694 * xmlRecoverDoc:
13695 * @cur: a pointer to an array of xmlChar
13696 *
13697 * parse an XML in-memory document and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013698 * In the case the document is not Well Formed, a attempt to build a
13699 * tree is tried anyway
13700 *
13701 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013702 */
13703
13704xmlDocPtr
Daniel Veillardf39eafa2009-08-20 19:15:08 +020013705xmlRecoverDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000013706 return(xmlSAXParseDoc(NULL, cur, 1));
13707}
13708
13709/**
13710 * xmlParseFile:
13711 * @filename: the filename
13712 *
13713 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13714 * compressed document is provided by default if found at compile-time.
13715 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000013716 * Returns the resulting document tree if the file was wellformed,
13717 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000013718 */
13719
13720xmlDocPtr
13721xmlParseFile(const char *filename) {
13722 return(xmlSAXParseFile(NULL, filename, 0));
13723}
13724
13725/**
13726 * xmlRecoverFile:
13727 * @filename: the filename
13728 *
13729 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
13730 * compressed document is provided by default if found at compile-time.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013731 * In the case the document is not Well Formed, it attempts to build
13732 * a tree anyway
Owen Taylor3473f882001-02-23 17:55:21 +000013733 *
Daniel Veillard2135fc22008-04-04 16:10:51 +000013734 * Returns the resulting document tree or NULL in case of failure
Owen Taylor3473f882001-02-23 17:55:21 +000013735 */
13736
13737xmlDocPtr
13738xmlRecoverFile(const char *filename) {
13739 return(xmlSAXParseFile(NULL, filename, 1));
13740}
13741
13742
13743/**
13744 * xmlSetupParserForBuffer:
13745 * @ctxt: an XML parser context
13746 * @buffer: a xmlChar * buffer
13747 * @filename: a file name
13748 *
13749 * Setup the parser context to parse a new buffer; Clears any prior
13750 * contents from the parser context. The buffer parameter must not be
13751 * NULL, but the filename parameter can be
13752 */
13753void
13754xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13755 const char* filename)
13756{
13757 xmlParserInputPtr input;
13758
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013759 if ((ctxt == NULL) || (buffer == NULL))
13760 return;
13761
Owen Taylor3473f882001-02-23 17:55:21 +000013762 input = xmlNewInputStream(ctxt);
13763 if (input == NULL) {
Daniel Veillard24eb9782003-10-04 21:08:09 +000013764 xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
Daniel Veillard36e5cd52004-11-02 14:52:23 +000013765 xmlClearParserCtxt(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000013766 return;
13767 }
13768
13769 xmlClearParserCtxt(ctxt);
13770 if (filename != NULL)
Daniel Veillardc3ca5ba2003-05-09 22:26:28 +000013771 input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
Owen Taylor3473f882001-02-23 17:55:21 +000013772 input->base = buffer;
13773 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000013774 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000013775 inputPush(ctxt, input);
13776}
13777
13778/**
13779 * xmlSAXUserParseFile:
13780 * @sax: a SAX handler
13781 * @user_data: The user data returned on SAX callbacks
13782 * @filename: a file name
13783 *
13784 * parse an XML file and call the given SAX handler routines.
13785 * Automatic support for ZLIB/Compress compressed document is provided
13786 *
13787 * Returns 0 in case of success or a error number otherwise
13788 */
13789int
13790xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
13791 const char *filename) {
13792 int ret = 0;
13793 xmlParserCtxtPtr ctxt;
13794
13795 ctxt = xmlCreateFileParserCtxt(filename);
13796 if (ctxt == NULL) return -1;
Daniel Veillard092643b2003-09-25 14:29:29 +000013797 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
Owen Taylor3473f882001-02-23 17:55:21 +000013798 xmlFree(ctxt->sax);
13799 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000013800 xmlDetectSAX2(ctxt);
13801
Owen Taylor3473f882001-02-23 17:55:21 +000013802 if (user_data != NULL)
13803 ctxt->userData = user_data;
13804
13805 xmlParseDocument(ctxt);
13806
13807 if (ctxt->wellFormed)
13808 ret = 0;
13809 else {
13810 if (ctxt->errNo != 0)
13811 ret = ctxt->errNo;
13812 else
13813 ret = -1;
13814 }
13815 if (sax != NULL)
13816 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000013817 if (ctxt->myDoc != NULL) {
13818 xmlFreeDoc(ctxt->myDoc);
13819 ctxt->myDoc = NULL;
13820 }
Owen Taylor3473f882001-02-23 17:55:21 +000013821 xmlFreeParserCtxt(ctxt);
13822
13823 return ret;
13824}
Daniel Veillard81273902003-09-30 00:43:48 +000013825#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000013826
13827/************************************************************************
13828 * *
13829 * Front ends when parsing from memory *
13830 * *
13831 ************************************************************************/
13832
13833/**
13834 * xmlCreateMemoryParserCtxt:
13835 * @buffer: a pointer to a char array
13836 * @size: the size of the array
13837 *
13838 * Create a parser context for an XML in-memory document.
13839 *
13840 * Returns the new parser context or NULL
13841 */
13842xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000013843xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013844 xmlParserCtxtPtr ctxt;
13845 xmlParserInputPtr input;
13846 xmlParserInputBufferPtr buf;
13847
13848 if (buffer == NULL)
13849 return(NULL);
13850 if (size <= 0)
13851 return(NULL);
13852
13853 ctxt = xmlNewParserCtxt();
13854 if (ctxt == NULL)
13855 return(NULL);
13856
Daniel Veillard53350552003-09-18 13:35:51 +000013857 /* TODO: xmlParserInputBufferCreateStatic, requires some serious changes */
Owen Taylor3473f882001-02-23 17:55:21 +000013858 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013859 if (buf == NULL) {
13860 xmlFreeParserCtxt(ctxt);
13861 return(NULL);
13862 }
Owen Taylor3473f882001-02-23 17:55:21 +000013863
13864 input = xmlNewInputStream(ctxt);
13865 if (input == NULL) {
Daniel Veillarda7e05b42002-11-19 08:11:14 +000013866 xmlFreeParserInputBuffer(buf);
Owen Taylor3473f882001-02-23 17:55:21 +000013867 xmlFreeParserCtxt(ctxt);
13868 return(NULL);
13869 }
13870
13871 input->filename = NULL;
13872 input->buf = buf;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080013873 input->cur =
13874 input->base = xmlBufContent(input->buf->buffer);
13875 input->end = xmlBufEnd(input->buf->buffer);
Owen Taylor3473f882001-02-23 17:55:21 +000013876
13877 inputPush(ctxt, input);
13878 return(ctxt);
13879}
13880
Daniel Veillard81273902003-09-30 00:43:48 +000013881#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000013882/**
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013883 * xmlSAXParseMemoryWithData:
13884 * @sax: the SAX handler block
13885 * @buffer: an pointer to a char array
13886 * @size: the size of the array
13887 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
13888 * documents
13889 * @data: the userdata
13890 *
13891 * parse an XML in-memory block and use the given SAX function block
13892 * to handle the parsing callback. If sax is NULL, fallback to the default
13893 * DOM tree building routines.
13894 *
13895 * User data (void *) is stored within the parser context in the
13896 * context's _private member, so it is available nearly everywhere in libxml
13897 *
13898 * Returns the resulting document tree
13899 */
13900
13901xmlDocPtr
13902xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
13903 int size, int recovery, void *data) {
13904 xmlDocPtr ret;
13905 xmlParserCtxtPtr ctxt;
13906
Daniel Veillardab2a7632009-07-09 08:45:03 +020013907 xmlInitParser();
13908
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013909 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
13910 if (ctxt == NULL) return(NULL);
13911 if (sax != NULL) {
13912 if (ctxt->sax != NULL)
13913 xmlFree(ctxt->sax);
13914 ctxt->sax = sax;
13915 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000013916 xmlDetectSAX2(ctxt);
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013917 if (data!=NULL) {
13918 ctxt->_private=data;
13919 }
13920
Daniel Veillardadba5f12003-04-04 16:09:01 +000013921 ctxt->recovery = recovery;
13922
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013923 xmlParseDocument(ctxt);
13924
13925 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
13926 else {
13927 ret = NULL;
13928 xmlFreeDoc(ctxt->myDoc);
13929 ctxt->myDoc = NULL;
13930 }
13931 if (sax != NULL)
13932 ctxt->sax = NULL;
13933 xmlFreeParserCtxt(ctxt);
Daniel Veillardab2a7632009-07-09 08:45:03 +020013934
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013935 return(ret);
13936}
13937
13938/**
Owen Taylor3473f882001-02-23 17:55:21 +000013939 * xmlSAXParseMemory:
13940 * @sax: the SAX handler block
13941 * @buffer: an pointer to a char array
13942 * @size: the size of the array
13943 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
13944 * documents
13945 *
13946 * parse an XML in-memory block and use the given SAX function block
13947 * to handle the parsing callback. If sax is NULL, fallback to the default
13948 * DOM tree building routines.
13949 *
13950 * Returns the resulting document tree
13951 */
13952xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000013953xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
13954 int size, int recovery) {
Daniel Veillard8606bbb2002-11-12 12:36:52 +000013955 return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000013956}
13957
13958/**
13959 * xmlParseMemory:
13960 * @buffer: an pointer to a char array
13961 * @size: the size of the array
13962 *
13963 * parse an XML in-memory block and build a tree.
13964 *
13965 * Returns the resulting document tree
13966 */
13967
Daniel Veillard50822cb2001-07-26 20:05:51 +000013968xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013969 return(xmlSAXParseMemory(NULL, buffer, size, 0));
13970}
13971
13972/**
13973 * xmlRecoverMemory:
13974 * @buffer: an pointer to a char array
13975 * @size: the size of the array
13976 *
13977 * parse an XML in-memory block and build a tree.
Daniel Veillard2135fc22008-04-04 16:10:51 +000013978 * In the case the document is not Well Formed, an attempt to
13979 * build a tree is tried anyway
13980 *
13981 * Returns the resulting document tree or NULL in case of error
Owen Taylor3473f882001-02-23 17:55:21 +000013982 */
13983
Daniel Veillard50822cb2001-07-26 20:05:51 +000013984xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000013985 return(xmlSAXParseMemory(NULL, buffer, size, 1));
13986}
13987
13988/**
13989 * xmlSAXUserParseMemory:
13990 * @sax: a SAX handler
13991 * @user_data: The user data returned on SAX callbacks
13992 * @buffer: an in-memory XML document input
13993 * @size: the length of the XML document in bytes
13994 *
13995 * A better SAX parsing routine.
13996 * parse an XML in-memory buffer and call the given SAX handler routines.
Daniel Veillardab2a7632009-07-09 08:45:03 +020013997 *
Owen Taylor3473f882001-02-23 17:55:21 +000013998 * Returns 0 in case of success or a error number otherwise
13999 */
14000int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000014001 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000014002 int ret = 0;
14003 xmlParserCtxtPtr ctxt;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014004
14005 xmlInitParser();
14006
Owen Taylor3473f882001-02-23 17:55:21 +000014007 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14008 if (ctxt == NULL) return -1;
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014009 if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14010 xmlFree(ctxt->sax);
Daniel Veillard9e923512002-08-14 08:48:52 +000014011 ctxt->sax = sax;
Daniel Veillarde57ec792003-09-10 10:50:59 +000014012 xmlDetectSAX2(ctxt);
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014013
Daniel Veillard30211a02001-04-26 09:33:18 +000014014 if (user_data != NULL)
14015 ctxt->userData = user_data;
Daniel Veillardab2a7632009-07-09 08:45:03 +020014016
Owen Taylor3473f882001-02-23 17:55:21 +000014017 xmlParseDocument(ctxt);
14018
14019 if (ctxt->wellFormed)
14020 ret = 0;
14021 else {
14022 if (ctxt->errNo != 0)
14023 ret = ctxt->errNo;
14024 else
14025 ret = -1;
14026 }
Daniel Veillard3dcd3192007-08-14 13:46:54 +000014027 if (sax != NULL)
14028 ctxt->sax = NULL;
Daniel Veillard34099b42004-11-04 17:34:35 +000014029 if (ctxt->myDoc != NULL) {
14030 xmlFreeDoc(ctxt->myDoc);
14031 ctxt->myDoc = NULL;
14032 }
Owen Taylor3473f882001-02-23 17:55:21 +000014033 xmlFreeParserCtxt(ctxt);
14034
14035 return ret;
14036}
Daniel Veillard81273902003-09-30 00:43:48 +000014037#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014038
14039/**
14040 * xmlCreateDocParserCtxt:
14041 * @cur: a pointer to an array of xmlChar
14042 *
14043 * Creates a parser context for an XML in-memory document.
14044 *
14045 * Returns the new parser context or NULL
14046 */
14047xmlParserCtxtPtr
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014048xmlCreateDocParserCtxt(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014049 int len;
14050
14051 if (cur == NULL)
14052 return(NULL);
14053 len = xmlStrlen(cur);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014054 return(xmlCreateMemoryParserCtxt((const char *)cur, len));
Owen Taylor3473f882001-02-23 17:55:21 +000014055}
14056
Daniel Veillard81273902003-09-30 00:43:48 +000014057#ifdef LIBXML_SAX1_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000014058/**
14059 * xmlSAXParseDoc:
14060 * @sax: the SAX handler block
14061 * @cur: a pointer to an array of xmlChar
14062 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
14063 * documents
14064 *
14065 * parse an XML in-memory document and build a tree.
14066 * It use the given SAX function block to handle the parsing callback.
14067 * If sax is NULL, fallback to the default DOM tree building routines.
14068 *
14069 * Returns the resulting document tree
14070 */
14071
14072xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014073xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000014074 xmlDocPtr ret;
14075 xmlParserCtxtPtr ctxt;
Daniel Veillard38936062004-11-04 17:45:11 +000014076 xmlSAXHandlerPtr oldsax = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +000014077
Daniel Veillard38936062004-11-04 17:45:11 +000014078 if (cur == NULL) return(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000014079
14080
14081 ctxt = xmlCreateDocParserCtxt(cur);
14082 if (ctxt == NULL) return(NULL);
14083 if (sax != NULL) {
Daniel Veillard38936062004-11-04 17:45:11 +000014084 oldsax = ctxt->sax;
Owen Taylor3473f882001-02-23 17:55:21 +000014085 ctxt->sax = sax;
14086 ctxt->userData = NULL;
14087 }
Daniel Veillarde57ec792003-09-10 10:50:59 +000014088 xmlDetectSAX2(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +000014089
14090 xmlParseDocument(ctxt);
14091 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14092 else {
14093 ret = NULL;
14094 xmlFreeDoc(ctxt->myDoc);
14095 ctxt->myDoc = NULL;
14096 }
Daniel Veillard34099b42004-11-04 17:34:35 +000014097 if (sax != NULL)
Daniel Veillard38936062004-11-04 17:45:11 +000014098 ctxt->sax = oldsax;
Owen Taylor3473f882001-02-23 17:55:21 +000014099 xmlFreeParserCtxt(ctxt);
14100
14101 return(ret);
14102}
14103
14104/**
14105 * xmlParseDoc:
14106 * @cur: a pointer to an array of xmlChar
14107 *
14108 * parse an XML in-memory document and build a tree.
14109 *
14110 * Returns the resulting document tree
14111 */
14112
14113xmlDocPtr
Daniel Veillard7331e5c2005-03-31 14:59:00 +000014114xmlParseDoc(const xmlChar *cur) {
Owen Taylor3473f882001-02-23 17:55:21 +000014115 return(xmlSAXParseDoc(NULL, cur, 0));
14116}
Daniel Veillard81273902003-09-30 00:43:48 +000014117#endif /* LIBXML_SAX1_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014118
Daniel Veillard81273902003-09-30 00:43:48 +000014119#ifdef LIBXML_LEGACY_ENABLED
Daniel Veillard8107a222002-01-13 14:10:10 +000014120/************************************************************************
14121 * *
14122 * Specific function to keep track of entities references *
14123 * and used by the XSLT debugger *
14124 * *
14125 ************************************************************************/
14126
14127static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14128
14129/**
14130 * xmlAddEntityReference:
14131 * @ent : A valid entity
14132 * @firstNode : A valid first node for children of entity
14133 * @lastNode : A valid last node of children entity
14134 *
14135 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14136 */
14137static void
14138xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14139 xmlNodePtr lastNode)
14140{
14141 if (xmlEntityRefFunc != NULL) {
14142 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
14143 }
14144}
14145
14146
14147/**
14148 * xmlSetEntityReferenceFunc:
Daniel Veillard01c13b52002-12-10 15:19:08 +000014149 * @func: A valid function
Daniel Veillard8107a222002-01-13 14:10:10 +000014150 *
14151 * Set the function to call call back when a xml reference has been made
14152 */
14153void
14154xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14155{
14156 xmlEntityRefFunc = func;
14157}
Daniel Veillard81273902003-09-30 00:43:48 +000014158#endif /* LIBXML_LEGACY_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014159
14160/************************************************************************
14161 * *
14162 * Miscellaneous *
14163 * *
14164 ************************************************************************/
14165
14166#ifdef LIBXML_XPATH_ENABLED
14167#include <libxml/xpath.h>
14168#endif
14169
Daniel Veillardffa3c742005-07-21 13:24:09 +000014170extern void XMLCDECL xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000014171static int xmlParserInitialized = 0;
14172
14173/**
14174 * xmlInitParser:
14175 *
14176 * Initialization function for the XML parser.
14177 * This is not reentrant. Call once before processing in case of
14178 * use in multithreaded programs.
14179 */
14180
14181void
14182xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000014183 if (xmlParserInitialized != 0)
14184 return;
Owen Taylor3473f882001-02-23 17:55:21 +000014185
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014186#ifdef LIBXML_THREAD_ENABLED
14187 __xmlGlobalInitMutexLock();
14188 if (xmlParserInitialized == 0) {
14189#endif
Daniel Veillard7dd70802009-06-04 11:08:39 +020014190 xmlInitThreads();
Mike Hommeye6f05092010-10-15 19:50:03 +020014191 xmlInitGlobals();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014192 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
14193 (xmlGenericError == NULL))
14194 initGenericErrorDefaultFunc(NULL);
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014195 xmlInitMemory();
Daniel Veillard379ebc12012-05-18 15:41:31 +080014196 xmlInitializeDict();
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014197 xmlInitCharEncodingHandlers();
14198 xmlDefaultSAXHandlerInit();
14199 xmlRegisterDefaultInputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014200#ifdef LIBXML_OUTPUT_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014201 xmlRegisterDefaultOutputCallbacks();
Daniel Veillarda9cce9c2003-09-29 13:20:24 +000014202#endif /* LIBXML_OUTPUT_ENABLED */
Owen Taylor3473f882001-02-23 17:55:21 +000014203#ifdef LIBXML_HTML_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014204 htmlInitAutoClose();
14205 htmlDefaultSAXHandlerInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014206#endif
14207#ifdef LIBXML_XPATH_ENABLED
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014208 xmlXPathInit();
Owen Taylor3473f882001-02-23 17:55:21 +000014209#endif
Daniel Veillardfde5b0b2007-02-12 17:31:53 +000014210 xmlParserInitialized = 1;
14211#ifdef LIBXML_THREAD_ENABLED
14212 }
14213 __xmlGlobalInitMutexUnlock();
14214#endif
Owen Taylor3473f882001-02-23 17:55:21 +000014215}
14216
14217/**
14218 * xmlCleanupParser:
14219 *
Daniel Veillard05b37c62008-03-31 08:27:07 +000014220 * This function name is somewhat misleading. It does not clean up
14221 * parser state, it cleans up memory allocated by the library itself.
14222 * It is a cleanup function for the XML library. It tries to reclaim all
14223 * related global memory allocated for the library processing.
14224 * It doesn't deallocate any document related memory. One should
14225 * call xmlCleanupParser() only when the process has finished using
14226 * the library and all XML/HTML documents built with it.
14227 * See also xmlInitParser() which has the opposite function of preparing
14228 * the library for operations.
Daniel Veillard01101202009-02-21 09:22:04 +000014229 *
14230 * WARNING: if your application is multithreaded or has plugin support
14231 * calling this may crash the application if another thread or
14232 * a plugin is still using libxml2. It's sometimes very hard to
14233 * guess if libxml2 is in use in the application, some libraries
14234 * or plugins may use it without notice. In case of doubt abstain
14235 * from calling this function or do it just before calling exit()
14236 * to avoid leak reports from valgrind !
Owen Taylor3473f882001-02-23 17:55:21 +000014237 */
14238
14239void
14240xmlCleanupParser(void) {
Daniel Veillard7fb801f2003-08-17 21:07:26 +000014241 if (!xmlParserInitialized)
14242 return;
14243
Owen Taylor3473f882001-02-23 17:55:21 +000014244 xmlCleanupCharEncodingHandlers();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000014245#ifdef LIBXML_CATALOG_ENABLED
14246 xmlCatalogCleanup();
14247#endif
Daniel Veillard14412512005-01-21 23:53:26 +000014248 xmlDictCleanup();
Daniel Veillard04054be2003-10-15 10:48:54 +000014249 xmlCleanupInputCallbacks();
14250#ifdef LIBXML_OUTPUT_ENABLED
14251 xmlCleanupOutputCallbacks();
14252#endif
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014253#ifdef LIBXML_SCHEMAS_ENABLED
14254 xmlSchemaCleanupTypes();
Daniel Veillarddd6d3002004-11-03 14:20:29 +000014255 xmlRelaxNGCleanupTypes();
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014256#endif
Daniel Veillard781ac8b2003-05-15 22:11:36 +000014257 xmlCleanupGlobals();
Daniel Veillard2b8c4a12003-10-02 22:28:19 +000014258 xmlResetLastError();
Daniel Veillard74c0e592003-11-25 07:01:38 +000014259 xmlCleanupThreads(); /* must be last if called not from the main thread */
William M. Brack72ee48d2003-12-30 08:30:19 +000014260 xmlCleanupMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000014261 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000014262}
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014263
14264/************************************************************************
14265 * *
14266 * New set (2.6.0) of simpler and more flexible APIs *
14267 * *
14268 ************************************************************************/
14269
14270/**
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014271 * DICT_FREE:
14272 * @str: a string
14273 *
14274 * Free a string if it is not owned by the "dict" dictionnary in the
14275 * current scope
14276 */
14277#define DICT_FREE(str) \
14278 if ((str) && ((!dict) || \
14279 (xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14280 xmlFree((char *)(str));
14281
14282/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014283 * xmlCtxtReset:
14284 * @ctxt: an XML parser context
14285 *
14286 * Reset a parser context
14287 */
14288void
14289xmlCtxtReset(xmlParserCtxtPtr ctxt)
14290{
14291 xmlParserInputPtr input;
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014292 xmlDictPtr dict;
14293
14294 if (ctxt == NULL)
14295 return;
14296
14297 dict = ctxt->dict;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014298
14299 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14300 xmlFreeInputStream(input);
14301 }
14302 ctxt->inputNr = 0;
14303 ctxt->input = NULL;
14304
14305 ctxt->spaceNr = 0;
Daniel Veillard2e620862007-06-12 08:18:21 +000014306 if (ctxt->spaceTab != NULL) {
14307 ctxt->spaceTab[0] = -1;
14308 ctxt->space = &ctxt->spaceTab[0];
14309 } else {
14310 ctxt->space = NULL;
14311 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014312
14313
14314 ctxt->nodeNr = 0;
14315 ctxt->node = NULL;
14316
14317 ctxt->nameNr = 0;
14318 ctxt->name = NULL;
14319
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014320 DICT_FREE(ctxt->version);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014321 ctxt->version = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014322 DICT_FREE(ctxt->encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014323 ctxt->encoding = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014324 DICT_FREE(ctxt->directory);
14325 ctxt->directory = NULL;
14326 DICT_FREE(ctxt->extSubURI);
14327 ctxt->extSubURI = NULL;
14328 DICT_FREE(ctxt->extSubSystem);
14329 ctxt->extSubSystem = NULL;
14330 if (ctxt->myDoc != NULL)
14331 xmlFreeDoc(ctxt->myDoc);
14332 ctxt->myDoc = NULL;
14333
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014334 ctxt->standalone = -1;
14335 ctxt->hasExternalSubset = 0;
14336 ctxt->hasPErefs = 0;
14337 ctxt->html = 0;
14338 ctxt->external = 0;
14339 ctxt->instate = XML_PARSER_START;
14340 ctxt->token = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014341
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014342 ctxt->wellFormed = 1;
14343 ctxt->nsWellFormed = 1;
Daniel Veillardae289182004-01-21 16:00:43 +000014344 ctxt->disableSAX = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014345 ctxt->valid = 1;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014346#if 0
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014347 ctxt->vctxt.userData = ctxt;
14348 ctxt->vctxt.error = xmlParserValidityError;
14349 ctxt->vctxt.warning = xmlParserValidityWarning;
Daniel Veillard766c4f92004-03-26 10:48:29 +000014350#endif
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014351 ctxt->record_info = 0;
14352 ctxt->nbChars = 0;
14353 ctxt->checkIndex = 0;
14354 ctxt->inSubset = 0;
14355 ctxt->errNo = XML_ERR_OK;
14356 ctxt->depth = 0;
14357 ctxt->charset = XML_CHAR_ENCODING_UTF8;
14358 ctxt->catalogs = NULL;
Daniel Veillard4bf899b2008-08-20 17:04:30 +000014359 ctxt->nbentities = 0;
Daniel Veillard0161e632008-08-28 15:36:32 +000014360 ctxt->sizeentities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014361 xmlInitNodeInfoSeq(&ctxt->node_seq);
14362
14363 if (ctxt->attsDefault != NULL) {
14364 xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
14365 ctxt->attsDefault = NULL;
14366 }
14367 if (ctxt->attsSpecial != NULL) {
14368 xmlHashFree(ctxt->attsSpecial, NULL);
14369 ctxt->attsSpecial = NULL;
14370 }
14371
Daniel Veillard4432df22003-09-28 18:58:27 +000014372#ifdef LIBXML_CATALOG_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014373 if (ctxt->catalogs != NULL)
14374 xmlCatalogFreeLocal(ctxt->catalogs);
Daniel Veillard4432df22003-09-28 18:58:27 +000014375#endif
Daniel Veillardcc199e02003-10-24 21:11:48 +000014376 if (ctxt->lastError.code != XML_ERR_OK)
14377 xmlResetError(&ctxt->lastError);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014378}
14379
14380/**
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014381 * xmlCtxtResetPush:
14382 * @ctxt: an XML parser context
14383 * @chunk: a pointer to an array of chars
14384 * @size: number of chars in the array
14385 * @filename: an optional file name or URI
14386 * @encoding: the document encoding, or NULL
14387 *
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014388 * Reset a push parser context
14389 *
14390 * Returns 0 in case of success and 1 in case of error
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014391 */
14392int
14393xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14394 int size, const char *filename, const char *encoding)
14395{
14396 xmlParserInputPtr inputStream;
14397 xmlParserInputBufferPtr buf;
14398 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
14399
Daniel Veillarde4e3f5d2003-10-28 23:06:32 +000014400 if (ctxt == NULL)
14401 return(1);
14402
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014403 if ((encoding == NULL) && (chunk != NULL) && (size >= 4))
14404 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
14405
14406 buf = xmlAllocParserInputBuffer(enc);
14407 if (buf == NULL)
14408 return(1);
14409
14410 if (ctxt == NULL) {
14411 xmlFreeParserInputBuffer(buf);
14412 return(1);
14413 }
14414
14415 xmlCtxtReset(ctxt);
14416
14417 if (ctxt->pushTab == NULL) {
14418 ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
14419 sizeof(xmlChar *));
14420 if (ctxt->pushTab == NULL) {
14421 xmlErrMemory(ctxt, NULL);
14422 xmlFreeParserInputBuffer(buf);
14423 return(1);
14424 }
14425 }
14426
14427 if (filename == NULL) {
14428 ctxt->directory = NULL;
14429 } else {
14430 ctxt->directory = xmlParserGetDirectory(filename);
14431 }
14432
14433 inputStream = xmlNewInputStream(ctxt);
14434 if (inputStream == NULL) {
14435 xmlFreeParserInputBuffer(buf);
14436 return(1);
14437 }
14438
14439 if (filename == NULL)
14440 inputStream->filename = NULL;
14441 else
14442 inputStream->filename = (char *)
14443 xmlCanonicPath((const xmlChar *) filename);
14444 inputStream->buf = buf;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080014445 inputStream->cur =
14446 inputStream->base = xmlBufContent(buf->buffer);
14447 inputStream->end = xmlBufEnd(buf->buffer);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014448
14449 inputPush(ctxt, inputStream);
14450
14451 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14452 (ctxt->input->buf != NULL)) {
Daniel Veillard768eb3b2012-07-16 14:19:49 +080014453 int base = ctxt->input->base - xmlBufContent(ctxt->input->buf->buffer);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014454 int cur = ctxt->input->cur - ctxt->input->base;
14455
14456 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14457
Daniel Veillard768eb3b2012-07-16 14:19:49 +080014458 ctxt->input->base = xmlBufContent(ctxt->input->buf->buffer) + base;
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014459 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard768eb3b2012-07-16 14:19:49 +080014460 ctxt->input->end = xmlBufEnd(ctxt->input->buf->buffer);
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014461#ifdef DEBUG_PUSH
14462 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
14463#endif
14464 }
14465
14466 if (encoding != NULL) {
14467 xmlCharEncodingHandlerPtr hdlr;
14468
Daniel Veillard37334572008-07-31 08:20:02 +000014469 if (ctxt->encoding != NULL)
14470 xmlFree((xmlChar *) ctxt->encoding);
14471 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14472
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014473 hdlr = xmlFindCharEncodingHandler(encoding);
14474 if (hdlr != NULL) {
14475 xmlSwitchToEncoding(ctxt, hdlr);
14476 } else {
14477 xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14478 "Unsupported encoding %s\n", BAD_CAST encoding);
14479 }
14480 } else if (enc != XML_CHAR_ENCODING_NONE) {
14481 xmlSwitchEncoding(ctxt, enc);
14482 }
14483
14484 return(0);
14485}
14486
Daniel Veillard37334572008-07-31 08:20:02 +000014487
Daniel Veillard9ba8e382003-10-28 21:31:45 +000014488/**
Daniel Veillard37334572008-07-31 08:20:02 +000014489 * xmlCtxtUseOptionsInternal:
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014490 * @ctxt: an XML parser context
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014491 * @options: a combination of xmlParserOption
Daniel Veillard37334572008-07-31 08:20:02 +000014492 * @encoding: the user provided encoding to use
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014493 *
14494 * Applies the options to the parser context
14495 *
14496 * Returns 0 in case of success, the set of unknown or unimplemented options
14497 * in case of error.
14498 */
Daniel Veillard37334572008-07-31 08:20:02 +000014499static int
14500xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options, const char *encoding)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014501{
Daniel Veillard36e5cd52004-11-02 14:52:23 +000014502 if (ctxt == NULL)
14503 return(-1);
Daniel Veillard37334572008-07-31 08:20:02 +000014504 if (encoding != NULL) {
14505 if (ctxt->encoding != NULL)
14506 xmlFree((xmlChar *) ctxt->encoding);
14507 ctxt->encoding = xmlStrdup((const xmlChar *) encoding);
14508 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014509 if (options & XML_PARSE_RECOVER) {
14510 ctxt->recovery = 1;
14511 options -= XML_PARSE_RECOVER;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014512 ctxt->options |= XML_PARSE_RECOVER;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014513 } else
14514 ctxt->recovery = 0;
14515 if (options & XML_PARSE_DTDLOAD) {
14516 ctxt->loadsubset = XML_DETECT_IDS;
14517 options -= XML_PARSE_DTDLOAD;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014518 ctxt->options |= XML_PARSE_DTDLOAD;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014519 } else
14520 ctxt->loadsubset = 0;
14521 if (options & XML_PARSE_DTDATTR) {
14522 ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14523 options -= XML_PARSE_DTDATTR;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014524 ctxt->options |= XML_PARSE_DTDATTR;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014525 }
14526 if (options & XML_PARSE_NOENT) {
14527 ctxt->replaceEntities = 1;
14528 /* ctxt->loadsubset |= XML_DETECT_IDS; */
14529 options -= XML_PARSE_NOENT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014530 ctxt->options |= XML_PARSE_NOENT;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014531 } else
14532 ctxt->replaceEntities = 0;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014533 if (options & XML_PARSE_PEDANTIC) {
14534 ctxt->pedantic = 1;
14535 options -= XML_PARSE_PEDANTIC;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014536 ctxt->options |= XML_PARSE_PEDANTIC;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014537 } else
14538 ctxt->pedantic = 0;
14539 if (options & XML_PARSE_NOBLANKS) {
14540 ctxt->keepBlanks = 0;
14541 ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14542 options -= XML_PARSE_NOBLANKS;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014543 ctxt->options |= XML_PARSE_NOBLANKS;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014544 } else
14545 ctxt->keepBlanks = 1;
14546 if (options & XML_PARSE_DTDVALID) {
14547 ctxt->validate = 1;
14548 if (options & XML_PARSE_NOWARNING)
14549 ctxt->vctxt.warning = NULL;
14550 if (options & XML_PARSE_NOERROR)
14551 ctxt->vctxt.error = NULL;
14552 options -= XML_PARSE_DTDVALID;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014553 ctxt->options |= XML_PARSE_DTDVALID;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014554 } else
14555 ctxt->validate = 0;
Daniel Veillard971771e2005-07-09 17:32:57 +000014556 if (options & XML_PARSE_NOWARNING) {
14557 ctxt->sax->warning = NULL;
14558 options -= XML_PARSE_NOWARNING;
14559 }
14560 if (options & XML_PARSE_NOERROR) {
14561 ctxt->sax->error = NULL;
14562 ctxt->sax->fatalError = NULL;
14563 options -= XML_PARSE_NOERROR;
14564 }
Daniel Veillard81273902003-09-30 00:43:48 +000014565#ifdef LIBXML_SAX1_ENABLED
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014566 if (options & XML_PARSE_SAX1) {
14567 ctxt->sax->startElement = xmlSAX2StartElement;
14568 ctxt->sax->endElement = xmlSAX2EndElement;
14569 ctxt->sax->startElementNs = NULL;
14570 ctxt->sax->endElementNs = NULL;
14571 ctxt->sax->initialized = 1;
14572 options -= XML_PARSE_SAX1;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014573 ctxt->options |= XML_PARSE_SAX1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014574 }
Daniel Veillard81273902003-09-30 00:43:48 +000014575#endif /* LIBXML_SAX1_ENABLED */
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014576 if (options & XML_PARSE_NODICT) {
14577 ctxt->dictNames = 0;
14578 options -= XML_PARSE_NODICT;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014579 ctxt->options |= XML_PARSE_NODICT;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014580 } else {
14581 ctxt->dictNames = 1;
14582 }
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014583 if (options & XML_PARSE_NOCDATA) {
14584 ctxt->sax->cdataBlock = NULL;
14585 options -= XML_PARSE_NOCDATA;
Daniel Veillardae0765b2008-07-31 19:54:59 +000014586 ctxt->options |= XML_PARSE_NOCDATA;
Daniel Veillarddca8cc72003-09-26 13:53:14 +000014587 }
14588 if (options & XML_PARSE_NSCLEAN) {
14589 ctxt->options |= XML_PARSE_NSCLEAN;
14590 options -= XML_PARSE_NSCLEAN;
14591 }
Daniel Veillard61b93382003-11-03 14:28:31 +000014592 if (options & XML_PARSE_NONET) {
14593 ctxt->options |= XML_PARSE_NONET;
14594 options -= XML_PARSE_NONET;
14595 }
Daniel Veillard8874b942005-08-25 13:19:21 +000014596 if (options & XML_PARSE_COMPACT) {
14597 ctxt->options |= XML_PARSE_COMPACT;
14598 options -= XML_PARSE_COMPACT;
14599 }
Daniel Veillard7e5c3f42008-07-29 16:12:31 +000014600 if (options & XML_PARSE_OLD10) {
14601 ctxt->options |= XML_PARSE_OLD10;
14602 options -= XML_PARSE_OLD10;
14603 }
Daniel Veillard8915c152008-08-26 13:05:34 +000014604 if (options & XML_PARSE_NOBASEFIX) {
14605 ctxt->options |= XML_PARSE_NOBASEFIX;
14606 options -= XML_PARSE_NOBASEFIX;
14607 }
14608 if (options & XML_PARSE_HUGE) {
14609 ctxt->options |= XML_PARSE_HUGE;
14610 options -= XML_PARSE_HUGE;
14611 }
Rob Richardsb9ed0172009-01-05 17:28:50 +000014612 if (options & XML_PARSE_OLDSAX) {
14613 ctxt->options |= XML_PARSE_OLDSAX;
14614 options -= XML_PARSE_OLDSAX;
14615 }
Daniel Veillardc62efc82011-05-16 16:03:50 +080014616 if (options & XML_PARSE_IGNORE_ENC) {
14617 ctxt->options |= XML_PARSE_IGNORE_ENC;
14618 options -= XML_PARSE_IGNORE_ENC;
14619 }
Daniel Veillard7ec29972003-10-31 14:36:36 +000014620 ctxt->linenumbers = 1;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014621 return (options);
14622}
14623
14624/**
Daniel Veillard37334572008-07-31 08:20:02 +000014625 * xmlCtxtUseOptions:
14626 * @ctxt: an XML parser context
14627 * @options: a combination of xmlParserOption
14628 *
14629 * Applies the options to the parser context
14630 *
14631 * Returns 0 in case of success, the set of unknown or unimplemented options
14632 * in case of error.
14633 */
14634int
14635xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14636{
14637 return(xmlCtxtUseOptionsInternal(ctxt, options, NULL));
14638}
14639
14640/**
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014641 * xmlDoRead:
14642 * @ctxt: an XML parser context
Daniel Veillard60942de2003-09-25 21:05:58 +000014643 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014644 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014645 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014646 * @reuse: keep the context for reuse
14647 *
14648 * Common front-end for the xmlRead functions
Daniel Veillard37334572008-07-31 08:20:02 +000014649 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014650 * Returns the resulting document tree or NULL
14651 */
14652static xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014653xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14654 int options, int reuse)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014655{
14656 xmlDocPtr ret;
Daniel Veillard37334572008-07-31 08:20:02 +000014657
14658 xmlCtxtUseOptionsInternal(ctxt, options, encoding);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014659 if (encoding != NULL) {
14660 xmlCharEncodingHandlerPtr hdlr;
14661
14662 hdlr = xmlFindCharEncodingHandler(encoding);
14663 if (hdlr != NULL)
14664 xmlSwitchToEncoding(ctxt, hdlr);
14665 }
Daniel Veillard60942de2003-09-25 21:05:58 +000014666 if ((URL != NULL) && (ctxt->input != NULL) &&
14667 (ctxt->input->filename == NULL))
14668 ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014669 xmlParseDocument(ctxt);
14670 if ((ctxt->wellFormed) || ctxt->recovery)
14671 ret = ctxt->myDoc;
14672 else {
14673 ret = NULL;
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014674 if (ctxt->myDoc != NULL) {
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014675 xmlFreeDoc(ctxt->myDoc);
14676 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014677 }
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014678 ctxt->myDoc = NULL;
14679 if (!reuse) {
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014680 xmlFreeParserCtxt(ctxt);
Daniel Veillarde96a2a42003-09-24 21:23:56 +000014681 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014682
14683 return (ret);
14684}
14685
14686/**
14687 * xmlReadDoc:
14688 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014689 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014690 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014691 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014692 *
14693 * parse an XML in-memory document and build a tree.
14694 *
14695 * Returns the resulting document tree
14696 */
14697xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014698xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014699{
14700 xmlParserCtxtPtr ctxt;
14701
14702 if (cur == NULL)
14703 return (NULL);
14704
14705 ctxt = xmlCreateDocParserCtxt(cur);
14706 if (ctxt == NULL)
14707 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014708 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014709}
14710
14711/**
14712 * xmlReadFile:
14713 * @filename: a file or URL
14714 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014715 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014716 *
14717 * parse an XML file from the filesystem or the network.
14718 *
14719 * Returns the resulting document tree
14720 */
14721xmlDocPtr
14722xmlReadFile(const char *filename, const char *encoding, int options)
14723{
14724 xmlParserCtxtPtr ctxt;
14725
Daniel Veillard61b93382003-11-03 14:28:31 +000014726 ctxt = xmlCreateURLParserCtxt(filename, options);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014727 if (ctxt == NULL)
14728 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014729 return (xmlDoRead(ctxt, NULL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014730}
14731
14732/**
14733 * xmlReadMemory:
14734 * @buffer: a pointer to a char array
14735 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014736 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014737 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014738 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014739 *
14740 * parse an XML in-memory document and build a tree.
14741 *
14742 * Returns the resulting document tree
14743 */
14744xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014745xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014746{
14747 xmlParserCtxtPtr ctxt;
14748
14749 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14750 if (ctxt == NULL)
14751 return (NULL);
Daniel Veillard60942de2003-09-25 21:05:58 +000014752 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014753}
14754
14755/**
14756 * xmlReadFd:
14757 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014758 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014759 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014760 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014761 *
14762 * parse an XML from a file descriptor and build a tree.
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014763 * NOTE that the file descriptor will not be closed when the
14764 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014765 *
14766 * Returns the resulting document tree
14767 */
14768xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014769xmlReadFd(int fd, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014770{
14771 xmlParserCtxtPtr ctxt;
14772 xmlParserInputBufferPtr input;
14773 xmlParserInputPtr stream;
14774
14775 if (fd < 0)
14776 return (NULL);
14777
14778 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14779 if (input == NULL)
14780 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014781 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014782 ctxt = xmlNewParserCtxt();
14783 if (ctxt == NULL) {
14784 xmlFreeParserInputBuffer(input);
14785 return (NULL);
14786 }
14787 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14788 if (stream == NULL) {
14789 xmlFreeParserInputBuffer(input);
14790 xmlFreeParserCtxt(ctxt);
14791 return (NULL);
14792 }
14793 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014794 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014795}
14796
14797/**
14798 * xmlReadIO:
14799 * @ioread: an I/O read function
14800 * @ioclose: an I/O close function
14801 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000014802 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014803 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014804 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014805 *
14806 * parse an XML document from I/O functions and source and build a tree.
Lin Yi-Li24464be2012-05-10 16:14:55 +080014807 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014808 * Returns the resulting document tree
14809 */
14810xmlDocPtr
14811xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
Daniel Veillard60942de2003-09-25 21:05:58 +000014812 void *ioctx, const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014813{
14814 xmlParserCtxtPtr ctxt;
14815 xmlParserInputBufferPtr input;
14816 xmlParserInputPtr stream;
14817
14818 if (ioread == NULL)
14819 return (NULL);
14820
14821 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14822 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080014823 if (input == NULL) {
14824 if (ioclose != NULL)
14825 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014826 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080014827 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014828 ctxt = xmlNewParserCtxt();
14829 if (ctxt == NULL) {
14830 xmlFreeParserInputBuffer(input);
14831 return (NULL);
14832 }
14833 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14834 if (stream == NULL) {
14835 xmlFreeParserInputBuffer(input);
14836 xmlFreeParserCtxt(ctxt);
14837 return (NULL);
14838 }
14839 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014840 return (xmlDoRead(ctxt, URL, encoding, options, 0));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014841}
14842
14843/**
14844 * xmlCtxtReadDoc:
14845 * @ctxt: an XML parser context
14846 * @cur: a pointer to a zero terminated string
Daniel Veillard60942de2003-09-25 21:05:58 +000014847 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014848 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014849 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014850 *
14851 * parse an XML in-memory document and build a tree.
14852 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080014853 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014854 * Returns the resulting document tree
14855 */
14856xmlDocPtr
14857xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
Daniel Veillard60942de2003-09-25 21:05:58 +000014858 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014859{
14860 xmlParserInputPtr stream;
14861
14862 if (cur == NULL)
14863 return (NULL);
14864 if (ctxt == NULL)
14865 return (NULL);
14866
14867 xmlCtxtReset(ctxt);
14868
14869 stream = xmlNewStringInputStream(ctxt, cur);
14870 if (stream == NULL) {
14871 return (NULL);
14872 }
14873 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014874 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014875}
14876
14877/**
14878 * xmlCtxtReadFile:
14879 * @ctxt: an XML parser context
14880 * @filename: a file or URL
14881 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014882 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014883 *
14884 * parse an XML file from the filesystem or the network.
14885 * This reuses the existing @ctxt parser context
14886 *
14887 * Returns the resulting document tree
14888 */
14889xmlDocPtr
14890xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
14891 const char *encoding, int options)
14892{
14893 xmlParserInputPtr stream;
14894
14895 if (filename == NULL)
14896 return (NULL);
14897 if (ctxt == NULL)
14898 return (NULL);
14899
14900 xmlCtxtReset(ctxt);
14901
Daniel Veillard29614c72004-11-26 10:47:26 +000014902 stream = xmlLoadExternalEntity(filename, NULL, ctxt);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014903 if (stream == NULL) {
14904 return (NULL);
14905 }
14906 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014907 return (xmlDoRead(ctxt, NULL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014908}
14909
14910/**
14911 * xmlCtxtReadMemory:
14912 * @ctxt: an XML parser context
14913 * @buffer: a pointer to a char array
14914 * @size: the size of the array
Daniel Veillard60942de2003-09-25 21:05:58 +000014915 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014916 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014917 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014918 *
14919 * parse an XML in-memory document and build a tree.
14920 * This reuses the existing @ctxt parser context
14921 *
14922 * Returns the resulting document tree
14923 */
14924xmlDocPtr
14925xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
Daniel Veillard60942de2003-09-25 21:05:58 +000014926 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014927{
14928 xmlParserInputBufferPtr input;
14929 xmlParserInputPtr stream;
14930
14931 if (ctxt == NULL)
14932 return (NULL);
14933 if (buffer == NULL)
14934 return (NULL);
14935
14936 xmlCtxtReset(ctxt);
14937
14938 input = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14939 if (input == NULL) {
14940 return(NULL);
14941 }
14942
14943 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14944 if (stream == NULL) {
14945 xmlFreeParserInputBuffer(input);
14946 return(NULL);
14947 }
14948
14949 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014950 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014951}
14952
14953/**
14954 * xmlCtxtReadFd:
14955 * @ctxt: an XML parser context
14956 * @fd: an open file descriptor
Daniel Veillard60942de2003-09-25 21:05:58 +000014957 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014958 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000014959 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014960 *
14961 * parse an XML from a file descriptor and build a tree.
14962 * This reuses the existing @ctxt parser context
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014963 * NOTE that the file descriptor will not be closed when the
14964 * reader is closed or reset.
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014965 *
14966 * Returns the resulting document tree
14967 */
14968xmlDocPtr
Daniel Veillard60942de2003-09-25 21:05:58 +000014969xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
14970 const char *URL, const char *encoding, int options)
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014971{
14972 xmlParserInputBufferPtr input;
14973 xmlParserInputPtr stream;
14974
14975 if (fd < 0)
14976 return (NULL);
14977 if (ctxt == NULL)
14978 return (NULL);
14979
14980 xmlCtxtReset(ctxt);
14981
14982
14983 input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14984 if (input == NULL)
14985 return (NULL);
Daniel Veillard4bc5f432003-12-22 18:13:12 +000014986 input->closecallback = NULL;
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014987 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14988 if (stream == NULL) {
14989 xmlFreeParserInputBuffer(input);
14990 return (NULL);
14991 }
14992 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000014993 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000014994}
14995
14996/**
14997 * xmlCtxtReadIO:
14998 * @ctxt: an XML parser context
14999 * @ioread: an I/O read function
15000 * @ioclose: an I/O close function
15001 * @ioctx: an I/O handler
Daniel Veillard60942de2003-09-25 21:05:58 +000015002 * @URL: the base URL to use for the document
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015003 * @encoding: the document encoding, or NULL
Daniel Veillard87ab1c12003-12-21 13:01:56 +000015004 * @options: a combination of xmlParserOption
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015005 *
15006 * parse an XML document from I/O functions and source and build a tree.
15007 * This reuses the existing @ctxt parser context
Lin Yi-Li24464be2012-05-10 16:14:55 +080015008 *
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015009 * Returns the resulting document tree
15010 */
15011xmlDocPtr
15012xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15013 xmlInputCloseCallback ioclose, void *ioctx,
Daniel Veillard60942de2003-09-25 21:05:58 +000015014 const char *URL,
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015015 const char *encoding, int options)
15016{
15017 xmlParserInputBufferPtr input;
15018 xmlParserInputPtr stream;
15019
15020 if (ioread == NULL)
15021 return (NULL);
15022 if (ctxt == NULL)
15023 return (NULL);
15024
15025 xmlCtxtReset(ctxt);
15026
15027 input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15028 XML_CHAR_ENCODING_NONE);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015029 if (input == NULL) {
15030 if (ioclose != NULL)
15031 ioclose(ioctx);
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015032 return (NULL);
Lin Yi-Li24464be2012-05-10 16:14:55 +080015033 }
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015034 stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15035 if (stream == NULL) {
15036 xmlFreeParserInputBuffer(input);
15037 return (NULL);
15038 }
15039 inputPush(ctxt, stream);
Daniel Veillard60942de2003-09-25 21:05:58 +000015040 return (xmlDoRead(ctxt, URL, encoding, options, 1));
Daniel Veillard16fa96c2003-09-23 21:50:54 +000015041}
Daniel Veillard5d4644e2005-04-01 13:11:58 +000015042
15043#define bottom_parser
15044#include "elfgcchack.h"