blob: 0964881f8d015ccc5fae5d95c71d61de7ceeebfe [file] [log] [blame]
Daniel Veillard260a68f1998-08-13 03:39:55 +00001/*
2 * parser.c : an XML 1.0 non-verifying parser
3 *
Daniel Veillarde0854c32000-08-27 21:12:29 +00004 * References:
5 * The XML specification:
6 * http://www.w3.org/TR/REC-xml
7 * Original 1.0 version:
8 * http://www.w3.org/TR/1998/REC-xml-19980210
9 * XML second edition working draft
10 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
11 *
Daniel Veillard260a68f1998-08-13 03:39:55 +000012 * See Copyright for the status of this software.
13 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000014 * Daniel.Veillard@w3.org
Daniel Veillard260a68f1998-08-13 03:39:55 +000015 */
16
17#ifdef WIN32
Daniel Veillard3c558c31999-12-22 11:30:41 +000018#include "win32config.h"
Daniel Veillard260a68f1998-08-13 03:39:55 +000019#else
Daniel Veillard7f7d1111999-09-22 09:46:25 +000020#include "config.h"
Daniel Veillard260a68f1998-08-13 03:39:55 +000021#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000022
Daniel Veillard260a68f1998-08-13 03:39:55 +000023#include <stdio.h>
Daniel Veillard32bc74e2000-07-14 14:49:25 +000024#include <string.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000025#ifdef HAVE_CTYPE_H
26#include <ctype.h>
27#endif
28#ifdef HAVE_STDLIB_H
Seth Alvese7f12e61998-10-01 20:51:15 +000029#include <stdlib.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000030#endif
31#ifdef HAVE_SYS_STAT_H
Daniel Veillard260a68f1998-08-13 03:39:55 +000032#include <sys/stat.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000033#endif
Daniel Veillard260a68f1998-08-13 03:39:55 +000034#ifdef HAVE_FCNTL_H
35#include <fcntl.h>
36#endif
37#ifdef HAVE_UNISTD_H
38#include <unistd.h>
39#endif
40#ifdef HAVE_ZLIB_H
41#include <zlib.h>
42#endif
43
Daniel Veillard361d8452000-04-03 19:48:13 +000044#include <libxml/xmlmemory.h>
45#include <libxml/tree.h>
46#include <libxml/parser.h>
47#include <libxml/entities.h>
48#include <libxml/encoding.h>
49#include <libxml/valid.h>
50#include <libxml/parserInternals.h>
51#include <libxml/xmlIO.h>
Daniel Veillard496a1cf2000-05-03 14:20:55 +000052#include <libxml/uri.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000053#include "xml-error.h"
Daniel Veillard260a68f1998-08-13 03:39:55 +000054
Daniel Veillarddbfd6411999-12-28 16:35:14 +000055#define XML_PARSER_BIG_BUFFER_SIZE 1000
56#define XML_PARSER_BUFFER_SIZE 100
57
Daniel Veillardcf461992000-03-14 18:30:20 +000058int xmlGetWarningsDefaultValue = 1;
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +000059int xmlParserDebugEntities = 0;
60
Daniel Veillard14fff061999-06-22 21:49:07 +000061
Daniel Veillard3c558c31999-12-22 11:30:41 +000062/*
63 * List of XML prefixed PI allowed by W3C specs
64 */
65
66const char *xmlW3CPIs[] = {
67 "xml-stylesheet",
68 NULL
69};
Daniel Veillarde2d034d1999-07-27 19:52:06 +000070
Daniel Veillardcf461992000-03-14 18:30:20 +000071void xmlParserHandleReference(xmlParserCtxtPtr ctxt);
72void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
73xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
74 const xmlChar **str);
Daniel Veillard87b95392000-08-12 21:12:04 +000075/************************************************************************
76 * *
77 * Version and Features handling *
78 * *
79 ************************************************************************/
Daniel Veillardbe803962000-06-28 23:40:59 +000080const char *xmlParserVersion = LIBXML_VERSION_STRING;
81
82/*
83 * xmlCheckVersion:
84 * @version: the include version number
85 *
86 * check the compiled lib version against the include one.
87 * This can warn or immediately kill the application
88 */
89void
90xmlCheckVersion(int version) {
Daniel Veillard3f6f7f62000-06-30 17:58:25 +000091 int myversion = (int) LIBXML_VERSION;
Daniel Veillardbe803962000-06-28 23:40:59 +000092
93 if ((myversion / 10000) != (version / 10000)) {
94 fprintf(stderr,
95 "Fatal: program compiled against libxml %d using libxml %d\n",
96 (version / 10000), (myversion / 10000));
97 exit(1);
98 }
99 if ((myversion / 100) < (version / 100)) {
100 fprintf(stderr,
101 "Warning: program compiled against libxml %d using older %d\n",
102 (version / 100), (myversion / 100));
103 }
104}
105
106
Daniel Veillard87b95392000-08-12 21:12:04 +0000107const char *xmlFeaturesList[] = {
108 "validate",
109 "keep blanks",
110 "disable SAX",
111 "fetch external entities",
112 "substitute entities",
113 "gather line info",
114 "user data",
115 "is html",
116 "is standalone",
117 "stop parser",
118 "document",
119 "is well formed",
120 "is valid",
121 "SAX block",
122 "SAX function internalSubset",
123 "SAX function isStandalone",
124 "SAX function hasInternalSubset",
125 "SAX function hasExternalSubset",
126 "SAX function resolveEntity",
127 "SAX function getEntity",
128 "SAX function entityDecl",
129 "SAX function notationDecl",
130 "SAX function attributeDecl",
131 "SAX function elementDecl",
132 "SAX function unparsedEntityDecl",
133 "SAX function setDocumentLocator",
134 "SAX function startDocument",
135 "SAX function endDocument",
136 "SAX function startElement",
137 "SAX function endElement",
138 "SAX function reference",
139 "SAX function characters",
140 "SAX function ignorableWhitespace",
141 "SAX function processingInstruction",
142 "SAX function comment",
143 "SAX function warning",
144 "SAX function error",
145 "SAX function fatalError",
146 "SAX function getParameterEntity",
147 "SAX function cdataBlock",
148 "SAX function externalSubset",
149};
150
151/*
152 * xmlGetFeaturesList:
153 * @len: the length of the features name array (input/output)
154 * @result: an array of string to be filled with the features name.
155 *
156 * Copy at most *@len feature names into the @result array
157 *
158 * Returns -1 in case or error, or the total number of features,
159 * len is updated with the number of strings copied,
160 * strings must not be deallocated
161 */
162int
163xmlGetFeaturesList(int *len, const char **result) {
164 int ret, i;
165
166 ret = sizeof(xmlFeaturesList)/sizeof(xmlFeaturesList[0]);
167 if ((len == NULL) || (result == NULL))
168 return(ret);
169 if ((*len < 0) || (*len >= 1000))
170 return(-1);
171 if (*len > ret)
172 *len = ret;
173 for (i = 0;i < *len;i++)
174 result[i] = xmlFeaturesList[i];
175 return(ret);
176}
177
178/*
179 * xmlGetFeature:
180 * @ctxt: an XML/HTML parser context
181 * @name: the feature name
182 * @result: location to store the result
183 *
184 * Read the current value of one feature of this parser instance
185 *
186 * Returns -1 in case or error, 0 otherwise
187 */
188int
189xmlGetFeature(xmlParserCtxtPtr ctxt, const char *name, void *result) {
190 if ((ctxt == NULL) || (name == NULL) || (result == NULL))
191 return(-1);
192
193 if (!strcmp(name, "validate")) {
194 *((int *) result) = ctxt->validate;
195 } else if (!strcmp(name, "keep blanks")) {
196 *((int *) result) = ctxt->keepBlanks;
197 } else if (!strcmp(name, "disable SAX")) {
198 *((int *) result) = ctxt->disableSAX;
199 } else if (!strcmp(name, "fetch external entities")) {
200 *((int *) result) = ctxt->validate;
201 } else if (!strcmp(name, "substitute entities")) {
202 *((int *) result) = ctxt->replaceEntities;
203 } else if (!strcmp(name, "gather line info")) {
204 *((int *) result) = ctxt->record_info;
205 } else if (!strcmp(name, "user data")) {
206 *((void **)result) = ctxt->userData;
207 } else if (!strcmp(name, "is html")) {
208 *((int *) result) = ctxt->html;
209 } else if (!strcmp(name, "is standalone")) {
210 *((int *) result) = ctxt->standalone;
211 } else if (!strcmp(name, "document")) {
212 *((xmlDocPtr *) result) = ctxt->myDoc;
213 } else if (!strcmp(name, "is well formed")) {
214 *((int *) result) = ctxt->wellFormed;
215 } else if (!strcmp(name, "is valid")) {
216 *((int *) result) = ctxt->valid;
217 } else if (!strcmp(name, "SAX block")) {
218 *((xmlSAXHandlerPtr *) result) = ctxt->sax;
219 } else if (!strcmp(name, "SAX function internalSubset")) {
220 *((internalSubsetSAXFunc *) result) = ctxt->sax->internalSubset;
221 } else if (!strcmp(name, "SAX function isStandalone")) {
222 *((isStandaloneSAXFunc *) result) = ctxt->sax->isStandalone;
223 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
224 *((hasInternalSubsetSAXFunc *) result) = ctxt->sax->hasInternalSubset;
225 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
226 *((hasExternalSubsetSAXFunc *) result) = ctxt->sax->hasExternalSubset;
227 } else if (!strcmp(name, "SAX function resolveEntity")) {
228 *((resolveEntitySAXFunc *) result) = ctxt->sax->resolveEntity;
229 } else if (!strcmp(name, "SAX function getEntity")) {
230 *((getEntitySAXFunc *) result) = ctxt->sax->getEntity;
231 } else if (!strcmp(name, "SAX function entityDecl")) {
232 *((entityDeclSAXFunc *) result) = ctxt->sax->entityDecl;
233 } else if (!strcmp(name, "SAX function notationDecl")) {
234 *((notationDeclSAXFunc *) result) = ctxt->sax->notationDecl;
235 } else if (!strcmp(name, "SAX function attributeDecl")) {
236 *((attributeDeclSAXFunc *) result) = ctxt->sax->attributeDecl;
237 } else if (!strcmp(name, "SAX function elementDecl")) {
238 *((elementDeclSAXFunc *) result) = ctxt->sax->elementDecl;
239 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
240 *((unparsedEntityDeclSAXFunc *) result) = ctxt->sax->unparsedEntityDecl;
241 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
242 *((setDocumentLocatorSAXFunc *) result) = ctxt->sax->setDocumentLocator;
243 } else if (!strcmp(name, "SAX function startDocument")) {
244 *((startDocumentSAXFunc *) result) = ctxt->sax->startDocument;
245 } else if (!strcmp(name, "SAX function endDocument")) {
246 *((endDocumentSAXFunc *) result) = ctxt->sax->endDocument;
247 } else if (!strcmp(name, "SAX function startElement")) {
248 *((startElementSAXFunc *) result) = ctxt->sax->startElement;
249 } else if (!strcmp(name, "SAX function endElement")) {
250 *((endElementSAXFunc *) result) = ctxt->sax->endElement;
251 } else if (!strcmp(name, "SAX function reference")) {
252 *((referenceSAXFunc *) result) = ctxt->sax->reference;
253 } else if (!strcmp(name, "SAX function characters")) {
254 *((charactersSAXFunc *) result) = ctxt->sax->characters;
255 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
256 *((ignorableWhitespaceSAXFunc *) result) = ctxt->sax->ignorableWhitespace;
257 } else if (!strcmp(name, "SAX function processingInstruction")) {
258 *((processingInstructionSAXFunc *) result) = ctxt->sax->processingInstruction;
259 } else if (!strcmp(name, "SAX function comment")) {
260 *((commentSAXFunc *) result) = ctxt->sax->comment;
261 } else if (!strcmp(name, "SAX function warning")) {
262 *((warningSAXFunc *) result) = ctxt->sax->warning;
263 } else if (!strcmp(name, "SAX function error")) {
264 *((errorSAXFunc *) result) = ctxt->sax->error;
265 } else if (!strcmp(name, "SAX function fatalError")) {
266 *((fatalErrorSAXFunc *) result) = ctxt->sax->fatalError;
267 } else if (!strcmp(name, "SAX function getParameterEntity")) {
268 *((getParameterEntitySAXFunc *) result) = ctxt->sax->getParameterEntity;
269 } else if (!strcmp(name, "SAX function cdataBlock")) {
270 *((cdataBlockSAXFunc *) result) = ctxt->sax->cdataBlock;
271 } else if (!strcmp(name, "SAX function externalSubset")) {
272 *((externalSubsetSAXFunc *) result) = ctxt->sax->externalSubset;
273 } else {
274 return(-1);
275 }
276 return(0);
277}
278
279/*
280 * xmlSetFeature:
281 * @ctxt: an XML/HTML parser context
282 * @name: the feature name
283 * @value: pointer to the location of the new value
284 *
285 * Change the current value of one feature of this parser instance
286 *
287 * Returns -1 in case or error, 0 otherwise
288 */
289int
290xmlSetFeature(xmlParserCtxtPtr ctxt, const char *name, void *value) {
291 if ((ctxt == NULL) || (name == NULL) || (value == NULL))
292 return(-1);
293
294 if (!strcmp(name, "validate")) {
295 ctxt->validate = *((int *) value);
296 } else if (!strcmp(name, "keep blanks")) {
297 ctxt->keepBlanks = *((int *) value);
298 } else if (!strcmp(name, "disable SAX")) {
299 ctxt->disableSAX = *((int *) value);
300 } else if (!strcmp(name, "fetch external entities")) {
301 int newvalid = *((int *) value);
302 if ((!ctxt->validate) && (newvalid != 0)) {
303 if (ctxt->vctxt.warning == NULL)
304 ctxt->vctxt.warning = xmlParserValidityWarning;
305 if (ctxt->vctxt.error == NULL)
306 ctxt->vctxt.error = xmlParserValidityError;
307 /* Allocate the Node stack */
308 ctxt->vctxt.nodeTab = (xmlNodePtr *)
309 xmlMalloc(4 * sizeof(xmlNodePtr));
310 ctxt->vctxt.nodeNr = 0;
311 ctxt->vctxt.nodeMax = 4;
312 ctxt->vctxt.node = NULL;
313 }
314 ctxt->validate = newvalid;
315 } else if (!strcmp(name, "substitute entities")) {
316 ctxt->replaceEntities = *((int *) value);
317 } else if (!strcmp(name, "gather line info")) {
318 ctxt->record_info = *((int *) value);
319 } else if (!strcmp(name, "user data")) {
320 ctxt->userData = *((void **)value);
321 } else if (!strcmp(name, "is html")) {
322 ctxt->html = *((int *) value);
323 } else if (!strcmp(name, "is standalone")) {
324 ctxt->standalone = *((int *) value);
325 } else if (!strcmp(name, "document")) {
326 ctxt->myDoc = *((xmlDocPtr *) value);
327 } else if (!strcmp(name, "is well formed")) {
328 ctxt->wellFormed = *((int *) value);
329 } else if (!strcmp(name, "is valid")) {
330 ctxt->valid = *((int *) value);
331 } else if (!strcmp(name, "SAX block")) {
332 ctxt->sax = *((xmlSAXHandlerPtr *) value);
333 } else if (!strcmp(name, "SAX function internalSubset")) {
334 ctxt->sax->internalSubset = *((internalSubsetSAXFunc *) value);
335 } else if (!strcmp(name, "SAX function isStandalone")) {
336 ctxt->sax->isStandalone = *((isStandaloneSAXFunc *) value);
337 } else if (!strcmp(name, "SAX function hasInternalSubset")) {
338 ctxt->sax->hasInternalSubset = *((hasInternalSubsetSAXFunc *) value);
339 } else if (!strcmp(name, "SAX function hasExternalSubset")) {
340 ctxt->sax->hasExternalSubset = *((hasExternalSubsetSAXFunc *) value);
341 } else if (!strcmp(name, "SAX function resolveEntity")) {
342 ctxt->sax->resolveEntity = *((resolveEntitySAXFunc *) value);
343 } else if (!strcmp(name, "SAX function getEntity")) {
344 ctxt->sax->getEntity = *((getEntitySAXFunc *) value);
345 } else if (!strcmp(name, "SAX function entityDecl")) {
346 ctxt->sax->entityDecl = *((entityDeclSAXFunc *) value);
347 } else if (!strcmp(name, "SAX function notationDecl")) {
348 ctxt->sax->notationDecl = *((notationDeclSAXFunc *) value);
349 } else if (!strcmp(name, "SAX function attributeDecl")) {
350 ctxt->sax->attributeDecl = *((attributeDeclSAXFunc *) value);
351 } else if (!strcmp(name, "SAX function elementDecl")) {
352 ctxt->sax->elementDecl = *((elementDeclSAXFunc *) value);
353 } else if (!strcmp(name, "SAX function unparsedEntityDecl")) {
354 ctxt->sax->unparsedEntityDecl = *((unparsedEntityDeclSAXFunc *) value);
355 } else if (!strcmp(name, "SAX function setDocumentLocator")) {
356 ctxt->sax->setDocumentLocator = *((setDocumentLocatorSAXFunc *) value);
357 } else if (!strcmp(name, "SAX function startDocument")) {
358 ctxt->sax->startDocument = *((startDocumentSAXFunc *) value);
359 } else if (!strcmp(name, "SAX function endDocument")) {
360 ctxt->sax->endDocument = *((endDocumentSAXFunc *) value);
361 } else if (!strcmp(name, "SAX function startElement")) {
362 ctxt->sax->startElement = *((startElementSAXFunc *) value);
363 } else if (!strcmp(name, "SAX function endElement")) {
364 ctxt->sax->endElement = *((endElementSAXFunc *) value);
365 } else if (!strcmp(name, "SAX function reference")) {
366 ctxt->sax->reference = *((referenceSAXFunc *) value);
367 } else if (!strcmp(name, "SAX function characters")) {
368 ctxt->sax->characters = *((charactersSAXFunc *) value);
369 } else if (!strcmp(name, "SAX function ignorableWhitespace")) {
370 ctxt->sax->ignorableWhitespace = *((ignorableWhitespaceSAXFunc *) value);
371 } else if (!strcmp(name, "SAX function processingInstruction")) {
372 ctxt->sax->processingInstruction = *((processingInstructionSAXFunc *) value);
373 } else if (!strcmp(name, "SAX function comment")) {
374 ctxt->sax->comment = *((commentSAXFunc *) value);
375 } else if (!strcmp(name, "SAX function warning")) {
376 ctxt->sax->warning = *((warningSAXFunc *) value);
377 } else if (!strcmp(name, "SAX function error")) {
378 ctxt->sax->error = *((errorSAXFunc *) value);
379 } else if (!strcmp(name, "SAX function fatalError")) {
380 ctxt->sax->fatalError = *((fatalErrorSAXFunc *) value);
381 } else if (!strcmp(name, "SAX function getParameterEntity")) {
382 ctxt->sax->getParameterEntity = *((getParameterEntitySAXFunc *) value);
383 } else if (!strcmp(name, "SAX function cdataBlock")) {
384 ctxt->sax->cdataBlock = *((cdataBlockSAXFunc *) value);
385 } else if (!strcmp(name, "SAX function externalSubset")) {
386 ctxt->sax->externalSubset = *((externalSubsetSAXFunc *) value);
387 } else {
388 return(-1);
389 }
390 return(0);
391}
392
393
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000394/************************************************************************
395 * *
396 * Input handling functions for progressive parsing *
397 * *
398 ************************************************************************/
399
400/* #define DEBUG_INPUT */
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000401/* #define DEBUG_STACK */
402/* #define DEBUG_PUSH */
403
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000404
Daniel Veillardb05deb71999-08-10 19:04:08 +0000405#define INPUT_CHUNK 250
406/* we need to keep enough input to show errors in context */
407#define LINE_LEN 80
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000408
409#ifdef DEBUG_INPUT
410#define CHECK_BUFFER(in) check_buffer(in)
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000411
412void check_buffer(xmlParserInputPtr in) {
413 if (in->base != in->buf->buffer->content) {
414 fprintf(stderr, "xmlParserInput: base mismatch problem\n");
415 }
416 if (in->cur < in->base) {
417 fprintf(stderr, "xmlParserInput: cur < base problem\n");
418 }
419 if (in->cur > in->base + in->buf->buffer->use) {
420 fprintf(stderr, "xmlParserInput: cur > base + use problem\n");
421 }
422 fprintf(stderr,"buffer %x : content %x, cur %d, use %d, size %d\n",
423 (int) in, (int) in->buf->buffer->content, in->cur - in->base,
424 in->buf->buffer->use, in->buf->buffer->size);
425}
426
Daniel Veillardb05deb71999-08-10 19:04:08 +0000427#else
428#define CHECK_BUFFER(in)
429#endif
430
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000431
432/**
433 * xmlParserInputRead:
434 * @in: an XML parser input
435 * @len: an indicative size for the lookahead
436 *
437 * This function refresh the input for the parser. It doesn't try to
438 * preserve pointers to the input buffer, and discard already read data
439 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000440 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000441 * end of this entity
442 */
443int
444xmlParserInputRead(xmlParserInputPtr in, int len) {
445 int ret;
446 int used;
447 int index;
448
449#ifdef DEBUG_INPUT
450 fprintf(stderr, "Read\n");
451#endif
452 if (in->buf == NULL) return(-1);
453 if (in->base == NULL) return(-1);
454 if (in->cur == NULL) return(-1);
455 if (in->buf->buffer == NULL) return(-1);
Daniel Veillard52402ce2000-08-22 23:36:12 +0000456 if (in->buf->readcallback == NULL) return(-1);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000457
458 CHECK_BUFFER(in);
459
460 used = in->cur - in->buf->buffer->content;
461 ret = xmlBufferShrink(in->buf->buffer, used);
462 if (ret > 0) {
463 in->cur -= ret;
464 in->consumed += ret;
465 }
466 ret = xmlParserInputBufferRead(in->buf, len);
467 if (in->base != in->buf->buffer->content) {
468 /*
469 * the buffer has been realloced
470 */
471 index = in->cur - in->base;
472 in->base = in->buf->buffer->content;
473 in->cur = &in->buf->buffer->content[index];
474 }
475
476 CHECK_BUFFER(in);
477
478 return(ret);
479}
480
481/**
482 * xmlParserInputGrow:
483 * @in: an XML parser input
484 * @len: an indicative size for the lookahead
485 *
486 * This function increase the input for the parser. It tries to
487 * preserve pointers to the input buffer, and keep already read data
488 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000489 * Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000490 * end of this entity
491 */
492int
493xmlParserInputGrow(xmlParserInputPtr in, int len) {
494 int ret;
495 int index;
496
497#ifdef DEBUG_INPUT
498 fprintf(stderr, "Grow\n");
499#endif
500 if (in->buf == NULL) return(-1);
501 if (in->base == NULL) return(-1);
502 if (in->cur == NULL) return(-1);
503 if (in->buf->buffer == NULL) return(-1);
504
505 CHECK_BUFFER(in);
506
507 index = in->cur - in->base;
Daniel Veillard3f6f7f62000-06-30 17:58:25 +0000508 if (in->buf->buffer->use > (unsigned int) index + INPUT_CHUNK) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000509
510 CHECK_BUFFER(in);
511
512 return(0);
513 }
Daniel Veillard5d211f42000-04-07 17:00:24 +0000514 if (in->buf->readcallback != NULL)
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000515 ret = xmlParserInputBufferGrow(in->buf, len);
516 else
517 return(0);
Daniel Veillard10a2c651999-12-12 13:03:50 +0000518
519 /*
520 * NOTE : in->base may be a "dandling" i.e. freed pointer in this
521 * block, but we use it really as an integer to do some
522 * pointer arithmetic. Insure will raise it as a bug but in
523 * that specific case, that's not !
524 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000525 if (in->base != in->buf->buffer->content) {
526 /*
527 * the buffer has been realloced
528 */
529 index = in->cur - in->base;
530 in->base = in->buf->buffer->content;
531 in->cur = &in->buf->buffer->content[index];
532 }
533
534 CHECK_BUFFER(in);
535
536 return(ret);
537}
538
539/**
540 * xmlParserInputShrink:
541 * @in: an XML parser input
542 *
543 * This function removes used input for the parser.
544 */
545void
546xmlParserInputShrink(xmlParserInputPtr in) {
547 int used;
548 int ret;
549 int index;
550
551#ifdef DEBUG_INPUT
552 fprintf(stderr, "Shrink\n");
553#endif
554 if (in->buf == NULL) return;
555 if (in->base == NULL) return;
556 if (in->cur == NULL) return;
557 if (in->buf->buffer == NULL) return;
558
559 CHECK_BUFFER(in);
560
561 used = in->cur - in->buf->buffer->content;
562 if (used > INPUT_CHUNK) {
Daniel Veillardb05deb71999-08-10 19:04:08 +0000563 ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000564 if (ret > 0) {
565 in->cur -= ret;
566 in->consumed += ret;
567 }
568 }
569
570 CHECK_BUFFER(in);
571
572 if (in->buf->buffer->use > INPUT_CHUNK) {
573 return;
574 }
575 xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
576 if (in->base != in->buf->buffer->content) {
577 /*
578 * the buffer has been realloced
579 */
580 index = in->cur - in->base;
581 in->base = in->buf->buffer->content;
582 in->cur = &in->buf->buffer->content[index];
583 }
584
585 CHECK_BUFFER(in);
586}
587
Daniel Veillard260a68f1998-08-13 03:39:55 +0000588/************************************************************************
589 * *
590 * Parser stacks related functions and macros *
591 * *
592 ************************************************************************/
Daniel Veillard011b63c1999-06-02 17:44:04 +0000593
594int xmlSubstituteEntitiesDefaultValue = 0;
Daniel Veillardb05deb71999-08-10 19:04:08 +0000595int xmlDoValidityCheckingDefaultValue = 0;
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +0000596int xmlPedanticParserDefaultValue = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +0000597int xmlKeepBlanksDefaultValue = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +0000598xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
599 const xmlChar ** str);
Daniel Veillard011b63c1999-06-02 17:44:04 +0000600
Daniel Veillard260a68f1998-08-13 03:39:55 +0000601/*
602 * Generic function for accessing stacks in the Parser Context
603 */
604
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000605#define PUSH_AND_POP(scope, type, name) \
606scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000607 if (ctxt->name##Nr >= ctxt->name##Max) { \
608 ctxt->name##Max *= 2; \
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000609 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000610 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
611 if (ctxt->name##Tab == NULL) { \
612 fprintf(stderr, "realloc failed !\n"); \
Daniel Veillard0142b842000-01-14 14:45:24 +0000613 return(0); \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000614 } \
615 } \
616 ctxt->name##Tab[ctxt->name##Nr] = value; \
617 ctxt->name = value; \
618 return(ctxt->name##Nr++); \
619} \
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000620scope type name##Pop(xmlParserCtxtPtr ctxt) { \
Daniel Veillardd692aa41999-02-28 21:54:31 +0000621 type ret; \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000622 if (ctxt->name##Nr <= 0) return(0); \
623 ctxt->name##Nr--; \
Daniel Veillardccb09631998-10-27 06:21:04 +0000624 if (ctxt->name##Nr > 0) \
625 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
626 else \
627 ctxt->name = NULL; \
Daniel Veillardd692aa41999-02-28 21:54:31 +0000628 ret = ctxt->name##Tab[ctxt->name##Nr]; \
629 ctxt->name##Tab[ctxt->name##Nr] = 0; \
630 return(ret); \
Daniel Veillard260a68f1998-08-13 03:39:55 +0000631} \
632
Daniel Veillarddbfd6411999-12-28 16:35:14 +0000633PUSH_AND_POP(extern, xmlParserInputPtr, input)
634PUSH_AND_POP(extern, xmlNodePtr, node)
635PUSH_AND_POP(extern, xmlChar*, name)
Daniel Veillard260a68f1998-08-13 03:39:55 +0000636
Daniel Veillardcf461992000-03-14 18:30:20 +0000637int spacePush(xmlParserCtxtPtr ctxt, int val) {
638 if (ctxt->spaceNr >= ctxt->spaceMax) {
639 ctxt->spaceMax *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000640 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
Daniel Veillardcf461992000-03-14 18:30:20 +0000641 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
642 if (ctxt->spaceTab == NULL) {
643 fprintf(stderr, "realloc failed !\n");
644 return(0);
645 }
646 }
647 ctxt->spaceTab[ctxt->spaceNr] = val;
648 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
649 return(ctxt->spaceNr++);
650}
651
652int spacePop(xmlParserCtxtPtr ctxt) {
653 int ret;
654 if (ctxt->spaceNr <= 0) return(0);
655 ctxt->spaceNr--;
656 if (ctxt->spaceNr > 0)
657 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
658 else
659 ctxt->space = NULL;
660 ret = ctxt->spaceTab[ctxt->spaceNr];
661 ctxt->spaceTab[ctxt->spaceNr] = -1;
662 return(ret);
663}
664
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000665/*
666 * Macros for accessing the content. Those should be used only by the parser,
667 * and not exported.
668 *
669 * Dirty macros, i.e. one need to make assumption on the context to use them
670 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000671 * CUR_PTR return the current pointer to the xmlChar to be parsed.
Daniel Veillardcf461992000-03-14 18:30:20 +0000672 * To be used with extreme caution since operations consuming
673 * characters may move the input buffer to a different location !
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000674 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
Daniel Veillardcf461992000-03-14 18:30:20 +0000675 * in ISO-Latin or UTF-8.
676 * This should be used internally by the parser
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000677 * only to compare to ASCII values otherwise it would break when
678 * running with UTF-8 encoding.
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000679 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000680 * to compare on ASCII based substring.
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000681 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000682 * strings within the parser.
683 *
Daniel Veillard011b63c1999-06-02 17:44:04 +0000684 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000685 *
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000686 * NEXT Skip to the next character, this does the proper decoding
687 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillard011b63c1999-06-02 17:44:04 +0000688 * COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
Daniel Veillardcf461992000-03-14 18:30:20 +0000689 * CUR_CHAR Return the current char as an int as well as its lenght.
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000690 */
Daniel Veillard260a68f1998-08-13 03:39:55 +0000691
Daniel Veillardcf461992000-03-14 18:30:20 +0000692#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
Daniel Veillardb05deb71999-08-10 19:04:08 +0000693#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000694#define NXT(val) ctxt->input->cur[(val)]
695#define CUR_PTR ctxt->input->cur
Daniel Veillardcf461992000-03-14 18:30:20 +0000696
697#define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val); \
698 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
699 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); \
700 if ((*ctxt->input->cur == 0) && \
701 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
702 xmlPopInput(ctxt)
703
Daniel Veillardb05deb71999-08-10 19:04:08 +0000704#define SHRINK xmlParserInputShrink(ctxt->input); \
705 if ((*ctxt->input->cur == 0) && \
706 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
707 xmlPopInput(ctxt)
708
709#define GROW xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
710 if ((*ctxt->input->cur == 0) && \
711 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
712 xmlPopInput(ctxt)
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000713
Daniel Veillardcf461992000-03-14 18:30:20 +0000714#define SKIP_BLANKS xmlSkipBlankChars(ctxt);
Daniel Veillard0ba4d531998-11-01 19:34:31 +0000715
Daniel Veillardcf461992000-03-14 18:30:20 +0000716#define NEXT xmlNextChar(ctxt);
717
718#define NEXTL(l) \
719 if (*(ctxt->input->cur) == '\n') { \
720 ctxt->input->line++; ctxt->input->col = 1; \
721 } else ctxt->input->col++; \
722 ctxt->token = 0; ctxt->input->cur += l; \
Daniel Veillardb05deb71999-08-10 19:04:08 +0000723 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillardcf461992000-03-14 18:30:20 +0000724 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000725
Daniel Veillardcf461992000-03-14 18:30:20 +0000726#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l);
727#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l);
728
729#define COPY_BUF(l,b,i,v) \
730 if (l == 1) b[i++] = (xmlChar) v; \
731 else i += xmlCopyChar(l,&b[i],v);
732
733/**
734 * xmlNextChar:
735 * @ctxt: the XML parser context
736 *
737 * Skip to the next char input char.
738 */
739
740void
741xmlNextChar(xmlParserCtxtPtr ctxt) {
Daniel Veillard3f6f7f62000-06-30 17:58:25 +0000742 if (ctxt->instate == XML_PARSER_EOF)
743 return;
744
Daniel Veillardcf461992000-03-14 18:30:20 +0000745 /*
746 * TODO: 2.11 End-of-Line Handling
747 * the literal two-character sequence "#xD#xA" or a standalone
748 * literal #xD, an XML processor must pass to the application
749 * the single character #xA.
750 */
751 if (ctxt->token != 0) ctxt->token = 0;
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000752 else if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
Daniel Veillardcf461992000-03-14 18:30:20 +0000753 if ((*ctxt->input->cur == 0) &&
754 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
755 (ctxt->instate != XML_PARSER_COMMENT)) {
756 /*
757 * If we are at the end of the current entity and
758 * the context allows it, we pop consumed entities
759 * automatically.
760 * TODO: the auto closing should be blocked in other cases
761 */
762 xmlPopInput(ctxt);
763 } else {
764 if (*(ctxt->input->cur) == '\n') {
765 ctxt->input->line++; ctxt->input->col = 1;
766 } else ctxt->input->col++;
Daniel Veillardbe803962000-06-28 23:40:59 +0000767 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
Daniel Veillardcf461992000-03-14 18:30:20 +0000768 /*
769 * We are supposed to handle UTF8, check it's valid
770 * From rfc2044: encoding of the Unicode values on UTF-8:
771 *
772 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
773 * 0000 0000-0000 007F 0xxxxxxx
774 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
775 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
776 *
777 * Check for the 0x110000 limit too
778 */
779 const unsigned char *cur = ctxt->input->cur;
780 unsigned char c;
781
782 c = *cur;
783 if (c & 0x80) {
784 if (cur[1] == 0)
785 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
786 if ((cur[1] & 0xc0) != 0x80)
787 goto encoding_error;
788 if ((c & 0xe0) == 0xe0) {
789 unsigned int val;
790
791 if (cur[2] == 0)
792 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
793 if ((cur[2] & 0xc0) != 0x80)
794 goto encoding_error;
795 if ((c & 0xf0) == 0xf0) {
796 if (cur[3] == 0)
797 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
798 if (((c & 0xf8) != 0xf0) ||
799 ((cur[3] & 0xc0) != 0x80))
800 goto encoding_error;
801 /* 4-byte code */
802 ctxt->input->cur += 4;
803 val = (cur[0] & 0x7) << 18;
804 val |= (cur[1] & 0x3f) << 12;
805 val |= (cur[2] & 0x3f) << 6;
806 val |= cur[3] & 0x3f;
807 } else {
808 /* 3-byte code */
809 ctxt->input->cur += 3;
810 val = (cur[0] & 0xf) << 12;
811 val |= (cur[1] & 0x3f) << 6;
812 val |= cur[2] & 0x3f;
813 }
814 if (((val > 0xd7ff) && (val < 0xe000)) ||
815 ((val > 0xfffd) && (val < 0x10000)) ||
816 (val >= 0x110000)) {
817 if ((ctxt->sax != NULL) &&
818 (ctxt->sax->error != NULL))
819 ctxt->sax->error(ctxt->userData,
Daniel Veillard496a1cf2000-05-03 14:20:55 +0000820 "Char 0x%X out of allowed range\n", val);
Daniel Veillardcf461992000-03-14 18:30:20 +0000821 ctxt->errNo = XML_ERR_INVALID_ENCODING;
822 ctxt->wellFormed = 0;
823 ctxt->disableSAX = 1;
824 }
825 } else
826 /* 2-byte code */
827 ctxt->input->cur += 2;
828 } else
829 /* 1-byte code */
830 ctxt->input->cur++;
831 } else {
832 /*
833 * Assume it's a fixed lenght encoding (1) with
834 * a compatibke encoding for the ASCII set, since
835 * XML constructs only use < 128 chars
836 */
837 ctxt->input->cur++;
838 }
839 ctxt->nbChars++;
840 if (*ctxt->input->cur == 0)
841 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
842 }
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000843 } else {
844 ctxt->input->cur++;
845 ctxt->nbChars++;
846 if (*ctxt->input->cur == 0)
847 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
Daniel Veillardcf461992000-03-14 18:30:20 +0000848 }
Daniel Veillard32bc74e2000-07-14 14:49:25 +0000849 if ((*ctxt->input->cur == '%') && (!ctxt->html))
850 xmlParserHandlePEReference(ctxt);
851 if ((*ctxt->input->cur == '&')&& (!ctxt->html))
852 xmlParserHandleReference(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +0000853 if ((*ctxt->input->cur == 0) &&
854 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
855 xmlPopInput(ctxt);
856 return;
857encoding_error:
858 /*
859 * If we detect an UTF8 error that probably mean that the
860 * input encoding didn't get properly advertized in the
861 * declaration header. Report the error and switch the encoding
862 * to ISO-Latin-1 (if you don't like this policy, just declare the
863 * encoding !)
864 */
Daniel Veillardbe803962000-06-28 23:40:59 +0000865 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillardcf461992000-03-14 18:30:20 +0000866 ctxt->sax->error(ctxt->userData,
867 "Input is not proper UTF-8, indicate encoding !\n");
Daniel Veillardbe803962000-06-28 23:40:59 +0000868 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
869 ctxt->input->cur[0], ctxt->input->cur[1],
870 ctxt->input->cur[2], ctxt->input->cur[3]);
871 }
Daniel Veillardcf461992000-03-14 18:30:20 +0000872 ctxt->errNo = XML_ERR_INVALID_ENCODING;
873
Daniel Veillardbe803962000-06-28 23:40:59 +0000874 ctxt->charset = XML_CHAR_ENCODING_8859_1;
Daniel Veillardcf461992000-03-14 18:30:20 +0000875 ctxt->input->cur++;
876 return;
877}
878
879/**
880 * xmlCurrentChar:
881 * @ctxt: the XML parser context
882 * @len: pointer to the length of the char read
883 *
884 * The current char value, if using UTF-8 this may actaully span multiple
885 * bytes in the input buffer. Implement the end of line normalization:
886 * 2.11 End-of-Line Handling
887 * Wherever an external parsed entity or the literal entity value
888 * of an internal parsed entity contains either the literal two-character
889 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
890 * must pass to the application the single character #xA.
891 * This behavior can conveniently be produced by normalizing all
892 * line breaks to #xA on input, before parsing.)
893 *
894 * Returns the current char value and its lenght
895 */
896
897int
898xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
Daniel Veillard3f6f7f62000-06-30 17:58:25 +0000899 if (ctxt->instate == XML_PARSER_EOF)
900 return(0);
901
Daniel Veillardcf461992000-03-14 18:30:20 +0000902 if (ctxt->token != 0) {
903 *len = 0;
904 return(ctxt->token);
905 }
Daniel Veillardbe803962000-06-28 23:40:59 +0000906 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
Daniel Veillardcf461992000-03-14 18:30:20 +0000907 /*
908 * We are supposed to handle UTF8, check it's valid
909 * From rfc2044: encoding of the Unicode values on UTF-8:
910 *
911 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
912 * 0000 0000-0000 007F 0xxxxxxx
913 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
914 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
915 *
916 * Check for the 0x110000 limit too
917 */
918 const unsigned char *cur = ctxt->input->cur;
919 unsigned char c;
920 unsigned int val;
921
922 c = *cur;
923 if (c & 0x80) {
924 if (cur[1] == 0)
925 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
926 if ((cur[1] & 0xc0) != 0x80)
927 goto encoding_error;
928 if ((c & 0xe0) == 0xe0) {
929
930 if (cur[2] == 0)
931 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
932 if ((cur[2] & 0xc0) != 0x80)
933 goto encoding_error;
934 if ((c & 0xf0) == 0xf0) {
935 if (cur[3] == 0)
936 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
937 if (((c & 0xf8) != 0xf0) ||
938 ((cur[3] & 0xc0) != 0x80))
939 goto encoding_error;
940 /* 4-byte code */
941 *len = 4;
942 val = (cur[0] & 0x7) << 18;
943 val |= (cur[1] & 0x3f) << 12;
944 val |= (cur[2] & 0x3f) << 6;
945 val |= cur[3] & 0x3f;
946 } else {
947 /* 3-byte code */
948 *len = 3;
949 val = (cur[0] & 0xf) << 12;
950 val |= (cur[1] & 0x3f) << 6;
951 val |= cur[2] & 0x3f;
952 }
953 } else {
954 /* 2-byte code */
955 *len = 2;
956 val = (cur[0] & 0x1f) << 6;
957 val |= cur[1] & 0x3f;
958 }
959 if (!IS_CHAR(val)) {
960 if ((ctxt->sax != NULL) &&
961 (ctxt->sax->error != NULL))
962 ctxt->sax->error(ctxt->userData,
Daniel Veillard496a1cf2000-05-03 14:20:55 +0000963 "Char 0x%X out of allowed range\n", val);
Daniel Veillardcf461992000-03-14 18:30:20 +0000964 ctxt->errNo = XML_ERR_INVALID_ENCODING;
965 ctxt->wellFormed = 0;
966 ctxt->disableSAX = 1;
967 }
968 return(val);
969 } else {
970 /* 1-byte code */
971 *len = 1;
972 if (*ctxt->input->cur == 0xD) {
973 if (ctxt->input->cur[1] == 0xA) {
974 ctxt->nbChars++;
975 ctxt->input->cur++;
976 }
977 return(0xA);
978 }
979 return((int) *ctxt->input->cur);
980 }
981 }
982 /*
983 * Assume it's a fixed lenght encoding (1) with
984 * a compatibke encoding for the ASCII set, since
985 * XML constructs only use < 128 chars
986 */
987 *len = 1;
988 if (*ctxt->input->cur == 0xD) {
989 if (ctxt->input->cur[1] == 0xA) {
990 ctxt->nbChars++;
991 ctxt->input->cur++;
992 }
993 return(0xA);
994 }
995 return((int) *ctxt->input->cur);
996encoding_error:
997 /*
998 * If we detect an UTF8 error that probably mean that the
999 * input encoding didn't get properly advertized in the
1000 * declaration header. Report the error and switch the encoding
1001 * to ISO-Latin-1 (if you don't like this policy, just declare the
1002 * encoding !)
1003 */
Daniel Veillardbe803962000-06-28 23:40:59 +00001004 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillardcf461992000-03-14 18:30:20 +00001005 ctxt->sax->error(ctxt->userData,
1006 "Input is not proper UTF-8, indicate encoding !\n");
Daniel Veillardbe803962000-06-28 23:40:59 +00001007 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1008 ctxt->input->cur[0], ctxt->input->cur[1],
1009 ctxt->input->cur[2], ctxt->input->cur[3]);
1010 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001011 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1012
Daniel Veillardbe803962000-06-28 23:40:59 +00001013 ctxt->charset = XML_CHAR_ENCODING_8859_1;
Daniel Veillardcf461992000-03-14 18:30:20 +00001014 *len = 1;
1015 return((int) *ctxt->input->cur);
1016}
1017
1018/**
1019 * xmlStringCurrentChar:
1020 * @ctxt: the XML parser context
1021 * @cur: pointer to the beginning of the char
1022 * @len: pointer to the length of the char read
1023 *
1024 * The current char value, if using UTF-8 this may actaully span multiple
1025 * bytes in the input buffer.
1026 *
1027 * Returns the current char value and its lenght
1028 */
1029
1030int
1031xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
Daniel Veillardbe803962000-06-28 23:40:59 +00001032 if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
Daniel Veillardcf461992000-03-14 18:30:20 +00001033 /*
1034 * We are supposed to handle UTF8, check it's valid
1035 * From rfc2044: encoding of the Unicode values on UTF-8:
1036 *
1037 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1038 * 0000 0000-0000 007F 0xxxxxxx
1039 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1040 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1041 *
1042 * Check for the 0x110000 limit too
1043 */
1044 unsigned char c;
1045 unsigned int val;
1046
1047 c = *cur;
1048 if (c & 0x80) {
1049 if ((cur[1] & 0xc0) != 0x80)
1050 goto encoding_error;
1051 if ((c & 0xe0) == 0xe0) {
1052
1053 if ((cur[2] & 0xc0) != 0x80)
1054 goto encoding_error;
1055 if ((c & 0xf0) == 0xf0) {
1056 if (((c & 0xf8) != 0xf0) ||
1057 ((cur[3] & 0xc0) != 0x80))
1058 goto encoding_error;
1059 /* 4-byte code */
1060 *len = 4;
1061 val = (cur[0] & 0x7) << 18;
1062 val |= (cur[1] & 0x3f) << 12;
1063 val |= (cur[2] & 0x3f) << 6;
1064 val |= cur[3] & 0x3f;
1065 } else {
1066 /* 3-byte code */
1067 *len = 3;
1068 val = (cur[0] & 0xf) << 12;
1069 val |= (cur[1] & 0x3f) << 6;
1070 val |= cur[2] & 0x3f;
1071 }
1072 } else {
1073 /* 2-byte code */
1074 *len = 2;
1075 val = (cur[0] & 0x1f) << 6;
1076 val |= cur[2] & 0x3f;
1077 }
1078 if (!IS_CHAR(val)) {
1079 if ((ctxt->sax != NULL) &&
1080 (ctxt->sax->error != NULL))
1081 ctxt->sax->error(ctxt->userData,
Daniel Veillard496a1cf2000-05-03 14:20:55 +00001082 "Char 0x%X out of allowed range\n", val);
Daniel Veillardcf461992000-03-14 18:30:20 +00001083 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1084 ctxt->wellFormed = 0;
1085 ctxt->disableSAX = 1;
1086 }
1087 return(val);
1088 } else {
1089 /* 1-byte code */
1090 *len = 1;
1091 return((int) *cur);
1092 }
1093 }
1094 /*
1095 * Assume it's a fixed lenght encoding (1) with
1096 * a compatibke encoding for the ASCII set, since
1097 * XML constructs only use < 128 chars
1098 */
1099 *len = 1;
1100 return((int) *cur);
1101encoding_error:
1102 /*
1103 * If we detect an UTF8 error that probably mean that the
1104 * input encoding didn't get properly advertized in the
1105 * declaration header. Report the error and switch the encoding
1106 * to ISO-Latin-1 (if you don't like this policy, just declare the
1107 * encoding !)
1108 */
Daniel Veillardbe803962000-06-28 23:40:59 +00001109 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
Daniel Veillardcf461992000-03-14 18:30:20 +00001110 ctxt->sax->error(ctxt->userData,
1111 "Input is not proper UTF-8, indicate encoding !\n");
Daniel Veillardbe803962000-06-28 23:40:59 +00001112 ctxt->sax->error(ctxt->userData, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
1113 ctxt->input->cur[0], ctxt->input->cur[1],
1114 ctxt->input->cur[2], ctxt->input->cur[3]);
1115 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001116 ctxt->errNo = XML_ERR_INVALID_ENCODING;
1117
1118 *len = 1;
1119 return((int) *cur);
1120}
1121
1122/**
1123 * xmlCopyChar:
1124 * @len: pointer to the length of the char read (or zero)
1125 * @array: pointer to an arry of xmlChar
1126 * @val: the char value
1127 *
1128 * append the char value in the array
1129 *
1130 * Returns the number of xmlChar written
1131 */
1132
1133int
1134xmlCopyChar(int len, xmlChar *out, int val) {
1135 /*
1136 * We are supposed to handle UTF8, check it's valid
1137 * From rfc2044: encoding of the Unicode values on UTF-8:
1138 *
1139 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
1140 * 0000 0000-0000 007F 0xxxxxxx
1141 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1142 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1143 */
1144 if (len == 0) {
1145 if (val < 0) len = 0;
1146 else if (val < 0x80) len = 1;
1147 else if (val < 0x800) len = 2;
1148 else if (val < 0x10000) len = 3;
1149 else if (val < 0x110000) len = 4;
1150 if (len == 0) {
1151 fprintf(stderr, "Internal error, xmlCopyChar 0x%X out of bound\n",
1152 val);
1153 return(0);
1154 }
1155 }
1156 if (len > 1) {
1157 int bits;
1158
1159 if (val < 0x80) { *out++= val; bits= -6; }
1160 else if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1161 else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6; }
1162 else { *out++= (val >> 18) | 0xF0; bits= 12; }
1163
1164 for ( ; bits >= 0; bits-= 6)
1165 *out++= ((val >> bits) & 0x3F) | 0x80 ;
1166
1167 return(len);
1168 }
1169 *out = (xmlChar) val;
1170 return(1);
1171}
1172
1173/**
1174 * xmlSkipBlankChars:
1175 * @ctxt: the XML parser context
1176 *
1177 * skip all blanks character found at that point in the input streams.
1178 * It pops up finished entities in the process if allowable at that point.
1179 *
1180 * Returns the number of space chars skipped
1181 */
1182
1183int
1184xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
1185 int cur, res = 0;
1186
Daniel Veillarde0854c32000-08-27 21:12:29 +00001187 /*
1188 * It's Okay to use CUR/NEXT here since all the blanks are on
1189 * the ASCII range.
1190 */
Daniel Veillardcf461992000-03-14 18:30:20 +00001191 do {
1192 cur = CUR;
Daniel Veillarde0854c32000-08-27 21:12:29 +00001193 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
Daniel Veillardcf461992000-03-14 18:30:20 +00001194 NEXT;
1195 cur = CUR;
1196 res++;
1197 }
1198 while ((cur == 0) && (ctxt->inputNr > 1) &&
1199 (ctxt->instate != XML_PARSER_COMMENT)) {
1200 xmlPopInput(ctxt);
1201 cur = CUR;
1202 }
Daniel Veillarde0854c32000-08-27 21:12:29 +00001203 /*
1204 * Need to handle support of entities branching here
1205 */
Daniel Veillardcf461992000-03-14 18:30:20 +00001206 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
1207 if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt);
Daniel Veillarde0854c32000-08-27 21:12:29 +00001208 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
Daniel Veillardcf461992000-03-14 18:30:20 +00001209 return(res);
1210}
Daniel Veillard260a68f1998-08-13 03:39:55 +00001211
Daniel Veillardb05deb71999-08-10 19:04:08 +00001212/************************************************************************
1213 * *
1214 * Commodity functions to handle entities processing *
1215 * *
1216 ************************************************************************/
Daniel Veillard260a68f1998-08-13 03:39:55 +00001217
Daniel Veillard11e00581998-10-24 18:27:49 +00001218/**
1219 * xmlPopInput:
1220 * @ctxt: an XML parser context
1221 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00001222 * xmlPopInput: the current input pointed by ctxt->input came to an end
1223 * pop it and return the next char.
1224 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001225 * Returns the current xmlChar in the parser context
Daniel Veillard260a68f1998-08-13 03:39:55 +00001226 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001227xmlChar
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001228xmlPopInput(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001229 if (ctxt->inputNr == 1) return(0); /* End of main Input */
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00001230 if (xmlParserDebugEntities)
1231 fprintf(stderr, "Popping input %d\n", ctxt->inputNr);
Daniel Veillardbc50b591999-03-01 12:28:53 +00001232 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillardb05deb71999-08-10 19:04:08 +00001233 if ((*ctxt->input->cur == 0) &&
1234 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
1235 return(xmlPopInput(ctxt));
Daniel Veillard260a68f1998-08-13 03:39:55 +00001236 return(CUR);
1237}
1238
Daniel Veillard11e00581998-10-24 18:27:49 +00001239/**
1240 * xmlPushInput:
1241 * @ctxt: an XML parser context
1242 * @input: an XML parser input fragment (entity, XML fragment ...).
1243 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00001244 * xmlPushInput: switch to a new input stream which is stacked on top
1245 * of the previous one(s).
1246 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00001247void
1248xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001249 if (input == NULL) return;
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00001250
1251 if (xmlParserDebugEntities) {
1252 if ((ctxt->input != NULL) && (ctxt->input->filename))
1253 fprintf(stderr, "%s(%d): ", ctxt->input->filename,
1254 ctxt->input->line);
1255 fprintf(stderr, "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
1256 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00001257 inputPush(ctxt, input);
Daniel Veillardcf461992000-03-14 18:30:20 +00001258 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001259}
1260
Daniel Veillard11e00581998-10-24 18:27:49 +00001261/**
Daniel Veillardd692aa41999-02-28 21:54:31 +00001262 * xmlFreeInputStream:
Daniel Veillard51e3b151999-11-12 17:02:31 +00001263 * @input: an xmlParserInputPtr
Daniel Veillardd692aa41999-02-28 21:54:31 +00001264 *
1265 * Free up an input stream.
1266 */
1267void
1268xmlFreeInputStream(xmlParserInputPtr input) {
1269 if (input == NULL) return;
1270
Daniel Veillard6454aec1999-09-02 22:04:43 +00001271 if (input->filename != NULL) xmlFree((char *) input->filename);
1272 if (input->directory != NULL) xmlFree((char *) input->directory);
Daniel Veillardcf461992000-03-14 18:30:20 +00001273 if (input->encoding != NULL) xmlFree((char *) input->encoding);
1274 if (input->version != NULL) xmlFree((char *) input->version);
Daniel Veillardd692aa41999-02-28 21:54:31 +00001275 if ((input->free != NULL) && (input->base != NULL))
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001276 input->free((xmlChar *) input->base);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001277 if (input->buf != NULL)
1278 xmlFreeParserInputBuffer(input->buf);
Daniel Veillardd692aa41999-02-28 21:54:31 +00001279 memset(input, -1, sizeof(xmlParserInput));
Daniel Veillard6454aec1999-09-02 22:04:43 +00001280 xmlFree(input);
Daniel Veillardd692aa41999-02-28 21:54:31 +00001281}
1282
1283/**
Daniel Veillardb05deb71999-08-10 19:04:08 +00001284 * xmlNewInputStream:
1285 * @ctxt: an XML parser context
1286 *
1287 * Create a new input stream structure
1288 * Returns the new input stream or NULL
1289 */
1290xmlParserInputPtr
1291xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1292 xmlParserInputPtr input;
1293
Daniel Veillard6454aec1999-09-02 22:04:43 +00001294 input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
Daniel Veillardb05deb71999-08-10 19:04:08 +00001295 if (input == NULL) {
Daniel Veillard5e873c42000-04-12 13:27:38 +00001296 if (ctxt != NULL) {
1297 ctxt->errNo = XML_ERR_NO_MEMORY;
1298 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1299 ctxt->sax->error(ctxt->userData,
1300 "malloc: couldn't allocate a new input stream\n");
1301 ctxt->errNo = XML_ERR_NO_MEMORY;
1302 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00001303 return(NULL);
1304 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001305 memset(input, 0, sizeof(xmlParserInput));
Daniel Veillardb05deb71999-08-10 19:04:08 +00001306 input->line = 1;
1307 input->col = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00001308 input->standalone = -1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001309 return(input);
1310}
1311
1312/**
Daniel Veillard5e873c42000-04-12 13:27:38 +00001313 * xmlNewIOInputStream:
1314 * @ctxt: an XML parser context
1315 * @input: an I/O Input
1316 * @enc: the charset encoding if known
1317 *
1318 * Create a new input stream structure encapsulating the @input into
1319 * a stream suitable for the parser.
1320 *
1321 * Returns the new input stream or NULL
1322 */
1323xmlParserInputPtr
1324xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1325 xmlCharEncoding enc) {
1326 xmlParserInputPtr inputStream;
1327
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00001328 if (xmlParserDebugEntities)
1329 fprintf(stderr, "new input from I/O\n");
Daniel Veillard5e873c42000-04-12 13:27:38 +00001330 inputStream = xmlNewInputStream(ctxt);
1331 if (inputStream == NULL) {
1332 return(NULL);
1333 }
1334 inputStream->filename = NULL;
1335 inputStream->buf = input;
1336 inputStream->base = inputStream->buf->buffer->content;
1337 inputStream->cur = inputStream->buf->buffer->content;
1338 if (enc != XML_CHAR_ENCODING_NONE) {
1339 xmlSwitchEncoding(ctxt, enc);
1340 }
1341
1342 return(inputStream);
1343}
1344
1345/**
Daniel Veillard11e00581998-10-24 18:27:49 +00001346 * xmlNewEntityInputStream:
1347 * @ctxt: an XML parser context
1348 * @entity: an Entity pointer
1349 *
Daniel Veillard011b63c1999-06-02 17:44:04 +00001350 * Create a new input stream based on an xmlEntityPtr
Daniel Veillardb96e6431999-08-29 21:02:19 +00001351 *
1352 * Returns the new input stream or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00001353 */
Daniel Veillardccb09631998-10-27 06:21:04 +00001354xmlParserInputPtr
1355xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00001356 xmlParserInputPtr input;
1357
1358 if (entity == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001359 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00001360 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00001361 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00001362 "internal: xmlNewEntityInputStream entity = NULL\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001363 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillardccb09631998-10-27 06:21:04 +00001364 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001365 }
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00001366 if (xmlParserDebugEntities)
1367 fprintf(stderr, "new input from entity: %s\n", entity->name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001368 if (entity->content == NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +00001369 switch (entity->etype) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00001370 case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001371 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
Daniel Veillardb96e6431999-08-29 21:02:19 +00001372 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1373 ctxt->sax->error(ctxt->userData,
1374 "xmlNewEntityInputStream unparsed entity !\n");
1375 break;
1376 case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1377 case XML_EXTERNAL_PARAMETER_ENTITY:
1378 return(xmlLoadExternalEntity((char *) entity->SystemID,
Daniel Veillard686d6b62000-01-03 11:08:02 +00001379 (char *) entity->ExternalID, ctxt));
Daniel Veillardb96e6431999-08-29 21:02:19 +00001380 case XML_INTERNAL_GENERAL_ENTITY:
1381 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1382 ctxt->sax->error(ctxt->userData,
1383 "Internal entity %s without content !\n", entity->name);
1384 break;
1385 case XML_INTERNAL_PARAMETER_ENTITY:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001386 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillardb96e6431999-08-29 21:02:19 +00001387 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1388 ctxt->sax->error(ctxt->userData,
1389 "Internal parameter entity %s without content !\n", entity->name);
1390 break;
1391 case XML_INTERNAL_PREDEFINED_ENTITY:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001392 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillardb96e6431999-08-29 21:02:19 +00001393 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1394 ctxt->sax->error(ctxt->userData,
1395 "Predefined entity %s without content !\n", entity->name);
1396 break;
1397 }
Daniel Veillardccb09631998-10-27 06:21:04 +00001398 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001399 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00001400 input = xmlNewInputStream(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001401 if (input == NULL) {
Daniel Veillardccb09631998-10-27 06:21:04 +00001402 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001403 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001404 input->filename = (char *) entity->SystemID;
Daniel Veillard260a68f1998-08-13 03:39:55 +00001405 input->base = entity->content;
1406 input->cur = entity->content;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001407 input->length = entity->length;
Daniel Veillardccb09631998-10-27 06:21:04 +00001408 return(input);
Daniel Veillard260a68f1998-08-13 03:39:55 +00001409}
1410
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001411/**
1412 * xmlNewStringInputStream:
1413 * @ctxt: an XML parser context
Daniel Veillardb05deb71999-08-10 19:04:08 +00001414 * @buffer: an memory buffer
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001415 *
1416 * Create a new input stream based on a memory buffer.
Daniel Veillard1e346af1999-02-22 10:33:01 +00001417 * Returns the new input stream
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001418 */
1419xmlParserInputPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001420xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001421 xmlParserInputPtr input;
1422
Daniel Veillardb05deb71999-08-10 19:04:08 +00001423 if (buffer == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001424 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001425 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00001426 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001427 "internal: xmlNewStringInputStream string = NULL\n");
1428 return(NULL);
1429 }
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00001430 if (xmlParserDebugEntities)
1431 fprintf(stderr, "new fixed input: %.30s\n", buffer);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001432 input = xmlNewInputStream(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001433 if (input == NULL) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001434 return(NULL);
1435 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00001436 input->base = buffer;
1437 input->cur = buffer;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001438 input->length = xmlStrlen(buffer);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00001439 return(input);
1440}
1441
Daniel Veillard011b63c1999-06-02 17:44:04 +00001442/**
1443 * xmlNewInputFromFile:
1444 * @ctxt: an XML parser context
1445 * @filename: the filename to use as entity
1446 *
1447 * Create a new input stream based on a file.
1448 *
1449 * Returns the new input stream or NULL in case of error
1450 */
1451xmlParserInputPtr
1452xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001453 xmlParserInputBufferPtr buf;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001454 xmlParserInputPtr inputStream;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001455 char *directory = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001456
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00001457 if (xmlParserDebugEntities)
1458 fprintf(stderr, "new input from file: %s\n", filename);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001459 if (ctxt == NULL) return(NULL);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001460 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001461 if (buf == NULL) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001462 char name[XML_PARSER_BIG_BUFFER_SIZE];
Daniel Veillard011b63c1999-06-02 17:44:04 +00001463
Daniel Veillardb05deb71999-08-10 19:04:08 +00001464 if ((ctxt->input != NULL) && (ctxt->input->directory != NULL)) {
1465#ifdef WIN32
1466 sprintf(name, "%s\\%s", ctxt->input->directory, filename);
1467#else
1468 sprintf(name, "%s/%s", ctxt->input->directory, filename);
1469#endif
1470 buf = xmlParserInputBufferCreateFilename(name,
1471 XML_CHAR_ENCODING_NONE);
1472 if (buf != NULL)
Daniel Veillard686d6b62000-01-03 11:08:02 +00001473 directory = xmlParserGetDirectory(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001474 }
1475 if ((buf == NULL) && (ctxt->directory != NULL)) {
1476#ifdef WIN32
1477 sprintf(name, "%s\\%s", ctxt->directory, filename);
1478#else
1479 sprintf(name, "%s/%s", ctxt->directory, filename);
1480#endif
1481 buf = xmlParserInputBufferCreateFilename(name,
1482 XML_CHAR_ENCODING_NONE);
1483 if (buf != NULL)
Daniel Veillard686d6b62000-01-03 11:08:02 +00001484 directory = xmlParserGetDirectory(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001485 }
1486 if (buf == NULL)
1487 return(NULL);
1488 }
1489 if (directory == NULL)
1490 directory = xmlParserGetDirectory(filename);
1491
1492 inputStream = xmlNewInputStream(ctxt);
Daniel Veillard011b63c1999-06-02 17:44:04 +00001493 if (inputStream == NULL) {
Daniel Veillard6454aec1999-09-02 22:04:43 +00001494 if (directory != NULL) xmlFree((char *) directory);
Daniel Veillard011b63c1999-06-02 17:44:04 +00001495 return(NULL);
1496 }
1497
Daniel Veillard6454aec1999-09-02 22:04:43 +00001498 inputStream->filename = xmlMemStrdup(filename);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001499 inputStream->directory = directory;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001500 inputStream->buf = buf;
Daniel Veillard011b63c1999-06-02 17:44:04 +00001501
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001502 inputStream->base = inputStream->buf->buffer->content;
1503 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001504 if ((ctxt->directory == NULL) && (directory != NULL))
Daniel Veillard294cbca1999-12-03 13:19:09 +00001505 ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
Daniel Veillard011b63c1999-06-02 17:44:04 +00001506 return(inputStream);
1507}
1508
1509/************************************************************************
1510 * *
Daniel Veillardb05deb71999-08-10 19:04:08 +00001511 * Commodity functions to handle parser contexts *
1512 * *
1513 ************************************************************************/
1514
1515/**
1516 * xmlInitParserCtxt:
1517 * @ctxt: an XML parser context
1518 *
1519 * Initialize a parser context
1520 */
1521
1522void
1523xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1524{
1525 xmlSAXHandler *sax;
1526
Daniel Veillardcf461992000-03-14 18:30:20 +00001527 xmlDefaultSAXHandlerInit();
1528
Daniel Veillard6454aec1999-09-02 22:04:43 +00001529 sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
Daniel Veillardb05deb71999-08-10 19:04:08 +00001530 if (sax == NULL) {
1531 fprintf(stderr, "xmlInitParserCtxt: out of memory\n");
1532 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001533 memset(sax, 0, sizeof(xmlSAXHandler));
Daniel Veillardb05deb71999-08-10 19:04:08 +00001534
1535 /* Allocate the Input stack */
Daniel Veillard6454aec1999-09-02 22:04:43 +00001536 ctxt->inputTab = (xmlParserInputPtr *) xmlMalloc(5 * sizeof(xmlParserInputPtr));
Daniel Veillardb05deb71999-08-10 19:04:08 +00001537 ctxt->inputNr = 0;
1538 ctxt->inputMax = 5;
1539 ctxt->input = NULL;
Daniel Veillardcf461992000-03-14 18:30:20 +00001540
Daniel Veillardb05deb71999-08-10 19:04:08 +00001541 ctxt->version = NULL;
1542 ctxt->encoding = NULL;
1543 ctxt->standalone = -1;
1544 ctxt->hasExternalSubset = 0;
1545 ctxt->hasPErefs = 0;
1546 ctxt->html = 0;
1547 ctxt->external = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001548 ctxt->instate = XML_PARSER_START;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001549 ctxt->token = 0;
1550 ctxt->directory = NULL;
1551
1552 /* Allocate the Node stack */
Daniel Veillard6454aec1999-09-02 22:04:43 +00001553 ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
Daniel Veillardb05deb71999-08-10 19:04:08 +00001554 ctxt->nodeNr = 0;
1555 ctxt->nodeMax = 10;
1556 ctxt->node = NULL;
1557
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001558 /* Allocate the Name stack */
1559 ctxt->nameTab = (xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1560 ctxt->nameNr = 0;
1561 ctxt->nameMax = 10;
1562 ctxt->name = NULL;
1563
Daniel Veillardcf461992000-03-14 18:30:20 +00001564 /* Allocate the space stack */
1565 ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1566 ctxt->spaceNr = 1;
1567 ctxt->spaceMax = 10;
1568 ctxt->spaceTab[0] = -1;
1569 ctxt->space = &ctxt->spaceTab[0];
1570
1571 if (sax == NULL) {
1572 ctxt->sax = &xmlDefaultSAXHandler;
1573 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00001574 ctxt->sax = sax;
1575 memcpy(sax, &xmlDefaultSAXHandler, sizeof(xmlSAXHandler));
1576 }
1577 ctxt->userData = ctxt;
1578 ctxt->myDoc = NULL;
1579 ctxt->wellFormed = 1;
1580 ctxt->valid = 1;
1581 ctxt->validate = xmlDoValidityCheckingDefaultValue;
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00001582 ctxt->pedantic = xmlPedanticParserDefaultValue;
Daniel Veillard83a30e72000-03-02 03:33:32 +00001583 ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001584 ctxt->vctxt.userData = ctxt;
Daniel Veillard5feb8492000-02-02 17:15:36 +00001585 if (ctxt->validate) {
1586 ctxt->vctxt.error = xmlParserValidityError;
Daniel Veillardcf461992000-03-14 18:30:20 +00001587 if (xmlGetWarningsDefaultValue == 0)
1588 ctxt->vctxt.warning = NULL;
1589 else
1590 ctxt->vctxt.warning = xmlParserValidityWarning;
1591 /* Allocate the Node stack */
1592 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
1593 ctxt->vctxt.nodeNr = 0;
1594 ctxt->vctxt.nodeMax = 4;
1595 ctxt->vctxt.node = NULL;
Daniel Veillard5feb8492000-02-02 17:15:36 +00001596 } else {
1597 ctxt->vctxt.error = NULL;
1598 ctxt->vctxt.warning = NULL;
1599 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00001600 ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1601 ctxt->record_info = 0;
Daniel Veillard10a2c651999-12-12 13:03:50 +00001602 ctxt->nbChars = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001603 ctxt->checkIndex = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001604 ctxt->inSubset = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001605 ctxt->errNo = XML_ERR_OK;
Daniel Veillardcf461992000-03-14 18:30:20 +00001606 ctxt->depth = 0;
Daniel Veillardbe803962000-06-28 23:40:59 +00001607 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001608 xmlInitNodeInfoSeq(&ctxt->node_seq);
1609}
1610
1611/**
1612 * xmlFreeParserCtxt:
1613 * @ctxt: an XML parser context
1614 *
1615 * Free all the memory used by a parser context. However the parsed
1616 * document in ctxt->myDoc is not freed.
1617 */
1618
1619void
1620xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
1621{
1622 xmlParserInputPtr input;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001623 xmlChar *oldname;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001624
1625 if (ctxt == NULL) return;
1626
Daniel Veillarde0854c32000-08-27 21:12:29 +00001627 while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
Daniel Veillardb05deb71999-08-10 19:04:08 +00001628 xmlFreeInputStream(input);
1629 }
Daniel Veillarde0854c32000-08-27 21:12:29 +00001630 while ((oldname = namePop(ctxt)) != NULL) { /* Non consuming */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001631 xmlFree(oldname);
1632 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001633 if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001634 if (ctxt->nameTab != NULL) xmlFree(ctxt->nameTab);
Daniel Veillard6454aec1999-09-02 22:04:43 +00001635 if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
1636 if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
1637 if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
1638 if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
Daniel Veillardcf461992000-03-14 18:30:20 +00001639 if (ctxt->intSubName != NULL) xmlFree((char *) ctxt->intSubName);
1640 if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
1641 if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
1642 if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001643 if ((ctxt->sax != NULL) && (ctxt->sax != &xmlDefaultSAXHandler))
Daniel Veillard6454aec1999-09-02 22:04:43 +00001644 xmlFree(ctxt->sax);
1645 if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
1646 xmlFree(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00001647}
1648
1649/**
1650 * xmlNewParserCtxt:
1651 *
1652 * Allocate and initialize a new parser context.
1653 *
1654 * Returns the xmlParserCtxtPtr or NULL
1655 */
1656
1657xmlParserCtxtPtr
1658xmlNewParserCtxt()
1659{
1660 xmlParserCtxtPtr ctxt;
1661
Daniel Veillard6454aec1999-09-02 22:04:43 +00001662 ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
Daniel Veillardb05deb71999-08-10 19:04:08 +00001663 if (ctxt == NULL) {
1664 fprintf(stderr, "xmlNewParserCtxt : cannot allocate context\n");
1665 perror("malloc");
1666 return(NULL);
1667 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001668 memset(ctxt, 0, sizeof(xmlParserCtxt));
Daniel Veillardb05deb71999-08-10 19:04:08 +00001669 xmlInitParserCtxt(ctxt);
1670 return(ctxt);
1671}
1672
1673/**
1674 * xmlClearParserCtxt:
1675 * @ctxt: an XML parser context
1676 *
1677 * Clear (release owned resources) and reinitialize a parser context
1678 */
1679
1680void
1681xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
1682{
1683 xmlClearNodeInfoSeq(&ctxt->node_seq);
1684 xmlInitParserCtxt(ctxt);
1685}
1686
1687/************************************************************************
1688 * *
Daniel Veillard011b63c1999-06-02 17:44:04 +00001689 * Commodity functions to handle entities *
1690 * *
1691 ************************************************************************/
1692
Daniel Veillardcf461992000-03-14 18:30:20 +00001693/**
1694 * xmlCheckEntity:
1695 * @ctxt: an XML parser context
1696 * @content: the entity content string
1697 *
1698 * Parse an entity content and checks the WF constraints
1699 *
1700 */
1701
1702void
1703xmlCheckEntity(xmlParserCtxtPtr ctxt, const xmlChar *content) {
1704}
Daniel Veillardb05deb71999-08-10 19:04:08 +00001705
1706/**
1707 * xmlParseCharRef:
1708 * @ctxt: an XML parser context
1709 *
1710 * parse Reference declarations
1711 *
1712 * [66] CharRef ::= '&#' [0-9]+ ';' |
1713 * '&#x' [0-9a-fA-F]+ ';'
1714 *
1715 * [ WFC: Legal Character ]
1716 * Characters referred to using character references must match the
1717 * production for Char.
1718 *
Daniel Veillard10a2c651999-12-12 13:03:50 +00001719 * Returns the value parsed (as an int), 0 in case of error
Daniel Veillardb05deb71999-08-10 19:04:08 +00001720 */
1721int
1722xmlParseCharRef(xmlParserCtxtPtr ctxt) {
1723 int val = 0;
Daniel Veillarde0854c32000-08-27 21:12:29 +00001724 int count = 0;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001725
1726 if (ctxt->token != 0) {
1727 val = ctxt->token;
1728 ctxt->token = 0;
1729 return(val);
1730 }
Daniel Veillarde0854c32000-08-27 21:12:29 +00001731 /*
1732 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
1733 */
Daniel Veillardcf461992000-03-14 18:30:20 +00001734 if ((RAW == '&') && (NXT(1) == '#') &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00001735 (NXT(2) == 'x')) {
1736 SKIP(3);
Daniel Veillarde0854c32000-08-27 21:12:29 +00001737 GROW;
1738 while (RAW != ';') { /* loop blocked by count */
1739 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
Daniel Veillardb05deb71999-08-10 19:04:08 +00001740 val = val * 16 + (CUR - '0');
Daniel Veillarde0854c32000-08-27 21:12:29 +00001741 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
Daniel Veillardb05deb71999-08-10 19:04:08 +00001742 val = val * 16 + (CUR - 'a') + 10;
Daniel Veillarde0854c32000-08-27 21:12:29 +00001743 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
Daniel Veillardb05deb71999-08-10 19:04:08 +00001744 val = val * 16 + (CUR - 'A') + 10;
1745 else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001746 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001747 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1748 ctxt->sax->error(ctxt->userData,
1749 "xmlParseCharRef: invalid hexadecimal value\n");
1750 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001751 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001752 val = 0;
1753 break;
1754 }
1755 NEXT;
Daniel Veillarde0854c32000-08-27 21:12:29 +00001756 count++;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001757 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001758 if (RAW == ';') {
1759 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1760 ctxt->nbChars ++;
1761 ctxt->input->cur++;
1762 }
1763 } else if ((RAW == '&') && (NXT(1) == '#')) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00001764 SKIP(2);
Daniel Veillarde0854c32000-08-27 21:12:29 +00001765 GROW;
1766 while (RAW != ';') { /* loop blocked by count */
1767 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
Daniel Veillardb05deb71999-08-10 19:04:08 +00001768 val = val * 10 + (CUR - '0');
1769 else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001770 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001771 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1772 ctxt->sax->error(ctxt->userData,
1773 "xmlParseCharRef: invalid decimal value\n");
1774 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001775 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001776 val = 0;
1777 break;
1778 }
1779 NEXT;
Daniel Veillarde0854c32000-08-27 21:12:29 +00001780 count++;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001781 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001782 if (RAW == ';') {
1783 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
1784 ctxt->nbChars ++;
1785 ctxt->input->cur++;
1786 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00001787 } else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001788 ctxt->errNo = XML_ERR_INVALID_CHARREF;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001789 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1790 ctxt->sax->error(ctxt->userData,
1791 "xmlParseCharRef: invalid value\n");
1792 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001793 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001794 }
1795
1796 /*
1797 * [ WFC: Legal Character ]
1798 * Characters referred to using character references must match the
1799 * production for Char.
1800 */
1801 if (IS_CHAR(val)) {
1802 return(val);
1803 } else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001804 ctxt->errNo = XML_ERR_INVALID_CHAR;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001805 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001806 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
Daniel Veillardb05deb71999-08-10 19:04:08 +00001807 val);
1808 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001809 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001810 }
1811 return(0);
1812}
1813
1814/**
Daniel Veillard10a2c651999-12-12 13:03:50 +00001815 * xmlParseStringCharRef:
1816 * @ctxt: an XML parser context
1817 * @str: a pointer to an index in the string
1818 *
1819 * parse Reference declarations, variant parsing from a string rather
1820 * than an an input flow.
1821 *
1822 * [66] CharRef ::= '&#' [0-9]+ ';' |
1823 * '&#x' [0-9a-fA-F]+ ';'
1824 *
1825 * [ WFC: Legal Character ]
1826 * Characters referred to using character references must match the
1827 * production for Char.
1828 *
1829 * Returns the value parsed (as an int), 0 in case of error, str will be
1830 * updated to the current value of the index
1831 */
1832int
1833xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
1834 const xmlChar *ptr;
1835 xmlChar cur;
1836 int val = 0;
1837
1838 if ((str == NULL) || (*str == NULL)) return(0);
1839 ptr = *str;
1840 cur = *ptr;
Daniel Veillard0caf07a1999-12-21 16:25:49 +00001841 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00001842 ptr += 3;
1843 cur = *ptr;
Daniel Veillarde0854c32000-08-27 21:12:29 +00001844 while (cur != ';') { /* Non input consuming loop */
Daniel Veillard10a2c651999-12-12 13:03:50 +00001845 if ((cur >= '0') && (cur <= '9'))
1846 val = val * 16 + (cur - '0');
1847 else if ((cur >= 'a') && (cur <= 'f'))
1848 val = val * 16 + (cur - 'a') + 10;
1849 else if ((cur >= 'A') && (cur <= 'F'))
1850 val = val * 16 + (cur - 'A') + 10;
1851 else {
1852 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
1853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1854 ctxt->sax->error(ctxt->userData,
Daniel Veillardbe803962000-06-28 23:40:59 +00001855 "xmlParseStringCharRef: invalid hexadecimal value\n");
Daniel Veillard10a2c651999-12-12 13:03:50 +00001856 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001857 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00001858 val = 0;
1859 break;
1860 }
1861 ptr++;
1862 cur = *ptr;
1863 }
1864 if (cur == ';')
1865 ptr++;
Daniel Veillard0142b842000-01-14 14:45:24 +00001866 } else if ((cur == '&') && (ptr[1] == '#')){
Daniel Veillard10a2c651999-12-12 13:03:50 +00001867 ptr += 2;
1868 cur = *ptr;
Daniel Veillarde0854c32000-08-27 21:12:29 +00001869 while (cur != ';') { /* Non input consuming loops */
Daniel Veillard10a2c651999-12-12 13:03:50 +00001870 if ((cur >= '0') && (cur <= '9'))
1871 val = val * 10 + (cur - '0');
1872 else {
1873 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
1874 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1875 ctxt->sax->error(ctxt->userData,
Daniel Veillardbe803962000-06-28 23:40:59 +00001876 "xmlParseStringCharRef: invalid decimal value\n");
Daniel Veillard10a2c651999-12-12 13:03:50 +00001877 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001878 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00001879 val = 0;
1880 break;
1881 }
1882 ptr++;
1883 cur = *ptr;
1884 }
1885 if (cur == ';')
1886 ptr++;
1887 } else {
1888 ctxt->errNo = XML_ERR_INVALID_CHARREF;
1889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1890 ctxt->sax->error(ctxt->userData,
1891 "xmlParseCharRef: invalid value\n");
1892 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001893 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00001894 return(0);
1895 }
1896 *str = ptr;
1897
1898 /*
1899 * [ WFC: Legal Character ]
1900 * Characters referred to using character references must match the
1901 * production for Char.
1902 */
1903 if (IS_CHAR(val)) {
1904 return(val);
1905 } else {
1906 ctxt->errNo = XML_ERR_INVALID_CHAR;
1907 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1908 ctxt->sax->error(ctxt->userData,
1909 "CharRef: invalid xmlChar value %d\n", val);
1910 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001911 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00001912 }
1913 return(0);
1914}
1915
1916/**
Daniel Veillardb05deb71999-08-10 19:04:08 +00001917 * xmlParserHandleReference:
1918 * @ctxt: the parser context
1919 *
1920 * [67] Reference ::= EntityRef | CharRef
1921 *
1922 * [68] EntityRef ::= '&' Name ';'
1923 *
1924 * [ WFC: Entity Declared ]
1925 * the Name given in the entity reference must match that in an entity
1926 * declaration, except that well-formed documents need not declare any
1927 * of the following entities: amp, lt, gt, apos, quot.
1928 *
1929 * [ WFC: Parsed Entity ]
1930 * An entity reference must not contain the name of an unparsed entity
1931 *
1932 * [66] CharRef ::= '&#' [0-9]+ ';' |
1933 * '&#x' [0-9a-fA-F]+ ';'
1934 *
1935 * A PEReference may have been detectect in the current input stream
1936 * the handling is done accordingly to
1937 * http://www.w3.org/TR/REC-xml#entproc
Daniel Veillarde0854c32000-08-27 21:12:29 +00001938 *
1939 * TODO: the default handling part seems deprecated now ... cut it off
Daniel Veillardb05deb71999-08-10 19:04:08 +00001940 */
1941void
1942xmlParserHandleReference(xmlParserCtxtPtr ctxt) {
1943 xmlParserInputPtr input;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001944 xmlChar *name;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001945 xmlEntityPtr ent = NULL;
1946
Daniel Veillard35008381999-10-25 13:15:52 +00001947 if (ctxt->token != 0) {
Daniel Veillard35008381999-10-25 13:15:52 +00001948 return;
1949 }
Daniel Veillardcf461992000-03-14 18:30:20 +00001950 if (RAW != '&') return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001951 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00001952 if ((RAW == '&') && (NXT(1) == '#')) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00001953 switch(ctxt->instate) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001954 case XML_PARSER_ENTITY_DECL:
1955 case XML_PARSER_PI:
Daniel Veillardb05deb71999-08-10 19:04:08 +00001956 case XML_PARSER_CDATA_SECTION:
Daniel Veillardb05deb71999-08-10 19:04:08 +00001957 case XML_PARSER_COMMENT:
Daniel Veillardcf461992000-03-14 18:30:20 +00001958 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001959 /* we just ignore it there */
1960 return;
1961 case XML_PARSER_START_TAG:
1962 return;
1963 case XML_PARSER_END_TAG:
Daniel Veillardb05deb71999-08-10 19:04:08 +00001964 return;
1965 case XML_PARSER_EOF:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001966 ctxt->errNo = XML_ERR_CHARREF_AT_EOF;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001967 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1968 ctxt->sax->error(ctxt->userData, "CharRef at EOF\n");
1969 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001970 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001971 return;
1972 case XML_PARSER_PROLOG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00001973 case XML_PARSER_START:
1974 case XML_PARSER_MISC:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001975 ctxt->errNo = XML_ERR_CHARREF_IN_PROLOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1977 ctxt->sax->error(ctxt->userData, "CharRef in prolog!\n");
1978 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001979 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001980 return;
1981 case XML_PARSER_EPILOG:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001982 ctxt->errNo = XML_ERR_CHARREF_IN_EPILOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001983 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1984 ctxt->sax->error(ctxt->userData, "CharRef in epilog!\n");
1985 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001986 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001987 return;
1988 case XML_PARSER_DTD:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00001989 ctxt->errNo = XML_ERR_CHARREF_IN_DTD;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001990 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1991 ctxt->sax->error(ctxt->userData,
1992 "CharRef are forbiden in DTDs!\n");
1993 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00001994 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001995 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00001996 case XML_PARSER_ENTITY_VALUE:
1997 /*
1998 * NOTE: in the case of entity values, we don't do the
Daniel Veillard51e3b151999-11-12 17:02:31 +00001999 * substitution here since we need the literal
Daniel Veillardb05deb71999-08-10 19:04:08 +00002000 * entity value to be able to save the internal
2001 * subset of the document.
Daniel Veillarde0854c32000-08-27 21:12:29 +00002002 * This will be handled by xmlStringDecodeEntities
Daniel Veillardb05deb71999-08-10 19:04:08 +00002003 */
2004 return;
2005 case XML_PARSER_CONTENT:
2006 case XML_PARSER_ATTRIBUTE_VALUE:
Daniel Veillardb05deb71999-08-10 19:04:08 +00002007 ctxt->token = xmlParseCharRef(ctxt);
2008 return;
2009 }
2010 return;
2011 }
2012
2013 switch(ctxt->instate) {
2014 case XML_PARSER_CDATA_SECTION:
2015 return;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002016 case XML_PARSER_PI:
Daniel Veillardb05deb71999-08-10 19:04:08 +00002017 case XML_PARSER_COMMENT:
Daniel Veillardcf461992000-03-14 18:30:20 +00002018 case XML_PARSER_SYSTEM_LITERAL:
2019 case XML_PARSER_CONTENT:
Daniel Veillardb05deb71999-08-10 19:04:08 +00002020 return;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002021 case XML_PARSER_START_TAG:
2022 return;
2023 case XML_PARSER_END_TAG:
2024 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002025 case XML_PARSER_EOF:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002026 ctxt->errNo = XML_ERR_ENTITYREF_AT_EOF;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002027 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2028 ctxt->sax->error(ctxt->userData, "Reference at EOF\n");
2029 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002030 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002031 return;
2032 case XML_PARSER_PROLOG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002033 case XML_PARSER_START:
2034 case XML_PARSER_MISC:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002035 ctxt->errNo = XML_ERR_ENTITYREF_IN_PROLOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002036 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2037 ctxt->sax->error(ctxt->userData, "Reference in prolog!\n");
2038 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002039 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002040 return;
2041 case XML_PARSER_EPILOG:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002042 ctxt->errNo = XML_ERR_ENTITYREF_IN_EPILOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2044 ctxt->sax->error(ctxt->userData, "Reference in epilog!\n");
2045 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002046 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002047 return;
2048 case XML_PARSER_ENTITY_VALUE:
2049 /*
2050 * NOTE: in the case of entity values, we don't do the
Daniel Veillard51e3b151999-11-12 17:02:31 +00002051 * substitution here since we need the literal
Daniel Veillardb05deb71999-08-10 19:04:08 +00002052 * entity value to be able to save the internal
2053 * subset of the document.
Daniel Veillarde0854c32000-08-27 21:12:29 +00002054 * This will be handled by xmlStringDecodeEntities
Daniel Veillardb05deb71999-08-10 19:04:08 +00002055 */
2056 return;
2057 case XML_PARSER_ATTRIBUTE_VALUE:
2058 /*
2059 * NOTE: in the case of attributes values, we don't do the
2060 * substitution here unless we are in a mode where
2061 * the parser is explicitely asked to substitute
2062 * entities. The SAX callback is called with values
2063 * without entity substitution.
Daniel Veillarde0854c32000-08-27 21:12:29 +00002064 * This will then be handled by xmlStringDecodeEntities
Daniel Veillardb05deb71999-08-10 19:04:08 +00002065 */
Daniel Veillardb96e6431999-08-29 21:02:19 +00002066 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002067 case XML_PARSER_ENTITY_DECL:
2068 /*
2069 * we just ignore it there
2070 * the substitution will be done once the entity is referenced
2071 */
2072 return;
2073 case XML_PARSER_DTD:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002074 ctxt->errNo = XML_ERR_ENTITYREF_IN_DTD;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002075 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2076 ctxt->sax->error(ctxt->userData,
2077 "Entity references are forbiden in DTDs!\n");
2078 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002079 ctxt->disableSAX = 1;
Daniel Veillardb96e6431999-08-29 21:02:19 +00002080 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002081 }
2082
Daniel Veillarde0854c32000-08-27 21:12:29 +00002083/* TODO: this seems not reached anymore .... Verify ... */
2084fprintf(stderr, "Reached deprecated section in xmlParserHandleReference()\n");
2085fprintf(stderr, "Please forward the document to Daniel.Veillard@w3.org\n");
2086fprintf(stderr, "indicating the version: %s, thanks !\n", xmlParserVersion);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002087 NEXT;
2088 name = xmlScanName(ctxt);
2089 if (name == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002090 ctxt->errNo = XML_ERR_ENTITYREF_NO_NAME;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002091 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2092 ctxt->sax->error(ctxt->userData, "Entity reference: no name\n");
2093 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002094 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002095 ctxt->token = '&';
2096 return;
2097 }
2098 if (NXT(xmlStrlen(name)) != ';') {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002099 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002100 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2101 ctxt->sax->error(ctxt->userData,
2102 "Entity reference: ';' expected\n");
2103 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002104 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002105 ctxt->token = '&';
Daniel Veillard6454aec1999-09-02 22:04:43 +00002106 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002107 return;
2108 }
2109 SKIP(xmlStrlen(name) + 1);
2110 if (ctxt->sax != NULL) {
2111 if (ctxt->sax->getEntity != NULL)
2112 ent = ctxt->sax->getEntity(ctxt->userData, name);
2113 }
2114
2115 /*
2116 * [ WFC: Entity Declared ]
2117 * the Name given in the entity reference must match that in an entity
2118 * declaration, except that well-formed documents need not declare any
2119 * of the following entities: amp, lt, gt, apos, quot.
2120 */
2121 if (ent == NULL)
2122 ent = xmlGetPredefinedEntity(name);
2123 if (ent == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002124 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002125 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2126 ctxt->sax->error(ctxt->userData,
2127 "Entity reference: entity %s not declared\n",
2128 name);
2129 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002130 ctxt->disableSAX = 1;
Daniel Veillard6454aec1999-09-02 22:04:43 +00002131 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002132 return;
2133 }
2134
2135 /*
2136 * [ WFC: Parsed Entity ]
2137 * An entity reference must not contain the name of an unparsed entity
2138 */
Daniel Veillardcf461992000-03-14 18:30:20 +00002139 if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002140 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2142 ctxt->sax->error(ctxt->userData,
2143 "Entity reference to unparsed entity %s\n", name);
2144 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002145 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002146 }
2147
Daniel Veillardcf461992000-03-14 18:30:20 +00002148 if (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002149 ctxt->token = ent->content[0];
Daniel Veillard6454aec1999-09-02 22:04:43 +00002150 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002151 return;
2152 }
2153 input = xmlNewEntityInputStream(ctxt, ent);
2154 xmlPushInput(ctxt, input);
Daniel Veillard6454aec1999-09-02 22:04:43 +00002155 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002156 return;
2157}
2158
2159/**
2160 * xmlParserHandlePEReference:
2161 * @ctxt: the parser context
2162 *
2163 * [69] PEReference ::= '%' Name ';'
2164 *
2165 * [ WFC: No Recursion ]
2166 * TODO A parsed entity must not contain a recursive
2167 * reference to itself, either directly or indirectly.
2168 *
2169 * [ WFC: Entity Declared ]
2170 * In a document without any DTD, a document with only an internal DTD
2171 * subset which contains no parameter entity references, or a document
2172 * with "standalone='yes'", ... ... The declaration of a parameter
2173 * entity must precede any reference to it...
2174 *
2175 * [ VC: Entity Declared ]
2176 * In a document with an external subset or external parameter entities
2177 * with "standalone='no'", ... ... The declaration of a parameter entity
2178 * must precede any reference to it...
2179 *
2180 * [ WFC: In DTD ]
2181 * Parameter-entity references may only appear in the DTD.
2182 * NOTE: misleading but this is handled.
2183 *
2184 * A PEReference may have been detected in the current input stream
2185 * the handling is done accordingly to
2186 * http://www.w3.org/TR/REC-xml#entproc
2187 * i.e.
2188 * - Included in literal in entity values
2189 * - Included as Paraemeter Entity reference within DTDs
2190 */
2191void
2192xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002193 xmlChar *name;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002194 xmlEntityPtr entity = NULL;
2195 xmlParserInputPtr input;
2196
Daniel Veillard35008381999-10-25 13:15:52 +00002197 if (ctxt->token != 0) {
Daniel Veillard35008381999-10-25 13:15:52 +00002198 return;
2199 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002200 if (RAW != '%') return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002201 switch(ctxt->instate) {
2202 case XML_PARSER_CDATA_SECTION:
2203 return;
2204 case XML_PARSER_COMMENT:
2205 return;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002206 case XML_PARSER_START_TAG:
2207 return;
2208 case XML_PARSER_END_TAG:
2209 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002210 case XML_PARSER_EOF:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002211 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002212 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2213 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
2214 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002215 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002216 return;
2217 case XML_PARSER_PROLOG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002218 case XML_PARSER_START:
2219 case XML_PARSER_MISC:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002220 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002221 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2222 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
2223 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002224 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002225 return;
2226 case XML_PARSER_ENTITY_DECL:
2227 case XML_PARSER_CONTENT:
2228 case XML_PARSER_ATTRIBUTE_VALUE:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002229 case XML_PARSER_PI:
Daniel Veillardcf461992000-03-14 18:30:20 +00002230 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillardb05deb71999-08-10 19:04:08 +00002231 /* we just ignore it there */
2232 return;
2233 case XML_PARSER_EPILOG:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002234 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002235 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2236 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
2237 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002238 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002239 return;
2240 case XML_PARSER_ENTITY_VALUE:
2241 /*
2242 * NOTE: in the case of entity values, we don't do the
Daniel Veillard51e3b151999-11-12 17:02:31 +00002243 * substitution here since we need the literal
Daniel Veillardb05deb71999-08-10 19:04:08 +00002244 * entity value to be able to save the internal
2245 * subset of the document.
Daniel Veillarde0854c32000-08-27 21:12:29 +00002246 * This will be handled by xmlStringDecodeEntities
Daniel Veillardb05deb71999-08-10 19:04:08 +00002247 */
2248 return;
2249 case XML_PARSER_DTD:
2250 /*
2251 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2252 * In the internal DTD subset, parameter-entity references
2253 * can occur only where markup declarations can occur, not
2254 * within markup declarations.
2255 * In that case this is handled in xmlParseMarkupDecl
2256 */
2257 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2258 return;
2259 }
2260
2261 NEXT;
2262 name = xmlParseName(ctxt);
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00002263 if (xmlParserDebugEntities)
2264 fprintf(stderr, "PE Reference: %s\n", name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002265 if (name == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002266 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002267 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2268 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
2269 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002270 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002271 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00002272 if (RAW == ';') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002273 NEXT;
2274 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
2275 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
2276 if (entity == NULL) {
2277
2278 /*
2279 * [ WFC: Entity Declared ]
2280 * In a document without any DTD, a document with only an
2281 * internal DTD subset which contains no parameter entity
2282 * references, or a document with "standalone='yes'", ...
2283 * ... The declaration of a parameter entity must precede
2284 * any reference to it...
2285 */
2286 if ((ctxt->standalone == 1) ||
2287 ((ctxt->hasExternalSubset == 0) &&
2288 (ctxt->hasPErefs == 0))) {
2289 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2290 ctxt->sax->error(ctxt->userData,
2291 "PEReference: %%%s; not found\n", name);
2292 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002293 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002294 } else {
2295 /*
2296 * [ VC: Entity Declared ]
2297 * In a document with an external subset or external
2298 * parameter entities with "standalone='no'", ...
2299 * ... The declaration of a parameter entity must precede
2300 * any reference to it...
2301 */
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00002302 if ((!ctxt->disableSAX) &&
2303 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
Daniel Veillard87b95392000-08-12 21:12:04 +00002304 ctxt->vctxt.error(ctxt->vctxt.userData,
2305 "PEReference: %%%s; not found\n", name);
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00002306 } else if ((!ctxt->disableSAX) &&
2307 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00002308 ctxt->sax->warning(ctxt->userData,
2309 "PEReference: %%%s; not found\n", name);
2310 ctxt->valid = 0;
2311 }
2312 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00002313 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
2314 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002315 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00002316 * TODO !!! handle the extra spaces added before and after
Daniel Veillardb05deb71999-08-10 19:04:08 +00002317 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardb05deb71999-08-10 19:04:08 +00002318 */
2319 input = xmlNewEntityInputStream(ctxt, entity);
2320 xmlPushInput(ctxt, input);
Daniel Veillardcf461992000-03-14 18:30:20 +00002321 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2322 (RAW == '<') && (NXT(1) == '?') &&
2323 (NXT(2) == 'x') && (NXT(3) == 'm') &&
2324 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
2325 xmlParseTextDecl(ctxt);
2326 }
2327 if (ctxt->token == 0)
2328 ctxt->token = ' ';
Daniel Veillardb05deb71999-08-10 19:04:08 +00002329 } else {
2330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2331 ctxt->sax->error(ctxt->userData,
2332 "xmlHandlePEReference: %s is not a parameter entity\n",
2333 name);
2334 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002335 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002336 }
2337 }
2338 } else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002339 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002340 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2341 ctxt->sax->error(ctxt->userData,
2342 "xmlHandlePEReference: expecting ';'\n");
2343 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00002344 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002345 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00002346 xmlFree(name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002347 }
2348}
2349
Daniel Veillard011b63c1999-06-02 17:44:04 +00002350/*
2351 * Macro used to grow the current buffer.
2352 */
2353#define growBuffer(buffer) { \
2354 buffer##_size *= 2; \
Daniel Veillard0142b842000-01-14 14:45:24 +00002355 buffer = (xmlChar *) \
2356 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
Daniel Veillard011b63c1999-06-02 17:44:04 +00002357 if (buffer == NULL) { \
2358 perror("realloc failed"); \
Daniel Veillard0142b842000-01-14 14:45:24 +00002359 return(NULL); \
Daniel Veillard011b63c1999-06-02 17:44:04 +00002360 } \
2361}
2362
Daniel Veillard011b63c1999-06-02 17:44:04 +00002363/**
2364 * xmlDecodeEntities:
2365 * @ctxt: the parser context
2366 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2367 * @len: the len to decode (in bytes !), -1 for no size limit
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002368 * @end: an end marker xmlChar, 0 if none
2369 * @end2: an end marker xmlChar, 0 if none
2370 * @end3: an end marker xmlChar, 0 if none
Daniel Veillard011b63c1999-06-02 17:44:04 +00002371 *
Daniel Veillarde0854c32000-08-27 21:12:29 +00002372 * This function is deprecated, we now always process entities content
2373 * through xmlStringDecodeEntities
2374 *
2375 * TODO: remove it in next major release.
2376 *
Daniel Veillard011b63c1999-06-02 17:44:04 +00002377 * [67] Reference ::= EntityRef | CharRef
2378 *
2379 * [69] PEReference ::= '%' Name ';'
2380 *
2381 * Returns A newly allocated string with the substitution done. The caller
2382 * must deallocate it !
2383 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002384xmlChar *
Daniel Veillard011b63c1999-06-02 17:44:04 +00002385xmlDecodeEntities(xmlParserCtxtPtr ctxt, int len, int what,
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002386 xmlChar end, xmlChar end2, xmlChar end3) {
2387 xmlChar *buffer = NULL;
Daniel Veillard3f6f7f62000-06-30 17:58:25 +00002388 unsigned int buffer_size = 0;
2389 unsigned int nbchars = 0;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002390
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002391 xmlChar *current = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002392 xmlEntityPtr ent;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002393 unsigned int max = (unsigned int) len;
Daniel Veillardcf461992000-03-14 18:30:20 +00002394 int c,l;
2395
2396 if (ctxt->depth > 40) {
2397 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2398 ctxt->sax->error(ctxt->userData,
2399 "Detected entity reference loop\n");
2400 ctxt->wellFormed = 0;
2401 ctxt->disableSAX = 1;
2402 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2403 return(NULL);
2404 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00002405
2406 /*
2407 * allocate a translation buffer.
2408 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002409 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002410 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
Daniel Veillard011b63c1999-06-02 17:44:04 +00002411 if (buffer == NULL) {
2412 perror("xmlDecodeEntities: malloc failed");
2413 return(NULL);
2414 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00002415
2416 /*
2417 * Ok loop until we reach one of the ending char or a size limit.
2418 */
Daniel Veillarde0854c32000-08-27 21:12:29 +00002419 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00002420 c = CUR_CHAR(l);
Daniel Veillarde0854c32000-08-27 21:12:29 +00002421 while ((nbchars < max) && (c != end) && /* NOTUSED */
Daniel Veillardcf461992000-03-14 18:30:20 +00002422 (c != end2) && (c != end3)) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00002423 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00002424 if (c == 0) break;
2425 if (((c == '&') && (ctxt->token != '&')) && (NXT(1) == '#')) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002426 int val = xmlParseCharRef(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00002427 COPY_BUF(0,buffer,nbchars,val);
2428 NEXTL(l);
2429 } else if ((c == '&') && (ctxt->token != '&') &&
2430 (what & XML_SUBSTITUTE_REF)) {
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00002431 if (xmlParserDebugEntities)
2432 fprintf(stderr, "decoding Entity Reference\n");
Daniel Veillardb05deb71999-08-10 19:04:08 +00002433 ent = xmlParseEntityRef(ctxt);
2434 if ((ent != NULL) &&
2435 (ctxt->replaceEntities != 0)) {
2436 current = ent->content;
Daniel Veillarde0854c32000-08-27 21:12:29 +00002437 while (*current != 0) { /* non input consuming loop */
Daniel Veillardcf461992000-03-14 18:30:20 +00002438 buffer[nbchars++] = *current++;
2439 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002440 growBuffer(buffer);
Daniel Veillard011b63c1999-06-02 17:44:04 +00002441 }
2442 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00002443 } else if (ent != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00002444 const xmlChar *cur = ent->name;
Daniel Veillardb05deb71999-08-10 19:04:08 +00002445
Daniel Veillardcf461992000-03-14 18:30:20 +00002446 buffer[nbchars++] = '&';
2447 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00002448 growBuffer(buffer);
Daniel Veillardb05deb71999-08-10 19:04:08 +00002449 }
Daniel Veillarde0854c32000-08-27 21:12:29 +00002450 while (*cur != 0) { /* non input consuming loop */
Daniel Veillardcf461992000-03-14 18:30:20 +00002451 buffer[nbchars++] = *cur++;
2452 }
2453 buffer[nbchars++] = ';';
Daniel Veillard011b63c1999-06-02 17:44:04 +00002454 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002455 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00002456 /*
2457 * a PEReference induce to switch the entity flow,
2458 * we break here to flush the current set of chars
2459 * parsed if any. We will be called back later.
2460 */
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00002461 if (xmlParserDebugEntities)
2462 fprintf(stderr, "decoding PE Reference\n");
Daniel Veillarde2d034d1999-07-27 19:52:06 +00002463 if (nbchars != 0) break;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002464
2465 xmlParsePEReference(ctxt);
2466
2467 /*
2468 * Pop-up of finished entities.
2469 */
Daniel Veillarde0854c32000-08-27 21:12:29 +00002470 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
Daniel Veillard011b63c1999-06-02 17:44:04 +00002471 xmlPopInput(ctxt);
2472
Daniel Veillardb05deb71999-08-10 19:04:08 +00002473 break;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002474 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00002475 COPY_BUF(l,buffer,nbchars,c);
2476 NEXTL(l);
2477 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Raph Levien05240da1999-06-15 21:27:11 +00002478 growBuffer(buffer);
Raph Levien05240da1999-06-15 21:27:11 +00002479 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00002480 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002481 c = CUR_CHAR(l);
Daniel Veillard011b63c1999-06-02 17:44:04 +00002482 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002483 buffer[nbchars++] = 0;
Daniel Veillard011b63c1999-06-02 17:44:04 +00002484 return(buffer);
2485}
2486
Daniel Veillard10a2c651999-12-12 13:03:50 +00002487/**
2488 * xmlStringDecodeEntities:
2489 * @ctxt: the parser context
2490 * @str: the input string
2491 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2492 * @end: an end marker xmlChar, 0 if none
2493 * @end2: an end marker xmlChar, 0 if none
2494 * @end3: an end marker xmlChar, 0 if none
2495 *
Daniel Veillarde0854c32000-08-27 21:12:29 +00002496 * Takes a entity string content and process to do the adequate subtitutions.
2497 *
Daniel Veillard10a2c651999-12-12 13:03:50 +00002498 * [67] Reference ::= EntityRef | CharRef
2499 *
2500 * [69] PEReference ::= '%' Name ';'
2501 *
2502 * Returns A newly allocated string with the substitution done. The caller
2503 * must deallocate it !
2504 */
2505xmlChar *
2506xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
2507 xmlChar end, xmlChar end2, xmlChar end3) {
2508 xmlChar *buffer = NULL;
2509 int buffer_size = 0;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002510
2511 xmlChar *current = NULL;
2512 xmlEntityPtr ent;
Daniel Veillardcf461992000-03-14 18:30:20 +00002513 int c,l;
2514 int nbchars = 0;
2515
Daniel Veillard87b95392000-08-12 21:12:04 +00002516 if (str == NULL)
2517 return(NULL);
2518
Daniel Veillardcf461992000-03-14 18:30:20 +00002519 if (ctxt->depth > 40) {
2520 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2521 ctxt->sax->error(ctxt->userData,
2522 "Detected entity reference loop\n");
2523 ctxt->wellFormed = 0;
2524 ctxt->disableSAX = 1;
2525 ctxt->errNo = XML_ERR_ENTITY_LOOP;
2526 return(NULL);
2527 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00002528
2529 /*
2530 * allocate a translation buffer.
2531 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00002532 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002533 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
2534 if (buffer == NULL) {
2535 perror("xmlDecodeEntities: malloc failed");
2536 return(NULL);
2537 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00002538
2539 /*
2540 * Ok loop until we reach one of the ending char or a size limit.
Daniel Veillarde0854c32000-08-27 21:12:29 +00002541 * we are operating on already parsed values.
Daniel Veillard10a2c651999-12-12 13:03:50 +00002542 */
Daniel Veillardcf461992000-03-14 18:30:20 +00002543 c = CUR_SCHAR(str, l);
Daniel Veillarde0854c32000-08-27 21:12:29 +00002544 while ((c != 0) && (c != end) && /* non input consuming loop */
2545 (c != end2) && (c != end3)) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002546
Daniel Veillardcf461992000-03-14 18:30:20 +00002547 if (c == 0) break;
2548 if ((c == '&') && (str[1] == '#')) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002549 int val = xmlParseStringCharRef(ctxt, &str);
Daniel Veillardcf461992000-03-14 18:30:20 +00002550 if (val != 0) {
2551 COPY_BUF(0,buffer,nbchars,val);
2552 }
2553 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00002554 if (xmlParserDebugEntities)
2555 fprintf(stderr, "String decoding Entity Reference: %.30s\n",
2556 str);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002557 ent = xmlParseStringEntityRef(ctxt, &str);
Daniel Veillarde0854c32000-08-27 21:12:29 +00002558 if ((ent != NULL) &&
2559 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
Daniel Veillard52402ce2000-08-22 23:36:12 +00002560 if (ent->content != NULL) {
2561 COPY_BUF(0,buffer,nbchars,ent->content[0]);
2562 } else {
2563 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2564 ctxt->sax->error(ctxt->userData,
2565 "internal error entity has no content\n");
2566 }
2567 } else if ((ent != NULL) && (ent->content != NULL)) {
Daniel Veillardcf461992000-03-14 18:30:20 +00002568 xmlChar *rep;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002569
Daniel Veillardcf461992000-03-14 18:30:20 +00002570 ctxt->depth++;
2571 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2572 0, 0, 0);
2573 ctxt->depth--;
2574 if (rep != NULL) {
2575 current = rep;
Daniel Veillarde0854c32000-08-27 21:12:29 +00002576 while (*current != 0) { /* non input consuming loop */
Daniel Veillardcf461992000-03-14 18:30:20 +00002577 buffer[nbchars++] = *current++;
2578 if (nbchars >
2579 buffer_size - XML_PARSER_BUFFER_SIZE) {
2580 growBuffer(buffer);
2581 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00002582 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002583 xmlFree(rep);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002584 }
2585 } else if (ent != NULL) {
2586 int i = xmlStrlen(ent->name);
2587 const xmlChar *cur = ent->name;
2588
Daniel Veillardcf461992000-03-14 18:30:20 +00002589 buffer[nbchars++] = '&';
2590 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002591 growBuffer(buffer);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002592 }
2593 for (;i > 0;i--)
Daniel Veillardcf461992000-03-14 18:30:20 +00002594 buffer[nbchars++] = *cur++;
2595 buffer[nbchars++] = ';';
Daniel Veillard10a2c651999-12-12 13:03:50 +00002596 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002597 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00002598 if (xmlParserDebugEntities)
2599 fprintf(stderr, "String decoding PE Reference: %.30s\n", str);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002600 ent = xmlParseStringPEReference(ctxt, &str);
2601 if (ent != NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +00002602 xmlChar *rep;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002603
Daniel Veillardcf461992000-03-14 18:30:20 +00002604 ctxt->depth++;
2605 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
2606 0, 0, 0);
2607 ctxt->depth--;
2608 if (rep != NULL) {
2609 current = rep;
Daniel Veillarde0854c32000-08-27 21:12:29 +00002610 while (*current != 0) { /* non input consuming loop */
Daniel Veillardcf461992000-03-14 18:30:20 +00002611 buffer[nbchars++] = *current++;
2612 if (nbchars >
2613 buffer_size - XML_PARSER_BUFFER_SIZE) {
2614 growBuffer(buffer);
2615 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00002616 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002617 xmlFree(rep);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002618 }
2619 }
2620 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00002621 COPY_BUF(l,buffer,nbchars,c);
2622 str += l;
2623 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00002624 growBuffer(buffer);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002625 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00002626 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002627 c = CUR_SCHAR(str, l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00002628 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002629 buffer[nbchars++] = 0;
Daniel Veillard10a2c651999-12-12 13:03:50 +00002630 return(buffer);
2631}
2632
Daniel Veillard260a68f1998-08-13 03:39:55 +00002633
2634/************************************************************************
2635 * *
Daniel Veillard27d88741999-05-29 11:51:49 +00002636 * Commodity functions to handle encodings *
2637 * *
2638 ************************************************************************/
2639
Daniel Veillardcf461992000-03-14 18:30:20 +00002640/*
2641 * xmlCheckLanguageID
2642 * @lang: pointer to the string value
2643 *
2644 * Checks that the value conforms to the LanguageID production:
2645 *
Daniel Veillarde0854c32000-08-27 21:12:29 +00002646 * NOTE: this is somewhat deprecated, those productions were removed from
2647 * the XML Second edition.
2648 *
Daniel Veillardcf461992000-03-14 18:30:20 +00002649 * [33] LanguageID ::= Langcode ('-' Subcode)*
2650 * [34] Langcode ::= ISO639Code | IanaCode | UserCode
2651 * [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
2652 * [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
2653 * [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
2654 * [38] Subcode ::= ([a-z] | [A-Z])+
2655 *
2656 * Returns 1 if correct 0 otherwise
2657 **/
2658int
2659xmlCheckLanguageID(const xmlChar *lang) {
2660 const xmlChar *cur = lang;
2661
2662 if (cur == NULL)
2663 return(0);
2664 if (((cur[0] == 'i') && (cur[1] == '-')) ||
2665 ((cur[0] == 'I') && (cur[1] == '-'))) {
2666 /*
2667 * IANA code
2668 */
2669 cur += 2;
Daniel Veillarde0854c32000-08-27 21:12:29 +00002670 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
Daniel Veillardcf461992000-03-14 18:30:20 +00002671 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2672 cur++;
2673 } else if (((cur[0] == 'x') && (cur[1] == '-')) ||
2674 ((cur[0] == 'X') && (cur[1] == '-'))) {
2675 /*
2676 * User code
2677 */
2678 cur += 2;
Daniel Veillarde0854c32000-08-27 21:12:29 +00002679 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
Daniel Veillardcf461992000-03-14 18:30:20 +00002680 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2681 cur++;
2682 } else if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2683 ((cur[0] >= 'a') && (cur[0] <= 'z'))) {
2684 /*
2685 * ISO639
2686 */
2687 cur++;
2688 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2689 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2690 cur++;
2691 else
2692 return(0);
2693 } else
2694 return(0);
Daniel Veillarde0854c32000-08-27 21:12:29 +00002695 while (cur[0] != 0) { /* non input consuming */
Daniel Veillardcf461992000-03-14 18:30:20 +00002696 if (cur[0] != '-')
2697 return(0);
2698 cur++;
2699 if (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
2700 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2701 cur++;
2702 else
2703 return(0);
Daniel Veillarde0854c32000-08-27 21:12:29 +00002704 while (((cur[0] >= 'A') && (cur[0] <= 'Z')) || /* non input consuming */
Daniel Veillardcf461992000-03-14 18:30:20 +00002705 ((cur[0] >= 'a') && (cur[0] <= 'z')))
2706 cur++;
2707 }
2708 return(1);
2709}
2710
Daniel Veillard27d88741999-05-29 11:51:49 +00002711/**
2712 * xmlSwitchEncoding:
2713 * @ctxt: the parser context
Daniel Veillard00fdf371999-10-08 09:40:39 +00002714 * @enc: the encoding value (number)
Daniel Veillard27d88741999-05-29 11:51:49 +00002715 *
2716 * change the input functions when discovering the character encoding
2717 * of a given entity.
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002718 *
2719 * Returns 0 in case of success, -1 otherwise
Daniel Veillard27d88741999-05-29 11:51:49 +00002720 */
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002721int
Daniel Veillard27d88741999-05-29 11:51:49 +00002722xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
2723{
Daniel Veillardcf461992000-03-14 18:30:20 +00002724 xmlCharEncodingHandlerPtr handler;
2725
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002726 switch (enc) {
2727 case XML_CHAR_ENCODING_ERROR:
2728 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
2729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2730 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2731 ctxt->wellFormed = 0;
2732 ctxt->disableSAX = 1;
2733 break;
2734 case XML_CHAR_ENCODING_NONE:
2735 /* let's assume it's UTF-8 without the XML decl */
Daniel Veillardbe803962000-06-28 23:40:59 +00002736 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002737 return(0);
2738 case XML_CHAR_ENCODING_UTF8:
2739 /* default encoding, no conversion should be needed */
Daniel Veillardbe803962000-06-28 23:40:59 +00002740 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002741 return(0);
2742 default:
2743 break;
2744 }
Daniel Veillardcf461992000-03-14 18:30:20 +00002745 handler = xmlGetCharEncodingHandler(enc);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002746 if (handler == NULL) {
2747 /*
2748 * Default handlers.
2749 */
2750 switch (enc) {
2751 case XML_CHAR_ENCODING_ERROR:
2752 ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
2753 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2754 ctxt->sax->error(ctxt->userData, "encoding unknown\n");
2755 ctxt->wellFormed = 0;
2756 ctxt->disableSAX = 1;
Daniel Veillardbe803962000-06-28 23:40:59 +00002757 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002758 break;
2759 case XML_CHAR_ENCODING_NONE:
2760 /* let's assume it's UTF-8 without the XML decl */
Daniel Veillardbe803962000-06-28 23:40:59 +00002761 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002762 return(0);
2763 case XML_CHAR_ENCODING_UTF8:
Daniel Veillard87b95392000-08-12 21:12:04 +00002764 case XML_CHAR_ENCODING_ASCII:
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002765 /* default encoding, no conversion should be needed */
Daniel Veillardbe803962000-06-28 23:40:59 +00002766 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002767 return(0);
2768 case XML_CHAR_ENCODING_UTF16LE:
2769 break;
2770 case XML_CHAR_ENCODING_UTF16BE:
2771 break;
2772 case XML_CHAR_ENCODING_UCS4LE:
2773 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2774 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2775 ctxt->sax->error(ctxt->userData,
2776 "char encoding USC4 little endian not supported\n");
2777 break;
2778 case XML_CHAR_ENCODING_UCS4BE:
2779 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2780 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2781 ctxt->sax->error(ctxt->userData,
2782 "char encoding USC4 big endian not supported\n");
2783 break;
2784 case XML_CHAR_ENCODING_EBCDIC:
2785 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2786 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2787 ctxt->sax->error(ctxt->userData,
2788 "char encoding EBCDIC not supported\n");
2789 break;
2790 case XML_CHAR_ENCODING_UCS4_2143:
2791 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2792 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2793 ctxt->sax->error(ctxt->userData,
2794 "char encoding UCS4 2143 not supported\n");
2795 break;
2796 case XML_CHAR_ENCODING_UCS4_3412:
2797 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2798 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2799 ctxt->sax->error(ctxt->userData,
2800 "char encoding UCS4 3412 not supported\n");
2801 break;
2802 case XML_CHAR_ENCODING_UCS2:
2803 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2804 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2805 ctxt->sax->error(ctxt->userData,
2806 "char encoding UCS2 not supported\n");
2807 break;
2808 case XML_CHAR_ENCODING_8859_1:
2809 case XML_CHAR_ENCODING_8859_2:
2810 case XML_CHAR_ENCODING_8859_3:
2811 case XML_CHAR_ENCODING_8859_4:
2812 case XML_CHAR_ENCODING_8859_5:
2813 case XML_CHAR_ENCODING_8859_6:
2814 case XML_CHAR_ENCODING_8859_7:
2815 case XML_CHAR_ENCODING_8859_8:
2816 case XML_CHAR_ENCODING_8859_9:
2817 /*
Daniel Veillard32bc74e2000-07-14 14:49:25 +00002818 * We used to keep the internal content in the
2819 * document encoding however this turns being unmaintainable
2820 * So xmlGetCharEncodingHandler() will return non-null
2821 * values for this now.
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002822 */
2823 if ((ctxt->inputNr == 1) &&
2824 (ctxt->encoding == NULL) &&
2825 (ctxt->input->encoding != NULL)) {
2826 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
2827 }
Daniel Veillardbe803962000-06-28 23:40:59 +00002828 ctxt->charset = enc;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002829 return(0);
2830 case XML_CHAR_ENCODING_2022_JP:
2831 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2832 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2833 ctxt->sax->error(ctxt->userData,
2834 "char encoding ISO-2022-JPnot supported\n");
2835 break;
2836 case XML_CHAR_ENCODING_SHIFT_JIS:
2837 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2838 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2839 ctxt->sax->error(ctxt->userData,
2840 "char encoding Shift_JIS not supported\n");
2841 break;
2842 case XML_CHAR_ENCODING_EUC_JP:
2843 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
2844 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2845 ctxt->sax->error(ctxt->userData,
2846 "char encoding EUC-JPnot supported\n");
2847 break;
2848 }
2849 }
2850 if (handler == NULL)
2851 return(-1);
Daniel Veillardbe803962000-06-28 23:40:59 +00002852 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002853 return(xmlSwitchToEncoding(ctxt, handler));
2854}
2855
2856/**
2857 * xmlSwitchToEncoding:
2858 * @ctxt: the parser context
2859 * @handler: the encoding handler
2860 *
2861 * change the input functions when discovering the character encoding
2862 * of a given entity.
2863 *
2864 * Returns 0 in case of success, -1 otherwise
2865 */
2866int
2867xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
2868{
2869 int nbchars;
2870
Daniel Veillardcf461992000-03-14 18:30:20 +00002871 if (handler != NULL) {
2872 if (ctxt->input != NULL) {
2873 if (ctxt->input->buf != NULL) {
2874 if (ctxt->input->buf->encoder != NULL) {
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002875 if (ctxt->input->buf->encoder == handler)
2876 return(0);
Daniel Veillardbe803962000-06-28 23:40:59 +00002877 /*
2878 * Note: this is a bit dangerous, but that's what it
2879 * takes to use nearly compatible signature for different
2880 * encodings.
2881 */
2882 xmlCharEncCloseFunc(ctxt->input->buf->encoder);
2883 ctxt->input->buf->encoder = handler;
2884 return(0);
Daniel Veillardcf461992000-03-14 18:30:20 +00002885 }
2886 ctxt->input->buf->encoder = handler;
2887
2888 /*
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002889 * Is there already some content down the pipe to convert ?
Daniel Veillardcf461992000-03-14 18:30:20 +00002890 */
2891 if ((ctxt->input->buf->buffer != NULL) &&
2892 (ctxt->input->buf->buffer->use > 0)) {
Daniel Veillardcf461992000-03-14 18:30:20 +00002893 int processed;
2894
2895 /*
2896 * Specific handling of the Byte Order Mark for
2897 * UTF-16
2898 */
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002899 if ((handler->name != NULL) &&
2900 (!strcmp(handler->name, "UTF-16LE")) &&
Daniel Veillardcf461992000-03-14 18:30:20 +00002901 (ctxt->input->cur[0] == 0xFF) &&
2902 (ctxt->input->cur[1] == 0xFE)) {
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002903 ctxt->input->cur += 2;
Daniel Veillardcf461992000-03-14 18:30:20 +00002904 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002905 if ((handler->name != NULL) &&
2906 (!strcmp(handler->name, "UTF-16BE")) &&
Daniel Veillardcf461992000-03-14 18:30:20 +00002907 (ctxt->input->cur[0] == 0xFE) &&
2908 (ctxt->input->cur[1] == 0xFF)) {
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002909 ctxt->input->cur += 2;
Daniel Veillardcf461992000-03-14 18:30:20 +00002910 }
2911
2912 /*
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002913 * Shring the current input buffer.
2914 * Move it as the raw buffer and create a new input buffer
Daniel Veillardcf461992000-03-14 18:30:20 +00002915 */
2916 processed = ctxt->input->cur - ctxt->input->base;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002917 xmlBufferShrink(ctxt->input->buf->buffer, processed);
2918 ctxt->input->buf->raw = ctxt->input->buf->buffer;
2919 ctxt->input->buf->buffer = xmlBufferCreate();
Daniel Veillardcf461992000-03-14 18:30:20 +00002920
Daniel Veillard52402ce2000-08-22 23:36:12 +00002921 if (ctxt->html) {
2922 /*
2923 * converst as much as possbile of the buffer
2924 */
2925 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
2926 ctxt->input->buf->buffer,
2927 ctxt->input->buf->raw);
2928 } else {
2929 /*
2930 * convert just enough to get
2931 * '<?xml version="1.0" encoding="xxx"?>'
2932 * parsed with the autodetected encoding
2933 * into the parser reading buffer.
2934 */
2935 nbchars = xmlCharEncFirstLine(ctxt->input->buf->encoder,
2936 ctxt->input->buf->buffer,
2937 ctxt->input->buf->raw);
2938 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002939 if (nbchars < 0) {
2940 fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
2941 return(-1);
2942 }
2943 ctxt->input->base =
2944 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillard87b95392000-08-12 21:12:04 +00002945
Daniel Veillardcf461992000-03-14 18:30:20 +00002946 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002947 return(0);
Daniel Veillardcf461992000-03-14 18:30:20 +00002948 } else {
Daniel Veillard46e370e2000-07-21 20:32:03 +00002949 if ((ctxt->input->length == 0) || (ctxt->input->buf == NULL)) {
Daniel Veillardcf461992000-03-14 18:30:20 +00002950 /*
2951 * When parsing a static memory array one must know the
2952 * size to be able to convert the buffer.
2953 */
2954 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2955 ctxt->sax->error(ctxt->userData,
2956 "xmlSwitchEncoding : no input\n");
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002957 return(-1);
Daniel Veillardcf461992000-03-14 18:30:20 +00002958 } else {
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002959 int processed;
Daniel Veillardcf461992000-03-14 18:30:20 +00002960
2961 /*
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002962 * Shring the current input buffer.
2963 * Move it as the raw buffer and create a new input buffer
Daniel Veillardcf461992000-03-14 18:30:20 +00002964 */
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002965 processed = ctxt->input->cur - ctxt->input->base;
Daniel Veillard46e370e2000-07-21 20:32:03 +00002966
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002967 ctxt->input->buf->raw = xmlBufferCreate();
2968 xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
Daniel Veillard46e370e2000-07-21 20:32:03 +00002969 ctxt->input->length - processed);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002970 ctxt->input->buf->buffer = xmlBufferCreate();
2971
2972 /*
2973 * convert as much as possible of the raw input
2974 * to the parser reading buffer.
2975 */
2976 nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
2977 ctxt->input->buf->buffer,
2978 ctxt->input->buf->raw);
2979 if (nbchars < 0) {
2980 fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
2981 return(-1);
Daniel Veillardcf461992000-03-14 18:30:20 +00002982 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002983
Daniel Veillardcf461992000-03-14 18:30:20 +00002984 /*
2985 * Conversion succeeded, get rid of the old buffer
2986 */
2987 if ((ctxt->input->free != NULL) &&
2988 (ctxt->input->base != NULL))
2989 ctxt->input->free((xmlChar *) ctxt->input->base);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002990 ctxt->input->base =
2991 ctxt->input->cur = ctxt->input->buf->buffer->content;
Daniel Veillardcf461992000-03-14 18:30:20 +00002992 }
2993 }
2994 } else {
2995 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2996 ctxt->sax->error(ctxt->userData,
2997 "xmlSwitchEncoding : no input\n");
Daniel Veillard496a1cf2000-05-03 14:20:55 +00002998 return(-1);
Daniel Veillardcf461992000-03-14 18:30:20 +00002999 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00003000 /*
3001 * The parsing is now done in UTF8 natively
3002 */
Daniel Veillardbe803962000-06-28 23:40:59 +00003003 ctxt->charset = XML_CHAR_ENCODING_UTF8;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00003004 } else
3005 return(-1);
3006 return(0);
Daniel Veillardcf461992000-03-14 18:30:20 +00003007
Daniel Veillard27d88741999-05-29 11:51:49 +00003008}
3009
3010/************************************************************************
3011 * *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003012 * Commodity functions to handle xmlChars *
Daniel Veillard260a68f1998-08-13 03:39:55 +00003013 * *
3014 ************************************************************************/
3015
Daniel Veillard11e00581998-10-24 18:27:49 +00003016/**
3017 * xmlStrndup:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003018 * @cur: the input xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +00003019 * @len: the len of @cur
3020 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003021 * a strndup for array of xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00003022 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003023 * Returns a new xmlChar * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003024 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003025xmlChar *
3026xmlStrndup(const xmlChar *cur, int len) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00003027 xmlChar *ret;
3028
3029 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard32bc74e2000-07-14 14:49:25 +00003030 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00003031 if (ret == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00003032 fprintf(stderr, "malloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003033 (len + 1) * (long)sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00003034 return(NULL);
3035 }
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003036 memcpy(ret, cur, len * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00003037 ret[len] = 0;
3038 return(ret);
3039}
3040
Daniel Veillard11e00581998-10-24 18:27:49 +00003041/**
3042 * xmlStrdup:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003043 * @cur: the input xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +00003044 *
Daniel Veillardcf461992000-03-14 18:30:20 +00003045 * a strdup for array of xmlChar's. Since they are supposed to be
3046 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
3047 * a termination mark of '0'.
Daniel Veillard1e346af1999-02-22 10:33:01 +00003048 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003049 * Returns a new xmlChar * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003050 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003051xmlChar *
3052xmlStrdup(const xmlChar *cur) {
3053 const xmlChar *p = cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003054
Daniel Veillard10a2c651999-12-12 13:03:50 +00003055 if (cur == NULL) return(NULL);
Daniel Veillarde0854c32000-08-27 21:12:29 +00003056 while (*p != 0) p++; /* non input consuming */
Daniel Veillard260a68f1998-08-13 03:39:55 +00003057 return(xmlStrndup(cur, p - cur));
3058}
3059
Daniel Veillard11e00581998-10-24 18:27:49 +00003060/**
3061 * xmlCharStrndup:
3062 * @cur: the input char *
3063 * @len: the len of @cur
3064 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003065 * a strndup for char's to xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00003066 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003067 * Returns a new xmlChar * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003068 */
3069
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003070xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003071xmlCharStrndup(const char *cur, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003072 int i;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003073 xmlChar *ret;
3074
3075 if ((cur == NULL) || (len < 0)) return(NULL);
Daniel Veillard32bc74e2000-07-14 14:49:25 +00003076 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00003077 if (ret == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00003078 fprintf(stderr, "malloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003079 (len + 1) * (long)sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00003080 return(NULL);
3081 }
3082 for (i = 0;i < len;i++)
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003083 ret[i] = (xmlChar) cur[i];
Daniel Veillard260a68f1998-08-13 03:39:55 +00003084 ret[len] = 0;
3085 return(ret);
3086}
3087
Daniel Veillard11e00581998-10-24 18:27:49 +00003088/**
3089 * xmlCharStrdup:
3090 * @cur: the input char *
3091 * @len: the len of @cur
3092 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003093 * a strdup for char's to xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00003094 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003095 * Returns a new xmlChar * or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003096 */
3097
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003098xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003099xmlCharStrdup(const char *cur) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003100 const char *p = cur;
3101
Daniel Veillard10a2c651999-12-12 13:03:50 +00003102 if (cur == NULL) return(NULL);
Daniel Veillarde0854c32000-08-27 21:12:29 +00003103 while (*p != '\0') p++; /* non input consuming */
Daniel Veillard260a68f1998-08-13 03:39:55 +00003104 return(xmlCharStrndup(cur, p - cur));
3105}
3106
Daniel Veillard11e00581998-10-24 18:27:49 +00003107/**
3108 * xmlStrcmp:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003109 * @str1: the first xmlChar *
3110 * @str2: the second xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +00003111 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003112 * a strcmp for xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00003113 *
3114 * Returns the integer result of the comparison
Daniel Veillard260a68f1998-08-13 03:39:55 +00003115 */
3116
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003117int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003118xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003119 register int tmp;
3120
Daniel Veillard10a2c651999-12-12 13:03:50 +00003121 if ((str1 == NULL) && (str2 == NULL)) return(0);
3122 if (str1 == NULL) return(-1);
3123 if (str2 == NULL) return(1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003124 do {
3125 tmp = *str1++ - *str2++;
3126 if (tmp != 0) return(tmp);
Daniel Veillarde0854c32000-08-27 21:12:29 +00003127 } while ((*str1 != 0) && (*str2 != 0)); /* non input consuming */
Daniel Veillard260a68f1998-08-13 03:39:55 +00003128 return (*str1 - *str2);
3129}
3130
Daniel Veillard11e00581998-10-24 18:27:49 +00003131/**
3132 * xmlStrncmp:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003133 * @str1: the first xmlChar *
3134 * @str2: the second xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +00003135 * @len: the max comparison length
3136 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003137 * a strncmp for xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00003138 *
3139 * Returns the integer result of the comparison
Daniel Veillard260a68f1998-08-13 03:39:55 +00003140 */
3141
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003142int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003143xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003144 register int tmp;
3145
3146 if (len <= 0) return(0);
Daniel Veillard10a2c651999-12-12 13:03:50 +00003147 if ((str1 == NULL) && (str2 == NULL)) return(0);
3148 if (str1 == NULL) return(-1);
3149 if (str2 == NULL) return(1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003150 do {
3151 tmp = *str1++ - *str2++;
3152 if (tmp != 0) return(tmp);
3153 len--;
3154 if (len <= 0) return(0);
Daniel Veillarde0854c32000-08-27 21:12:29 +00003155 } while ((*str1 != 0) && (*str2 != 0)); /* non input consuming */
Daniel Veillard260a68f1998-08-13 03:39:55 +00003156 return (*str1 - *str2);
3157}
3158
Daniel Veillard11e00581998-10-24 18:27:49 +00003159/**
3160 * xmlStrchr:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003161 * @str: the xmlChar * array
3162 * @val: the xmlChar to search
Daniel Veillard11e00581998-10-24 18:27:49 +00003163 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003164 * a strchr for xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00003165 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003166 * Returns the xmlChar * for the first occurence or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003167 */
3168
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003169const xmlChar *
3170xmlStrchr(const xmlChar *str, xmlChar val) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00003171 if (str == NULL) return(NULL);
Daniel Veillarde0854c32000-08-27 21:12:29 +00003172 while (*str != 0) { /* non input consuming */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003173 if (*str == val) return((xmlChar *) str);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003174 str++;
3175 }
3176 return(NULL);
3177}
3178
Daniel Veillard11e00581998-10-24 18:27:49 +00003179/**
Daniel Veillard1566d3a1999-07-15 14:24:29 +00003180 * xmlStrstr:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003181 * @str: the xmlChar * array (haystack)
3182 * @val: the xmlChar to search (needle)
Daniel Veillard1566d3a1999-07-15 14:24:29 +00003183 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003184 * a strstr for xmlChar's
Daniel Veillard1566d3a1999-07-15 14:24:29 +00003185 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003186 * Returns the xmlChar * for the first occurence or NULL.
Daniel Veillard1566d3a1999-07-15 14:24:29 +00003187 */
3188
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003189const xmlChar *
3190xmlStrstr(const xmlChar *str, xmlChar *val) {
Daniel Veillard1566d3a1999-07-15 14:24:29 +00003191 int n;
3192
3193 if (str == NULL) return(NULL);
3194 if (val == NULL) return(NULL);
3195 n = xmlStrlen(val);
3196
3197 if (n == 0) return(str);
Daniel Veillarde0854c32000-08-27 21:12:29 +00003198 while (*str != 0) { /* non input consuming */
Daniel Veillard1566d3a1999-07-15 14:24:29 +00003199 if (*str == *val) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003200 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
Daniel Veillard1566d3a1999-07-15 14:24:29 +00003201 }
3202 str++;
3203 }
3204 return(NULL);
3205}
3206
3207/**
3208 * xmlStrsub:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003209 * @str: the xmlChar * array (haystack)
Daniel Veillard1566d3a1999-07-15 14:24:29 +00003210 * @start: the index of the first char (zero based)
3211 * @len: the length of the substring
3212 *
3213 * Extract a substring of a given string
3214 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003215 * Returns the xmlChar * for the first occurence or NULL.
Daniel Veillard1566d3a1999-07-15 14:24:29 +00003216 */
3217
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003218xmlChar *
3219xmlStrsub(const xmlChar *str, int start, int len) {
Daniel Veillard1566d3a1999-07-15 14:24:29 +00003220 int i;
3221
3222 if (str == NULL) return(NULL);
3223 if (start < 0) return(NULL);
3224 if (len < 0) return(NULL);
3225
3226 for (i = 0;i < start;i++) {
3227 if (*str == 0) return(NULL);
3228 str++;
3229 }
3230 if (*str == 0) return(NULL);
3231 return(xmlStrndup(str, len));
3232}
3233
3234/**
Daniel Veillard11e00581998-10-24 18:27:49 +00003235 * xmlStrlen:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003236 * @str: the xmlChar * array
Daniel Veillard11e00581998-10-24 18:27:49 +00003237 *
Daniel Veillard51e3b151999-11-12 17:02:31 +00003238 * length of a xmlChar's string
Daniel Veillard1e346af1999-02-22 10:33:01 +00003239 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003240 * Returns the number of xmlChar contained in the ARRAY.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003241 */
3242
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003243int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003244xmlStrlen(const xmlChar *str) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003245 int len = 0;
3246
3247 if (str == NULL) return(0);
Daniel Veillarde0854c32000-08-27 21:12:29 +00003248 while (*str != 0) { /* non input consuming */
Daniel Veillard260a68f1998-08-13 03:39:55 +00003249 str++;
3250 len++;
3251 }
3252 return(len);
3253}
3254
Daniel Veillard11e00581998-10-24 18:27:49 +00003255/**
3256 * xmlStrncat:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003257 * @cur: the original xmlChar * array
3258 * @add: the xmlChar * array added
Daniel Veillard11e00581998-10-24 18:27:49 +00003259 * @len: the length of @add
3260 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003261 * a strncat for array of xmlChar's
Daniel Veillard1e346af1999-02-22 10:33:01 +00003262 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003263 * Returns a new xmlChar * containing the concatenated string.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003264 */
3265
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003266xmlChar *
3267xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003268 int size;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003269 xmlChar *ret;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003270
3271 if ((add == NULL) || (len == 0))
3272 return(cur);
3273 if (cur == NULL)
3274 return(xmlStrndup(add, len));
3275
3276 size = xmlStrlen(cur);
Daniel Veillard32bc74e2000-07-14 14:49:25 +00003277 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00003278 if (ret == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00003279 fprintf(stderr, "xmlStrncat: realloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003280 (size + len + 1) * (long)sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00003281 return(cur);
3282 }
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003283 memcpy(&ret[size], add, len * sizeof(xmlChar));
Daniel Veillard260a68f1998-08-13 03:39:55 +00003284 ret[size + len] = 0;
3285 return(ret);
3286}
3287
Daniel Veillard11e00581998-10-24 18:27:49 +00003288/**
3289 * xmlStrcat:
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003290 * @cur: the original xmlChar * array
3291 * @add: the xmlChar * array added
Daniel Veillard11e00581998-10-24 18:27:49 +00003292 *
Daniel Veillardcf461992000-03-14 18:30:20 +00003293 * a strcat for array of xmlChar's. Since they are supposed to be
3294 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
3295 * a termination mark of '0'.
Daniel Veillard1e346af1999-02-22 10:33:01 +00003296 *
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003297 * Returns a new xmlChar * containing the concatenated string.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003298 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003299xmlChar *
3300xmlStrcat(xmlChar *cur, const xmlChar *add) {
3301 const xmlChar *p = add;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003302
3303 if (add == NULL) return(cur);
3304 if (cur == NULL)
3305 return(xmlStrdup(add));
3306
Daniel Veillarde0854c32000-08-27 21:12:29 +00003307 while (*p != 0) p++; /* non input consuming */
Daniel Veillard260a68f1998-08-13 03:39:55 +00003308 return(xmlStrncat(cur, add, p - add));
3309}
3310
3311/************************************************************************
3312 * *
3313 * Commodity functions, cleanup needed ? *
3314 * *
3315 ************************************************************************/
3316
Daniel Veillard11e00581998-10-24 18:27:49 +00003317/**
3318 * areBlanks:
3319 * @ctxt: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003320 * @str: a xmlChar *
Daniel Veillard11e00581998-10-24 18:27:49 +00003321 * @len: the size of @str
3322 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00003323 * Is this a sequence of blank chars that one can ignore ?
Daniel Veillard11e00581998-10-24 18:27:49 +00003324 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00003325 * Returns 1 if ignorable 0 otherwise.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003326 */
3327
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003328static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00003329 int i, ret;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003330 xmlNodePtr lastChild;
3331
Daniel Veillard83a30e72000-03-02 03:33:32 +00003332 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00003333 * Check for xml:space value.
3334 */
3335 if (*(ctxt->space) == 1)
3336 return(0);
3337
3338 /*
Daniel Veillard83a30e72000-03-02 03:33:32 +00003339 * Check that the string is made of blanks
3340 */
Daniel Veillard260a68f1998-08-13 03:39:55 +00003341 for (i = 0;i < len;i++)
3342 if (!(IS_BLANK(str[i]))) return(0);
3343
Daniel Veillard83a30e72000-03-02 03:33:32 +00003344 /*
3345 * Look if the element is mixed content in the Dtd if available
3346 */
Daniel Veillardb05deb71999-08-10 19:04:08 +00003347 if (ctxt->myDoc != NULL) {
3348 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
3349 if (ret == 0) return(1);
3350 if (ret == 1) return(0);
3351 }
Daniel Veillard83a30e72000-03-02 03:33:32 +00003352
Daniel Veillardb05deb71999-08-10 19:04:08 +00003353 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00003354 * Otherwise, heuristic :-\
Daniel Veillardb05deb71999-08-10 19:04:08 +00003355 */
Daniel Veillard83a30e72000-03-02 03:33:32 +00003356 if (ctxt->keepBlanks)
3357 return(0);
Daniel Veillardcf461992000-03-14 18:30:20 +00003358 if (RAW != '<') return(0);
Daniel Veillard83a30e72000-03-02 03:33:32 +00003359 if (ctxt->node == NULL) return(0);
Daniel Veillardcf461992000-03-14 18:30:20 +00003360 if ((ctxt->node->children == NULL) &&
3361 (RAW == '<') && (NXT(1) == '/')) return(0);
Daniel Veillard83a30e72000-03-02 03:33:32 +00003362
Daniel Veillard260a68f1998-08-13 03:39:55 +00003363 lastChild = xmlGetLastChild(ctxt->node);
3364 if (lastChild == NULL) {
Daniel Veillard90fb02c2000-03-02 03:46:43 +00003365 if (ctxt->node->content != NULL) return(0);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003366 } else if (xmlNodeIsText(lastChild))
Daniel Veillard90fb02c2000-03-02 03:46:43 +00003367 return(0);
Daniel Veillardcf461992000-03-14 18:30:20 +00003368 else if ((ctxt->node->children != NULL) &&
3369 (xmlNodeIsText(ctxt->node->children)))
Daniel Veillard90fb02c2000-03-02 03:46:43 +00003370 return(0);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003371 return(1);
3372}
3373
Daniel Veillard11e00581998-10-24 18:27:49 +00003374/**
3375 * xmlHandleEntity:
3376 * @ctxt: an XML parser context
3377 * @entity: an XML entity pointer.
3378 *
3379 * Default handling of defined entities, when should we define a new input
Daniel Veillard260a68f1998-08-13 03:39:55 +00003380 * stream ? When do we just handle that as a set of chars ?
Daniel Veillardb05deb71999-08-10 19:04:08 +00003381 *
3382 * OBSOLETE: to be removed at some point.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003383 */
3384
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003385void
3386xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003387 int len;
Daniel Veillardccb09631998-10-27 06:21:04 +00003388 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003389
3390 if (entity->content == NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003391 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003392 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003393 ctxt->sax->error(ctxt->userData, "xmlHandleEntity %s: content == NULL\n",
Daniel Veillard260a68f1998-08-13 03:39:55 +00003394 entity->name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003395 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003396 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003397 return;
3398 }
3399 len = xmlStrlen(entity->content);
3400 if (len <= 2) goto handle_as_char;
3401
3402 /*
3403 * Redefine its content as an input stream.
3404 */
Daniel Veillardccb09631998-10-27 06:21:04 +00003405 input = xmlNewEntityInputStream(ctxt, entity);
3406 xmlPushInput(ctxt, input);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003407 return;
3408
3409handle_as_char:
3410 /*
3411 * Just handle the content as a set of chars.
3412 */
Daniel Veillardcf461992000-03-14 18:30:20 +00003413 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3414 (ctxt->sax->characters != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003415 ctxt->sax->characters(ctxt->userData, entity->content, len);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003416
3417}
3418
3419/*
3420 * Forward definition for recusive behaviour.
3421 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00003422void xmlParsePEReference(xmlParserCtxtPtr ctxt);
3423void xmlParseReference(xmlParserCtxtPtr ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003424
3425/************************************************************************
3426 * *
3427 * Extra stuff for namespace support *
3428 * Relates to http://www.w3.org/TR/WD-xml-names *
3429 * *
3430 ************************************************************************/
3431
Daniel Veillard11e00581998-10-24 18:27:49 +00003432/**
3433 * xmlNamespaceParseNCName:
3434 * @ctxt: an XML parser context
3435 *
3436 * parse an XML namespace name.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003437 *
Daniel Veillarde0854c32000-08-27 21:12:29 +00003438 * TODO: this seems not in use anymore, the namespace handling is done on
3439 * top of the SAX interfaces, i.e. not on raw input.
3440 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00003441 * [NS 3] NCName ::= (Letter | '_') (NCNameChar)*
3442 *
3443 * [NS 4] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3444 * CombiningChar | Extender
Daniel Veillard1e346af1999-02-22 10:33:01 +00003445 *
3446 * Returns the namespace name or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003447 */
3448
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003449xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003450xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00003451 xmlChar buf[XML_MAX_NAMELEN + 5];
3452 int len = 0, l;
3453 int cur = CUR_CHAR(l);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003454
Daniel Veillardcf461992000-03-14 18:30:20 +00003455 /* load first the value of the char !!! */
Daniel Veillarde0854c32000-08-27 21:12:29 +00003456 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00003457 if (!IS_LETTER(cur) && (cur != '_')) return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003458
Daniel Veillarde0854c32000-08-27 21:12:29 +00003459fprintf(stderr, "xmlNamespaceParseNCName: reached loop 3\n");
3460 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) || /* NOT REACHED */
Daniel Veillardcf461992000-03-14 18:30:20 +00003461 (cur == '.') || (cur == '-') ||
3462 (cur == '_') ||
3463 (IS_COMBINING(cur)) ||
3464 (IS_EXTENDER(cur))) {
3465 COPY_BUF(l,buf,len,cur);
3466 NEXTL(l);
3467 cur = CUR_CHAR(l);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003468 if (len >= XML_MAX_NAMELEN) {
3469 fprintf(stderr,
3470 "xmlNamespaceParseNCName: reached XML_MAX_NAMELEN limit\n");
Daniel Veillarde0854c32000-08-27 21:12:29 +00003471 while ((IS_LETTER(cur)) || (IS_DIGIT(cur)) ||/* NOT REACHED */
Daniel Veillardcf461992000-03-14 18:30:20 +00003472 (cur == '.') || (cur == '-') ||
3473 (cur == '_') ||
3474 (IS_COMBINING(cur)) ||
3475 (IS_EXTENDER(cur))) {
3476 NEXTL(l);
3477 cur = CUR_CHAR(l);
3478 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003479 break;
3480 }
3481 }
3482 return(xmlStrndup(buf, len));
Daniel Veillard260a68f1998-08-13 03:39:55 +00003483}
3484
Daniel Veillard11e00581998-10-24 18:27:49 +00003485/**
3486 * xmlNamespaceParseQName:
3487 * @ctxt: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003488 * @prefix: a xmlChar **
Daniel Veillard11e00581998-10-24 18:27:49 +00003489 *
Daniel Veillarde0854c32000-08-27 21:12:29 +00003490 * TODO: this seems not in use anymore, the namespace handling is done on
3491 * top of the SAX interfaces, i.e. not on raw input.
3492 *
Daniel Veillard11e00581998-10-24 18:27:49 +00003493 * parse an XML qualified name
Daniel Veillard260a68f1998-08-13 03:39:55 +00003494 *
3495 * [NS 5] QName ::= (Prefix ':')? LocalPart
3496 *
3497 * [NS 6] Prefix ::= NCName
3498 *
3499 * [NS 7] LocalPart ::= NCName
Daniel Veillard1e346af1999-02-22 10:33:01 +00003500 *
Daniel Veillard51e3b151999-11-12 17:02:31 +00003501 * Returns the local part, and prefix is updated
Daniel Veillard11e00581998-10-24 18:27:49 +00003502 * to get the Prefix if any.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003503 */
3504
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003505xmlChar *
3506xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, xmlChar **prefix) {
3507 xmlChar *ret = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003508
3509 *prefix = NULL;
3510 ret = xmlNamespaceParseNCName(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00003511 if (RAW == ':') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003512 *prefix = ret;
3513 NEXT;
3514 ret = xmlNamespaceParseNCName(ctxt);
3515 }
3516
3517 return(ret);
3518}
3519
Daniel Veillard11e00581998-10-24 18:27:49 +00003520/**
3521 * xmlNamespaceParseNSDef:
3522 * @ctxt: an XML parser context
3523 *
3524 * parse a namespace prefix declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00003525 *
Daniel Veillarde0854c32000-08-27 21:12:29 +00003526 * TODO: this seems not in use anymore, the namespace handling is done on
3527 * top of the SAX interfaces, i.e. not on raw input.
3528 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00003529 * [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
3530 *
3531 * [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
Daniel Veillard1e346af1999-02-22 10:33:01 +00003532 *
3533 * Returns the namespace name
Daniel Veillard260a68f1998-08-13 03:39:55 +00003534 */
3535
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003536xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003537xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003538 xmlChar *name = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003539
Daniel Veillardcf461992000-03-14 18:30:20 +00003540 if ((RAW == 'x') && (NXT(1) == 'm') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003541 (NXT(2) == 'l') && (NXT(3) == 'n') &&
3542 (NXT(4) == 's')) {
3543 SKIP(5);
Daniel Veillardcf461992000-03-14 18:30:20 +00003544 if (RAW == ':') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003545 NEXT;
3546 name = xmlNamespaceParseNCName(ctxt);
3547 }
3548 }
3549 return(name);
3550}
3551
Daniel Veillard11e00581998-10-24 18:27:49 +00003552/**
Daniel Veillarde0854c32000-08-27 21:12:29 +00003553 * xmlSplitQName:
3554 * @ctxt: an XML parser context
3555 * @name: an XML parser context
3556 * @prefix: a xmlChar **
3557 *
3558 * parse an UTF8 encoded XML qualified name string
3559 *
3560 * [NS 5] QName ::= (Prefix ':')? LocalPart
3561 *
3562 * [NS 6] Prefix ::= NCName
3563 *
3564 * [NS 7] LocalPart ::= NCName
3565 *
3566 * Returns the local part, and prefix is updated
3567 * to get the Prefix if any.
3568 */
3569
3570xmlChar *
3571xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3572 xmlChar buf[XML_MAX_NAMELEN + 5];
3573 xmlChar *buffer = NULL;
3574 int len = 0;
3575 int max = XML_MAX_NAMELEN;
3576 xmlChar *ret = NULL;
3577 const xmlChar *cur = name;
3578 int c;
3579
3580 *prefix = NULL;
3581
3582 /* xml: prefix is not really a namespace */
3583 if ((cur[0] == 'x') && (cur[1] == 'm') &&
3584 (cur[2] == 'l') && (cur[3] == ':'))
3585 return(xmlStrdup(name));
3586
3587 /* nasty but valid */
3588 if (cur[0] == ':')
3589 return(xmlStrdup(name));
3590
3591 c = *cur++;
3592 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3593 buf[len++] = c;
3594 c = *cur++;
3595 }
3596 if (len >= max) {
3597 /*
3598 * Okay someone managed to make a huge name, so he's ready to pay
3599 * for the processing speed.
3600 */
3601 max = len * 2;
3602
3603 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
3604 if (buffer == NULL) {
3605 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3606 ctxt->sax->error(ctxt->userData,
3607 "xmlSplitQName: out of memory\n");
3608 return(NULL);
3609 }
3610 memcpy(buffer, buf, len);
3611 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3612 if (len + 10 > max) {
3613 max *= 2;
3614 buffer = (xmlChar *) xmlRealloc(buffer,
3615 max * sizeof(xmlChar));
3616 if (buffer == NULL) {
3617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3618 ctxt->sax->error(ctxt->userData,
3619 "xmlSplitQName: out of memory\n");
3620 return(NULL);
3621 }
3622 }
3623 buffer[len++] = c;
3624 c = *cur++;
3625 }
3626 buffer[len] = 0;
3627 }
3628
3629 if (buffer == NULL)
3630 ret = xmlStrndup(buf, len);
3631 else {
3632 ret = buffer;
3633 buffer = NULL;
3634 max = XML_MAX_NAMELEN;
3635 }
3636
3637
3638 if (c == ':') {
3639 c = *cur++;
3640 if (c == 0) return(ret);
3641 *prefix = ret;
3642 len = 0;
3643
3644 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3645 buf[len++] = c;
3646 c = *cur++;
3647 }
3648 if (len >= max) {
3649 /*
3650 * Okay someone managed to make a huge name, so he's ready to pay
3651 * for the processing speed.
3652 */
3653 max = len * 2;
3654
3655 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
3656 if (buffer == NULL) {
3657 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3658 ctxt->sax->error(ctxt->userData,
3659 "xmlSplitQName: out of memory\n");
3660 return(NULL);
3661 }
3662 memcpy(buffer, buf, len);
3663 while (c != 0) { /* tested bigname2.xml */
3664 if (len + 10 > max) {
3665 max *= 2;
3666 buffer = (xmlChar *) xmlRealloc(buffer,
3667 max * sizeof(xmlChar));
3668 if (buffer == NULL) {
3669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3670 ctxt->sax->error(ctxt->userData,
3671 "xmlSplitQName: out of memory\n");
3672 return(NULL);
3673 }
3674 }
3675 buffer[len++] = c;
3676 c = *cur++;
3677 }
3678 buffer[len] = 0;
3679 }
3680
3681 if (buffer == NULL)
3682 ret = xmlStrndup(buf, len);
3683 else {
3684 ret = buffer;
3685 }
3686 }
3687
3688 return(ret);
3689}
3690
3691/**
Daniel Veillard11e00581998-10-24 18:27:49 +00003692 * xmlParseQuotedString:
3693 * @ctxt: an XML parser context
3694 *
Daniel Veillarde0854c32000-08-27 21:12:29 +00003695 * Parse and return a string between quotes or doublequotes
3696 *
3697 * TODO: Deprecated, to be removed at next drop of binary compatibility
Daniel Veillard1e346af1999-02-22 10:33:01 +00003698 *
3699 * Returns the string parser or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003700 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003701xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003702xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00003703 xmlChar *buf = NULL;
Daniel Veillardcf461992000-03-14 18:30:20 +00003704 int len = 0,l;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00003705 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00003706 int c;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003707
Daniel Veillard10a2c651999-12-12 13:03:50 +00003708 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3709 if (buf == NULL) {
3710 fprintf(stderr, "malloc of %d byte failed\n", size);
3711 return(NULL);
3712 }
Daniel Veillarde0854c32000-08-27 21:12:29 +00003713fprintf(stderr, "xmlParseQuotedString: reached loop 4\n");
Daniel Veillardcf461992000-03-14 18:30:20 +00003714 if (RAW == '"') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003715 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00003716 c = CUR_CHAR(l);
Daniel Veillarde0854c32000-08-27 21:12:29 +00003717 while (IS_CHAR(c) && (c != '"')) { /* NOTUSED */
Daniel Veillardcf461992000-03-14 18:30:20 +00003718 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00003719 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00003720 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00003721 if (buf == NULL) {
3722 fprintf(stderr, "realloc of %d byte failed\n", size);
3723 return(NULL);
3724 }
3725 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003726 COPY_BUF(l,buf,len,c);
3727 NEXTL(l);
3728 c = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00003729 }
3730 if (c != '"') {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003731 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003732 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard10a2c651999-12-12 13:03:50 +00003733 ctxt->sax->error(ctxt->userData,
3734 "String not closed \"%.50s\"\n", buf);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003735 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003736 ctxt->disableSAX = 1;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003737 } else {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003738 NEXT;
3739 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003740 } else if (RAW == '\''){
Daniel Veillard260a68f1998-08-13 03:39:55 +00003741 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00003742 c = CUR;
Daniel Veillarde0854c32000-08-27 21:12:29 +00003743 while (IS_CHAR(c) && (c != '\'')) { /* NOTUSED */
Daniel Veillard10a2c651999-12-12 13:03:50 +00003744 if (len + 1 >= size) {
3745 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00003746 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00003747 if (buf == NULL) {
3748 fprintf(stderr, "realloc of %d byte failed\n", size);
3749 return(NULL);
3750 }
3751 }
3752 buf[len++] = c;
3753 NEXT;
3754 c = CUR;
3755 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003756 if (RAW != '\'') {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003757 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003758 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard10a2c651999-12-12 13:03:50 +00003759 ctxt->sax->error(ctxt->userData,
3760 "String not closed \"%.50s\"\n", buf);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003761 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003762 ctxt->disableSAX = 1;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003763 } else {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003764 NEXT;
3765 }
3766 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00003767 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003768}
3769
Daniel Veillard11e00581998-10-24 18:27:49 +00003770/**
3771 * xmlParseNamespace:
3772 * @ctxt: an XML parser context
3773 *
Daniel Veillarde0854c32000-08-27 21:12:29 +00003774 * xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003775 *
3776 * This is what the older xml-name Working Draft specified, a bunch of
3777 * other stuff may still rely on it, so support is still here as
Daniel Veillard51e3b151999-11-12 17:02:31 +00003778 * if it was declared on the root of the Tree:-(
Daniel Veillardb05deb71999-08-10 19:04:08 +00003779 *
Daniel Veillarde0854c32000-08-27 21:12:29 +00003780 * TODO: remove from library
3781 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003782 * To be removed at next drop of binary compatibility
Daniel Veillard260a68f1998-08-13 03:39:55 +00003783 */
3784
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003785void
3786xmlParseNamespace(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003787 xmlChar *href = NULL;
3788 xmlChar *prefix = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003789 int garbage = 0;
3790
3791 /*
3792 * We just skipped "namespace" or "xml:namespace"
3793 */
3794 SKIP_BLANKS;
3795
Daniel Veillarde0854c32000-08-27 21:12:29 +00003796fprintf(stderr, "xmlParseNamespace: reached loop 5\n");
3797 while (IS_CHAR(RAW) && (RAW != '>')) { /* NOT REACHED */
Daniel Veillard260a68f1998-08-13 03:39:55 +00003798 /*
3799 * We can have "ns" or "prefix" attributes
3800 * Old encoding as 'href' or 'AS' attributes is still supported
3801 */
Daniel Veillardcf461992000-03-14 18:30:20 +00003802 if ((RAW == 'n') && (NXT(1) == 's')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003803 garbage = 0;
3804 SKIP(2);
3805 SKIP_BLANKS;
3806
Daniel Veillardcf461992000-03-14 18:30:20 +00003807 if (RAW != '=') continue;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003808 NEXT;
3809 SKIP_BLANKS;
3810
3811 href = xmlParseQuotedString(ctxt);
3812 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00003813 } else if ((RAW == 'h') && (NXT(1) == 'r') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003814 (NXT(2) == 'e') && (NXT(3) == 'f')) {
3815 garbage = 0;
3816 SKIP(4);
3817 SKIP_BLANKS;
3818
Daniel Veillardcf461992000-03-14 18:30:20 +00003819 if (RAW != '=') continue;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003820 NEXT;
3821 SKIP_BLANKS;
3822
3823 href = xmlParseQuotedString(ctxt);
3824 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00003825 } else if ((RAW == 'p') && (NXT(1) == 'r') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00003826 (NXT(2) == 'e') && (NXT(3) == 'f') &&
3827 (NXT(4) == 'i') && (NXT(5) == 'x')) {
3828 garbage = 0;
3829 SKIP(6);
3830 SKIP_BLANKS;
3831
Daniel Veillardcf461992000-03-14 18:30:20 +00003832 if (RAW != '=') continue;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003833 NEXT;
3834 SKIP_BLANKS;
3835
3836 prefix = xmlParseQuotedString(ctxt);
3837 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00003838 } else if ((RAW == 'A') && (NXT(1) == 'S')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003839 garbage = 0;
3840 SKIP(2);
3841 SKIP_BLANKS;
3842
Daniel Veillardcf461992000-03-14 18:30:20 +00003843 if (RAW != '=') continue;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003844 NEXT;
3845 SKIP_BLANKS;
3846
3847 prefix = xmlParseQuotedString(ctxt);
3848 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00003849 } else if ((RAW == '?') && (NXT(1) == '>')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00003850 garbage = 0;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003851 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003852 } else {
3853 /*
3854 * Found garbage when parsing the namespace
3855 */
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003856 if (!garbage) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00003857 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00003858 ctxt->sax->error(ctxt->userData,
3859 "xmlParseNamespace found garbage\n");
3860 }
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003861 ctxt->errNo = XML_ERR_NS_DECL_ERROR;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00003862 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00003863 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003864 NEXT;
3865 }
3866 }
3867
3868 MOVETO_ENDTAG(CUR_PTR);
3869 NEXT;
3870
3871 /*
3872 * Register the DTD.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003873 if (href != NULL)
Daniel Veillard517752b1999-04-05 12:20:10 +00003874 if ((ctxt->sax != NULL) && (ctxt->sax->globalNamespace != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00003875 ctxt->sax->globalNamespace(ctxt->userData, href, prefix);
Daniel Veillard517752b1999-04-05 12:20:10 +00003876 */
Daniel Veillard260a68f1998-08-13 03:39:55 +00003877
Daniel Veillard6454aec1999-09-02 22:04:43 +00003878 if (prefix != NULL) xmlFree(prefix);
3879 if (href != NULL) xmlFree(href);
Daniel Veillard260a68f1998-08-13 03:39:55 +00003880}
3881
3882/************************************************************************
3883 * *
3884 * The parser itself *
3885 * Relates to http://www.w3.org/TR/REC-xml *
3886 * *
3887 ************************************************************************/
3888
Daniel Veillard11e00581998-10-24 18:27:49 +00003889/**
Daniel Veillardb05deb71999-08-10 19:04:08 +00003890 * xmlScanName:
3891 * @ctxt: an XML parser context
3892 *
3893 * Trickery: parse an XML name but without consuming the input flow
Daniel Veillarde0854c32000-08-27 21:12:29 +00003894 * Needed for rollback cases. Used only when parsing entities references.
3895 *
3896 * TODO: seems deprecated now, only used in the default part of
3897 * xmlParserHandleReference
Daniel Veillardb05deb71999-08-10 19:04:08 +00003898 *
3899 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3900 * CombiningChar | Extender
3901 *
3902 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3903 *
3904 * [6] Names ::= Name (S Name)*
3905 *
3906 * Returns the Name parsed or NULL
3907 */
3908
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003909xmlChar *
Daniel Veillardb05deb71999-08-10 19:04:08 +00003910xmlScanName(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003911 xmlChar buf[XML_MAX_NAMELEN];
Daniel Veillardb05deb71999-08-10 19:04:08 +00003912 int len = 0;
3913
3914 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00003915 if (!IS_LETTER(RAW) && (RAW != '_') &&
3916 (RAW != ':')) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00003917 return(NULL);
3918 }
3919
Daniel Veillarde0854c32000-08-27 21:12:29 +00003920
3921 while ((IS_LETTER(NXT(len))) || (IS_DIGIT(NXT(len))) || /* NOT REACHED */
Daniel Veillardb05deb71999-08-10 19:04:08 +00003922 (NXT(len) == '.') || (NXT(len) == '-') ||
3923 (NXT(len) == '_') || (NXT(len) == ':') ||
3924 (IS_COMBINING(NXT(len))) ||
3925 (IS_EXTENDER(NXT(len)))) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00003926 GROW;
Daniel Veillardb05deb71999-08-10 19:04:08 +00003927 buf[len] = NXT(len);
3928 len++;
3929 if (len >= XML_MAX_NAMELEN) {
3930 fprintf(stderr,
3931 "xmlScanName: reached XML_MAX_NAMELEN limit\n");
Daniel Veillarde0854c32000-08-27 21:12:29 +00003932 while ((IS_LETTER(NXT(len))) || /* NOT REACHED */
3933 (IS_DIGIT(NXT(len))) ||
Daniel Veillardb05deb71999-08-10 19:04:08 +00003934 (NXT(len) == '.') || (NXT(len) == '-') ||
3935 (NXT(len) == '_') || (NXT(len) == ':') ||
3936 (IS_COMBINING(NXT(len))) ||
3937 (IS_EXTENDER(NXT(len))))
3938 len++;
3939 break;
3940 }
3941 }
3942 return(xmlStrndup(buf, len));
3943}
3944
3945/**
Daniel Veillard11e00581998-10-24 18:27:49 +00003946 * xmlParseName:
3947 * @ctxt: an XML parser context
3948 *
3949 * parse an XML name.
Daniel Veillard260a68f1998-08-13 03:39:55 +00003950 *
3951 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3952 * CombiningChar | Extender
3953 *
3954 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
3955 *
3956 * [6] Names ::= Name (S Name)*
Daniel Veillard1e346af1999-02-22 10:33:01 +00003957 *
3958 * Returns the Name parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00003959 */
3960
Daniel Veillarddd6b3671999-09-23 22:19:22 +00003961xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00003962xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00003963 xmlChar buf[XML_MAX_NAMELEN + 5];
3964 int len = 0, l;
3965 int c;
Daniel Veillarde0854c32000-08-27 21:12:29 +00003966 int count = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00003967
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003968 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00003969 c = CUR_CHAR(l);
Daniel Veillard5e873c42000-04-12 13:27:38 +00003970 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3971 (!IS_LETTER(c) && (c != '_') &&
3972 (c != ':'))) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003973 return(NULL);
3974 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00003975
Daniel Veillarde0854c32000-08-27 21:12:29 +00003976 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
Daniel Veillard5e873c42000-04-12 13:27:38 +00003977 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3978 (c == '.') || (c == '-') ||
3979 (c == '_') || (c == ':') ||
3980 (IS_COMBINING(c)) ||
3981 (IS_EXTENDER(c)))) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00003982 if (count++ > 100) {
3983 count = 0;
3984 GROW;
3985 }
Daniel Veillardcf461992000-03-14 18:30:20 +00003986 COPY_BUF(l,buf,len,c);
3987 NEXTL(l);
3988 c = CUR_CHAR(l);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00003989 if (len >= XML_MAX_NAMELEN) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00003990 /*
3991 * Okay someone managed to make a huge name, so he's ready to pay
3992 * for the processing speed.
3993 */
3994 xmlChar *buffer;
3995 int max = len * 2;
3996
3997 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
3998 if (buffer == NULL) {
3999 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4000 ctxt->sax->error(ctxt->userData,
4001 "xmlParseName: out of memory\n");
4002 return(NULL);
4003 }
4004 memcpy(buffer, buf, len);
4005 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
Daniel Veillardcf461992000-03-14 18:30:20 +00004006 (c == '.') || (c == '-') ||
4007 (c == '_') || (c == ':') ||
4008 (IS_COMBINING(c)) ||
4009 (IS_EXTENDER(c))) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00004010 if (count++ > 100) {
4011 count = 0;
4012 GROW;
4013 }
4014 if (len + 10 > max) {
4015 max *= 2;
4016 buffer = (xmlChar *) xmlRealloc(buffer,
4017 max * sizeof(xmlChar));
4018 if (buffer == NULL) {
4019 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4020 ctxt->sax->error(ctxt->userData,
4021 "xmlParseName: out of memory\n");
4022 return(NULL);
4023 }
4024 }
4025 COPY_BUF(l,buffer,len,c);
Daniel Veillardcf461992000-03-14 18:30:20 +00004026 NEXTL(l);
4027 c = CUR_CHAR(l);
Daniel Veillardb05deb71999-08-10 19:04:08 +00004028 }
Daniel Veillarde0854c32000-08-27 21:12:29 +00004029 buffer[len] = 0;
4030 return(buffer);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004031 }
4032 }
4033 return(xmlStrndup(buf, len));
Daniel Veillard260a68f1998-08-13 03:39:55 +00004034}
4035
Daniel Veillard11e00581998-10-24 18:27:49 +00004036/**
Daniel Veillard10a2c651999-12-12 13:03:50 +00004037 * xmlParseStringName:
4038 * @ctxt: an XML parser context
4039 * @str: a pointer to an index in the string
4040 *
4041 * parse an XML name.
4042 *
4043 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
4044 * CombiningChar | Extender
4045 *
4046 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
4047 *
4048 * [6] Names ::= Name (S Name)*
4049 *
4050 * Returns the Name parsed or NULL. The str pointer
4051 * is updated to the current location in the string.
4052 */
4053
4054xmlChar *
4055xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004056 xmlChar buf[XML_MAX_NAMELEN + 5];
4057 const xmlChar *cur = *str;
4058 int len = 0, l;
4059 int c;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004060
Daniel Veillardcf461992000-03-14 18:30:20 +00004061 c = CUR_SCHAR(cur, l);
4062 if (!IS_LETTER(c) && (c != '_') &&
4063 (c != ':')) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00004064 return(NULL);
4065 }
4066
Daniel Veillarde0854c32000-08-27 21:12:29 +00004067 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Daniel Veillardcf461992000-03-14 18:30:20 +00004068 (c == '.') || (c == '-') ||
4069 (c == '_') || (c == ':') ||
4070 (IS_COMBINING(c)) ||
4071 (IS_EXTENDER(c))) {
4072 COPY_BUF(l,buf,len,c);
4073 cur += l;
4074 c = CUR_SCHAR(cur, l);
Daniel Veillarde0854c32000-08-27 21:12:29 +00004075 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
4076 /*
4077 * Okay someone managed to make a huge name, so he's ready to pay
4078 * for the processing speed.
4079 */
4080 xmlChar *buffer;
4081 int max = len * 2;
4082
4083 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
4084 if (buffer == NULL) {
4085 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4086 ctxt->sax->error(ctxt->userData,
4087 "xmlParseStringName: out of memory\n");
4088 return(NULL);
4089 }
4090 memcpy(buffer, buf, len);
4091 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
Daniel Veillardcf461992000-03-14 18:30:20 +00004092 (c == '.') || (c == '-') ||
4093 (c == '_') || (c == ':') ||
4094 (IS_COMBINING(c)) ||
4095 (IS_EXTENDER(c))) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00004096 if (len + 10 > max) {
4097 max *= 2;
4098 buffer = (xmlChar *) xmlRealloc(buffer,
4099 max * sizeof(xmlChar));
4100 if (buffer == NULL) {
4101 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4102 ctxt->sax->error(ctxt->userData,
4103 "xmlParseStringName: out of memory\n");
4104 return(NULL);
4105 }
4106 }
4107 COPY_BUF(l,buffer,len,c);
Daniel Veillardcf461992000-03-14 18:30:20 +00004108 cur += l;
4109 c = CUR_SCHAR(cur, l);
4110 }
Daniel Veillarde0854c32000-08-27 21:12:29 +00004111 buffer[len] = 0;
4112 *str = cur;
4113 return(buffer);
Daniel Veillardcf461992000-03-14 18:30:20 +00004114 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00004115 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004116 *str = cur;
4117 return(xmlStrndup(buf, len));
Daniel Veillard10a2c651999-12-12 13:03:50 +00004118}
4119
4120/**
Daniel Veillard11e00581998-10-24 18:27:49 +00004121 * xmlParseNmtoken:
4122 * @ctxt: an XML parser context
4123 *
4124 * parse an XML Nmtoken.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004125 *
4126 * [7] Nmtoken ::= (NameChar)+
4127 *
4128 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
Daniel Veillard1e346af1999-02-22 10:33:01 +00004129 *
4130 * Returns the Nmtoken parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00004131 */
4132
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004133xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004134xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00004135 xmlChar buf[XML_MAX_NAMELEN + 5];
4136 int len = 0, l;
4137 int c;
4138 int count = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004139
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004140 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00004141 c = CUR_CHAR(l);
Daniel Veillarde0854c32000-08-27 21:12:29 +00004142
4143 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Daniel Veillardcf461992000-03-14 18:30:20 +00004144 (c == '.') || (c == '-') ||
4145 (c == '_') || (c == ':') ||
4146 (IS_COMBINING(c)) ||
4147 (IS_EXTENDER(c))) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00004148 if (count++ > 100) {
4149 count = 0;
4150 GROW;
4151 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004152 COPY_BUF(l,buf,len,c);
4153 NEXTL(l);
4154 c = CUR_CHAR(l);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004155 if (len >= XML_MAX_NAMELEN) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00004156 /*
4157 * Okay someone managed to make a huge token, so he's ready to pay
4158 * for the processing speed.
4159 */
4160 xmlChar *buffer;
4161 int max = len * 2;
4162
4163 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
4164 if (buffer == NULL) {
4165 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4166 ctxt->sax->error(ctxt->userData,
4167 "xmlParseNmtoken: out of memory\n");
4168 return(NULL);
4169 }
4170 memcpy(buffer, buf, len);
4171 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
Daniel Veillardcf461992000-03-14 18:30:20 +00004172 (c == '.') || (c == '-') ||
4173 (c == '_') || (c == ':') ||
4174 (IS_COMBINING(c)) ||
4175 (IS_EXTENDER(c))) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00004176 if (count++ > 100) {
4177 count = 0;
4178 GROW;
4179 }
4180 if (len + 10 > max) {
4181 max *= 2;
4182 buffer = (xmlChar *) xmlRealloc(buffer,
4183 max * sizeof(xmlChar));
4184 if (buffer == NULL) {
4185 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4186 ctxt->sax->error(ctxt->userData,
4187 "xmlParseName: out of memory\n");
4188 return(NULL);
4189 }
4190 }
4191 COPY_BUF(l,buffer,len,c);
Daniel Veillardcf461992000-03-14 18:30:20 +00004192 NEXTL(l);
4193 c = CUR_CHAR(l);
4194 }
Daniel Veillarde0854c32000-08-27 21:12:29 +00004195 buffer[len] = 0;
4196 return(buffer);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004197 }
4198 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004199 if (len == 0)
4200 return(NULL);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004201 return(xmlStrndup(buf, len));
Daniel Veillard260a68f1998-08-13 03:39:55 +00004202}
4203
Daniel Veillard11e00581998-10-24 18:27:49 +00004204/**
4205 * xmlParseEntityValue:
4206 * @ctxt: an XML parser context
Daniel Veillard011b63c1999-06-02 17:44:04 +00004207 * @orig: if non-NULL store a copy of the original entity value
Daniel Veillard11e00581998-10-24 18:27:49 +00004208 *
4209 * parse a value for ENTITY decl.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004210 *
4211 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
4212 * "'" ([^%&'] | PEReference | Reference)* "'"
Daniel Veillard1e346af1999-02-22 10:33:01 +00004213 *
Daniel Veillard011b63c1999-06-02 17:44:04 +00004214 * Returns the EntityValue parsed with reference substitued or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00004215 */
4216
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004217xmlChar *
4218xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00004219 xmlChar *buf = NULL;
4220 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004221 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00004222 int c, l;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004223 xmlChar stop;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004224 xmlChar *ret = NULL;
Daniel Veillardcf461992000-03-14 18:30:20 +00004225 const xmlChar *cur = NULL;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004226 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004227
Daniel Veillardcf461992000-03-14 18:30:20 +00004228 if (RAW == '"') stop = '"';
4229 else if (RAW == '\'') stop = '\'';
Daniel Veillard10a2c651999-12-12 13:03:50 +00004230 else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004231 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004232 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00004233 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004234 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004235 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004236 return(NULL);
4237 }
4238 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4239 if (buf == NULL) {
4240 fprintf(stderr, "malloc of %d byte failed\n", size);
4241 return(NULL);
4242 }
4243
4244 /*
4245 * The content of the entity definition is copied in a buffer.
4246 */
4247
4248 ctxt->instate = XML_PARSER_ENTITY_VALUE;
4249 input = ctxt->input;
4250 GROW;
4251 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00004252 c = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004253 /*
4254 * NOTE: 4.4.5 Included in Literal
4255 * When a parameter entity reference appears in a literal entity
4256 * value, ... a single or double quote character in the replacement
4257 * text is always treated as a normal data character and will not
4258 * terminate the literal.
4259 * In practice it means we stop the loop only when back at parsing
4260 * the initial entity and the quote is found
4261 */
Daniel Veillarde0854c32000-08-27 21:12:29 +00004262 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
4263 (ctxt->input != input))) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004264 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00004265 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00004266 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00004267 if (buf == NULL) {
4268 fprintf(stderr, "realloc of %d byte failed\n", size);
4269 return(NULL);
4270 }
4271 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004272 COPY_BUF(l,buf,len,c);
4273 NEXTL(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004274 /*
4275 * Pop-up of finished entities.
4276 */
Daniel Veillarde0854c32000-08-27 21:12:29 +00004277 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
Daniel Veillard10a2c651999-12-12 13:03:50 +00004278 xmlPopInput(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00004279
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00004280 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00004281 c = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004282 if (c == 0) {
4283 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00004284 c = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004285 }
4286 }
4287 buf[len] = 0;
4288
4289 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00004290 * Raise problem w.r.t. '&' and '%' being used in non-entities
4291 * reference constructs. Note Charref will be handled in
4292 * xmlStringDecodeEntities()
4293 */
4294 cur = buf;
4295 while (*cur != 0) {
4296 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
4297 xmlChar *name;
4298 xmlChar tmp = *cur;
4299
4300 cur++;
4301 name = xmlParseStringName(ctxt, &cur);
4302 if ((name == NULL) || (*cur != ';')) {
4303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4304 ctxt->sax->error(ctxt->userData,
4305 "EntityValue: '%c' forbidden except for entities references\n",
4306 tmp);
4307 ctxt->wellFormed = 0;
4308 ctxt->disableSAX = 1;
4309 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
4310 }
4311 if ((ctxt->inSubset == 1) && (tmp == '%')) {
4312 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4313 ctxt->sax->error(ctxt->userData,
4314 "EntityValue: PEReferences forbidden in internal subset\n",
4315 tmp);
4316 ctxt->wellFormed = 0;
4317 ctxt->disableSAX = 1;
4318 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
4319 }
4320 if (name != NULL)
4321 xmlFree(name);
4322 }
4323 cur++;
4324 }
4325
4326 /*
Daniel Veillard10a2c651999-12-12 13:03:50 +00004327 * Then PEReference entities are substituted.
4328 */
4329 if (c != stop) {
4330 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
4331 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4332 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
4333 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004334 ctxt->disableSAX = 1;
4335 xmlFree(buf);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004336 } else {
4337 NEXT;
4338 /*
4339 * NOTE: 4.4.7 Bypassed
4340 * When a general entity reference appears in the EntityValue in
4341 * an entity declaration, it is bypassed and left as is.
Daniel Veillardcf461992000-03-14 18:30:20 +00004342 * so XML_SUBSTITUTE_REF is not set here.
Daniel Veillard10a2c651999-12-12 13:03:50 +00004343 */
4344 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
4345 0, 0, 0);
4346 if (orig != NULL)
4347 *orig = buf;
4348 else
4349 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004350 }
4351
4352 return(ret);
4353}
4354
Daniel Veillard11e00581998-10-24 18:27:49 +00004355/**
4356 * xmlParseAttValue:
4357 * @ctxt: an XML parser context
4358 *
4359 * parse a value for an attribute
Daniel Veillard011b63c1999-06-02 17:44:04 +00004360 * Note: the parser won't do substitution of entities here, this
Daniel Veillardb96e6431999-08-29 21:02:19 +00004361 * will be handled later in xmlStringGetNodeList
Daniel Veillard260a68f1998-08-13 03:39:55 +00004362 *
4363 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4364 * "'" ([^<&'] | Reference)* "'"
Daniel Veillard1e346af1999-02-22 10:33:01 +00004365 *
Daniel Veillard7f858501999-11-17 17:32:38 +00004366 * 3.3.3 Attribute-Value Normalization:
4367 * Before the value of an attribute is passed to the application or
4368 * checked for validity, the XML processor must normalize it as follows:
4369 * - a character reference is processed by appending the referenced
4370 * character to the attribute value
4371 * - an entity reference is processed by recursively processing the
4372 * replacement text of the entity
4373 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4374 * appending #x20 to the normalized value, except that only a single
4375 * #x20 is appended for a "#xD#xA" sequence that is part of an external
4376 * parsed entity or the literal entity value of an internal parsed entity
4377 * - other characters are processed by appending them to the normalized value
Daniel Veillard07136651999-11-18 14:02:05 +00004378 * If the declared value is not CDATA, then the XML processor must further
4379 * process the normalized attribute value by discarding any leading and
4380 * trailing space (#x20) characters, and by replacing sequences of space
4381 * (#x20) characters by a single space (#x20) character.
4382 * All attributes for which no declaration has been read should be treated
4383 * by a non-validating parser as if declared CDATA.
Daniel Veillard7f858501999-11-17 17:32:38 +00004384 *
4385 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004386 */
4387
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004388xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004389xmlParseAttValue(xmlParserCtxtPtr ctxt) {
Daniel Veillard7f858501999-11-17 17:32:38 +00004390 xmlChar limit = 0;
Daniel Veillardbe803962000-06-28 23:40:59 +00004391 xmlChar *buf = NULL;
4392 int len = 0;
4393 int buf_size = 0;
4394 int c, l;
Daniel Veillard7f858501999-11-17 17:32:38 +00004395 xmlChar *current = NULL;
4396 xmlEntityPtr ent;
Daniel Veillard7f858501999-11-17 17:32:38 +00004397
Daniel Veillard260a68f1998-08-13 03:39:55 +00004398
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004399 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00004400 if (NXT(0) == '"') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004401 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillard7f858501999-11-17 17:32:38 +00004402 limit = '"';
Daniel Veillard260a68f1998-08-13 03:39:55 +00004403 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00004404 } else if (NXT(0) == '\'') {
Daniel Veillard7f858501999-11-17 17:32:38 +00004405 limit = '\'';
Daniel Veillardb05deb71999-08-10 19:04:08 +00004406 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004407 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004408 } else {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004409 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004411 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004412 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004413 ctxt->disableSAX = 1;
Daniel Veillard7f858501999-11-17 17:32:38 +00004414 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004415 }
4416
Daniel Veillard7f858501999-11-17 17:32:38 +00004417 /*
4418 * allocate a translation buffer.
4419 */
Daniel Veillardbe803962000-06-28 23:40:59 +00004420 buf_size = XML_PARSER_BUFFER_SIZE;
4421 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
4422 if (buf == NULL) {
Daniel Veillard7f858501999-11-17 17:32:38 +00004423 perror("xmlParseAttValue: malloc failed");
4424 return(NULL);
4425 }
Daniel Veillard7f858501999-11-17 17:32:38 +00004426
4427 /*
4428 * Ok loop until we reach one of the ending char or a size limit.
4429 */
Daniel Veillardbe803962000-06-28 23:40:59 +00004430 c = CUR_CHAR(l);
4431 while (((NXT(0) != limit) && (c != '<')) || (ctxt->token != 0)) {
4432 if (c == 0) break;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00004433 if (ctxt->token == '&') {
4434 static xmlChar buffer[6] = "&#38;";
4435
4436 if (len > buf_size - 10) {
4437 growBuffer(buf);
4438 }
4439 current = &buffer[0];
4440 while (*current != 0) {
4441 buf[len++] = *current++;
4442 }
4443 ctxt->token = 0;
4444 } else if ((c == '&') && (NXT(1) == '#')) {
Daniel Veillard7f858501999-11-17 17:32:38 +00004445 int val = xmlParseCharRef(ctxt);
Daniel Veillardbe803962000-06-28 23:40:59 +00004446 COPY_BUF(l,buf,len,val);
4447 NEXTL(l);
4448 } else if (c == '&') {
Daniel Veillard7f858501999-11-17 17:32:38 +00004449 ent = xmlParseEntityRef(ctxt);
4450 if ((ent != NULL) &&
4451 (ctxt->replaceEntities != 0)) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004452 xmlChar *rep;
Daniel Veillard7f858501999-11-17 17:32:38 +00004453
Daniel Veillardcf461992000-03-14 18:30:20 +00004454 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4455 rep = xmlStringDecodeEntities(ctxt, ent->content,
4456 XML_SUBSTITUTE_REF, 0, 0, 0);
4457 if (rep != NULL) {
4458 current = rep;
4459 while (*current != 0) {
Daniel Veillardbe803962000-06-28 23:40:59 +00004460 buf[len++] = *current++;
4461 if (len > buf_size - 10) {
4462 growBuffer(buf);
Daniel Veillardcf461992000-03-14 18:30:20 +00004463 }
4464 }
4465 xmlFree(rep);
Daniel Veillard7f858501999-11-17 17:32:38 +00004466 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004467 } else {
4468 if (ent->content != NULL)
Daniel Veillardbe803962000-06-28 23:40:59 +00004469 buf[len++] = ent->content[0];
Daniel Veillard7f858501999-11-17 17:32:38 +00004470 }
4471 } else if (ent != NULL) {
4472 int i = xmlStrlen(ent->name);
4473 const xmlChar *cur = ent->name;
4474
Daniel Veillardcf461992000-03-14 18:30:20 +00004475 /*
4476 * This may look absurd but is needed to detect
4477 * entities problems
4478 */
Daniel Veillard87b95392000-08-12 21:12:04 +00004479 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4480 (ent->content != NULL)) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004481 xmlChar *rep;
4482 rep = xmlStringDecodeEntities(ctxt, ent->content,
4483 XML_SUBSTITUTE_REF, 0, 0, 0);
4484 if (rep != NULL)
4485 xmlFree(rep);
4486 }
4487
4488 /*
4489 * Just output the reference
4490 */
Daniel Veillardbe803962000-06-28 23:40:59 +00004491 buf[len++] = '&';
4492 if (len > buf_size - i - 10) {
4493 growBuffer(buf);
Daniel Veillard7f858501999-11-17 17:32:38 +00004494 }
4495 for (;i > 0;i--)
Daniel Veillardbe803962000-06-28 23:40:59 +00004496 buf[len++] = *cur++;
4497 buf[len++] = ';';
Daniel Veillard7f858501999-11-17 17:32:38 +00004498 }
Daniel Veillard7f858501999-11-17 17:32:38 +00004499 } else {
Daniel Veillardbe803962000-06-28 23:40:59 +00004500 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4501 COPY_BUF(l,buf,len,0x20);
4502 if (len > buf_size - 10) {
4503 growBuffer(buf);
Daniel Veillard7f858501999-11-17 17:32:38 +00004504 }
Daniel Veillard7f858501999-11-17 17:32:38 +00004505 } else {
Daniel Veillardbe803962000-06-28 23:40:59 +00004506 COPY_BUF(l,buf,len,c);
4507 if (len > buf_size - 10) {
4508 growBuffer(buf);
Daniel Veillard7f858501999-11-17 17:32:38 +00004509 }
Daniel Veillard7f858501999-11-17 17:32:38 +00004510 }
Daniel Veillardbe803962000-06-28 23:40:59 +00004511 NEXTL(l);
Daniel Veillard7f858501999-11-17 17:32:38 +00004512 }
Daniel Veillardbe803962000-06-28 23:40:59 +00004513 GROW;
4514 c = CUR_CHAR(l);
Daniel Veillard7f858501999-11-17 17:32:38 +00004515 }
Daniel Veillardbe803962000-06-28 23:40:59 +00004516 buf[len++] = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004517 if (RAW == '<') {
Daniel Veillard7f858501999-11-17 17:32:38 +00004518 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4519 ctxt->sax->error(ctxt->userData,
4520 "Unescaped '<' not allowed in attributes values\n");
4521 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
4522 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004523 ctxt->disableSAX = 1;
4524 } else if (RAW != limit) {
Daniel Veillard7f858501999-11-17 17:32:38 +00004525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4526 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
4527 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
4528 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004529 ctxt->disableSAX = 1;
Daniel Veillard7f858501999-11-17 17:32:38 +00004530 } else
4531 NEXT;
Daniel Veillardbe803962000-06-28 23:40:59 +00004532 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004533}
4534
Daniel Veillard11e00581998-10-24 18:27:49 +00004535/**
4536 * xmlParseSystemLiteral:
4537 * @ctxt: an XML parser context
4538 *
4539 * parse an XML Literal
Daniel Veillard260a68f1998-08-13 03:39:55 +00004540 *
4541 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
Daniel Veillard1e346af1999-02-22 10:33:01 +00004542 *
4543 * Returns the SystemLiteral parsed or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00004544 */
4545
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004546xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004547xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00004548 xmlChar *buf = NULL;
4549 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004550 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00004551 int cur, l;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004552 xmlChar stop;
Daniel Veillardcf461992000-03-14 18:30:20 +00004553 int state = ctxt->instate;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004554
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004555 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00004556 if (RAW == '"') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004557 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004558 stop = '"';
Daniel Veillardcf461992000-03-14 18:30:20 +00004559 } else if (RAW == '\'') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004560 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004561 stop = '\'';
Daniel Veillard260a68f1998-08-13 03:39:55 +00004562 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004563 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004564 ctxt->sax->error(ctxt->userData,
4565 "SystemLiteral \" or ' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004566 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004567 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004568 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004569 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004570 }
4571
Daniel Veillard10a2c651999-12-12 13:03:50 +00004572 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4573 if (buf == NULL) {
4574 fprintf(stderr, "malloc of %d byte failed\n", size);
4575 return(NULL);
4576 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004577 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4578 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004579 while ((IS_CHAR(cur)) && (cur != stop)) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004580 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00004581 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00004582 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00004583 if (buf == NULL) {
4584 fprintf(stderr, "realloc of %d byte failed\n", size);
Daniel Veillard32bc74e2000-07-14 14:49:25 +00004585 ctxt->instate = (xmlParserInputState) state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004586 return(NULL);
4587 }
4588 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004589 COPY_BUF(l,buf,len,cur);
4590 NEXTL(l);
4591 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004592 if (cur == 0) {
4593 GROW;
4594 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00004595 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004596 }
4597 }
4598 buf[len] = 0;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00004599 ctxt->instate = (xmlParserInputState) state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004600 if (!IS_CHAR(cur)) {
4601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4602 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
4603 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
4604 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004605 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004606 } else {
4607 NEXT;
4608 }
4609 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004610}
4611
Daniel Veillard11e00581998-10-24 18:27:49 +00004612/**
4613 * xmlParsePubidLiteral:
4614 * @ctxt: an XML parser context
Daniel Veillard260a68f1998-08-13 03:39:55 +00004615 *
Daniel Veillard11e00581998-10-24 18:27:49 +00004616 * parse an XML public literal
Daniel Veillard1e346af1999-02-22 10:33:01 +00004617 *
4618 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4619 *
4620 * Returns the PubidLiteral parsed or NULL.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004621 */
4622
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004623xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004624xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00004625 xmlChar *buf = NULL;
4626 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004627 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004628 xmlChar cur;
4629 xmlChar stop;
Daniel Veillard6077d031999-10-09 09:11:45 +00004630
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004631 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00004632 if (RAW == '"') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004633 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004634 stop = '"';
Daniel Veillardcf461992000-03-14 18:30:20 +00004635 } else if (RAW == '\'') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00004636 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004637 stop = '\'';
Daniel Veillard260a68f1998-08-13 03:39:55 +00004638 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004639 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004640 ctxt->sax->error(ctxt->userData,
4641 "SystemLiteral \" or ' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004642 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004643 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004644 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004645 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004646 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00004647 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4648 if (buf == NULL) {
4649 fprintf(stderr, "malloc of %d byte failed\n", size);
4650 return(NULL);
4651 }
4652 cur = CUR;
4653 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) {
4654 if (len + 1 >= size) {
4655 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00004656 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00004657 if (buf == NULL) {
4658 fprintf(stderr, "realloc of %d byte failed\n", size);
4659 return(NULL);
4660 }
4661 }
4662 buf[len++] = cur;
4663 NEXT;
4664 cur = CUR;
4665 if (cur == 0) {
4666 GROW;
4667 SHRINK;
4668 cur = CUR;
4669 }
4670 }
4671 buf[len] = 0;
4672 if (cur != stop) {
4673 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4674 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
4675 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
4676 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004677 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004678 } else {
4679 NEXT;
4680 }
4681 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004682}
4683
Daniel Veillard11e00581998-10-24 18:27:49 +00004684/**
4685 * xmlParseCharData:
4686 * @ctxt: an XML parser context
4687 * @cdata: int indicating whether we are within a CDATA section
4688 *
4689 * parse a CharData section.
4690 * if we are within a CDATA section ']]>' marks an end of section.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004691 *
Daniel Veillardcf461992000-03-14 18:30:20 +00004692 * The right angle bracket (>) may be represented using the string "&gt;",
4693 * and must, for compatibility, be escaped using "&gt;" or a character
4694 * reference when it appears in the string "]]>" in content, when that
4695 * string is not marking the end of a CDATA section.
4696 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00004697 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4698 */
4699
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004700void
4701xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillardcf461992000-03-14 18:30:20 +00004702 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004703 int nbchar = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004704 int cur, l;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004705
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004706 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00004707 cur = CUR_CHAR(l);
Daniel Veillard5e873c42000-04-12 13:27:38 +00004708 while (((cur != '<') || (ctxt->token == '<')) &&
4709 ((cur != '&') || (ctxt->token == '&')) &&
4710 (IS_CHAR(cur))) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00004711 if ((cur == ']') && (NXT(1) == ']') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004712 (NXT(2) == '>')) {
4713 if (cdata) break;
4714 else {
4715 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcf461992000-03-14 18:30:20 +00004716 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004717 "Sequence ']]>' not allowed in content\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004718 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
Daniel Veillardcf461992000-03-14 18:30:20 +00004719 /* Should this be relaxed ??? I see a "must here */
4720 ctxt->wellFormed = 0;
4721 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004722 }
4723 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004724 COPY_BUF(l,buf,nbchar,cur);
4725 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004726 /*
4727 * Ok the segment is to be consumed as chars.
4728 */
Daniel Veillardcf461992000-03-14 18:30:20 +00004729 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004730 if (areBlanks(ctxt, buf, nbchar)) {
4731 if (ctxt->sax->ignorableWhitespace != NULL)
4732 ctxt->sax->ignorableWhitespace(ctxt->userData,
4733 buf, nbchar);
4734 } else {
4735 if (ctxt->sax->characters != NULL)
4736 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4737 }
4738 }
4739 nbchar = 0;
4740 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004741 NEXTL(l);
4742 cur = CUR_CHAR(l);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004743 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004744 if (nbchar != 0) {
4745 /*
4746 * Ok the segment is to be consumed as chars.
4747 */
Daniel Veillardcf461992000-03-14 18:30:20 +00004748 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004749 if (areBlanks(ctxt, buf, nbchar)) {
4750 if (ctxt->sax->ignorableWhitespace != NULL)
4751 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4752 } else {
4753 if (ctxt->sax->characters != NULL)
4754 ctxt->sax->characters(ctxt->userData, buf, nbchar);
4755 }
4756 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004757 }
4758}
4759
Daniel Veillard11e00581998-10-24 18:27:49 +00004760/**
4761 * xmlParseExternalID:
4762 * @ctxt: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004763 * @publicID: a xmlChar** receiving PubidLiteral
Daniel Veillard1e346af1999-02-22 10:33:01 +00004764 * @strict: indicate whether we should restrict parsing to only
4765 * production [75], see NOTE below
Daniel Veillard11e00581998-10-24 18:27:49 +00004766 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00004767 * Parse an External ID or a Public ID
4768 *
4769 * NOTE: Productions [75] and [83] interract badly since [75] can generate
4770 * 'PUBLIC' S PubidLiteral S SystemLiteral
Daniel Veillard260a68f1998-08-13 03:39:55 +00004771 *
4772 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4773 * | 'PUBLIC' S PubidLiteral S SystemLiteral
Daniel Veillard1e346af1999-02-22 10:33:01 +00004774 *
4775 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
4776 *
4777 * Returns the function returns SystemLiteral and in the second
4778 * case publicID receives PubidLiteral, is strict is off
4779 * it is possible to return NULL and have publicID set.
Daniel Veillard260a68f1998-08-13 03:39:55 +00004780 */
4781
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004782xmlChar *
4783xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4784 xmlChar *URI = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004785
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004786 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00004787 if ((RAW == 'S') && (NXT(1) == 'Y') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00004788 (NXT(2) == 'S') && (NXT(3) == 'T') &&
4789 (NXT(4) == 'E') && (NXT(5) == 'M')) {
4790 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004791 if (!IS_BLANK(CUR)) {
4792 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004793 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004794 "Space required after 'SYSTEM'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004795 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004796 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004797 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004798 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004799 SKIP_BLANKS;
4800 URI = xmlParseSystemLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004801 if (URI == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004802 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004803 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004804 "xmlParseExternalID: SYSTEM, no URI\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004805 ctxt->errNo = XML_ERR_URI_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004806 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004807 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004808 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004809 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00004810 (NXT(2) == 'B') && (NXT(3) == 'L') &&
4811 (NXT(4) == 'I') && (NXT(5) == 'C')) {
4812 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004813 if (!IS_BLANK(CUR)) {
4814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004815 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004816 "Space required after 'PUBLIC'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004817 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004818 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004819 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004820 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004821 SKIP_BLANKS;
4822 *publicID = xmlParsePubidLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004823 if (*publicID == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004824 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004825 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004826 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004827 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004828 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004829 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004830 }
Daniel Veillard1e346af1999-02-22 10:33:01 +00004831 if (strict) {
4832 /*
4833 * We don't handle [83] so "S SystemLiteral" is required.
4834 */
4835 if (!IS_BLANK(CUR)) {
4836 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004837 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00004838 "Space required after the Public Identifier\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004839 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004840 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004841 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004842 }
4843 } else {
4844 /*
4845 * We handle [83] so we return immediately, if
4846 * "S SystemLiteral" is not detected. From a purely parsing
4847 * point of view that's a nice mess.
4848 */
Daniel Veillard10a2c651999-12-12 13:03:50 +00004849 const xmlChar *ptr;
4850 GROW;
4851
4852 ptr = CUR_PTR;
Daniel Veillard1e346af1999-02-22 10:33:01 +00004853 if (!IS_BLANK(*ptr)) return(NULL);
4854
4855 while (IS_BLANK(*ptr)) ptr++;
Daniel Veillardcf461992000-03-14 18:30:20 +00004856 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004857 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004858 SKIP_BLANKS;
4859 URI = xmlParseSystemLiteral(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004860 if (URI == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004861 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004862 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004863 "xmlParseExternalID: PUBLIC, no URI\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004864 ctxt->errNo = XML_ERR_URI_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004865 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004866 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004867 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004868 }
4869 return(URI);
4870}
4871
Daniel Veillard11e00581998-10-24 18:27:49 +00004872/**
4873 * xmlParseComment:
Daniel Veillard1e346af1999-02-22 10:33:01 +00004874 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00004875 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00004876 * Skip an XML (SGML) comment <!-- .... -->
Daniel Veillard260a68f1998-08-13 03:39:55 +00004877 * The spec says that "For compatibility, the string "--" (double-hyphen)
4878 * must not occur within comments. "
4879 *
4880 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4881 */
Daniel Veillard517752b1999-04-05 12:20:10 +00004882void
Daniel Veillardb96e6431999-08-29 21:02:19 +00004883xmlParseComment(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00004884 xmlChar *buf = NULL;
Daniel Veillard496a1cf2000-05-03 14:20:55 +00004885 int len;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004886 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00004887 int q, ql;
4888 int r, rl;
4889 int cur, l;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004890 xmlParserInputState state;
Daniel Veillardcf461992000-03-14 18:30:20 +00004891 xmlParserInputPtr input = ctxt->input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004892
4893 /*
4894 * Check that there is a comment right here.
4895 */
Daniel Veillardcf461992000-03-14 18:30:20 +00004896 if ((RAW != '<') || (NXT(1) != '!') ||
Daniel Veillard517752b1999-04-05 12:20:10 +00004897 (NXT(2) != '-') || (NXT(3) != '-')) return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004898
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004899 state = ctxt->instate;
Daniel Veillardb05deb71999-08-10 19:04:08 +00004900 ctxt->instate = XML_PARSER_COMMENT;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00004901 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004902 SKIP(4);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004903 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
4904 if (buf == NULL) {
4905 fprintf(stderr, "malloc of %d byte failed\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004906 ctxt->instate = state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004907 return;
4908 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004909 q = CUR_CHAR(ql);
4910 NEXTL(ql);
4911 r = CUR_CHAR(rl);
4912 NEXTL(rl);
4913 cur = CUR_CHAR(l);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00004914 len = 0;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004915 while (IS_CHAR(cur) &&
4916 ((cur != '>') ||
4917 (r != '-') || (q != '-'))) {
Daniel Veillard496a1cf2000-05-03 14:20:55 +00004918 if ((r == '-') && (q == '-') && (len > 1)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004919 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00004920 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00004921 "Comment must not contain '--' (double-hyphen)`\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004922 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004923 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004924 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004925 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004926 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00004927 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00004928 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00004929 if (buf == NULL) {
4930 fprintf(stderr, "realloc of %d byte failed\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004931 ctxt->instate = state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004932 return;
4933 }
4934 }
Daniel Veillardcf461992000-03-14 18:30:20 +00004935 COPY_BUF(ql,buf,len,q);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004936 q = r;
Daniel Veillardcf461992000-03-14 18:30:20 +00004937 ql = rl;
Daniel Veillard10a2c651999-12-12 13:03:50 +00004938 r = cur;
Daniel Veillardcf461992000-03-14 18:30:20 +00004939 rl = l;
4940 NEXTL(l);
4941 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004942 if (cur == 0) {
4943 SHRINK;
4944 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00004945 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00004946 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004947 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00004948 buf[len] = 0;
4949 if (!IS_CHAR(cur)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00004950 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00004951 ctxt->sax->error(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00004952 "Comment not terminated \n<!--%.50s\n", buf);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004953 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00004954 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00004955 ctxt->disableSAX = 1;
4956 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004957 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00004958 if (input != ctxt->input) {
4959 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4960 ctxt->sax->error(ctxt->userData,
4961"Comment doesn't start and stop in the same entity\n");
4962 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4963 ctxt->wellFormed = 0;
4964 ctxt->disableSAX = 1;
4965 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00004966 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00004967 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
4968 (!ctxt->disableSAX))
Daniel Veillard10a2c651999-12-12 13:03:50 +00004969 ctxt->sax->comment(ctxt->userData, buf);
4970 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00004971 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00004972 ctxt->instate = state;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004973}
4974
Daniel Veillard11e00581998-10-24 18:27:49 +00004975/**
4976 * xmlParsePITarget:
4977 * @ctxt: an XML parser context
4978 *
4979 * parse the name of a PI
Daniel Veillard260a68f1998-08-13 03:39:55 +00004980 *
4981 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
Daniel Veillard1e346af1999-02-22 10:33:01 +00004982 *
4983 * Returns the PITarget name or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00004984 */
4985
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004986xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00004987xmlParsePITarget(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00004988 xmlChar *name;
Daniel Veillard260a68f1998-08-13 03:39:55 +00004989
4990 name = xmlParseName(ctxt);
Daniel Veillard3c558c31999-12-22 11:30:41 +00004991 if ((name != NULL) &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00004992 ((name[0] == 'x') || (name[0] == 'X')) &&
4993 ((name[1] == 'm') || (name[1] == 'M')) &&
4994 ((name[2] == 'l') || (name[2] == 'L'))) {
Daniel Veillard3c558c31999-12-22 11:30:41 +00004995 int i;
Daniel Veillardcf461992000-03-14 18:30:20 +00004996 if ((name[0] == 'x') && (name[1] == 'm') &&
4997 (name[2] == 'l') && (name[3] == 0)) {
4998 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4999 ctxt->sax->error(ctxt->userData,
5000 "XML declaration allowed only at the start of the document\n");
5001 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
5002 ctxt->wellFormed = 0;
5003 ctxt->disableSAX = 1;
5004 return(name);
5005 } else if (name[3] == 0) {
5006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5007 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
5008 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
5009 ctxt->wellFormed = 0;
5010 ctxt->disableSAX = 1;
5011 return(name);
5012 }
Daniel Veillard3c558c31999-12-22 11:30:41 +00005013 for (i = 0;;i++) {
5014 if (xmlW3CPIs[i] == NULL) break;
5015 if (!xmlStrcmp(name, (const xmlChar *)xmlW3CPIs[i]))
5016 return(name);
5017 }
5018 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
5019 ctxt->sax->warning(ctxt->userData,
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005020 "xmlParsePItarget: invalid name prefix 'xml'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005021 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005022 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005023 }
5024 return(name);
5025}
5026
Daniel Veillard11e00581998-10-24 18:27:49 +00005027/**
5028 * xmlParsePI:
5029 * @ctxt: an XML parser context
5030 *
5031 * parse an XML Processing Instruction.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005032 *
5033 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
Daniel Veillard1e346af1999-02-22 10:33:01 +00005034 *
5035 * The processing is transfered to SAX once parsed.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005036 */
5037
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005038void
5039xmlParsePI(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00005040 xmlChar *buf = NULL;
5041 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005042 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00005043 int cur, l;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005044 xmlChar *target;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005045 xmlParserInputState state;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005046
Daniel Veillardcf461992000-03-14 18:30:20 +00005047 if ((RAW == '<') && (NXT(1) == '?')) {
5048 xmlParserInputPtr input = ctxt->input;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005049 state = ctxt->instate;
5050 ctxt->instate = XML_PARSER_PI;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005051 /*
5052 * this is a Processing Instruction.
5053 */
5054 SKIP(2);
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005055 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005056
5057 /*
5058 * Parse the target name and check for special support like
5059 * namespace.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005060 */
5061 target = xmlParsePITarget(ctxt);
5062 if (target != NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +00005063 if ((RAW == '?') && (NXT(1) == '>')) {
5064 if (input != ctxt->input) {
5065 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5066 ctxt->sax->error(ctxt->userData,
5067 "PI declaration doesn't start and stop in the same entity\n");
5068 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5069 ctxt->wellFormed = 0;
5070 ctxt->disableSAX = 1;
5071 }
5072 SKIP(2);
5073
5074 /*
5075 * SAX: PI detected.
5076 */
5077 if ((ctxt->sax) && (!ctxt->disableSAX) &&
5078 (ctxt->sax->processingInstruction != NULL))
5079 ctxt->sax->processingInstruction(ctxt->userData,
5080 target, NULL);
5081 ctxt->instate = state;
5082 xmlFree(target);
5083 return;
5084 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00005085 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
5086 if (buf == NULL) {
5087 fprintf(stderr, "malloc of %d byte failed\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005088 ctxt->instate = state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005089 return;
5090 }
5091 cur = CUR;
5092 if (!IS_BLANK(cur)) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00005093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5094 ctxt->sax->error(ctxt->userData,
5095 "xmlParsePI: PI %s space expected\n", target);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005096 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillardb96e6431999-08-29 21:02:19 +00005097 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005098 ctxt->disableSAX = 1;
Daniel Veillardb96e6431999-08-29 21:02:19 +00005099 }
5100 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00005101 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00005102 while (IS_CHAR(cur) &&
5103 ((cur != '?') || (NXT(1) != '>'))) {
Daniel Veillardcf461992000-03-14 18:30:20 +00005104 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00005105 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00005106 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00005107 if (buf == NULL) {
5108 fprintf(stderr, "realloc of %d byte failed\n", size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005109 ctxt->instate = state;
Daniel Veillard10a2c651999-12-12 13:03:50 +00005110 return;
5111 }
5112 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005113 COPY_BUF(l,buf,len,cur);
5114 NEXTL(l);
5115 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00005116 if (cur == 0) {
5117 SHRINK;
5118 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00005119 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00005120 }
5121 }
5122 buf[len] = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005123 if (cur != '?') {
Daniel Veillard517752b1999-04-05 12:20:10 +00005124 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005125 ctxt->sax->error(ctxt->userData,
Daniel Veillard517752b1999-04-05 12:20:10 +00005126 "xmlParsePI: PI %s never end ...\n", target);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005127 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
Daniel Veillard517752b1999-04-05 12:20:10 +00005128 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005129 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005130 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00005131 if (input != ctxt->input) {
5132 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5133 ctxt->sax->error(ctxt->userData,
5134 "PI declaration doesn't start and stop in the same entity\n");
5135 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5136 ctxt->wellFormed = 0;
5137 ctxt->disableSAX = 1;
5138 }
Daniel Veillard517752b1999-04-05 12:20:10 +00005139 SKIP(2);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005140
Daniel Veillard517752b1999-04-05 12:20:10 +00005141 /*
5142 * SAX: PI detected.
5143 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005144 if ((ctxt->sax) && (!ctxt->disableSAX) &&
Daniel Veillard517752b1999-04-05 12:20:10 +00005145 (ctxt->sax->processingInstruction != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00005146 ctxt->sax->processingInstruction(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00005147 target, buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005148 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00005149 xmlFree(buf);
Daniel Veillard6454aec1999-09-02 22:04:43 +00005150 xmlFree(target);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005151 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00005153 ctxt->sax->error(ctxt->userData,
5154 "xmlParsePI : no target name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005155 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005156 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005157 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005158 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005159 ctxt->instate = state;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005160 }
5161}
5162
Daniel Veillard11e00581998-10-24 18:27:49 +00005163/**
5164 * xmlParseNotationDecl:
5165 * @ctxt: an XML parser context
5166 *
5167 * parse a notation declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00005168 *
5169 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5170 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00005171 * Hence there is actually 3 choices:
5172 * 'PUBLIC' S PubidLiteral
5173 * 'PUBLIC' S PubidLiteral S SystemLiteral
5174 * and 'SYSTEM' S SystemLiteral
Daniel Veillard11e00581998-10-24 18:27:49 +00005175 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005176 * See the NOTE on xmlParseExternalID().
Daniel Veillard260a68f1998-08-13 03:39:55 +00005177 */
5178
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005179void
5180xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005181 xmlChar *name;
5182 xmlChar *Pubid;
5183 xmlChar *Systemid;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005184
Daniel Veillardcf461992000-03-14 18:30:20 +00005185 if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005186 (NXT(2) == 'N') && (NXT(3) == 'O') &&
5187 (NXT(4) == 'T') && (NXT(5) == 'A') &&
5188 (NXT(6) == 'T') && (NXT(7) == 'I') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00005189 (NXT(8) == 'O') && (NXT(9) == 'N')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00005190 xmlParserInputPtr input = ctxt->input;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005191 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005192 SKIP(10);
Daniel Veillard1e346af1999-02-22 10:33:01 +00005193 if (!IS_BLANK(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005194 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005195 ctxt->sax->error(ctxt->userData,
5196 "Space required after '<!NOTATION'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005197 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005198 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005199 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005200 return;
5201 }
5202 SKIP_BLANKS;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005203
5204 name = xmlParseName(ctxt);
5205 if (name == NULL) {
5206 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005207 ctxt->sax->error(ctxt->userData,
5208 "NOTATION: Name expected here\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005209 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005210 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005211 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005212 return;
5213 }
5214 if (!IS_BLANK(CUR)) {
5215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005216 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00005217 "Space required after the NOTATION name'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005218 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005219 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005220 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005221 return;
5222 }
5223 SKIP_BLANKS;
5224
Daniel Veillard260a68f1998-08-13 03:39:55 +00005225 /*
Daniel Veillard1e346af1999-02-22 10:33:01 +00005226 * Parse the IDs.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005227 */
Daniel Veillardcf461992000-03-14 18:30:20 +00005228 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
Daniel Veillard1e346af1999-02-22 10:33:01 +00005229 SKIP_BLANKS;
5230
Daniel Veillardcf461992000-03-14 18:30:20 +00005231 if (RAW == '>') {
5232 if (input != ctxt->input) {
5233 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5234 ctxt->sax->error(ctxt->userData,
5235"Notation declaration doesn't start and stop in the same entity\n");
5236 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5237 ctxt->wellFormed = 0;
5238 ctxt->disableSAX = 1;
5239 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005240 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00005241 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5242 (ctxt->sax->notationDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005243 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
Daniel Veillard1e346af1999-02-22 10:33:01 +00005244 } else {
5245 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005246 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00005247 "'>' required to close NOTATION declaration\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005248 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005249 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005250 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005251 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00005252 xmlFree(name);
5253 if (Systemid != NULL) xmlFree(Systemid);
5254 if (Pubid != NULL) xmlFree(Pubid);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005255 }
5256}
5257
Daniel Veillard11e00581998-10-24 18:27:49 +00005258/**
5259 * xmlParseEntityDecl:
5260 * @ctxt: an XML parser context
5261 *
5262 * parse <!ENTITY declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00005263 *
5264 * [70] EntityDecl ::= GEDecl | PEDecl
5265 *
5266 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5267 *
5268 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5269 *
5270 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5271 *
5272 * [74] PEDef ::= EntityValue | ExternalID
5273 *
5274 * [76] NDataDecl ::= S 'NDATA' S Name
Daniel Veillardb05deb71999-08-10 19:04:08 +00005275 *
5276 * [ VC: Notation Declared ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00005277 * The Name must match the declared name of a notation.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005278 */
5279
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005280void
5281xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005282 xmlChar *name = NULL;
5283 xmlChar *value = NULL;
5284 xmlChar *URI = NULL, *literal = NULL;
5285 xmlChar *ndata = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005286 int isParameter = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005287 xmlChar *orig = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005288
Daniel Veillardb05deb71999-08-10 19:04:08 +00005289 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00005290 if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005291 (NXT(2) == 'E') && (NXT(3) == 'N') &&
5292 (NXT(4) == 'T') && (NXT(5) == 'I') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005293 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00005294 xmlParserInputPtr input = ctxt->input;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005295 ctxt->instate = XML_PARSER_ENTITY_DECL;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005296 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005297 SKIP(8);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005298 if (!IS_BLANK(CUR)) {
5299 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005300 ctxt->sax->error(ctxt->userData,
5301 "Space required after '<!ENTITY'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005302 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005303 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005304 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005305 }
5306 SKIP_BLANKS;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005307
Daniel Veillardcf461992000-03-14 18:30:20 +00005308 if (RAW == '%') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00005309 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005310 if (!IS_BLANK(CUR)) {
5311 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005312 ctxt->sax->error(ctxt->userData,
5313 "Space required after '%'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005314 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005315 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005316 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005317 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005318 SKIP_BLANKS;
5319 isParameter = 1;
5320 }
5321
5322 name = xmlParseName(ctxt);
5323 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005325 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005326 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005327 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005328 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005329 return;
5330 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005331 if (!IS_BLANK(CUR)) {
5332 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005333 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005334 "Space required after the entity name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005335 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005336 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005337 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005338 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005339 SKIP_BLANKS;
5340
5341 /*
Daniel Veillard1e346af1999-02-22 10:33:01 +00005342 * handle the various case of definitions...
Daniel Veillard260a68f1998-08-13 03:39:55 +00005343 */
5344 if (isParameter) {
Daniel Veillardcf461992000-03-14 18:30:20 +00005345 if ((RAW == '"') || (RAW == '\''))
Daniel Veillard011b63c1999-06-02 17:44:04 +00005346 value = xmlParseEntityValue(ctxt, &orig);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005347 if (value) {
Daniel Veillardcf461992000-03-14 18:30:20 +00005348 if ((ctxt->sax != NULL) &&
5349 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005350 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00005351 XML_INTERNAL_PARAMETER_ENTITY,
5352 NULL, NULL, value);
5353 }
5354 else {
Daniel Veillard1e346af1999-02-22 10:33:01 +00005355 URI = xmlParseExternalID(ctxt, &literal, 1);
Daniel Veillardcf461992000-03-14 18:30:20 +00005356 if ((URI == NULL) && (literal == NULL)) {
5357 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5358 ctxt->sax->error(ctxt->userData,
5359 "Entity value required\n");
5360 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
5361 ctxt->wellFormed = 0;
5362 ctxt->disableSAX = 1;
5363 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005364 if (URI) {
Daniel Veillard496a1cf2000-05-03 14:20:55 +00005365 xmlURIPtr uri;
5366
5367 uri = xmlParseURI((const char *) URI);
5368 if (uri == NULL) {
5369 if ((ctxt->sax != NULL) &&
5370 (!ctxt->disableSAX) &&
5371 (ctxt->sax->error != NULL))
5372 ctxt->sax->error(ctxt->userData,
5373 "Invalid URI: %s\n", URI);
5374 ctxt->wellFormed = 0;
5375 ctxt->errNo = XML_ERR_INVALID_URI;
5376 } else {
5377 if (uri->fragment != NULL) {
5378 if ((ctxt->sax != NULL) &&
5379 (!ctxt->disableSAX) &&
5380 (ctxt->sax->error != NULL))
5381 ctxt->sax->error(ctxt->userData,
5382 "Fragment not allowed: %s\n", URI);
5383 ctxt->wellFormed = 0;
5384 ctxt->errNo = XML_ERR_URI_FRAGMENT;
5385 } else {
5386 if ((ctxt->sax != NULL) &&
5387 (!ctxt->disableSAX) &&
5388 (ctxt->sax->entityDecl != NULL))
5389 ctxt->sax->entityDecl(ctxt->userData, name,
5390 XML_EXTERNAL_PARAMETER_ENTITY,
5391 literal, URI, NULL);
5392 }
5393 xmlFreeURI(uri);
5394 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005395 }
5396 }
5397 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00005398 if ((RAW == '"') || (RAW == '\'')) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00005399 value = xmlParseEntityValue(ctxt, &orig);
Daniel Veillardcf461992000-03-14 18:30:20 +00005400 if ((ctxt->sax != NULL) &&
5401 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005402 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00005403 XML_INTERNAL_GENERAL_ENTITY,
5404 NULL, NULL, value);
5405 } else {
Daniel Veillard1e346af1999-02-22 10:33:01 +00005406 URI = xmlParseExternalID(ctxt, &literal, 1);
Daniel Veillardcf461992000-03-14 18:30:20 +00005407 if ((URI == NULL) && (literal == NULL)) {
5408 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5409 ctxt->sax->error(ctxt->userData,
5410 "Entity value required\n");
5411 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
5412 ctxt->wellFormed = 0;
5413 ctxt->disableSAX = 1;
5414 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00005415 if (URI) {
5416 xmlURIPtr uri;
5417
5418 uri = xmlParseURI((const char *)URI);
5419 if (uri == NULL) {
5420 if ((ctxt->sax != NULL) &&
5421 (!ctxt->disableSAX) &&
5422 (ctxt->sax->error != NULL))
5423 ctxt->sax->error(ctxt->userData,
5424 "Invalid URI: %s\n", URI);
5425 ctxt->wellFormed = 0;
5426 ctxt->errNo = XML_ERR_INVALID_URI;
5427 } else {
5428 if (uri->fragment != NULL) {
5429 if ((ctxt->sax != NULL) &&
5430 (!ctxt->disableSAX) &&
5431 (ctxt->sax->error != NULL))
5432 ctxt->sax->error(ctxt->userData,
5433 "Fragment not allowed: %s\n", URI);
5434 ctxt->wellFormed = 0;
5435 ctxt->errNo = XML_ERR_URI_FRAGMENT;
5436 }
5437 xmlFreeURI(uri);
5438 }
5439 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005440 if ((RAW != '>') && (!IS_BLANK(CUR))) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005441 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005442 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005443 "Space required before 'NDATA'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005444 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005445 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005446 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005447 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005448 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00005449 if ((RAW == 'N') && (NXT(1) == 'D') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005450 (NXT(2) == 'A') && (NXT(3) == 'T') &&
5451 (NXT(4) == 'A')) {
5452 SKIP(5);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005453 if (!IS_BLANK(CUR)) {
5454 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005455 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005456 "Space required after 'NDATA'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005457 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005458 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005459 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005460 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005461 SKIP_BLANKS;
5462 ndata = xmlParseName(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00005463 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
Daniel Veillardb96e6431999-08-29 21:02:19 +00005464 (ctxt->sax->unparsedEntityDecl != NULL))
5465 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00005466 literal, URI, ndata);
5467 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00005468 if ((ctxt->sax != NULL) &&
5469 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005470 ctxt->sax->entityDecl(ctxt->userData, name,
Daniel Veillard260a68f1998-08-13 03:39:55 +00005471 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5472 literal, URI, NULL);
5473 }
5474 }
5475 }
5476 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00005477 if (RAW != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005478 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005479 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00005480 "xmlParseEntityDecl: entity %s not terminated\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005481 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005482 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005483 ctxt->disableSAX = 1;
5484 } else {
5485 if (input != ctxt->input) {
5486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5487 ctxt->sax->error(ctxt->userData,
5488"Entity declaration doesn't start and stop in the same entity\n");
5489 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
5490 ctxt->wellFormed = 0;
5491 ctxt->disableSAX = 1;
5492 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005493 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00005494 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00005495 if (orig != NULL) {
5496 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00005497 * Ugly mechanism to save the raw entity value.
Daniel Veillard011b63c1999-06-02 17:44:04 +00005498 */
5499 xmlEntityPtr cur = NULL;
5500
Daniel Veillardb05deb71999-08-10 19:04:08 +00005501 if (isParameter) {
5502 if ((ctxt->sax != NULL) &&
5503 (ctxt->sax->getParameterEntity != NULL))
Daniel Veillardc08a2c61999-09-08 21:35:25 +00005504 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005505 } else {
5506 if ((ctxt->sax != NULL) &&
5507 (ctxt->sax->getEntity != NULL))
Daniel Veillardc08a2c61999-09-08 21:35:25 +00005508 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005509 }
5510 if (cur != NULL) {
5511 if (cur->orig != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00005512 xmlFree(orig);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005513 else
5514 cur->orig = orig;
5515 } else
Daniel Veillard6454aec1999-09-02 22:04:43 +00005516 xmlFree(orig);
Daniel Veillard011b63c1999-06-02 17:44:04 +00005517 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00005518 if (name != NULL) xmlFree(name);
5519 if (value != NULL) xmlFree(value);
5520 if (URI != NULL) xmlFree(URI);
5521 if (literal != NULL) xmlFree(literal);
5522 if (ndata != NULL) xmlFree(ndata);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005523 }
5524}
5525
Daniel Veillard11e00581998-10-24 18:27:49 +00005526/**
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005527 * xmlParseDefaultDecl:
5528 * @ctxt: an XML parser context
5529 * @value: Receive a possible fixed default value for the attribute
5530 *
5531 * Parse an attribute default declaration
5532 *
5533 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5534 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005535 * [ VC: Required Attribute ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00005536 * if the default declaration is the keyword #REQUIRED, then the
5537 * attribute must be specified for all elements of the type in the
5538 * attribute-list declaration.
Daniel Veillardb05deb71999-08-10 19:04:08 +00005539 *
5540 * [ VC: Attribute Default Legal ]
5541 * The declared default value must meet the lexical constraints of
5542 * the declared attribute type c.f. xmlValidateAttributeDecl()
5543 *
5544 * [ VC: Fixed Attribute Default ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00005545 * if an attribute has a default value declared with the #FIXED
5546 * keyword, instances of that attribute must match the default value.
Daniel Veillardb05deb71999-08-10 19:04:08 +00005547 *
5548 * [ WFC: No < in Attribute Values ]
5549 * handled in xmlParseAttValue()
5550 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005551 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5552 * or XML_ATTRIBUTE_FIXED.
5553 */
5554
5555int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005556xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005557 int val;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005558 xmlChar *ret;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005559
5560 *value = NULL;
Daniel Veillardcf461992000-03-14 18:30:20 +00005561 if ((RAW == '#') && (NXT(1) == 'R') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005562 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
5563 (NXT(4) == 'U') && (NXT(5) == 'I') &&
5564 (NXT(6) == 'R') && (NXT(7) == 'E') &&
5565 (NXT(8) == 'D')) {
5566 SKIP(9);
5567 return(XML_ATTRIBUTE_REQUIRED);
5568 }
Daniel Veillardcf461992000-03-14 18:30:20 +00005569 if ((RAW == '#') && (NXT(1) == 'I') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005570 (NXT(2) == 'M') && (NXT(3) == 'P') &&
5571 (NXT(4) == 'L') && (NXT(5) == 'I') &&
5572 (NXT(6) == 'E') && (NXT(7) == 'D')) {
5573 SKIP(8);
5574 return(XML_ATTRIBUTE_IMPLIED);
5575 }
5576 val = XML_ATTRIBUTE_NONE;
Daniel Veillardcf461992000-03-14 18:30:20 +00005577 if ((RAW == '#') && (NXT(1) == 'F') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005578 (NXT(2) == 'I') && (NXT(3) == 'X') &&
5579 (NXT(4) == 'E') && (NXT(5) == 'D')) {
5580 SKIP(6);
5581 val = XML_ATTRIBUTE_FIXED;
5582 if (!IS_BLANK(CUR)) {
5583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005584 ctxt->sax->error(ctxt->userData,
5585 "Space required after '#FIXED'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005586 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005587 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005588 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005589 }
5590 SKIP_BLANKS;
5591 }
5592 ret = xmlParseAttValue(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00005593 ctxt->instate = XML_PARSER_DTD;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005594 if (ret == NULL) {
5595 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005596 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005597 "Attribute default value declaration error\n");
5598 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005599 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005600 } else
5601 *value = ret;
5602 return(val);
5603}
5604
5605/**
Daniel Veillard1e346af1999-02-22 10:33:01 +00005606 * xmlParseNotationType:
5607 * @ctxt: an XML parser context
5608 *
5609 * parse an Notation attribute type.
5610 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005611 * Note: the leading 'NOTATION' S part has already being parsed...
5612 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005613 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5614 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005615 * [ VC: Notation Attributes ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00005616 * Values of this type must match one of the notation names included
Daniel Veillardb05deb71999-08-10 19:04:08 +00005617 * in the declaration; all notation names in the declaration must be declared.
Daniel Veillard1e346af1999-02-22 10:33:01 +00005618 *
5619 * Returns: the notation attribute tree built while parsing
5620 */
5621
5622xmlEnumerationPtr
5623xmlParseNotationType(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005624 xmlChar *name;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005625 xmlEnumerationPtr ret = NULL, last = NULL, cur;
5626
Daniel Veillardcf461992000-03-14 18:30:20 +00005627 if (RAW != '(') {
Daniel Veillard1e346af1999-02-22 10:33:01 +00005628 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005629 ctxt->sax->error(ctxt->userData,
5630 "'(' required to start 'NOTATION'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005631 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005632 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005633 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005634 return(NULL);
5635 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005636 SHRINK;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005637 do {
5638 NEXT;
5639 SKIP_BLANKS;
5640 name = xmlParseName(ctxt);
5641 if (name == NULL) {
5642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005643 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00005644 "Name expected in NOTATION declaration\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005645 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005646 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005647 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005648 return(ret);
5649 }
5650 cur = xmlCreateEnumeration(name);
Daniel Veillard6454aec1999-09-02 22:04:43 +00005651 xmlFree(name);
Daniel Veillard1e346af1999-02-22 10:33:01 +00005652 if (cur == NULL) return(ret);
5653 if (last == NULL) ret = last = cur;
5654 else {
5655 last->next = cur;
5656 last = cur;
5657 }
5658 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00005659 } while (RAW == '|');
5660 if (RAW != ')') {
Daniel Veillard1e346af1999-02-22 10:33:01 +00005661 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005662 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00005663 "')' required to finish NOTATION declaration\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005664 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005665 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005666 ctxt->disableSAX = 1;
5667 if ((last != NULL) && (last != ret))
5668 xmlFreeEnumeration(last);
Daniel Veillard1e346af1999-02-22 10:33:01 +00005669 return(ret);
5670 }
5671 NEXT;
5672 return(ret);
5673}
5674
5675/**
5676 * xmlParseEnumerationType:
5677 * @ctxt: an XML parser context
5678 *
5679 * parse an Enumeration attribute type.
5680 *
5681 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5682 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005683 * [ VC: Enumeration ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00005684 * Values of this type must match one of the Nmtoken tokens in
Daniel Veillardb05deb71999-08-10 19:04:08 +00005685 * the declaration
5686 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005687 * Returns: the enumeration attribute tree built while parsing
5688 */
5689
5690xmlEnumerationPtr
5691xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005692 xmlChar *name;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005693 xmlEnumerationPtr ret = NULL, last = NULL, cur;
5694
Daniel Veillardcf461992000-03-14 18:30:20 +00005695 if (RAW != '(') {
Daniel Veillard1e346af1999-02-22 10:33:01 +00005696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005697 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00005698 "'(' required to start ATTLIST enumeration\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005699 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005700 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005701 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005702 return(NULL);
5703 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005704 SHRINK;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005705 do {
5706 NEXT;
5707 SKIP_BLANKS;
5708 name = xmlParseNmtoken(ctxt);
5709 if (name == NULL) {
5710 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005711 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00005712 "NmToken expected in ATTLIST enumeration\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005713 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005714 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005715 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005716 return(ret);
5717 }
5718 cur = xmlCreateEnumeration(name);
Daniel Veillard6454aec1999-09-02 22:04:43 +00005719 xmlFree(name);
Daniel Veillard1e346af1999-02-22 10:33:01 +00005720 if (cur == NULL) return(ret);
5721 if (last == NULL) ret = last = cur;
5722 else {
5723 last->next = cur;
5724 last = cur;
5725 }
5726 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00005727 } while (RAW == '|');
5728 if (RAW != ')') {
Daniel Veillard1e346af1999-02-22 10:33:01 +00005729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005730 ctxt->sax->error(ctxt->userData,
Daniel Veillard1e346af1999-02-22 10:33:01 +00005731 "')' required to finish ATTLIST enumeration\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005732 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005733 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005734 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005735 return(ret);
5736 }
5737 NEXT;
5738 return(ret);
5739}
5740
5741/**
Daniel Veillard11e00581998-10-24 18:27:49 +00005742 * xmlParseEnumeratedType:
5743 * @ctxt: an XML parser context
Daniel Veillard1e346af1999-02-22 10:33:01 +00005744 * @tree: the enumeration tree built while parsing
Daniel Veillard11e00581998-10-24 18:27:49 +00005745 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005746 * parse an Enumerated attribute type.
Daniel Veillard260a68f1998-08-13 03:39:55 +00005747 *
5748 * [57] EnumeratedType ::= NotationType | Enumeration
5749 *
5750 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5751 *
Daniel Veillard11e00581998-10-24 18:27:49 +00005752 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005753 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
Daniel Veillard260a68f1998-08-13 03:39:55 +00005754 */
5755
Daniel Veillard1e346af1999-02-22 10:33:01 +00005756int
5757xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillardcf461992000-03-14 18:30:20 +00005758 if ((RAW == 'N') && (NXT(1) == 'O') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00005759 (NXT(2) == 'T') && (NXT(3) == 'A') &&
5760 (NXT(4) == 'T') && (NXT(5) == 'I') &&
5761 (NXT(6) == 'O') && (NXT(7) == 'N')) {
5762 SKIP(8);
5763 if (!IS_BLANK(CUR)) {
5764 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005765 ctxt->sax->error(ctxt->userData,
5766 "Space required after 'NOTATION'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005767 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005768 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005769 ctxt->disableSAX = 1;
Daniel Veillard1e346af1999-02-22 10:33:01 +00005770 return(0);
5771 }
5772 SKIP_BLANKS;
5773 *tree = xmlParseNotationType(ctxt);
5774 if (*tree == NULL) return(0);
5775 return(XML_ATTRIBUTE_NOTATION);
5776 }
5777 *tree = xmlParseEnumerationType(ctxt);
5778 if (*tree == NULL) return(0);
5779 return(XML_ATTRIBUTE_ENUMERATION);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005780}
5781
Daniel Veillard11e00581998-10-24 18:27:49 +00005782/**
5783 * xmlParseAttributeType:
5784 * @ctxt: an XML parser context
Daniel Veillard1e346af1999-02-22 10:33:01 +00005785 * @tree: the enumeration tree built while parsing
Daniel Veillard11e00581998-10-24 18:27:49 +00005786 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005787 * parse the Attribute list def for an element
Daniel Veillard260a68f1998-08-13 03:39:55 +00005788 *
5789 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
5790 *
5791 * [55] StringType ::= 'CDATA'
5792 *
5793 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
5794 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
Daniel Veillard11e00581998-10-24 18:27:49 +00005795 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00005796 * Validity constraints for attribute values syntax are checked in
5797 * xmlValidateAttributeValue()
5798 *
5799 * [ VC: ID ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00005800 * Values of type ID must match the Name production. A name must not
Daniel Veillardb05deb71999-08-10 19:04:08 +00005801 * appear more than once in an XML document as a value of this type;
5802 * i.e., ID values must uniquely identify the elements which bear them.
5803 *
5804 * [ VC: One ID per Element Type ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00005805 * No element type may have more than one ID attribute specified.
Daniel Veillardb05deb71999-08-10 19:04:08 +00005806 *
5807 * [ VC: ID Attribute Default ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00005808 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
Daniel Veillardb05deb71999-08-10 19:04:08 +00005809 *
5810 * [ VC: IDREF ]
5811 * Values of type IDREF must match the Name production, and values
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005812 * of type IDREFS must match Names; each IDREF Name must match the value
Daniel Veillardb96e6431999-08-29 21:02:19 +00005813 * of an ID attribute on some element in the XML document; i.e. IDREF
Daniel Veillardb05deb71999-08-10 19:04:08 +00005814 * values must match the value of some ID attribute.
5815 *
5816 * [ VC: Entity Name ]
5817 * Values of type ENTITY must match the Name production, values
Daniel Veillarddbfd6411999-12-28 16:35:14 +00005818 * of type ENTITIES must match Names; each Entity Name must match the
Daniel Veillardb96e6431999-08-29 21:02:19 +00005819 * name of an unparsed entity declared in the DTD.
Daniel Veillardb05deb71999-08-10 19:04:08 +00005820 *
5821 * [ VC: Name Token ]
5822 * Values of type NMTOKEN must match the Nmtoken production; values
5823 * of type NMTOKENS must match Nmtokens.
5824 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00005825 * Returns the attribute type
Daniel Veillard260a68f1998-08-13 03:39:55 +00005826 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005827int
Daniel Veillard1e346af1999-02-22 10:33:01 +00005828xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00005829 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00005830 if ((RAW == 'C') && (NXT(1) == 'D') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005831 (NXT(2) == 'A') && (NXT(3) == 'T') &&
5832 (NXT(4) == 'A')) {
5833 SKIP(5);
Daniel Veillard1e346af1999-02-22 10:33:01 +00005834 return(XML_ATTRIBUTE_CDATA);
Daniel Veillardcf461992000-03-14 18:30:20 +00005835 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005836 (NXT(2) == 'R') && (NXT(3) == 'E') &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00005837 (NXT(4) == 'F') && (NXT(5) == 'S')) {
5838 SKIP(6);
5839 return(XML_ATTRIBUTE_IDREFS);
Daniel Veillardcf461992000-03-14 18:30:20 +00005840 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00005841 (NXT(2) == 'R') && (NXT(3) == 'E') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005842 (NXT(4) == 'F')) {
5843 SKIP(5);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005844 return(XML_ATTRIBUTE_IDREF);
Daniel Veillardcf461992000-03-14 18:30:20 +00005845 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
Daniel Veillard1e346af1999-02-22 10:33:01 +00005846 SKIP(2);
5847 return(XML_ATTRIBUTE_ID);
Daniel Veillardcf461992000-03-14 18:30:20 +00005848 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005849 (NXT(2) == 'T') && (NXT(3) == 'I') &&
5850 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
5851 SKIP(6);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005852 return(XML_ATTRIBUTE_ENTITY);
Daniel Veillardcf461992000-03-14 18:30:20 +00005853 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005854 (NXT(2) == 'T') && (NXT(3) == 'I') &&
5855 (NXT(4) == 'T') && (NXT(5) == 'I') &&
5856 (NXT(6) == 'E') && (NXT(7) == 'S')) {
5857 SKIP(8);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005858 return(XML_ATTRIBUTE_ENTITIES);
Daniel Veillardcf461992000-03-14 18:30:20 +00005859 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005860 (NXT(2) == 'T') && (NXT(3) == 'O') &&
5861 (NXT(4) == 'K') && (NXT(5) == 'E') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00005862 (NXT(6) == 'N') && (NXT(7) == 'S')) {
5863 SKIP(8);
5864 return(XML_ATTRIBUTE_NMTOKENS);
Daniel Veillardcf461992000-03-14 18:30:20 +00005865 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005866 (NXT(2) == 'T') && (NXT(3) == 'O') &&
5867 (NXT(4) == 'K') && (NXT(5) == 'E') &&
Daniel Veillard1e346af1999-02-22 10:33:01 +00005868 (NXT(6) == 'N')) {
5869 SKIP(7);
5870 return(XML_ATTRIBUTE_NMTOKEN);
Daniel Veillard260a68f1998-08-13 03:39:55 +00005871 }
Daniel Veillard1e346af1999-02-22 10:33:01 +00005872 return(xmlParseEnumeratedType(ctxt, tree));
Daniel Veillard260a68f1998-08-13 03:39:55 +00005873}
5874
Daniel Veillard11e00581998-10-24 18:27:49 +00005875/**
5876 * xmlParseAttributeListDecl:
5877 * @ctxt: an XML parser context
5878 *
5879 * : parse the Attribute list def for an element
Daniel Veillard260a68f1998-08-13 03:39:55 +00005880 *
5881 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
5882 *
5883 * [53] AttDef ::= S Name S AttType S DefaultDecl
Daniel Veillard11e00581998-10-24 18:27:49 +00005884 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00005885 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00005886void
5887xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005888 xmlChar *elemName;
5889 xmlChar *attrName;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005890 xmlEnumerationPtr tree;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005891
Daniel Veillardcf461992000-03-14 18:30:20 +00005892 if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00005893 (NXT(2) == 'A') && (NXT(3) == 'T') &&
5894 (NXT(4) == 'T') && (NXT(5) == 'L') &&
5895 (NXT(6) == 'I') && (NXT(7) == 'S') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005896 (NXT(8) == 'T')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00005897 xmlParserInputPtr input = ctxt->input;
5898
Daniel Veillard260a68f1998-08-13 03:39:55 +00005899 SKIP(9);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005900 if (!IS_BLANK(CUR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00005901 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005902 ctxt->sax->error(ctxt->userData,
5903 "Space required after '<!ATTLIST'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005904 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005905 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005906 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005907 }
5908 SKIP_BLANKS;
5909 elemName = xmlParseName(ctxt);
5910 if (elemName == NULL) {
5911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005912 ctxt->sax->error(ctxt->userData,
5913 "ATTLIST: no name for Element\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005914 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005915 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005916 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005917 return;
5918 }
5919 SKIP_BLANKS;
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00005920 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00005921 while (RAW != '>') {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005922 const xmlChar *check = CUR_PTR;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005923 int type;
5924 int def;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005925 xmlChar *defaultValue = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00005926
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00005927 GROW;
Daniel Veillardb05deb71999-08-10 19:04:08 +00005928 tree = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005929 attrName = xmlParseName(ctxt);
5930 if (attrName == NULL) {
5931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00005932 ctxt->sax->error(ctxt->userData,
5933 "ATTLIST: no name for Attribute\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005934 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005935 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005936 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005937 break;
5938 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00005939 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005940 if (!IS_BLANK(CUR)) {
5941 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005942 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005943 "Space required after the attribute name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005944 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005945 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005946 ctxt->disableSAX = 1;
5947 if (attrName != NULL)
5948 xmlFree(attrName);
5949 if (defaultValue != NULL)
5950 xmlFree(defaultValue);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005951 break;
5952 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00005953 SKIP_BLANKS;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005954
Daniel Veillard1e346af1999-02-22 10:33:01 +00005955 type = xmlParseAttributeType(ctxt, &tree);
Daniel Veillardcf461992000-03-14 18:30:20 +00005956 if (type <= 0) {
5957 if (attrName != NULL)
5958 xmlFree(attrName);
5959 if (defaultValue != NULL)
5960 xmlFree(defaultValue);
5961 break;
5962 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005963
Daniel Veillardb05deb71999-08-10 19:04:08 +00005964 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005965 if (!IS_BLANK(CUR)) {
5966 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005967 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005968 "Space required after the attribute type\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005969 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005970 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00005971 ctxt->disableSAX = 1;
5972 if (attrName != NULL)
5973 xmlFree(attrName);
5974 if (defaultValue != NULL)
5975 xmlFree(defaultValue);
5976 if (tree != NULL)
5977 xmlFreeEnumeration(tree);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005978 break;
5979 }
5980 SKIP_BLANKS;
5981
5982 def = xmlParseDefaultDecl(ctxt, &defaultValue);
Daniel Veillardcf461992000-03-14 18:30:20 +00005983 if (def <= 0) {
5984 if (attrName != NULL)
5985 xmlFree(attrName);
5986 if (defaultValue != NULL)
5987 xmlFree(defaultValue);
5988 if (tree != NULL)
5989 xmlFreeEnumeration(tree);
5990 break;
5991 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005992
Daniel Veillardb05deb71999-08-10 19:04:08 +00005993 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00005994 if (RAW != '>') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005995 if (!IS_BLANK(CUR)) {
5996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00005997 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00005998 "Space required after the attribute default value\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00005999 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006000 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006001 ctxt->disableSAX = 1;
6002 if (attrName != NULL)
6003 xmlFree(attrName);
6004 if (defaultValue != NULL)
6005 xmlFree(defaultValue);
6006 if (tree != NULL)
6007 xmlFreeEnumeration(tree);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006008 break;
6009 }
6010 SKIP_BLANKS;
6011 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006012 if (check == CUR_PTR) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006013 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006014 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006015 "xmlParseAttributeListDecl: detected internal error\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006016 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillardcf461992000-03-14 18:30:20 +00006017 if (attrName != NULL)
6018 xmlFree(attrName);
6019 if (defaultValue != NULL)
6020 xmlFree(defaultValue);
6021 if (tree != NULL)
6022 xmlFreeEnumeration(tree);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006023 break;
6024 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006025 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6026 (ctxt->sax->attributeDecl != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006027 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
Daniel Veillard1e346af1999-02-22 10:33:01 +00006028 type, def, defaultValue, tree);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006029 if (attrName != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00006030 xmlFree(attrName);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006031 if (defaultValue != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00006032 xmlFree(defaultValue);
Daniel Veillardb05deb71999-08-10 19:04:08 +00006033 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006034 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006035 if (RAW == '>') {
6036 if (input != ctxt->input) {
6037 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6038 ctxt->sax->error(ctxt->userData,
6039"Attribute list declaration doesn't start and stop in the same entity\n");
6040 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
6041 ctxt->wellFormed = 0;
6042 ctxt->disableSAX = 1;
6043 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006044 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006045 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006046
Daniel Veillard6454aec1999-09-02 22:04:43 +00006047 xmlFree(elemName);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006048 }
6049}
6050
Daniel Veillard11e00581998-10-24 18:27:49 +00006051/**
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006052 * xmlParseElementMixedContentDecl:
Daniel Veillard11e00581998-10-24 18:27:49 +00006053 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00006054 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006055 * parse the declaration for a Mixed Element content
6056 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
Daniel Veillard260a68f1998-08-13 03:39:55 +00006057 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006058 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6059 * '(' S? '#PCDATA' S? ')'
6060 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00006061 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6062 *
6063 * [ VC: No Duplicate Types ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00006064 * The same name must not appear more than once in a single
6065 * mixed-content declaration.
Daniel Veillardb05deb71999-08-10 19:04:08 +00006066 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006067 * returns: the list of the xmlElementContentPtr describing the element choices
6068 */
6069xmlElementContentPtr
6070xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard1899e851999-02-01 12:18:54 +00006071 xmlElementContentPtr ret = NULL, cur = NULL, n;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006072 xmlChar *elem = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006073
Daniel Veillardb05deb71999-08-10 19:04:08 +00006074 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00006075 if ((RAW == '#') && (NXT(1) == 'P') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006076 (NXT(2) == 'C') && (NXT(3) == 'D') &&
6077 (NXT(4) == 'A') && (NXT(5) == 'T') &&
6078 (NXT(6) == 'A')) {
6079 SKIP(7);
6080 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006081 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00006082 if (RAW == ')') {
6083 ctxt->entity = ctxt->input;
Daniel Veillard3b9def11999-01-31 22:15:06 +00006084 NEXT;
6085 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
Daniel Veillardcf461992000-03-14 18:30:20 +00006086 if (RAW == '*') {
Daniel Veillardf600e251999-12-18 15:32:46 +00006087 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6088 NEXT;
6089 }
Daniel Veillard3b9def11999-01-31 22:15:06 +00006090 return(ret);
6091 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006092 if ((RAW == '(') || (RAW == '|')) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006093 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
6094 if (ret == NULL) return(NULL);
Daniel Veillardb05deb71999-08-10 19:04:08 +00006095 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006096 while (RAW == '|') {
Daniel Veillard1899e851999-02-01 12:18:54 +00006097 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006098 if (elem == NULL) {
6099 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
6100 if (ret == NULL) return(NULL);
6101 ret->c1 = cur;
Daniel Veillard1899e851999-02-01 12:18:54 +00006102 cur = ret;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006103 } else {
Daniel Veillard1899e851999-02-01 12:18:54 +00006104 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
6105 if (n == NULL) return(NULL);
6106 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
6107 cur->c2 = n;
6108 cur = n;
Daniel Veillard6454aec1999-09-02 22:04:43 +00006109 xmlFree(elem);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006110 }
6111 SKIP_BLANKS;
6112 elem = xmlParseName(ctxt);
6113 if (elem == NULL) {
6114 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006115 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006116 "xmlParseElementMixedContentDecl : Name expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006117 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006118 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006119 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006120 xmlFreeElementContent(cur);
6121 return(NULL);
6122 }
6123 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006124 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006125 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006126 if ((RAW == ')') && (NXT(1) == '*')) {
Daniel Veillard1e346af1999-02-22 10:33:01 +00006127 if (elem != NULL) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006128 cur->c2 = xmlNewElementContent(elem,
6129 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard6454aec1999-09-02 22:04:43 +00006130 xmlFree(elem);
Daniel Veillard1e346af1999-02-22 10:33:01 +00006131 }
Daniel Veillard1899e851999-02-01 12:18:54 +00006132 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006133 ctxt->entity = ctxt->input;
Daniel Veillard1899e851999-02-01 12:18:54 +00006134 SKIP(2);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006135 } else {
Daniel Veillard6454aec1999-09-02 22:04:43 +00006136 if (elem != NULL) xmlFree(elem);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006137 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006138 ctxt->sax->error(ctxt->userData,
Daniel Veillard3b9def11999-01-31 22:15:06 +00006139 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006140 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006141 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006142 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006143 xmlFreeElementContent(ret);
6144 return(NULL);
6145 }
6146
6147 } else {
6148 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006149 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006150 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006151 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006152 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006153 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006154 }
6155 return(ret);
6156}
6157
6158/**
6159 * xmlParseElementChildrenContentDecl:
6160 * @ctxt: an XML parser context
6161 *
6162 * parse the declaration for a Mixed Element content
6163 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6164 *
6165 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00006166 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
6167 *
6168 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6169 *
6170 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6171 *
6172 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6173 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00006174 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6175 * TODO Parameter-entity replacement text must be properly nested
6176 * with parenthetized groups. That is to say, if either of the
6177 * opening or closing parentheses in a choice, seq, or Mixed
6178 * construct is contained in the replacement text for a parameter
6179 * entity, both must be contained in the same replacement text. For
6180 * interoperability, if a parameter-entity reference appears in a
6181 * choice, seq, or Mixed construct, its replacement text should not
6182 * be empty, and neither the first nor last non-blank character of
6183 * the replacement text should be a connector (| or ,).
6184 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006185 * returns: the tree of xmlElementContentPtr describing the element
6186 * hierarchy.
6187 */
6188xmlElementContentPtr
6189xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt) {
6190 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006191 xmlChar *elem;
6192 xmlChar type = 0;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006193
6194 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006195 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00006196 if (RAW == '(') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006197 /* Recurse on first child */
6198 NEXT;
6199 SKIP_BLANKS;
6200 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
6201 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006202 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006203 } else {
6204 elem = xmlParseName(ctxt);
6205 if (elem == NULL) {
6206 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006207 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006208 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006209 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006210 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006211 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006212 return(NULL);
6213 }
6214 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillardb05deb71999-08-10 19:04:08 +00006215 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00006216 if (RAW == '?') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006217 cur->ocur = XML_ELEMENT_CONTENT_OPT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006218 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006219 } else if (RAW == '*') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006220 cur->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006221 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006222 } else if (RAW == '+') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006223 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006224 NEXT;
6225 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006226 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006227 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00006228 xmlFree(elem);
Daniel Veillardb05deb71999-08-10 19:04:08 +00006229 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006230 }
6231 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00006232 SHRINK;
Daniel Veillardcf461992000-03-14 18:30:20 +00006233 while (RAW != ')') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006234 /*
6235 * Each loop we parse one separator and one element.
6236 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006237 if (RAW == ',') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006238 if (type == 0) type = CUR;
6239
6240 /*
6241 * Detect "Name | Name , Name" error
6242 */
6243 else if (type != CUR) {
6244 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006245 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006246 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6247 type);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006248 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006249 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006250 ctxt->disableSAX = 1;
6251 if ((op != NULL) && (op != ret))
6252 xmlFreeElementContent(op);
Daniel Veillard87b95392000-08-12 21:12:04 +00006253 if ((last != NULL) && (last != ret) &&
6254 (last != ret->c1) && (last != ret->c2))
Daniel Veillardcf461992000-03-14 18:30:20 +00006255 xmlFreeElementContent(last);
6256 if (ret != NULL)
6257 xmlFreeElementContent(ret);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006258 return(NULL);
6259 }
Daniel Veillard1899e851999-02-01 12:18:54 +00006260 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006261
6262 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
6263 if (op == NULL) {
6264 xmlFreeElementContent(ret);
6265 return(NULL);
6266 }
6267 if (last == NULL) {
6268 op->c1 = ret;
6269 ret = cur = op;
6270 } else {
6271 cur->c2 = op;
6272 op->c1 = last;
6273 cur =op;
Daniel Veillard1899e851999-02-01 12:18:54 +00006274 last = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006275 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006276 } else if (RAW == '|') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006277 if (type == 0) type = CUR;
6278
6279 /*
6280 * Detect "Name , Name | Name" error
6281 */
6282 else if (type != CUR) {
6283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006284 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006285 "xmlParseElementChildrenContentDecl : '%c' expected\n",
6286 type);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006287 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006288 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006289 ctxt->disableSAX = 1;
Daniel Veillard87b95392000-08-12 21:12:04 +00006290 if ((op != NULL) && (op != ret) && (op != last))
Daniel Veillardcf461992000-03-14 18:30:20 +00006291 xmlFreeElementContent(op);
Daniel Veillard87b95392000-08-12 21:12:04 +00006292 if ((last != NULL) && (last != ret) &&
6293 (last != ret->c1) && (last != ret->c2))
Daniel Veillardcf461992000-03-14 18:30:20 +00006294 xmlFreeElementContent(last);
6295 if (ret != NULL)
6296 xmlFreeElementContent(ret);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006297 return(NULL);
6298 }
Daniel Veillard1899e851999-02-01 12:18:54 +00006299 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006300
6301 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
6302 if (op == NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +00006303 if ((op != NULL) && (op != ret))
6304 xmlFreeElementContent(op);
Daniel Veillard87b95392000-08-12 21:12:04 +00006305 if ((last != NULL) && (last != ret) &&
6306 (last != ret->c1) && (last != ret->c2))
Daniel Veillardcf461992000-03-14 18:30:20 +00006307 xmlFreeElementContent(last);
6308 if (ret != NULL)
6309 xmlFreeElementContent(ret);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006310 return(NULL);
6311 }
6312 if (last == NULL) {
6313 op->c1 = ret;
6314 ret = cur = op;
6315 } else {
6316 cur->c2 = op;
6317 op->c1 = last;
6318 cur =op;
Daniel Veillard1899e851999-02-01 12:18:54 +00006319 last = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006320 }
6321 } else {
6322 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006323 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006324 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
6325 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006326 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006327 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
Daniel Veillardcf461992000-03-14 18:30:20 +00006328 if ((op != NULL) && (op != ret))
6329 xmlFreeElementContent(op);
Daniel Veillard87b95392000-08-12 21:12:04 +00006330 if ((last != NULL) && (last != ret) &&
6331 (last != ret->c1) && (last != ret->c2))
Daniel Veillardcf461992000-03-14 18:30:20 +00006332 xmlFreeElementContent(last);
6333 if (ret != NULL)
6334 xmlFreeElementContent(ret);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006335 return(NULL);
6336 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00006337 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006338 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006339 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00006340 if (RAW == '(') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006341 /* Recurse on second child */
6342 NEXT;
6343 SKIP_BLANKS;
Daniel Veillard1899e851999-02-01 12:18:54 +00006344 last = xmlParseElementChildrenContentDecl(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006345 SKIP_BLANKS;
6346 } else {
6347 elem = xmlParseName(ctxt);
6348 if (elem == NULL) {
6349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006350 ctxt->sax->error(ctxt->userData,
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006351 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006352 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006353 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006354 ctxt->disableSAX = 1;
6355 if ((op != NULL) && (op != ret))
6356 xmlFreeElementContent(op);
Daniel Veillard87b95392000-08-12 21:12:04 +00006357 if ((last != NULL) && (last != ret) &&
6358 (last != ret->c1) && (last != ret->c2))
Daniel Veillardcf461992000-03-14 18:30:20 +00006359 xmlFreeElementContent(last);
6360 if (ret != NULL)
6361 xmlFreeElementContent(ret);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006362 return(NULL);
6363 }
Daniel Veillard1899e851999-02-01 12:18:54 +00006364 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillard6454aec1999-09-02 22:04:43 +00006365 xmlFree(elem);
Daniel Veillardcf461992000-03-14 18:30:20 +00006366 if (RAW == '?') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006367 last->ocur = XML_ELEMENT_CONTENT_OPT;
6368 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006369 } else if (RAW == '*') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006370 last->ocur = XML_ELEMENT_CONTENT_MULT;
6371 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006372 } else if (RAW == '+') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006373 last->ocur = XML_ELEMENT_CONTENT_PLUS;
6374 NEXT;
6375 } else {
6376 last->ocur = XML_ELEMENT_CONTENT_ONCE;
6377 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006378 }
6379 SKIP_BLANKS;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006380 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006381 }
Daniel Veillard1899e851999-02-01 12:18:54 +00006382 if ((cur != NULL) && (last != NULL)) {
6383 cur->c2 = last;
6384 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006385 ctxt->entity = ctxt->input;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006386 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006387 if (RAW == '?') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006388 ret->ocur = XML_ELEMENT_CONTENT_OPT;
6389 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006390 } else if (RAW == '*') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006391 ret->ocur = XML_ELEMENT_CONTENT_MULT;
6392 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006393 } else if (RAW == '+') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006394 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6395 NEXT;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006396 }
6397 return(ret);
6398}
6399
6400/**
6401 * xmlParseElementContentDecl:
6402 * @ctxt: an XML parser context
6403 * @name: the name of the element being defined.
6404 * @result: the Element Content pointer will be stored here if any
Daniel Veillard260a68f1998-08-13 03:39:55 +00006405 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006406 * parse the declaration for an Element content either Mixed or Children,
6407 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6408 *
6409 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
Daniel Veillard11e00581998-10-24 18:27:49 +00006410 *
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006411 * returns: the type of element content XML_ELEMENT_TYPE_xxx
Daniel Veillard260a68f1998-08-13 03:39:55 +00006412 */
6413
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006414int
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006415xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006416 xmlElementContentPtr *result) {
6417
6418 xmlElementContentPtr tree = NULL;
Daniel Veillardcf461992000-03-14 18:30:20 +00006419 xmlParserInputPtr input = ctxt->input;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006420 int res;
6421
6422 *result = NULL;
6423
Daniel Veillardcf461992000-03-14 18:30:20 +00006424 if (RAW != '(') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006425 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006426 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006427 "xmlParseElementContentDecl : '(' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006428 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006429 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006430 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006431 return(-1);
6432 }
6433 NEXT;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006434 GROW;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006435 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006436 if ((RAW == '#') && (NXT(1) == 'P') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006437 (NXT(2) == 'C') && (NXT(3) == 'D') &&
6438 (NXT(4) == 'A') && (NXT(5) == 'T') &&
6439 (NXT(6) == 'A')) {
6440 tree = xmlParseElementMixedContentDecl(ctxt);
6441 res = XML_ELEMENT_TYPE_MIXED;
6442 } else {
6443 tree = xmlParseElementChildrenContentDecl(ctxt);
6444 res = XML_ELEMENT_TYPE_ELEMENT;
6445 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006446 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
6447 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6448 ctxt->sax->error(ctxt->userData,
6449"Element content declaration doesn't start and stop in the same entity\n");
6450 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
6451 ctxt->wellFormed = 0;
6452 ctxt->disableSAX = 1;
6453 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006454 SKIP_BLANKS;
6455 /****************************
Daniel Veillardcf461992000-03-14 18:30:20 +00006456 if (RAW != ')') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006457 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006458 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006459 "xmlParseElementContentDecl : ')' expected\n");
6460 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006461 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006462 return(-1);
6463 }
6464 ****************************/
Daniel Veillard3b9def11999-01-31 22:15:06 +00006465 *result = tree;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006466 return(res);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006467}
6468
Daniel Veillard11e00581998-10-24 18:27:49 +00006469/**
6470 * xmlParseElementDecl:
6471 * @ctxt: an XML parser context
6472 *
6473 * parse an Element declaration.
Daniel Veillard260a68f1998-08-13 03:39:55 +00006474 *
6475 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6476 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00006477 * [ VC: Unique Element Type Declaration ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00006478 * No element type may be declared more than once
Daniel Veillard1e346af1999-02-22 10:33:01 +00006479 *
6480 * Returns the type of the element, or -1 in case of error
Daniel Veillard260a68f1998-08-13 03:39:55 +00006481 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006482int
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006483xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006484 xmlChar *name;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006485 int ret = -1;
6486 xmlElementContentPtr content = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006487
Daniel Veillardb05deb71999-08-10 19:04:08 +00006488 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00006489 if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00006490 (NXT(2) == 'E') && (NXT(3) == 'L') &&
6491 (NXT(4) == 'E') && (NXT(5) == 'M') &&
6492 (NXT(6) == 'E') && (NXT(7) == 'N') &&
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006493 (NXT(8) == 'T')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00006494 xmlParserInputPtr input = ctxt->input;
6495
Daniel Veillard260a68f1998-08-13 03:39:55 +00006496 SKIP(9);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006497 if (!IS_BLANK(CUR)) {
6498 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006499 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006500 "Space required after 'ELEMENT'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006501 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006502 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006503 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006504 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006505 SKIP_BLANKS;
6506 name = xmlParseName(ctxt);
6507 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006508 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006509 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006510 "xmlParseElementDecl: no name for Element\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006511 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006512 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006513 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006514 return(-1);
6515 }
6516 if (!IS_BLANK(CUR)) {
6517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006518 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006519 "Space required after the element name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006520 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006521 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006522 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006523 }
6524 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006525 if ((RAW == 'E') && (NXT(1) == 'M') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00006526 (NXT(2) == 'P') && (NXT(3) == 'T') &&
6527 (NXT(4) == 'Y')) {
6528 SKIP(5);
6529 /*
6530 * Element must always be empty.
6531 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006532 ret = XML_ELEMENT_TYPE_EMPTY;
Daniel Veillardcf461992000-03-14 18:30:20 +00006533 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00006534 (NXT(2) == 'Y')) {
6535 SKIP(3);
6536 /*
6537 * Element is a generic container.
6538 */
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006539 ret = XML_ELEMENT_TYPE_ANY;
Daniel Veillardcf461992000-03-14 18:30:20 +00006540 } else if (RAW == '(') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006541 ret = xmlParseElementContentDecl(ctxt, name, &content);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006542 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006543 /*
6544 * [ WFC: PEs in Internal Subset ] error handling.
6545 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006546 if ((RAW == '%') && (ctxt->external == 0) &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00006547 (ctxt->inputNr == 1)) {
6548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6549 ctxt->sax->error(ctxt->userData,
6550 "PEReference: forbidden within markup decl in internal subset\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006551 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006552 } else {
6553 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6554 ctxt->sax->error(ctxt->userData,
6555 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006556 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
Daniel Veillardb05deb71999-08-10 19:04:08 +00006557 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006558 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006559 ctxt->disableSAX = 1;
Daniel Veillard6454aec1999-09-02 22:04:43 +00006560 if (name != NULL) xmlFree(name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006561 return(-1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006562 }
Daniel Veillard686d6b62000-01-03 11:08:02 +00006563
Daniel Veillard260a68f1998-08-13 03:39:55 +00006564 SKIP_BLANKS;
Daniel Veillard686d6b62000-01-03 11:08:02 +00006565 /*
6566 * Pop-up of finished entities.
6567 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006568 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillard686d6b62000-01-03 11:08:02 +00006569 xmlPopInput(ctxt);
6570 SKIP_BLANKS;
6571
Daniel Veillardcf461992000-03-14 18:30:20 +00006572 if (RAW != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00006573 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00006574 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00006575 "xmlParseElementDecl: expected '>' at the end\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006576 ctxt->errNo = XML_ERR_GT_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006577 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006578 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006579 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00006580 if (input != ctxt->input) {
6581 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6582 ctxt->sax->error(ctxt->userData,
6583"Element declaration doesn't start and stop in the same entity\n");
6584 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
6585 ctxt->wellFormed = 0;
6586 ctxt->disableSAX = 1;
6587 }
6588
Daniel Veillard260a68f1998-08-13 03:39:55 +00006589 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006590 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6591 (ctxt->sax->elementDecl != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00006592 ctxt->sax->elementDecl(ctxt->userData, name, ret,
6593 content);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006594 }
Daniel Veillard14fff061999-06-22 21:49:07 +00006595 if (content != NULL) {
6596 xmlFreeElementContent(content);
6597 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006598 if (name != NULL) {
Daniel Veillard6454aec1999-09-02 22:04:43 +00006599 xmlFree(name);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006600 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00006601 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00006602 return(ret);
Daniel Veillard260a68f1998-08-13 03:39:55 +00006603}
6604
Daniel Veillard11e00581998-10-24 18:27:49 +00006605/**
6606 * xmlParseMarkupDecl:
6607 * @ctxt: an XML parser context
6608 *
6609 * parse Markup declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00006610 *
6611 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
6612 * NotationDecl | PI | Comment
6613 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00006614 * [ VC: Proper Declaration/PE Nesting ]
6615 * TODO Parameter-entity replacement text must be properly nested with
6616 * markup declarations. That is to say, if either the first character
6617 * or the last character of a markup declaration (markupdecl above) is
6618 * contained in the replacement text for a parameter-entity reference,
6619 * both must be contained in the same replacement text.
6620 *
6621 * [ WFC: PEs in Internal Subset ]
6622 * In the internal DTD subset, parameter-entity references can occur
6623 * only where markup declarations can occur, not within markup declarations.
6624 * (This does not apply to references that occur in external parameter
6625 * entities or to the external subset.)
Daniel Veillard260a68f1998-08-13 03:39:55 +00006626 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00006627void
6628xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00006629 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006630 xmlParseElementDecl(ctxt);
6631 xmlParseAttributeListDecl(ctxt);
6632 xmlParseEntityDecl(ctxt);
6633 xmlParseNotationDecl(ctxt);
6634 xmlParsePI(ctxt);
Daniel Veillardb96e6431999-08-29 21:02:19 +00006635 xmlParseComment(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00006636 /*
6637 * This is only for internal subset. On external entities,
6638 * the replacement is done before parsing stage
6639 */
6640 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
6641 xmlParsePEReference(ctxt);
6642 ctxt->instate = XML_PARSER_DTD;
Daniel Veillard260a68f1998-08-13 03:39:55 +00006643}
6644
Daniel Veillard11e00581998-10-24 18:27:49 +00006645/**
Daniel Veillard011b63c1999-06-02 17:44:04 +00006646 * xmlParseTextDecl:
6647 * @ctxt: an XML parser context
6648 *
6649 * parse an XML declaration header for external entities
6650 *
6651 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
6652 *
Daniel Veillardcf461992000-03-14 18:30:20 +00006653 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
Daniel Veillard011b63c1999-06-02 17:44:04 +00006654 */
6655
Daniel Veillardcf461992000-03-14 18:30:20 +00006656void
Daniel Veillard011b63c1999-06-02 17:44:04 +00006657xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006658 xmlChar *version;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006659
6660 /*
6661 * We know that '<?xml' is here.
6662 */
Daniel Veillard496a1cf2000-05-03 14:20:55 +00006663 if ((RAW == '<') && (NXT(1) == '?') &&
6664 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6665 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
6666 SKIP(5);
6667 } else {
6668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6669 ctxt->sax->error(ctxt->userData,
6670 "Text declaration '<?xml' required\n");
6671 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
6672 ctxt->wellFormed = 0;
6673 ctxt->disableSAX = 1;
6674
6675 return;
6676 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00006677
6678 if (!IS_BLANK(CUR)) {
6679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006680 ctxt->sax->error(ctxt->userData,
6681 "Space needed after '<?xml'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006682 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006683 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006684 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006685 }
6686 SKIP_BLANKS;
6687
6688 /*
6689 * We may have the VersionInfo here.
6690 */
6691 version = xmlParseVersionInfo(ctxt);
Daniel Veillard011b63c1999-06-02 17:44:04 +00006692 if (version == NULL)
6693 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillardcf461992000-03-14 18:30:20 +00006694 ctxt->input->version = version;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006695
6696 /*
6697 * We must have the encoding declaration
6698 */
6699 if (!IS_BLANK(CUR)) {
6700 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006701 ctxt->sax->error(ctxt->userData, "Space needed here\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006702 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006703 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006704 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006705 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00006706 xmlParseEncodingDecl(ctxt);
6707 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6708 /*
6709 * The XML REC instructs us to stop parsing right here
6710 */
6711 return;
6712 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00006713
6714 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00006715 if ((RAW == '?') && (NXT(1) == '>')) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00006716 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00006717 } else if (RAW == '>') {
Daniel Veillard011b63c1999-06-02 17:44:04 +00006718 /* Deprecated old WD ... */
6719 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006720 ctxt->sax->error(ctxt->userData,
6721 "XML declaration must end-up with '?>'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006722 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006723 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006724 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006725 NEXT;
6726 } else {
6727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00006728 ctxt->sax->error(ctxt->userData,
6729 "parsing XML declaration: '?>' expected\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006730 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006731 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006732 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006733 MOVETO_ENDTAG(CUR_PTR);
6734 NEXT;
6735 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00006736}
6737
6738/*
6739 * xmlParseConditionalSections
6740 * @ctxt: an XML parser context
6741 *
Daniel Veillard011b63c1999-06-02 17:44:04 +00006742 * [61] conditionalSect ::= includeSect | ignoreSect
6743 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6744 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6745 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6746 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6747 */
6748
6749void
6750xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00006751 SKIP(3);
6752 SKIP_BLANKS;
6753 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
6754 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
6755 (NXT(6) == 'E')) {
6756 SKIP(7);
6757 SKIP_BLANKS;
6758 if (RAW != '[') {
6759 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6760 ctxt->sax->error(ctxt->userData,
6761 "XML conditional section '[' expected\n");
6762 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6763 ctxt->wellFormed = 0;
6764 ctxt->disableSAX = 1;
6765 } else {
6766 NEXT;
6767 }
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00006768 if (xmlParserDebugEntities) {
6769 if ((ctxt->input != NULL) && (ctxt->input->filename))
6770 fprintf(stderr, "%s(%d): ", ctxt->input->filename,
6771 ctxt->input->line);
6772 fprintf(stderr, "Entering INCLUDE Conditional Section\n");
6773 }
6774
Daniel Veillardcf461992000-03-14 18:30:20 +00006775 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6776 (NXT(2) != '>'))) {
6777 const xmlChar *check = CUR_PTR;
6778 int cons = ctxt->input->consumed;
6779 int tok = ctxt->token;
Daniel Veillard71b656e2000-01-05 14:46:17 +00006780
Daniel Veillardcf461992000-03-14 18:30:20 +00006781 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6782 xmlParseConditionalSections(ctxt);
6783 } else if (IS_BLANK(CUR)) {
6784 NEXT;
6785 } else if (RAW == '%') {
6786 xmlParsePEReference(ctxt);
6787 } else
6788 xmlParseMarkupDecl(ctxt);
6789
6790 /*
6791 * Pop-up of finished entities.
6792 */
6793 while ((RAW == 0) && (ctxt->inputNr > 1))
6794 xmlPopInput(ctxt);
6795
6796 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6797 (tok == ctxt->token)) {
6798 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6799 ctxt->sax->error(ctxt->userData,
6800 "Content error in the external subset\n");
6801 ctxt->wellFormed = 0;
6802 ctxt->disableSAX = 1;
6803 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6804 break;
6805 }
6806 }
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00006807 if (xmlParserDebugEntities) {
6808 if ((ctxt->input != NULL) && (ctxt->input->filename))
6809 fprintf(stderr, "%s(%d): ", ctxt->input->filename,
6810 ctxt->input->line);
6811 fprintf(stderr, "Leaving INCLUDE Conditional Section\n");
6812 }
6813
Daniel Veillardcf461992000-03-14 18:30:20 +00006814 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
6815 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
6816 int state;
6817
6818 SKIP(6);
6819 SKIP_BLANKS;
6820 if (RAW != '[') {
6821 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6822 ctxt->sax->error(ctxt->userData,
6823 "XML conditional section '[' expected\n");
6824 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6825 ctxt->wellFormed = 0;
6826 ctxt->disableSAX = 1;
6827 } else {
6828 NEXT;
6829 }
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00006830 if (xmlParserDebugEntities) {
6831 if ((ctxt->input != NULL) && (ctxt->input->filename))
6832 fprintf(stderr, "%s(%d): ", ctxt->input->filename,
6833 ctxt->input->line);
6834 fprintf(stderr, "Entering IGNORE Conditional Section\n");
6835 }
Daniel Veillardcf461992000-03-14 18:30:20 +00006836
6837 /*
6838 * Parse up to the end of the conditionnal section
6839 * But disable SAX event generating DTD building in the meantime
6840 */
6841 state = ctxt->disableSAX;
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00006842 ctxt->disableSAX = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00006843 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
6844 (NXT(2) != '>'))) {
6845 const xmlChar *check = CUR_PTR;
6846 int cons = ctxt->input->consumed;
6847 int tok = ctxt->token;
6848
6849 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6850 xmlParseConditionalSections(ctxt);
6851 } else if (IS_BLANK(CUR)) {
6852 NEXT;
6853 } else if (RAW == '%') {
6854 xmlParsePEReference(ctxt);
6855 } else
6856 xmlParseMarkupDecl(ctxt);
6857
6858 /*
6859 * Pop-up of finished entities.
6860 */
6861 while ((RAW == 0) && (ctxt->inputNr > 1))
6862 xmlPopInput(ctxt);
6863
6864 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6865 (tok == ctxt->token)) {
6866 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6867 ctxt->sax->error(ctxt->userData,
6868 "Content error in the external subset\n");
6869 ctxt->wellFormed = 0;
6870 ctxt->disableSAX = 1;
6871 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
6872 break;
6873 }
6874 }
6875 ctxt->disableSAX = state;
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00006876 if (xmlParserDebugEntities) {
6877 if ((ctxt->input != NULL) && (ctxt->input->filename))
6878 fprintf(stderr, "%s(%d): ", ctxt->input->filename,
6879 ctxt->input->line);
6880 fprintf(stderr, "Leaving IGNORE Conditional Section\n");
6881 }
6882
Daniel Veillardcf461992000-03-14 18:30:20 +00006883 } else {
6884 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6885 ctxt->sax->error(ctxt->userData,
6886 "XML conditional section INCLUDE or IGNORE keyword expected\n");
6887 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
6888 ctxt->wellFormed = 0;
6889 ctxt->disableSAX = 1;
Daniel Veillard71b656e2000-01-05 14:46:17 +00006890 }
6891
Daniel Veillardcf461992000-03-14 18:30:20 +00006892 if (RAW == 0)
Daniel Veillard71b656e2000-01-05 14:46:17 +00006893 SHRINK;
6894
Daniel Veillardcf461992000-03-14 18:30:20 +00006895 if (RAW == 0) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00006896 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6897 ctxt->sax->error(ctxt->userData,
6898 "XML conditional section not closed\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006899 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006900 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006901 ctxt->disableSAX = 1;
Daniel Veillard71b656e2000-01-05 14:46:17 +00006902 } else {
6903 SKIP(3);
Daniel Veillard011b63c1999-06-02 17:44:04 +00006904 }
6905}
6906
6907/**
Daniel Veillard00fdf371999-10-08 09:40:39 +00006908 * xmlParseExternalSubset:
Daniel Veillard011b63c1999-06-02 17:44:04 +00006909 * @ctxt: an XML parser context
Daniel Veillard00fdf371999-10-08 09:40:39 +00006910 * @ExternalID: the external identifier
6911 * @SystemID: the system identifier (or URL)
Daniel Veillard011b63c1999-06-02 17:44:04 +00006912 *
6913 * parse Markup declarations from an external subset
6914 *
6915 * [30] extSubset ::= textDecl? extSubsetDecl
6916 *
6917 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
Daniel Veillard011b63c1999-06-02 17:44:04 +00006918 */
6919void
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006920xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
6921 const xmlChar *SystemID) {
Daniel Veillarda819dac1999-11-24 18:04:22 +00006922 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00006923 if ((RAW == '<') && (NXT(1) == '?') &&
Daniel Veillard011b63c1999-06-02 17:44:04 +00006924 (NXT(2) == 'x') && (NXT(3) == 'm') &&
6925 (NXT(4) == 'l')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00006926 xmlParseTextDecl(ctxt);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00006927 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6928 /*
6929 * The XML REC instructs us to stop parsing right here
6930 */
6931 ctxt->instate = XML_PARSER_EOF;
6932 return;
6933 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00006934 }
6935 if (ctxt->myDoc == NULL) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00006936 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
Daniel Veillard011b63c1999-06-02 17:44:04 +00006937 }
6938 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
6939 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
6940
Daniel Veillardb05deb71999-08-10 19:04:08 +00006941 ctxt->instate = XML_PARSER_DTD;
6942 ctxt->external = 1;
Daniel Veillardcf461992000-03-14 18:30:20 +00006943 while (((RAW == '<') && (NXT(1) == '?')) ||
6944 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard011b63c1999-06-02 17:44:04 +00006945 IS_BLANK(CUR)) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006946 const xmlChar *check = CUR_PTR;
Daniel Veillardb96e6431999-08-29 21:02:19 +00006947 int cons = ctxt->input->consumed;
Daniel Veillardcf461992000-03-14 18:30:20 +00006948 int tok = ctxt->token;
Daniel Veillardb96e6431999-08-29 21:02:19 +00006949
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00006950 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00006951 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00006952 xmlParseConditionalSections(ctxt);
6953 } else if (IS_BLANK(CUR)) {
6954 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00006955 } else if (RAW == '%') {
Daniel Veillard011b63c1999-06-02 17:44:04 +00006956 xmlParsePEReference(ctxt);
6957 } else
6958 xmlParseMarkupDecl(ctxt);
6959
6960 /*
6961 * Pop-up of finished entities.
6962 */
Daniel Veillardcf461992000-03-14 18:30:20 +00006963 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillard011b63c1999-06-02 17:44:04 +00006964 xmlPopInput(ctxt);
6965
Daniel Veillardcf461992000-03-14 18:30:20 +00006966 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
6967 (tok == ctxt->token)) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00006968 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6969 ctxt->sax->error(ctxt->userData,
6970 "Content error in the external subset\n");
6971 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006972 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006973 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
Daniel Veillardb96e6431999-08-29 21:02:19 +00006974 break;
6975 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00006976 }
6977
Daniel Veillardcf461992000-03-14 18:30:20 +00006978 if (RAW != 0) {
Daniel Veillard011b63c1999-06-02 17:44:04 +00006979 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6980 ctxt->sax->error(ctxt->userData,
6981 "Extra content at the end of the document\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00006982 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006983 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00006984 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00006985 }
6986
6987}
6988
6989/**
Daniel Veillard011b63c1999-06-02 17:44:04 +00006990 * xmlParseReference:
6991 * @ctxt: an XML parser context
6992 *
6993 * parse and handle entity references in content, depending on the SAX
6994 * interface, this may end-up in a call to character() if this is a
6995 * CharRef, a predefined entity, if there is no reference() callback.
6996 * or if the parser was asked to switch to that mode.
6997 *
6998 * [67] Reference ::= EntityRef | CharRef
6999 */
7000void
7001xmlParseReference(xmlParserCtxtPtr ctxt) {
7002 xmlEntityPtr ent;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007003 xmlChar *val;
Daniel Veillardcf461992000-03-14 18:30:20 +00007004 if (RAW != '&') return;
Daniel Veillard011b63c1999-06-02 17:44:04 +00007005
Daniel Veillardb96e6431999-08-29 21:02:19 +00007006 if (ctxt->inputNr > 1) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007007 xmlChar cur[2] = { '&' , 0 } ;
Daniel Veillardb96e6431999-08-29 21:02:19 +00007008
Daniel Veillardcf461992000-03-14 18:30:20 +00007009 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7010 (!ctxt->disableSAX))
Daniel Veillardb96e6431999-08-29 21:02:19 +00007011 ctxt->sax->characters(ctxt->userData, cur, 1);
7012 if (ctxt->token == '&')
7013 ctxt->token = 0;
7014 else {
7015 SKIP(1);
7016 }
7017 return;
7018 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00007019 if (NXT(1) == '#') {
Daniel Veillardcf461992000-03-14 18:30:20 +00007020 int i = 0;
7021 xmlChar out[10];
7022 int hex = NXT(2);
Daniel Veillard011b63c1999-06-02 17:44:04 +00007023 int val = xmlParseCharRef(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00007024
Daniel Veillardbe803962000-06-28 23:40:59 +00007025 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
Daniel Veillardcf461992000-03-14 18:30:20 +00007026 /*
7027 * So we are using non-UTF-8 buffers
7028 * Check that the char fit on 8bits, if not
7029 * generate a CharRef.
7030 */
7031 if (val <= 0xFF) {
7032 out[0] = val;
7033 out[1] = 0;
7034 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7035 (!ctxt->disableSAX))
7036 ctxt->sax->characters(ctxt->userData, out, 1);
7037 } else {
7038 if ((hex == 'x') || (hex == 'X'))
7039 sprintf((char *)out, "#x%X", val);
7040 else
7041 sprintf((char *)out, "#%d", val);
7042 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7043 (!ctxt->disableSAX))
7044 ctxt->sax->reference(ctxt->userData, out);
7045 }
7046 } else {
7047 /*
7048 * Just encode the value in UTF-8
7049 */
7050 COPY_BUF(0 ,out, i, val);
7051 out[i] = 0;
7052 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7053 (!ctxt->disableSAX))
7054 ctxt->sax->characters(ctxt->userData, out, i);
7055 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00007056 } else {
7057 ent = xmlParseEntityRef(ctxt);
7058 if (ent == NULL) return;
7059 if ((ent->name != NULL) &&
Daniel Veillardcf461992000-03-14 18:30:20 +00007060 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7061 xmlNodePtr list = NULL;
7062 int ret;
7063
7064
7065 /*
7066 * The first reference to the entity trigger a parsing phase
7067 * where the ent->children is filled with the result from
7068 * the parsing.
7069 */
7070 if (ent->children == NULL) {
7071 xmlChar *value;
7072 value = ent->content;
7073
7074 /*
7075 * Check that this entity is well formed
7076 */
7077 if ((value != NULL) &&
7078 (value[1] == 0) && (value[0] == '<') &&
7079 (!xmlStrcmp(ent->name, BAD_CAST "lt"))) {
7080 /*
Daniel Veillarde0854c32000-08-27 21:12:29 +00007081 * DONE: get definite answer on this !!!
Daniel Veillardcf461992000-03-14 18:30:20 +00007082 * Lots of entity decls are used to declare a single
7083 * char
7084 * <!ENTITY lt "<">
7085 * Which seems to be valid since
7086 * 2.4: The ampersand character (&) and the left angle
7087 * bracket (<) may appear in their literal form only
7088 * when used ... They are also legal within the literal
7089 * entity value of an internal entity declaration;i
7090 * see "4.3.2 Well-Formed Parsed Entities".
7091 * IMHO 2.4 and 4.3.2 are directly in contradiction.
7092 * Looking at the OASIS test suite and James Clark
7093 * tests, this is broken. However the XML REC uses
7094 * it. Is the XML REC not well-formed ????
7095 * This is a hack to avoid this problem
Daniel Veillarde0854c32000-08-27 21:12:29 +00007096 *
7097 * ANSWER: since lt gt amp .. are already defined,
7098 * this is a redefinition and hence the fact that the
7099 * contentis not well balanced is not a Wf error, this
7100 * is lousy but acceptable.
Daniel Veillardcf461992000-03-14 18:30:20 +00007101 */
7102 list = xmlNewDocText(ctxt->myDoc, value);
7103 if (list != NULL) {
7104 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
7105 (ent->children == NULL)) {
7106 ent->children = list;
7107 ent->last = list;
7108 list->parent = (xmlNodePtr) ent;
7109 } else {
7110 xmlFreeNodeList(list);
7111 }
7112 } else if (list != NULL) {
7113 xmlFreeNodeList(list);
7114 }
7115 } else {
7116 /*
7117 * 4.3.2: An internal general parsed entity is well-formed
7118 * if its replacement text matches the production labeled
7119 * content.
7120 */
7121 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7122 ctxt->depth++;
7123 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
7124 ctxt->sax, NULL, ctxt->depth,
7125 value, &list);
7126 ctxt->depth--;
7127 } else if (ent->etype ==
7128 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7129 ctxt->depth++;
7130 ret = xmlParseExternalEntity(ctxt->myDoc,
7131 ctxt->sax, NULL, ctxt->depth,
7132 ent->SystemID, ent->ExternalID, &list);
7133 ctxt->depth--;
7134 } else {
7135 ret = -1;
7136 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7137 ctxt->sax->error(ctxt->userData,
7138 "Internal: invalid entity type\n");
7139 }
7140 if (ret == XML_ERR_ENTITY_LOOP) {
7141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7142 ctxt->sax->error(ctxt->userData,
7143 "Detected entity reference loop\n");
7144 ctxt->wellFormed = 0;
7145 ctxt->disableSAX = 1;
7146 ctxt->errNo = XML_ERR_ENTITY_LOOP;
7147 } else if ((ret == 0) && (list != NULL)) {
7148 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
7149 (ent->children == NULL)) {
7150 ent->children = list;
7151 while (list != NULL) {
7152 list->parent = (xmlNodePtr) ent;
7153 if (list->next == NULL)
7154 ent->last = list;
7155 list = list->next;
7156 }
7157 } else {
7158 xmlFreeNodeList(list);
7159 }
7160 } else if (ret > 0) {
7161 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7162 ctxt->sax->error(ctxt->userData,
7163 "Entity value required\n");
7164 ctxt->errNo = ret;
7165 ctxt->wellFormed = 0;
7166 ctxt->disableSAX = 1;
7167 } else if (list != NULL) {
7168 xmlFreeNodeList(list);
7169 }
7170 }
7171 }
Daniel Veillardb96e6431999-08-29 21:02:19 +00007172 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
Daniel Veillardcf461992000-03-14 18:30:20 +00007173 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00007174 /*
7175 * Create a node.
7176 */
7177 ctxt->sax->reference(ctxt->userData, ent->name);
7178 return;
7179 } else if (ctxt->replaceEntities) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00007180 if ((ctxt->node != NULL) && (ent->children != NULL)) {
7181 /*
7182 * Seems we are generating the DOM content, do
7183 * a simple tree copy
7184 */
7185 xmlNodePtr new;
7186 new = xmlCopyNodeList(ent->children);
7187
7188 xmlAddChildList(ctxt->node, new);
7189 /*
7190 * This is to avoid a nasty side effect, see
7191 * characters() in SAX.c
7192 */
7193 ctxt->nodemem = 0;
7194 ctxt->nodelen = 0;
7195 return;
7196 } else {
7197 /*
7198 * Probably running in SAX mode
7199 */
7200 xmlParserInputPtr input;
Daniel Veillard011b63c1999-06-02 17:44:04 +00007201
Daniel Veillarde0854c32000-08-27 21:12:29 +00007202 input = xmlNewEntityInputStream(ctxt, ent);
7203 xmlPushInput(ctxt, input);
7204 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
7205 (RAW == '<') && (NXT(1) == '?') &&
7206 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7207 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7208 xmlParseTextDecl(ctxt);
7209 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7210 /*
7211 * The XML REC instructs us to stop parsing right here
7212 */
7213 ctxt->instate = XML_PARSER_EOF;
7214 return;
7215 }
7216 if (input->standalone == 1) {
7217 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7218 ctxt->sax->error(ctxt->userData,
7219 "external parsed entities cannot be standalone\n");
7220 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
7221 ctxt->wellFormed = 0;
7222 ctxt->disableSAX = 1;
7223 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00007224 }
Daniel Veillarde0854c32000-08-27 21:12:29 +00007225 return;
Daniel Veillardcf461992000-03-14 18:30:20 +00007226 }
Daniel Veillardb96e6431999-08-29 21:02:19 +00007227 }
Daniel Veillarde0854c32000-08-27 21:12:29 +00007228 } else {
7229 val = ent->content;
7230 if (val == NULL) return;
7231 /*
7232 * inline the entity.
7233 */
7234 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7235 (!ctxt->disableSAX))
7236 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
Daniel Veillard011b63c1999-06-02 17:44:04 +00007237 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00007238 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00007239}
7240
Daniel Veillard11e00581998-10-24 18:27:49 +00007241/**
7242 * xmlParseEntityRef:
7243 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00007244 *
7245 * parse ENTITY references declarations
Daniel Veillard260a68f1998-08-13 03:39:55 +00007246 *
7247 * [68] EntityRef ::= '&' Name ';'
Daniel Veillard1e346af1999-02-22 10:33:01 +00007248 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00007249 * [ WFC: Entity Declared ]
7250 * In a document without any DTD, a document with only an internal DTD
7251 * subset which contains no parameter entity references, or a document
7252 * with "standalone='yes'", the Name given in the entity reference
7253 * must match that in an entity declaration, except that well-formed
7254 * documents need not declare any of the following entities: amp, lt,
7255 * gt, apos, quot. The declaration of a parameter entity must precede
7256 * any reference to it. Similarly, the declaration of a general entity
7257 * must precede any reference to it which appears in a default value in an
7258 * attribute-list declaration. Note that if entities are declared in the
7259 * external subset or in external parameter entities, a non-validating
7260 * processor is not obligated to read and process their declarations;
7261 * for such documents, the rule that an entity must be declared is a
7262 * well-formedness constraint only if standalone='yes'.
7263 *
7264 * [ WFC: Parsed Entity ]
7265 * An entity reference must not contain the name of an unparsed entity
7266 *
Daniel Veillard011b63c1999-06-02 17:44:04 +00007267 * Returns the xmlEntityPtr if found, or NULL otherwise.
Daniel Veillard260a68f1998-08-13 03:39:55 +00007268 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00007269xmlEntityPtr
Daniel Veillard0ba4d531998-11-01 19:34:31 +00007270xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007271 xmlChar *name;
Daniel Veillard517752b1999-04-05 12:20:10 +00007272 xmlEntityPtr ent = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007273
Daniel Veillarde2d034d1999-07-27 19:52:06 +00007274 GROW;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007275
Daniel Veillardcf461992000-03-14 18:30:20 +00007276 if (RAW == '&') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00007277 NEXT;
7278 name = xmlParseName(ctxt);
7279 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007280 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00007281 ctxt->sax->error(ctxt->userData,
7282 "xmlParseEntityRef: no name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007283 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007284 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007285 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007286 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00007287 if (RAW == ';') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00007288 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007289 /*
Daniel Veillard011b63c1999-06-02 17:44:04 +00007290 * Ask first SAX for entity resolution, otherwise try the
7291 * predefined set.
7292 */
7293 if (ctxt->sax != NULL) {
7294 if (ctxt->sax->getEntity != NULL)
7295 ent = ctxt->sax->getEntity(ctxt->userData, name);
7296 if (ent == NULL)
7297 ent = xmlGetPredefinedEntity(name);
7298 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00007299 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00007300 * [ WFC: Entity Declared ]
7301 * In a document without any DTD, a document with only an
7302 * internal DTD subset which contains no parameter entity
7303 * references, or a document with "standalone='yes'", the
7304 * Name given in the entity reference must match that in an
7305 * entity declaration, except that well-formed documents
7306 * need not declare any of the following entities: amp, lt,
7307 * gt, apos, quot.
7308 * The declaration of a parameter entity must precede any
7309 * reference to it.
7310 * Similarly, the declaration of a general entity must
7311 * precede any reference to it which appears in a default
7312 * value in an attribute-list declaration. Note that if
7313 * entities are declared in the external subset or in
7314 * external parameter entities, a non-validating processor
7315 * is not obligated to read and process their declarations;
7316 * for such documents, the rule that an entity must be
7317 * declared is a well-formedness constraint only if
7318 * standalone='yes'.
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007319 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00007320 if (ent == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00007321 if ((ctxt->standalone == 1) ||
7322 ((ctxt->hasExternalSubset == 0) &&
7323 (ctxt->hasPErefs == 0))) {
7324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard011b63c1999-06-02 17:44:04 +00007325 ctxt->sax->error(ctxt->userData,
7326 "Entity '%s' not defined\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007327 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
Daniel Veillard011b63c1999-06-02 17:44:04 +00007328 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007329 ctxt->disableSAX = 1;
Daniel Veillard011b63c1999-06-02 17:44:04 +00007330 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00007331 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7332 ctxt->sax->warning(ctxt->userData,
7333 "Entity '%s' not defined\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007334 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard011b63c1999-06-02 17:44:04 +00007335 }
7336 }
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007337
7338 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00007339 * [ WFC: Parsed Entity ]
7340 * An entity reference must not contain the name of an
7341 * unparsed entity
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007342 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007343 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00007344 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7345 ctxt->sax->error(ctxt->userData,
7346 "Entity reference to unparsed entity %s\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007347 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007348 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007349 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007350 }
7351
7352 /*
7353 * [ WFC: No External Entity References ]
7354 * Attribute values cannot contain direct or indirect
7355 * entity references to external entities.
7356 */
7357 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcf461992000-03-14 18:30:20 +00007358 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00007359 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7360 ctxt->sax->error(ctxt->userData,
7361 "Attribute references external entity '%s'\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007362 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007363 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007364 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007365 }
7366 /*
7367 * [ WFC: No < in Attribute Values ]
7368 * The replacement text of any entity referred to directly or
7369 * indirectly in an attribute value (other than "&lt;") must
7370 * not contain a <.
7371 */
7372 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardb96e6431999-08-29 21:02:19 +00007373 (ent != NULL) &&
7374 (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
Daniel Veillardb05deb71999-08-10 19:04:08 +00007375 (ent->content != NULL) &&
7376 (xmlStrchr(ent->content, '<'))) {
7377 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7378 ctxt->sax->error(ctxt->userData,
7379 "'<' in entity '%s' is not allowed in attributes values\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007380 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007381 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007382 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007383 }
7384
7385 /*
7386 * Internal check, no parameter entities here ...
7387 */
7388 else {
Daniel Veillardcf461992000-03-14 18:30:20 +00007389 switch (ent->etype) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007390 case XML_INTERNAL_PARAMETER_ENTITY:
7391 case XML_EXTERNAL_PARAMETER_ENTITY:
7392 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00007393 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007394 "Attempt to reference the parameter entity '%s'\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007395 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007396 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007397 ctxt->disableSAX = 1;
7398 break;
7399 default:
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007400 break;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007401 }
7402 }
7403
7404 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00007405 * [ WFC: No Recursion ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00007406 * TODO A parsed entity must not contain a recursive reference
7407 * to itself, either directly or indirectly.
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007408 */
Daniel Veillard260a68f1998-08-13 03:39:55 +00007409
Daniel Veillard011b63c1999-06-02 17:44:04 +00007410 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00007412 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007413 "xmlParseEntityRef: expecting ';'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007414 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007415 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007416 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007417 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00007418 xmlFree(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007419 }
7420 }
Daniel Veillard011b63c1999-06-02 17:44:04 +00007421 return(ent);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007422}
Daniel Veillard10a2c651999-12-12 13:03:50 +00007423/**
7424 * xmlParseStringEntityRef:
7425 * @ctxt: an XML parser context
7426 * @str: a pointer to an index in the string
7427 *
7428 * parse ENTITY references declarations, but this version parses it from
7429 * a string value.
7430 *
7431 * [68] EntityRef ::= '&' Name ';'
7432 *
7433 * [ WFC: Entity Declared ]
7434 * In a document without any DTD, a document with only an internal DTD
7435 * subset which contains no parameter entity references, or a document
7436 * with "standalone='yes'", the Name given in the entity reference
7437 * must match that in an entity declaration, except that well-formed
7438 * documents need not declare any of the following entities: amp, lt,
7439 * gt, apos, quot. The declaration of a parameter entity must precede
7440 * any reference to it. Similarly, the declaration of a general entity
7441 * must precede any reference to it which appears in a default value in an
7442 * attribute-list declaration. Note that if entities are declared in the
7443 * external subset or in external parameter entities, a non-validating
7444 * processor is not obligated to read and process their declarations;
7445 * for such documents, the rule that an entity must be declared is a
7446 * well-formedness constraint only if standalone='yes'.
7447 *
7448 * [ WFC: Parsed Entity ]
7449 * An entity reference must not contain the name of an unparsed entity
7450 *
7451 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7452 * is updated to the current location in the string.
7453 */
7454xmlEntityPtr
7455xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7456 xmlChar *name;
7457 const xmlChar *ptr;
7458 xmlChar cur;
7459 xmlEntityPtr ent = NULL;
7460
Daniel Veillardcf461992000-03-14 18:30:20 +00007461 if ((str == NULL) || (*str == NULL))
7462 return(NULL);
Daniel Veillard10a2c651999-12-12 13:03:50 +00007463 ptr = *str;
7464 cur = *ptr;
7465 if (cur == '&') {
7466 ptr++;
7467 cur = *ptr;
7468 name = xmlParseStringName(ctxt, &ptr);
7469 if (name == NULL) {
7470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7471 ctxt->sax->error(ctxt->userData,
7472 "xmlParseEntityRef: no name\n");
7473 ctxt->errNo = XML_ERR_NAME_REQUIRED;
7474 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007475 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00007476 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00007477 if (*ptr == ';') {
7478 ptr++;
Daniel Veillard10a2c651999-12-12 13:03:50 +00007479 /*
7480 * Ask first SAX for entity resolution, otherwise try the
7481 * predefined set.
7482 */
7483 if (ctxt->sax != NULL) {
7484 if (ctxt->sax->getEntity != NULL)
7485 ent = ctxt->sax->getEntity(ctxt->userData, name);
7486 if (ent == NULL)
7487 ent = xmlGetPredefinedEntity(name);
7488 }
7489 /*
7490 * [ WFC: Entity Declared ]
7491 * In a document without any DTD, a document with only an
7492 * internal DTD subset which contains no parameter entity
7493 * references, or a document with "standalone='yes'", the
7494 * Name given in the entity reference must match that in an
7495 * entity declaration, except that well-formed documents
7496 * need not declare any of the following entities: amp, lt,
7497 * gt, apos, quot.
7498 * The declaration of a parameter entity must precede any
7499 * reference to it.
7500 * Similarly, the declaration of a general entity must
7501 * precede any reference to it which appears in a default
7502 * value in an attribute-list declaration. Note that if
7503 * entities are declared in the external subset or in
7504 * external parameter entities, a non-validating processor
7505 * is not obligated to read and process their declarations;
7506 * for such documents, the rule that an entity must be
7507 * declared is a well-formedness constraint only if
7508 * standalone='yes'.
7509 */
7510 if (ent == NULL) {
7511 if ((ctxt->standalone == 1) ||
7512 ((ctxt->hasExternalSubset == 0) &&
7513 (ctxt->hasPErefs == 0))) {
7514 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7515 ctxt->sax->error(ctxt->userData,
7516 "Entity '%s' not defined\n", name);
7517 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
7518 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007519 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00007520 } else {
7521 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7522 ctxt->sax->warning(ctxt->userData,
7523 "Entity '%s' not defined\n", name);
7524 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
7525 }
7526 }
7527
7528 /*
7529 * [ WFC: Parsed Entity ]
7530 * An entity reference must not contain the name of an
7531 * unparsed entity
7532 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007533 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00007534 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7535 ctxt->sax->error(ctxt->userData,
7536 "Entity reference to unparsed entity %s\n", name);
7537 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
7538 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007539 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00007540 }
7541
7542 /*
7543 * [ WFC: No External Entity References ]
7544 * Attribute values cannot contain direct or indirect
7545 * entity references to external entities.
7546 */
7547 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
Daniel Veillardcf461992000-03-14 18:30:20 +00007548 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00007549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7550 ctxt->sax->error(ctxt->userData,
7551 "Attribute references external entity '%s'\n", name);
7552 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
7553 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007554 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00007555 }
7556 /*
7557 * [ WFC: No < in Attribute Values ]
7558 * The replacement text of any entity referred to directly or
7559 * indirectly in an attribute value (other than "&lt;") must
7560 * not contain a <.
7561 */
7562 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7563 (ent != NULL) &&
7564 (xmlStrcmp(ent->name, BAD_CAST "lt")) &&
7565 (ent->content != NULL) &&
7566 (xmlStrchr(ent->content, '<'))) {
7567 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7568 ctxt->sax->error(ctxt->userData,
7569 "'<' in entity '%s' is not allowed in attributes values\n", name);
7570 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
7571 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007572 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00007573 }
7574
7575 /*
7576 * Internal check, no parameter entities here ...
7577 */
7578 else {
Daniel Veillardcf461992000-03-14 18:30:20 +00007579 switch (ent->etype) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00007580 case XML_INTERNAL_PARAMETER_ENTITY:
7581 case XML_EXTERNAL_PARAMETER_ENTITY:
7582 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7583 ctxt->sax->error(ctxt->userData,
7584 "Attempt to reference the parameter entity '%s'\n", name);
7585 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
7586 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007587 ctxt->disableSAX = 1;
7588 break;
7589 default:
Daniel Veillard10a2c651999-12-12 13:03:50 +00007590 break;
7591 }
7592 }
7593
7594 /*
7595 * [ WFC: No Recursion ]
7596 * TODO A parsed entity must not contain a recursive reference
7597 * to itself, either directly or indirectly.
7598 */
7599
7600 } else {
7601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7602 ctxt->sax->error(ctxt->userData,
7603 "xmlParseEntityRef: expecting ';'\n");
7604 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
7605 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007606 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00007607 }
7608 xmlFree(name);
7609 }
7610 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007611 *str = ptr;
Daniel Veillard10a2c651999-12-12 13:03:50 +00007612 return(ent);
7613}
Daniel Veillard260a68f1998-08-13 03:39:55 +00007614
Daniel Veillard11e00581998-10-24 18:27:49 +00007615/**
7616 * xmlParsePEReference:
7617 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00007618 *
7619 * parse PEReference declarations
Daniel Veillard011b63c1999-06-02 17:44:04 +00007620 * The entity content is handled directly by pushing it's content as
7621 * a new input stream.
Daniel Veillard260a68f1998-08-13 03:39:55 +00007622 *
7623 * [69] PEReference ::= '%' Name ';'
Daniel Veillard1e346af1999-02-22 10:33:01 +00007624 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00007625 * [ WFC: No Recursion ]
7626 * TODO A parsed entity must not contain a recursive
7627 * reference to itself, either directly or indirectly.
7628 *
7629 * [ WFC: Entity Declared ]
7630 * In a document without any DTD, a document with only an internal DTD
7631 * subset which contains no parameter entity references, or a document
7632 * with "standalone='yes'", ... ... The declaration of a parameter
7633 * entity must precede any reference to it...
7634 *
7635 * [ VC: Entity Declared ]
7636 * In a document with an external subset or external parameter entities
7637 * with "standalone='no'", ... ... The declaration of a parameter entity
7638 * must precede any reference to it...
7639 *
7640 * [ WFC: In DTD ]
7641 * Parameter-entity references may only appear in the DTD.
7642 * NOTE: misleading but this is handled.
Daniel Veillard260a68f1998-08-13 03:39:55 +00007643 */
Daniel Veillard011b63c1999-06-02 17:44:04 +00007644void
Daniel Veillard0ba4d531998-11-01 19:34:31 +00007645xmlParsePEReference(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007646 xmlChar *name;
Daniel Veillard517752b1999-04-05 12:20:10 +00007647 xmlEntityPtr entity = NULL;
Daniel Veillardccb09631998-10-27 06:21:04 +00007648 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007649
Daniel Veillardcf461992000-03-14 18:30:20 +00007650 if (RAW == '%') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00007651 NEXT;
7652 name = xmlParseName(ctxt);
7653 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007654 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00007655 ctxt->sax->error(ctxt->userData,
7656 "xmlParsePEReference: no name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007657 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007658 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007659 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007660 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00007661 if (RAW == ';') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00007662 NEXT;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007663 if ((ctxt->sax != NULL) &&
7664 (ctxt->sax->getParameterEntity != NULL))
7665 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7666 name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007667 if (entity == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00007668 /*
7669 * [ WFC: Entity Declared ]
7670 * In a document without any DTD, a document with only an
7671 * internal DTD subset which contains no parameter entity
7672 * references, or a document with "standalone='yes'", ...
7673 * ... The declaration of a parameter entity must precede
7674 * any reference to it...
7675 */
7676 if ((ctxt->standalone == 1) ||
7677 ((ctxt->hasExternalSubset == 0) &&
7678 (ctxt->hasPErefs == 0))) {
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00007679 if ((!ctxt->disableSAX) &&
7680 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00007681 ctxt->sax->error(ctxt->userData,
7682 "PEReference: %%%s; not found\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007683 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007684 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007685 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007686 } else {
7687 /*
7688 * [ VC: Entity Declared ]
7689 * In a document with an external subset or external
7690 * parameter entities with "standalone='no'", ...
7691 * ... The declaration of a parameter entity must precede
7692 * any reference to it...
7693 */
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +00007694 if ((!ctxt->disableSAX) &&
7695 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00007696 ctxt->sax->warning(ctxt->userData,
7697 "PEReference: %%%s; not found\n", name);
7698 ctxt->valid = 0;
7699 }
Daniel Veillardccb09631998-10-27 06:21:04 +00007700 } else {
Daniel Veillardb05deb71999-08-10 19:04:08 +00007701 /*
7702 * Internal checking in case the entity quest barfed
7703 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007704 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7705 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00007706 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7707 ctxt->sax->warning(ctxt->userData,
7708 "Internal: %%%s; is not a parameter entity\n", name);
7709 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00007710 /*
7711 * TODO !!!
7712 * handle the extra spaces added before and after
7713 * c.f. http://www.w3.org/TR/REC-xml#as-PE
7714 */
Daniel Veillardb05deb71999-08-10 19:04:08 +00007715 input = xmlNewEntityInputStream(ctxt, entity);
7716 xmlPushInput(ctxt, input);
Daniel Veillardcf461992000-03-14 18:30:20 +00007717 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
7718 (RAW == '<') && (NXT(1) == '?') &&
7719 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7720 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7721 xmlParseTextDecl(ctxt);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00007722 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7723 /*
7724 * The XML REC instructs us to stop parsing
7725 * right here
7726 */
7727 ctxt->instate = XML_PARSER_EOF;
7728 xmlFree(name);
7729 return;
7730 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007731 }
7732 if (ctxt->token == 0)
7733 ctxt->token = ' ';
Daniel Veillardb05deb71999-08-10 19:04:08 +00007734 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00007735 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00007736 ctxt->hasPErefs = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007737 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007738 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00007739 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007740 "xmlParsePEReference: expecting ';'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007741 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007742 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007743 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007744 }
Daniel Veillard6454aec1999-09-02 22:04:43 +00007745 xmlFree(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007746 }
7747 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00007748}
7749
Daniel Veillard11e00581998-10-24 18:27:49 +00007750/**
Daniel Veillard10a2c651999-12-12 13:03:50 +00007751 * xmlParseStringPEReference:
7752 * @ctxt: an XML parser context
7753 * @str: a pointer to an index in the string
7754 *
7755 * parse PEReference declarations
7756 *
7757 * [69] PEReference ::= '%' Name ';'
7758 *
7759 * [ WFC: No Recursion ]
7760 * TODO A parsed entity must not contain a recursive
7761 * reference to itself, either directly or indirectly.
7762 *
7763 * [ WFC: Entity Declared ]
7764 * In a document without any DTD, a document with only an internal DTD
7765 * subset which contains no parameter entity references, or a document
7766 * with "standalone='yes'", ... ... The declaration of a parameter
7767 * entity must precede any reference to it...
7768 *
7769 * [ VC: Entity Declared ]
7770 * In a document with an external subset or external parameter entities
7771 * with "standalone='no'", ... ... The declaration of a parameter entity
7772 * must precede any reference to it...
7773 *
7774 * [ WFC: In DTD ]
7775 * Parameter-entity references may only appear in the DTD.
7776 * NOTE: misleading but this is handled.
7777 *
7778 * Returns the string of the entity content.
7779 * str is updated to the current value of the index
7780 */
7781xmlEntityPtr
7782xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
7783 const xmlChar *ptr;
7784 xmlChar cur;
7785 xmlChar *name;
7786 xmlEntityPtr entity = NULL;
7787
7788 if ((str == NULL) || (*str == NULL)) return(NULL);
7789 ptr = *str;
7790 cur = *ptr;
7791 if (cur == '%') {
7792 ptr++;
7793 cur = *ptr;
7794 name = xmlParseStringName(ctxt, &ptr);
7795 if (name == NULL) {
7796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7797 ctxt->sax->error(ctxt->userData,
7798 "xmlParseStringPEReference: no name\n");
7799 ctxt->errNo = XML_ERR_NAME_REQUIRED;
7800 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007801 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00007802 } else {
7803 cur = *ptr;
7804 if (cur == ';') {
7805 ptr++;
7806 cur = *ptr;
7807 if ((ctxt->sax != NULL) &&
7808 (ctxt->sax->getParameterEntity != NULL))
7809 entity = ctxt->sax->getParameterEntity(ctxt->userData,
7810 name);
7811 if (entity == NULL) {
7812 /*
7813 * [ WFC: Entity Declared ]
7814 * In a document without any DTD, a document with only an
7815 * internal DTD subset which contains no parameter entity
7816 * references, or a document with "standalone='yes'", ...
7817 * ... The declaration of a parameter entity must precede
7818 * any reference to it...
7819 */
7820 if ((ctxt->standalone == 1) ||
7821 ((ctxt->hasExternalSubset == 0) &&
7822 (ctxt->hasPErefs == 0))) {
7823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7824 ctxt->sax->error(ctxt->userData,
7825 "PEReference: %%%s; not found\n", name);
7826 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
7827 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007828 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00007829 } else {
7830 /*
7831 * [ VC: Entity Declared ]
7832 * In a document with an external subset or external
7833 * parameter entities with "standalone='no'", ...
7834 * ... The declaration of a parameter entity must
7835 * precede any reference to it...
7836 */
7837 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7838 ctxt->sax->warning(ctxt->userData,
7839 "PEReference: %%%s; not found\n", name);
7840 ctxt->valid = 0;
7841 }
7842 } else {
7843 /*
7844 * Internal checking in case the entity quest barfed
7845 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007846 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
7847 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00007848 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7849 ctxt->sax->warning(ctxt->userData,
7850 "Internal: %%%s; is not a parameter entity\n", name);
7851 }
7852 }
7853 ctxt->hasPErefs = 1;
7854 } else {
7855 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7856 ctxt->sax->error(ctxt->userData,
7857 "xmlParseStringPEReference: expecting ';'\n");
7858 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
7859 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007860 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00007861 }
7862 xmlFree(name);
7863 }
7864 }
7865 *str = ptr;
7866 return(entity);
7867}
7868
7869/**
Daniel Veillardcf461992000-03-14 18:30:20 +00007870 * xmlParseDocTypeDecl:
Daniel Veillard11e00581998-10-24 18:27:49 +00007871 * @ctxt: an XML parser context
7872 *
7873 * parse a DOCTYPE declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00007874 *
7875 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
7876 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
Daniel Veillardb05deb71999-08-10 19:04:08 +00007877 *
7878 * [ VC: Root Element Type ]
7879 * The Name in the document type declaration must match the element
7880 * type of the root element.
Daniel Veillard260a68f1998-08-13 03:39:55 +00007881 */
7882
Daniel Veillard0ba4d531998-11-01 19:34:31 +00007883void
7884xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00007885 xmlChar *name = NULL;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007886 xmlChar *ExternalID = NULL;
7887 xmlChar *URI = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007888
7889 /*
7890 * We know that '<!DOCTYPE' has been detected.
7891 */
7892 SKIP(9);
7893
7894 SKIP_BLANKS;
7895
7896 /*
7897 * Parse the DOCTYPE name.
7898 */
7899 name = xmlParseName(ctxt);
7900 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00007901 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00007902 ctxt->sax->error(ctxt->userData,
7903 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00007904 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007905 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007906 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007907 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007908 ctxt->intSubName = name;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007909
7910 SKIP_BLANKS;
7911
7912 /*
7913 * Check for SystemID and ExternalID
7914 */
Daniel Veillard1e346af1999-02-22 10:33:01 +00007915 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
Daniel Veillardb05deb71999-08-10 19:04:08 +00007916
7917 if ((URI != NULL) || (ExternalID != NULL)) {
7918 ctxt->hasExternalSubset = 1;
7919 }
Daniel Veillardcf461992000-03-14 18:30:20 +00007920 ctxt->extSubURI = URI;
7921 ctxt->extSubSystem = ExternalID;
Daniel Veillardb05deb71999-08-10 19:04:08 +00007922
Daniel Veillard260a68f1998-08-13 03:39:55 +00007923 SKIP_BLANKS;
7924
Daniel Veillard011b63c1999-06-02 17:44:04 +00007925 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00007926 * Create and update the internal subset.
Daniel Veillard011b63c1999-06-02 17:44:04 +00007927 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007928 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
7929 (!ctxt->disableSAX))
Daniel Veillard27d88741999-05-29 11:51:49 +00007930 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007931
7932 /*
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007933 * Is there any internal subset declarations ?
7934 * they are handled separately in xmlParseInternalSubset()
7935 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007936 if (RAW == '[')
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007937 return;
7938
7939 /*
7940 * We should be at the end of the DOCTYPE declaration.
7941 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007942 if (RAW != '>') {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007943 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7944 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
7945 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007946 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007947 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
7948 }
7949 NEXT;
7950}
7951
7952/**
Daniel Veillardcf461992000-03-14 18:30:20 +00007953 * xmlParseInternalsubset:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007954 * @ctxt: an XML parser context
7955 *
7956 * parse the internal subset declaration
7957 *
7958 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
7959 */
7960
7961void
7962xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
7963 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00007964 * Is there any DTD definition ?
7965 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007966 if (RAW == '[') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00007967 ctxt->instate = XML_PARSER_DTD;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007968 NEXT;
7969 /*
7970 * Parse the succession of Markup declarations and
7971 * PEReferences.
7972 * Subsequence (markupdecl | PEReference | S)*
7973 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007974 while (RAW != ']') {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007975 const xmlChar *check = CUR_PTR;
Daniel Veillardb96e6431999-08-29 21:02:19 +00007976 int cons = ctxt->input->consumed;
Daniel Veillard260a68f1998-08-13 03:39:55 +00007977
7978 SKIP_BLANKS;
7979 xmlParseMarkupDecl(ctxt);
Daniel Veillardccb09631998-10-27 06:21:04 +00007980 xmlParsePEReference(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00007981
Daniel Veillard011b63c1999-06-02 17:44:04 +00007982 /*
7983 * Pop-up of finished entities.
7984 */
Daniel Veillardcf461992000-03-14 18:30:20 +00007985 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillard011b63c1999-06-02 17:44:04 +00007986 xmlPopInput(ctxt);
7987
Daniel Veillardc26087b1999-08-30 11:23:51 +00007988 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00007989 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7990 ctxt->sax->error(ctxt->userData,
Daniel Veillarddbfd6411999-12-28 16:35:14 +00007991 "xmlParseInternalSubset: error detected in Markup declaration\n");
Daniel Veillardb96e6431999-08-29 21:02:19 +00007992 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00007993 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00007994 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillardb96e6431999-08-29 21:02:19 +00007995 break;
7996 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00007997 }
Daniel Veillard36650692000-07-21 15:16:39 +00007998 if (RAW == ']') {
7999 NEXT;
8000 SKIP_BLANKS;
8001 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00008002 }
8003
8004 /*
8005 * We should be at the end of the DOCTYPE declaration.
8006 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008007 if (RAW != '>') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008008 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008009 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008010 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008011 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008012 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008013 }
8014 NEXT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008015}
8016
Daniel Veillard11e00581998-10-24 18:27:49 +00008017/**
8018 * xmlParseAttribute:
8019 * @ctxt: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008020 * @value: a xmlChar ** used to store the value of the attribute
Daniel Veillard11e00581998-10-24 18:27:49 +00008021 *
8022 * parse an attribute
Daniel Veillard260a68f1998-08-13 03:39:55 +00008023 *
8024 * [41] Attribute ::= Name Eq AttValue
8025 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00008026 * [ WFC: No External Entity References ]
8027 * Attribute values cannot contain direct or indirect entity references
8028 * to external entities.
8029 *
8030 * [ WFC: No < in Attribute Values ]
8031 * The replacement text of any entity referred to directly or indirectly in
8032 * an attribute value (other than "&lt;") must not contain a <.
8033 *
8034 * [ VC: Attribute Value Type ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00008035 * The attribute must have been declared; the value must be of the type
Daniel Veillardb05deb71999-08-10 19:04:08 +00008036 * declared for it.
8037 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00008038 * [25] Eq ::= S? '=' S?
8039 *
8040 * With namespace:
8041 *
8042 * [NS 11] Attribute ::= QName Eq AttValue
8043 *
8044 * Also the case QName == xmlns:??? is handled independently as a namespace
8045 * definition.
Daniel Veillard1e346af1999-02-22 10:33:01 +00008046 *
Daniel Veillard517752b1999-04-05 12:20:10 +00008047 * Returns the attribute name, and the value in *value.
Daniel Veillard260a68f1998-08-13 03:39:55 +00008048 */
8049
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008050xmlChar *
8051xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8052 xmlChar *name, *val;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008053
Daniel Veillard517752b1999-04-05 12:20:10 +00008054 *value = NULL;
8055 name = xmlParseName(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008056 if (name == NULL) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008057 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008058 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008059 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008060 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008061 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillardccb09631998-10-27 06:21:04 +00008062 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008063 }
8064
8065 /*
8066 * read the value
8067 */
8068 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008069 if (RAW == '=') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008070 NEXT;
8071 SKIP_BLANKS;
Daniel Veillard517752b1999-04-05 12:20:10 +00008072 val = xmlParseAttValue(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00008073 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008074 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008075 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008076 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008077 "Specification mandate value for attribute %s\n", name);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008078 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008079 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008080 ctxt->disableSAX = 1;
8081 xmlFree(name);
Daniel Veillardccb09631998-10-27 06:21:04 +00008082 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008083 }
8084
Daniel Veillardcf461992000-03-14 18:30:20 +00008085 /*
8086 * Check that xml:lang conforms to the specification
Daniel Veillarde0854c32000-08-27 21:12:29 +00008087 * No more registered as an error, just generate a warning now
8088 * since this was deprecated in XML second edition
Daniel Veillardcf461992000-03-14 18:30:20 +00008089 */
8090 if (!xmlStrcmp(name, BAD_CAST "xml:lang")) {
8091 if (!xmlCheckLanguageID(val)) {
Daniel Veillarde0854c32000-08-27 21:12:29 +00008092 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
8093 ctxt->sax->warning(ctxt->userData,
8094 "Malformed value for xml:lang : %s\n", val);
Daniel Veillardcf461992000-03-14 18:30:20 +00008095 }
8096 }
8097
8098 /*
8099 * Check that xml:space conforms to the specification
8100 */
8101 if (!xmlStrcmp(name, BAD_CAST "xml:space")) {
8102 if (!xmlStrcmp(val, BAD_CAST "default"))
8103 *(ctxt->space) = 0;
8104 else if (!xmlStrcmp(val, BAD_CAST "preserve"))
8105 *(ctxt->space) = 1;
8106 else {
8107 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8108 ctxt->sax->error(ctxt->userData,
8109"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
8110 val);
8111 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
8112 ctxt->wellFormed = 0;
8113 ctxt->disableSAX = 1;
8114 }
8115 }
8116
Daniel Veillard517752b1999-04-05 12:20:10 +00008117 *value = val;
8118 return(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008119}
8120
Daniel Veillard11e00581998-10-24 18:27:49 +00008121/**
8122 * xmlParseStartTag:
8123 * @ctxt: an XML parser context
8124 *
8125 * parse a start of tag either for rule element or
8126 * EmptyElement. In both case we don't parse the tag closing chars.
Daniel Veillard260a68f1998-08-13 03:39:55 +00008127 *
8128 * [40] STag ::= '<' Name (S Attribute)* S? '>'
8129 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00008130 * [ WFC: Unique Att Spec ]
8131 * No attribute name may appear more than once in the same start-tag or
8132 * empty-element tag.
8133 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00008134 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8135 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00008136 * [ WFC: Unique Att Spec ]
8137 * No attribute name may appear more than once in the same start-tag or
8138 * empty-element tag.
8139 *
Daniel Veillard260a68f1998-08-13 03:39:55 +00008140 * With namespace:
8141 *
8142 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8143 *
8144 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
Daniel Veillard14fff061999-06-22 21:49:07 +00008145 *
Daniel Veillard06047432000-04-24 11:33:38 +00008146 * Returns the element name parsed
Daniel Veillard260a68f1998-08-13 03:39:55 +00008147 */
8148
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008149xmlChar *
Daniel Veillard1e346af1999-02-22 10:33:01 +00008150xmlParseStartTag(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008151 xmlChar *name;
8152 xmlChar *attname;
8153 xmlChar *attvalue;
8154 const xmlChar **atts = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +00008155 int nbatts = 0;
8156 int maxatts = 0;
8157 int i;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008158
Daniel Veillardcf461992000-03-14 18:30:20 +00008159 if (RAW != '<') return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008160 NEXT;
8161
Daniel Veillard517752b1999-04-05 12:20:10 +00008162 name = xmlParseName(ctxt);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008163 if (name == NULL) {
8164 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008165 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008166 "xmlParseStartTag: invalid element name\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008167 ctxt->errNo = XML_ERR_NAME_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008168 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008169 ctxt->disableSAX = 1;
Daniel Veillard14fff061999-06-22 21:49:07 +00008170 return(NULL);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008171 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00008172
8173 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00008174 * Now parse the attributes, it ends up with the ending
8175 *
8176 * (S Attribute)* S?
8177 */
8178 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008179 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00008180
8181 while ((IS_CHAR(RAW)) &&
8182 (RAW != '>') &&
8183 ((RAW != '/') || (NXT(1) != '>'))) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008184 const xmlChar *q = CUR_PTR;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008185 int cons = ctxt->input->consumed;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008186
Daniel Veillard517752b1999-04-05 12:20:10 +00008187 attname = xmlParseAttribute(ctxt, &attvalue);
8188 if ((attname != NULL) && (attvalue != NULL)) {
8189 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00008190 * [ WFC: Unique Att Spec ]
8191 * No attribute name may appear more than once in the same
8192 * start-tag or empty-element tag.
Daniel Veillard517752b1999-04-05 12:20:10 +00008193 */
8194 for (i = 0; i < nbatts;i += 2) {
8195 if (!xmlStrcmp(atts[i], attname)) {
8196 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardb05deb71999-08-10 19:04:08 +00008197 ctxt->sax->error(ctxt->userData,
8198 "Attribute %s redefined\n",
8199 attname);
Daniel Veillard517752b1999-04-05 12:20:10 +00008200 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008201 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008202 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
Daniel Veillard6454aec1999-09-02 22:04:43 +00008203 xmlFree(attname);
8204 xmlFree(attvalue);
Daniel Veillardb05deb71999-08-10 19:04:08 +00008205 goto failed;
Daniel Veillard517752b1999-04-05 12:20:10 +00008206 }
8207 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00008208
Daniel Veillard517752b1999-04-05 12:20:10 +00008209 /*
8210 * Add the pair to atts
8211 */
8212 if (atts == NULL) {
8213 maxatts = 10;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008214 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
Daniel Veillard517752b1999-04-05 12:20:10 +00008215 if (atts == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00008216 fprintf(stderr, "malloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008217 maxatts * (long)sizeof(xmlChar *));
Daniel Veillard14fff061999-06-22 21:49:07 +00008218 return(NULL);
Daniel Veillard517752b1999-04-05 12:20:10 +00008219 }
Daniel Veillard51e3b151999-11-12 17:02:31 +00008220 } else if (nbatts + 4 > maxatts) {
Daniel Veillard517752b1999-04-05 12:20:10 +00008221 maxatts *= 2;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008222 atts = (const xmlChar **) xmlRealloc(atts,
8223 maxatts * sizeof(xmlChar *));
Daniel Veillard517752b1999-04-05 12:20:10 +00008224 if (atts == NULL) {
Daniel Veillardbe70ff71999-07-05 16:50:46 +00008225 fprintf(stderr, "realloc of %ld byte failed\n",
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008226 maxatts * (long)sizeof(xmlChar *));
Daniel Veillard14fff061999-06-22 21:49:07 +00008227 return(NULL);
Daniel Veillard517752b1999-04-05 12:20:10 +00008228 }
8229 }
8230 atts[nbatts++] = attname;
8231 atts[nbatts++] = attvalue;
8232 atts[nbatts] = NULL;
8233 atts[nbatts + 1] = NULL;
Daniel Veillardcf461992000-03-14 18:30:20 +00008234 } else {
8235 if (attname != NULL)
8236 xmlFree(attname);
8237 if (attvalue != NULL)
8238 xmlFree(attvalue);
Daniel Veillard517752b1999-04-05 12:20:10 +00008239 }
8240
Daniel Veillardb96e6431999-08-29 21:02:19 +00008241failed:
Daniel Veillardcf461992000-03-14 18:30:20 +00008242
8243 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8244 break;
8245 if (!IS_BLANK(RAW)) {
8246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8247 ctxt->sax->error(ctxt->userData,
8248 "attributes construct error\n");
8249 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
8250 ctxt->wellFormed = 0;
8251 ctxt->disableSAX = 1;
8252 }
Daniel Veillard517752b1999-04-05 12:20:10 +00008253 SKIP_BLANKS;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008254 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008255 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008256 ctxt->sax->error(ctxt->userData,
Daniel Veillard260a68f1998-08-13 03:39:55 +00008257 "xmlParseStartTag: problem parsing attributes\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008258 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008259 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008260 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008261 break;
8262 }
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008263 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008264 }
8265
8266 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00008267 * SAX: Start of Element !
8268 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008269 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8270 (!ctxt->disableSAX))
Daniel Veillard27d88741999-05-29 11:51:49 +00008271 ctxt->sax->startElement(ctxt->userData, name, atts);
Daniel Veillard517752b1999-04-05 12:20:10 +00008272
Daniel Veillard517752b1999-04-05 12:20:10 +00008273 if (atts != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008274 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
Daniel Veillard6454aec1999-09-02 22:04:43 +00008275 xmlFree(atts);
Daniel Veillard517752b1999-04-05 12:20:10 +00008276 }
Daniel Veillard14fff061999-06-22 21:49:07 +00008277 return(name);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008278}
8279
Daniel Veillard11e00581998-10-24 18:27:49 +00008280/**
8281 * xmlParseEndTag:
8282 * @ctxt: an XML parser context
Daniel Veillard11e00581998-10-24 18:27:49 +00008283 *
8284 * parse an end of tag
Daniel Veillard260a68f1998-08-13 03:39:55 +00008285 *
8286 * [42] ETag ::= '</' Name S? '>'
8287 *
8288 * With namespace
8289 *
Daniel Veillard517752b1999-04-05 12:20:10 +00008290 * [NS 9] ETag ::= '</' QName S? '>'
Daniel Veillard260a68f1998-08-13 03:39:55 +00008291 */
8292
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008293void
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008294xmlParseEndTag(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008295 xmlChar *name;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008296 xmlChar *oldname;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008297
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008298 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00008299 if ((RAW != '<') || (NXT(1) != '/')) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008300 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008301 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008302 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008303 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008304 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008305 return;
8306 }
8307 SKIP(2);
8308
Daniel Veillard517752b1999-04-05 12:20:10 +00008309 name = xmlParseName(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008310
8311 /*
8312 * We should definitely be at the ending "S? '>'" part
8313 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008314 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008315 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008316 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008317 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008318 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008319 ctxt->errNo = XML_ERR_GT_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008320 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008321 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008322 } else
8323 NEXT;
8324
Daniel Veillard517752b1999-04-05 12:20:10 +00008325 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00008326 * [ WFC: Element Type Match ]
8327 * The Name in an element's end-tag must match the element type in the
8328 * start-tag.
8329 *
Daniel Veillard14fff061999-06-22 21:49:07 +00008330 */
Daniel Veillardda07c342000-01-25 18:31:22 +00008331 if ((name == NULL) || (ctxt->name == NULL) ||
8332 (xmlStrcmp(name, ctxt->name))) {
8333 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
8334 if ((name != NULL) && (ctxt->name != NULL)) {
8335 ctxt->sax->error(ctxt->userData,
8336 "Opening and ending tag mismatch: %s and %s\n",
8337 ctxt->name, name);
8338 } else if (ctxt->name != NULL) {
8339 ctxt->sax->error(ctxt->userData,
8340 "Ending tag eror for: %s\n", ctxt->name);
8341 } else {
8342 ctxt->sax->error(ctxt->userData,
8343 "Ending tag error: internal error ???\n");
8344 }
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008345
Daniel Veillardda07c342000-01-25 18:31:22 +00008346 }
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008347 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
Daniel Veillard14fff061999-06-22 21:49:07 +00008348 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008349 ctxt->disableSAX = 1;
Daniel Veillard14fff061999-06-22 21:49:07 +00008350 }
8351
8352 /*
Daniel Veillard517752b1999-04-05 12:20:10 +00008353 * SAX: End of Tag
8354 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008355 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8356 (!ctxt->disableSAX))
Daniel Veillard27d88741999-05-29 11:51:49 +00008357 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillard517752b1999-04-05 12:20:10 +00008358
8359 if (name != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +00008360 xmlFree(name);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008361 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008362 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008363 if (oldname != NULL) {
8364#ifdef DEBUG_STACK
8365 fprintf(stderr,"Close: popped %s\n", oldname);
8366#endif
8367 xmlFree(oldname);
8368 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00008369 return;
8370}
8371
Daniel Veillard11e00581998-10-24 18:27:49 +00008372/**
8373 * xmlParseCDSect:
8374 * @ctxt: an XML parser context
8375 *
8376 * Parse escaped pure raw content.
Daniel Veillard260a68f1998-08-13 03:39:55 +00008377 *
8378 * [18] CDSect ::= CDStart CData CDEnd
8379 *
8380 * [19] CDStart ::= '<![CDATA['
8381 *
8382 * [20] Data ::= (Char* - (Char* ']]>' Char*))
8383 *
8384 * [21] CDEnd ::= ']]>'
8385 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008386void
8387xmlParseCDSect(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00008388 xmlChar *buf = NULL;
8389 int len = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008390 int size = XML_PARSER_BUFFER_SIZE;
Daniel Veillardcf461992000-03-14 18:30:20 +00008391 int r, rl;
8392 int s, sl;
8393 int cur, l;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008394
Daniel Veillardb05deb71999-08-10 19:04:08 +00008395 if ((NXT(0) == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00008396 (NXT(2) == '[') && (NXT(3) == 'C') &&
8397 (NXT(4) == 'D') && (NXT(5) == 'A') &&
8398 (NXT(6) == 'T') && (NXT(7) == 'A') &&
8399 (NXT(8) == '[')) {
8400 SKIP(9);
8401 } else
8402 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +00008403
8404 ctxt->instate = XML_PARSER_CDATA_SECTION;
Daniel Veillardcf461992000-03-14 18:30:20 +00008405 r = CUR_CHAR(rl);
8406 if (!IS_CHAR(r)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008408 ctxt->sax->error(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00008409 "CData section not finished\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008410 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008411 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008412 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
Daniel Veillardb05deb71999-08-10 19:04:08 +00008413 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008414 return;
8415 }
Daniel Veillardcf461992000-03-14 18:30:20 +00008416 NEXTL(rl);
8417 s = CUR_CHAR(sl);
8418 if (!IS_CHAR(s)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008419 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008420 ctxt->sax->error(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00008421 "CData section not finished\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008422 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008423 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008424 ctxt->disableSAX = 1;
Daniel Veillardb05deb71999-08-10 19:04:08 +00008425 ctxt->instate = XML_PARSER_CONTENT;
8426 return;
8427 }
Daniel Veillardcf461992000-03-14 18:30:20 +00008428 NEXTL(sl);
8429 cur = CUR_CHAR(l);
Daniel Veillard10a2c651999-12-12 13:03:50 +00008430 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8431 if (buf == NULL) {
8432 fprintf(stderr, "malloc of %d byte failed\n", size);
8433 return;
8434 }
Daniel Veillardb05deb71999-08-10 19:04:08 +00008435 while (IS_CHAR(cur) &&
8436 ((r != ']') || (s != ']') || (cur != '>'))) {
Daniel Veillardcf461992000-03-14 18:30:20 +00008437 if (len + 5 >= size) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00008438 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00008439 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00008440 if (buf == NULL) {
8441 fprintf(stderr, "realloc of %d byte failed\n", size);
8442 return;
8443 }
8444 }
Daniel Veillardcf461992000-03-14 18:30:20 +00008445 COPY_BUF(rl,buf,len,r);
Daniel Veillardb05deb71999-08-10 19:04:08 +00008446 r = s;
Daniel Veillardcf461992000-03-14 18:30:20 +00008447 rl = sl;
Daniel Veillardb05deb71999-08-10 19:04:08 +00008448 s = cur;
Daniel Veillardcf461992000-03-14 18:30:20 +00008449 sl = l;
8450 NEXTL(l);
8451 cur = CUR_CHAR(l);
Daniel Veillardb05deb71999-08-10 19:04:08 +00008452 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00008453 buf[len] = 0;
Daniel Veillardb05deb71999-08-10 19:04:08 +00008454 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillardcf461992000-03-14 18:30:20 +00008455 if (cur != '>') {
Daniel Veillardb05deb71999-08-10 19:04:08 +00008456 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008457 ctxt->sax->error(ctxt->userData,
Daniel Veillard10a2c651999-12-12 13:03:50 +00008458 "CData section not finished\n%.50s\n", buf);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008459 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
Daniel Veillardb05deb71999-08-10 19:04:08 +00008460 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008461 ctxt->disableSAX = 1;
Daniel Veillard10a2c651999-12-12 13:03:50 +00008462 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008463 return;
8464 }
Daniel Veillardcf461992000-03-14 18:30:20 +00008465 NEXTL(l);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008466
8467 /*
Daniel Veillard10a2c651999-12-12 13:03:50 +00008468 * Ok the buffer is to be consumed as cdata.
Daniel Veillard260a68f1998-08-13 03:39:55 +00008469 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008470 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00008471 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillard10a2c651999-12-12 13:03:50 +00008472 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008473 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00008474 xmlFree(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008475}
8476
Daniel Veillard11e00581998-10-24 18:27:49 +00008477/**
8478 * xmlParseContent:
8479 * @ctxt: an XML parser context
8480 *
8481 * Parse a content:
Daniel Veillard260a68f1998-08-13 03:39:55 +00008482 *
8483 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8484 */
8485
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008486void
8487xmlParseContent(xmlParserCtxtPtr ctxt) {
Daniel Veillardb05deb71999-08-10 19:04:08 +00008488 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00008489 while (((RAW != 0) || (ctxt->token != 0)) &&
8490 ((RAW != '<') || (NXT(1) != '/'))) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008491 const xmlChar *test = CUR_PTR;
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008492 int cons = ctxt->input->consumed;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008493 xmlChar tok = ctxt->token;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008494
8495 /*
Daniel Veillardcf461992000-03-14 18:30:20 +00008496 * Handle possible processed charrefs.
8497 */
8498 if (ctxt->token != 0) {
8499 xmlParseCharData(ctxt, 0);
8500 }
8501 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00008502 * First case : a Processing Instruction.
8503 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008504 else if ((RAW == '<') && (NXT(1) == '?')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008505 xmlParsePI(ctxt);
8506 }
Daniel Veillard517752b1999-04-05 12:20:10 +00008507
Daniel Veillard260a68f1998-08-13 03:39:55 +00008508 /*
8509 * Second case : a CDSection
8510 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008511 else if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00008512 (NXT(2) == '[') && (NXT(3) == 'C') &&
8513 (NXT(4) == 'D') && (NXT(5) == 'A') &&
8514 (NXT(6) == 'T') && (NXT(7) == 'A') &&
8515 (NXT(8) == '[')) {
8516 xmlParseCDSect(ctxt);
8517 }
Daniel Veillard517752b1999-04-05 12:20:10 +00008518
Daniel Veillard260a68f1998-08-13 03:39:55 +00008519 /*
8520 * Third case : a comment
8521 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008522 else if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00008523 (NXT(2) == '-') && (NXT(3) == '-')) {
Daniel Veillardb96e6431999-08-29 21:02:19 +00008524 xmlParseComment(ctxt);
Daniel Veillardb05deb71999-08-10 19:04:08 +00008525 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008526 }
Daniel Veillard517752b1999-04-05 12:20:10 +00008527
Daniel Veillard260a68f1998-08-13 03:39:55 +00008528 /*
8529 * Fourth case : a sub-element.
8530 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008531 else if (RAW == '<') {
Daniel Veillard517752b1999-04-05 12:20:10 +00008532 xmlParseElement(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008533 }
Daniel Veillard517752b1999-04-05 12:20:10 +00008534
Daniel Veillard260a68f1998-08-13 03:39:55 +00008535 /*
Daniel Veillardccb09631998-10-27 06:21:04 +00008536 * Fifth case : a reference. If if has not been resolved,
8537 * parsing returns it's Name, create the node
Daniel Veillard260a68f1998-08-13 03:39:55 +00008538 */
Daniel Veillardb05deb71999-08-10 19:04:08 +00008539
Daniel Veillardcf461992000-03-14 18:30:20 +00008540 else if (RAW == '&') {
Daniel Veillard011b63c1999-06-02 17:44:04 +00008541 xmlParseReference(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008542 }
Daniel Veillard517752b1999-04-05 12:20:10 +00008543
Daniel Veillard260a68f1998-08-13 03:39:55 +00008544 /*
8545 * Last case, text. Note that References are handled directly.
8546 */
8547 else {
8548 xmlParseCharData(ctxt, 0);
8549 }
8550
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008551 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008552 /*
8553 * Pop-up of finished entities.
8554 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008555 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillardbc50b591999-03-01 12:28:53 +00008556 xmlPopInput(ctxt);
Daniel Veillard10a2c651999-12-12 13:03:50 +00008557 SHRINK;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008558
Daniel Veillardb96e6431999-08-29 21:02:19 +00008559 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8560 (tok == ctxt->token)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008561 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008562 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008563 "detected an error in element content\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008564 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008565 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008566 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008567 break;
8568 }
8569 }
8570}
8571
Daniel Veillard11e00581998-10-24 18:27:49 +00008572/**
8573 * xmlParseElement:
8574 * @ctxt: an XML parser context
8575 *
8576 * parse an XML element, this is highly recursive
Daniel Veillard260a68f1998-08-13 03:39:55 +00008577 *
8578 * [39] element ::= EmptyElemTag | STag content ETag
8579 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00008580 * [ WFC: Element Type Match ]
8581 * The Name in an element's end-tag must match the element type in the
8582 * start-tag.
8583 *
8584 * [ VC: Element Valid ]
Daniel Veillardb96e6431999-08-29 21:02:19 +00008585 * An element is valid if there is a declaration matching elementdecl
Daniel Veillardb05deb71999-08-10 19:04:08 +00008586 * where the Name matches the element type and one of the following holds:
8587 * - The declaration matches EMPTY and the element has no content.
8588 * - The declaration matches children and the sequence of child elements
8589 * belongs to the language generated by the regular expression in the
8590 * content model, with optional white space (characters matching the
8591 * nonterminal S) between each pair of child elements.
8592 * - The declaration matches Mixed and the content consists of character
8593 * data and child elements whose types match names in the content model.
8594 * - The declaration matches ANY, and the types of any child elements have
8595 * been declared.
Daniel Veillard260a68f1998-08-13 03:39:55 +00008596 */
8597
Daniel Veillard517752b1999-04-05 12:20:10 +00008598void
Daniel Veillard1e346af1999-02-22 10:33:01 +00008599xmlParseElement(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008600 const xmlChar *openTag = CUR_PTR;
8601 xmlChar *name;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008602 xmlChar *oldname;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008603 xmlParserNodeInfo node_info;
Daniel Veillardc26087b1999-08-30 11:23:51 +00008604 xmlNodePtr ret;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008605
8606 /* Capture start position */
Daniel Veillardc26087b1999-08-30 11:23:51 +00008607 if (ctxt->record_info) {
8608 node_info.begin_pos = ctxt->input->consumed +
8609 (CUR_PTR - ctxt->input->base);
8610 node_info.begin_line = ctxt->input->line;
8611 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00008612
Daniel Veillardcf461992000-03-14 18:30:20 +00008613 if (ctxt->spaceNr == 0)
8614 spacePush(ctxt, -1);
8615 else
8616 spacePush(ctxt, *ctxt->space);
8617
Daniel Veillard14fff061999-06-22 21:49:07 +00008618 name = xmlParseStartTag(ctxt);
8619 if (name == NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +00008620 spacePop(ctxt);
Daniel Veillard14fff061999-06-22 21:49:07 +00008621 return;
8622 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008623 namePush(ctxt, name);
Daniel Veillardc26087b1999-08-30 11:23:51 +00008624 ret = ctxt->node;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008625
8626 /*
Daniel Veillardb05deb71999-08-10 19:04:08 +00008627 * [ VC: Root Element Type ]
8628 * The Name in the document type declaration must match the element
8629 * type of the root element.
8630 */
8631 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
Daniel Veillardcf461992000-03-14 18:30:20 +00008632 ctxt->node && (ctxt->node == ctxt->myDoc->children))
Daniel Veillardb05deb71999-08-10 19:04:08 +00008633 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8634
8635 /*
Daniel Veillard260a68f1998-08-13 03:39:55 +00008636 * Check for an Empty Element.
8637 */
Daniel Veillardcf461992000-03-14 18:30:20 +00008638 if ((RAW == '/') && (NXT(1) == '>')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008639 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00008640 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8641 (!ctxt->disableSAX))
Daniel Veillard14fff061999-06-22 21:49:07 +00008642 ctxt->sax->endElement(ctxt->userData, name);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008643 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008644 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008645 if (oldname != NULL) {
8646#ifdef DEBUG_STACK
8647 fprintf(stderr,"Close: popped %s\n", oldname);
8648#endif
8649 xmlFree(oldname);
8650 }
Daniel Veillard87b95392000-08-12 21:12:04 +00008651 if ( ret != NULL && ctxt->record_info ) {
8652 node_info.end_pos = ctxt->input->consumed +
8653 (CUR_PTR - ctxt->input->base);
8654 node_info.end_line = ctxt->input->line;
8655 node_info.node = ret;
8656 xmlParserAddNodeInfo(ctxt, &node_info);
8657 }
Daniel Veillard517752b1999-04-05 12:20:10 +00008658 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008659 }
Daniel Veillardcf461992000-03-14 18:30:20 +00008660 if (RAW == '>') {
Daniel Veillarde2d034d1999-07-27 19:52:06 +00008661 NEXT;
8662 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008663 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008664 ctxt->sax->error(ctxt->userData,
8665 "Couldn't find end of Start Tag\n%.30s\n",
Daniel Veillard242590e1998-11-13 18:04:35 +00008666 openTag);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008667 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008668 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008669 ctxt->errNo = XML_ERR_GT_REQUIRED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008670
8671 /*
8672 * end of parsing of this node.
8673 */
8674 nodePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008675 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008676 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008677 if (oldname != NULL) {
8678#ifdef DEBUG_STACK
8679 fprintf(stderr,"Close: popped %s\n", oldname);
8680#endif
8681 xmlFree(oldname);
8682 }
Daniel Veillardc26087b1999-08-30 11:23:51 +00008683
8684 /*
8685 * Capture end position and add node
8686 */
8687 if ( ret != NULL && ctxt->record_info ) {
8688 node_info.end_pos = ctxt->input->consumed +
8689 (CUR_PTR - ctxt->input->base);
8690 node_info.end_line = ctxt->input->line;
8691 node_info.node = ret;
8692 xmlParserAddNodeInfo(ctxt, &node_info);
8693 }
Daniel Veillard517752b1999-04-05 12:20:10 +00008694 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008695 }
8696
8697 /*
8698 * Parse the content of the element:
8699 */
8700 xmlParseContent(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008701 if (!IS_CHAR(RAW)) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008703 ctxt->sax->error(ctxt->userData,
Daniel Veillard242590e1998-11-13 18:04:35 +00008704 "Premature end of data in tag %.30s\n", openTag);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008705 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008706 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008707 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008708
8709 /*
8710 * end of parsing of this node.
8711 */
8712 nodePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008713 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008714 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008715 if (oldname != NULL) {
8716#ifdef DEBUG_STACK
8717 fprintf(stderr,"Close: popped %s\n", oldname);
8718#endif
8719 xmlFree(oldname);
8720 }
Daniel Veillard517752b1999-04-05 12:20:10 +00008721 return;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008722 }
8723
8724 /*
8725 * parse the end of tag: '</' should be here.
8726 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +00008727 xmlParseEndTag(ctxt);
Daniel Veillardc26087b1999-08-30 11:23:51 +00008728
8729 /*
8730 * Capture end position and add node
8731 */
8732 if ( ret != NULL && ctxt->record_info ) {
8733 node_info.end_pos = ctxt->input->consumed +
8734 (CUR_PTR - ctxt->input->base);
8735 node_info.end_line = ctxt->input->line;
8736 node_info.node = ret;
8737 xmlParserAddNodeInfo(ctxt, &node_info);
8738 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00008739}
8740
Daniel Veillard11e00581998-10-24 18:27:49 +00008741/**
8742 * xmlParseVersionNum:
8743 * @ctxt: an XML parser context
8744 *
8745 * parse the XML version value.
Daniel Veillard260a68f1998-08-13 03:39:55 +00008746 *
8747 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
Daniel Veillard1e346af1999-02-22 10:33:01 +00008748 *
8749 * Returns the string giving the XML version number, or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00008750 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008751xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008752xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00008753 xmlChar *buf = NULL;
8754 int len = 0;
8755 int size = 10;
8756 xmlChar cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008757
Daniel Veillard10a2c651999-12-12 13:03:50 +00008758 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8759 if (buf == NULL) {
8760 fprintf(stderr, "malloc of %d byte failed\n", size);
8761 return(NULL);
8762 }
8763 cur = CUR;
Daniel Veillardcf461992000-03-14 18:30:20 +00008764 while (((cur >= 'a') && (cur <= 'z')) ||
8765 ((cur >= 'A') && (cur <= 'Z')) ||
8766 ((cur >= '0') && (cur <= '9')) ||
8767 (cur == '_') || (cur == '.') ||
8768 (cur == ':') || (cur == '-')) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00008769 if (len + 1 >= size) {
8770 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00008771 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00008772 if (buf == NULL) {
8773 fprintf(stderr, "realloc of %d byte failed\n", size);
8774 return(NULL);
8775 }
8776 }
8777 buf[len++] = cur;
8778 NEXT;
8779 cur=CUR;
8780 }
8781 buf[len] = 0;
8782 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008783}
8784
Daniel Veillard11e00581998-10-24 18:27:49 +00008785/**
8786 * xmlParseVersionInfo:
8787 * @ctxt: an XML parser context
8788 *
8789 * parse the XML version.
Daniel Veillard260a68f1998-08-13 03:39:55 +00008790 *
8791 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
8792 *
8793 * [25] Eq ::= S? '=' S?
Daniel Veillard11e00581998-10-24 18:27:49 +00008794 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00008795 * Returns the version string, e.g. "1.0"
Daniel Veillard260a68f1998-08-13 03:39:55 +00008796 */
8797
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008798xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008799xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008800 xmlChar *version = NULL;
8801 const xmlChar *q;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008802
Daniel Veillardcf461992000-03-14 18:30:20 +00008803 if ((RAW == 'v') && (NXT(1) == 'e') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00008804 (NXT(2) == 'r') && (NXT(3) == 's') &&
8805 (NXT(4) == 'i') && (NXT(5) == 'o') &&
8806 (NXT(6) == 'n')) {
8807 SKIP(7);
8808 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008809 if (RAW != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008810 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008811 ctxt->sax->error(ctxt->userData,
8812 "xmlParseVersionInfo : expected '='\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008813 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008814 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008815 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008816 return(NULL);
8817 }
8818 NEXT;
8819 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008820 if (RAW == '"') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008821 NEXT;
8822 q = CUR_PTR;
8823 version = xmlParseVersionNum(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008824 if (RAW != '"') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008825 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008826 ctxt->sax->error(ctxt->userData,
8827 "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008828 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008829 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008830 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008831 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00008832 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00008833 } else if (RAW == '\''){
Daniel Veillard260a68f1998-08-13 03:39:55 +00008834 NEXT;
8835 q = CUR_PTR;
8836 version = xmlParseVersionNum(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008837 if (RAW != '\'') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008838 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008839 ctxt->sax->error(ctxt->userData,
8840 "String not closed\n%.50s\n", q);
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008841 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008842 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008843 ctxt->disableSAX = 1;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008844 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00008845 NEXT;
8846 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008847 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008848 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008849 "xmlParseVersionInfo : expected ' or \"\n");
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008850 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008851 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008852 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008853 }
8854 }
8855 return(version);
8856}
8857
Daniel Veillard11e00581998-10-24 18:27:49 +00008858/**
8859 * xmlParseEncName:
8860 * @ctxt: an XML parser context
8861 *
8862 * parse the XML encoding name
Daniel Veillard260a68f1998-08-13 03:39:55 +00008863 *
8864 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
Daniel Veillard11e00581998-10-24 18:27:49 +00008865 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00008866 * Returns the encoding name value or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00008867 */
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008868xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008869xmlParseEncName(xmlParserCtxtPtr ctxt) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00008870 xmlChar *buf = NULL;
8871 int len = 0;
8872 int size = 10;
8873 xmlChar cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008874
Daniel Veillard10a2c651999-12-12 13:03:50 +00008875 cur = CUR;
8876 if (((cur >= 'a') && (cur <= 'z')) ||
8877 ((cur >= 'A') && (cur <= 'Z'))) {
8878 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
8879 if (buf == NULL) {
8880 fprintf(stderr, "malloc of %d byte failed\n", size);
8881 return(NULL);
8882 }
8883
8884 buf[len++] = cur;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008885 NEXT;
Daniel Veillard10a2c651999-12-12 13:03:50 +00008886 cur = CUR;
Daniel Veillardcf461992000-03-14 18:30:20 +00008887 while (((cur >= 'a') && (cur <= 'z')) ||
8888 ((cur >= 'A') && (cur <= 'Z')) ||
8889 ((cur >= '0') && (cur <= '9')) ||
8890 (cur == '.') || (cur == '_') ||
8891 (cur == '-')) {
Daniel Veillard10a2c651999-12-12 13:03:50 +00008892 if (len + 1 >= size) {
8893 size *= 2;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00008894 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
Daniel Veillard10a2c651999-12-12 13:03:50 +00008895 if (buf == NULL) {
8896 fprintf(stderr, "realloc of %d byte failed\n", size);
8897 return(NULL);
8898 }
8899 }
8900 buf[len++] = cur;
8901 NEXT;
8902 cur = CUR;
8903 if (cur == 0) {
8904 SHRINK;
8905 GROW;
8906 cur = CUR;
8907 }
8908 }
8909 buf[len] = 0;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008910 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008912 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008913 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008914 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008915 ctxt->errNo = XML_ERR_ENCODING_NAME;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008916 }
Daniel Veillard10a2c651999-12-12 13:03:50 +00008917 return(buf);
Daniel Veillard260a68f1998-08-13 03:39:55 +00008918}
8919
Daniel Veillard11e00581998-10-24 18:27:49 +00008920/**
8921 * xmlParseEncodingDecl:
8922 * @ctxt: an XML parser context
8923 *
8924 * parse the XML encoding declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00008925 *
8926 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
Daniel Veillard11e00581998-10-24 18:27:49 +00008927 *
8928 * TODO: this should setup the conversion filters.
8929 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00008930 * Returns the encoding value or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +00008931 */
8932
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008933xmlChar *
Daniel Veillard0ba4d531998-11-01 19:34:31 +00008934xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008935 xmlChar *encoding = NULL;
8936 const xmlChar *q;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008937
8938 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008939 if ((RAW == 'e') && (NXT(1) == 'n') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00008940 (NXT(2) == 'c') && (NXT(3) == 'o') &&
8941 (NXT(4) == 'd') && (NXT(5) == 'i') &&
8942 (NXT(6) == 'n') && (NXT(7) == 'g')) {
8943 SKIP(8);
8944 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008945 if (RAW != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008946 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008947 ctxt->sax->error(ctxt->userData,
8948 "xmlParseEncodingDecl : expected '='\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008949 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008950 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008951 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008952 return(NULL);
8953 }
8954 NEXT;
8955 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00008956 if (RAW == '"') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00008957 NEXT;
8958 q = CUR_PTR;
8959 encoding = xmlParseEncName(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008960 if (RAW != '"') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008961 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008962 ctxt->sax->error(ctxt->userData,
8963 "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008964 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008965 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008966 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008967 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00008968 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00008969 } else if (RAW == '\''){
Daniel Veillard260a68f1998-08-13 03:39:55 +00008970 NEXT;
8971 q = CUR_PTR;
8972 encoding = xmlParseEncName(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00008973 if (RAW != '\'') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00008975 ctxt->sax->error(ctxt->userData,
8976 "String not closed\n%.50s\n", q);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008977 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008978 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008979 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008980 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00008981 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00008982 } else if (RAW == '"'){
Daniel Veillarde3bffb91998-11-08 14:40:56 +00008983 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00008984 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00008985 "xmlParseEncodingDecl : expected ' or \"\n");
8986 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00008987 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00008988 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00008989 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00008990 if (encoding != NULL) {
8991 xmlCharEncoding enc;
8992 xmlCharEncodingHandlerPtr handler;
8993
8994 if (ctxt->input->encoding != NULL)
8995 xmlFree((xmlChar *) ctxt->input->encoding);
8996 ctxt->input->encoding = encoding;
8997
8998 enc = xmlParseCharEncoding((const char *) encoding);
8999 /*
9000 * registered set of known encodings
9001 */
9002 if (enc != XML_CHAR_ENCODING_ERROR) {
9003 xmlSwitchEncoding(ctxt, enc);
9004 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9005 xmlFree(encoding);
9006 return(NULL);
9007 }
9008 } else {
9009 /*
9010 * fallback for unknown encodings
9011 */
9012 handler = xmlFindCharEncodingHandler((const char *) encoding);
9013 if (handler != NULL) {
9014 xmlSwitchToEncoding(ctxt, handler);
9015 } else {
9016 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
Daniel Veillard32bc74e2000-07-14 14:49:25 +00009017 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9018 ctxt->sax->error(ctxt->userData,
9019 "Unsupported encoding %s\n", encoding);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00009020 return(NULL);
9021 }
9022 }
9023 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00009024 }
9025 return(encoding);
9026}
9027
Daniel Veillard11e00581998-10-24 18:27:49 +00009028/**
9029 * xmlParseSDDecl:
9030 * @ctxt: an XML parser context
9031 *
9032 * parse the XML standalone declaration
Daniel Veillard260a68f1998-08-13 03:39:55 +00009033 *
9034 * [32] SDDecl ::= S 'standalone' Eq
9035 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
Daniel Veillard1e346af1999-02-22 10:33:01 +00009036 *
Daniel Veillardb05deb71999-08-10 19:04:08 +00009037 * [ VC: Standalone Document Declaration ]
9038 * TODO The standalone document declaration must have the value "no"
9039 * if any external markup declarations contain declarations of:
9040 * - attributes with default values, if elements to which these
9041 * attributes apply appear in the document without specifications
9042 * of values for these attributes, or
9043 * - entities (other than amp, lt, gt, apos, quot), if references
9044 * to those entities appear in the document, or
9045 * - attributes with values subject to normalization, where the
9046 * attribute appears in the document with a value which will change
9047 * as a result of normalization, or
9048 * - element types with element content, if white space occurs directly
9049 * within any instance of those types.
9050 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00009051 * Returns 1 if standalone, 0 otherwise
Daniel Veillard260a68f1998-08-13 03:39:55 +00009052 */
9053
Daniel Veillard0ba4d531998-11-01 19:34:31 +00009054int
9055xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00009056 int standalone = -1;
9057
9058 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00009059 if ((RAW == 's') && (NXT(1) == 't') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00009060 (NXT(2) == 'a') && (NXT(3) == 'n') &&
9061 (NXT(4) == 'd') && (NXT(5) == 'a') &&
9062 (NXT(6) == 'l') && (NXT(7) == 'o') &&
9063 (NXT(8) == 'n') && (NXT(9) == 'e')) {
9064 SKIP(10);
Daniel Veillard011b63c1999-06-02 17:44:04 +00009065 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00009066 if (RAW != '=') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00009067 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00009068 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009069 "XML standalone declaration : expected '='\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00009070 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009071 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009072 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00009073 return(standalone);
9074 }
9075 NEXT;
9076 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00009077 if (RAW == '\''){
Daniel Veillard260a68f1998-08-13 03:39:55 +00009078 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00009079 if ((RAW == 'n') && (NXT(1) == 'o')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00009080 standalone = 0;
9081 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00009082 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00009083 (NXT(2) == 's')) {
9084 standalone = 1;
9085 SKIP(3);
9086 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00009087 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00009088 ctxt->sax->error(ctxt->userData,
9089 "standalone accepts only 'yes' or 'no'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00009090 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009091 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009092 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00009093 }
Daniel Veillardcf461992000-03-14 18:30:20 +00009094 if (RAW != '\'') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00009095 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00009096 ctxt->sax->error(ctxt->userData, "String not closed\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00009097 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009098 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009099 ctxt->disableSAX = 1;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00009100 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00009101 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00009102 } else if (RAW == '"'){
Daniel Veillard260a68f1998-08-13 03:39:55 +00009103 NEXT;
Daniel Veillardcf461992000-03-14 18:30:20 +00009104 if ((RAW == 'n') && (NXT(1) == 'o')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00009105 standalone = 0;
9106 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00009107 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00009108 (NXT(2) == 's')) {
9109 standalone = 1;
9110 SKIP(3);
9111 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00009112 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00009113 ctxt->sax->error(ctxt->userData,
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009114 "standalone accepts only 'yes' or 'no'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00009115 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009116 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009117 ctxt->disableSAX = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00009118 }
Daniel Veillardcf461992000-03-14 18:30:20 +00009119 if (RAW != '"') {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00009120 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00009121 ctxt->sax->error(ctxt->userData, "String not closed\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009122 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009123 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00009124 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
Daniel Veillarde3bffb91998-11-08 14:40:56 +00009125 } else
Daniel Veillard260a68f1998-08-13 03:39:55 +00009126 NEXT;
9127 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00009128 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00009129 ctxt->sax->error(ctxt->userData,
9130 "Standalone value not found\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009131 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009132 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00009133 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00009134 }
9135 }
9136 return(standalone);
9137}
9138
Daniel Veillard11e00581998-10-24 18:27:49 +00009139/**
9140 * xmlParseXMLDecl:
9141 * @ctxt: an XML parser context
9142 *
9143 * parse an XML declaration header
Daniel Veillard260a68f1998-08-13 03:39:55 +00009144 *
9145 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
9146 */
9147
Daniel Veillard0ba4d531998-11-01 19:34:31 +00009148void
9149xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +00009150 xmlChar *version;
Daniel Veillard260a68f1998-08-13 03:39:55 +00009151
9152 /*
9153 * We know that '<?xml' is here.
9154 */
9155 SKIP(5);
9156
Daniel Veillardcf461992000-03-14 18:30:20 +00009157 if (!IS_BLANK(RAW)) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009158 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00009159 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00009160 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009161 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009162 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009163 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00009164 SKIP_BLANKS;
9165
9166 /*
9167 * We should have the VersionInfo here.
9168 */
9169 version = xmlParseVersionInfo(ctxt);
9170 if (version == NULL)
9171 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard517752b1999-04-05 12:20:10 +00009172 ctxt->version = xmlStrdup(version);
Daniel Veillard6454aec1999-09-02 22:04:43 +00009173 xmlFree(version);
Daniel Veillard260a68f1998-08-13 03:39:55 +00009174
9175 /*
9176 * We may have the encoding declaration
9177 */
Daniel Veillardcf461992000-03-14 18:30:20 +00009178 if (!IS_BLANK(RAW)) {
9179 if ((RAW == '?') && (NXT(1) == '>')) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009180 SKIP(2);
9181 return;
9182 }
9183 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00009184 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00009185 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009186 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009187 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009188 }
Daniel Veillard496a1cf2000-05-03 14:20:55 +00009189 xmlParseEncodingDecl(ctxt);
9190 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9191 /*
9192 * The XML REC instructs us to stop parsing right here
9193 */
9194 return;
9195 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00009196
9197 /*
9198 * We may have the standalone status.
9199 */
Daniel Veillardcf461992000-03-14 18:30:20 +00009200 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
9201 if ((RAW == '?') && (NXT(1) == '>')) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009202 SKIP(2);
9203 return;
9204 }
9205 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00009206 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009207 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009208 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00009209 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009210 }
9211 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00009212 ctxt->input->standalone = xmlParseSDDecl(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00009213
9214 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00009215 if ((RAW == '?') && (NXT(1) == '>')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00009216 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00009217 } else if (RAW == '>') {
Daniel Veillard260a68f1998-08-13 03:39:55 +00009218 /* Deprecated old WD ... */
Daniel Veillarde3bffb91998-11-08 14:40:56 +00009219 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00009220 ctxt->sax->error(ctxt->userData,
9221 "XML declaration must end-up with '?>'\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009222 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009223 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00009224 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00009225 NEXT;
9226 } else {
Daniel Veillarde3bffb91998-11-08 14:40:56 +00009227 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard7f7d1111999-09-22 09:46:25 +00009228 ctxt->sax->error(ctxt->userData,
9229 "parsing XML declaration: '?>' expected\n");
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009230 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009231 ctxt->disableSAX = 1;
Daniel Veillarddd6b3671999-09-23 22:19:22 +00009232 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
Daniel Veillard260a68f1998-08-13 03:39:55 +00009233 MOVETO_ENDTAG(CUR_PTR);
9234 NEXT;
9235 }
9236}
9237
Daniel Veillard11e00581998-10-24 18:27:49 +00009238/**
9239 * xmlParseMisc:
9240 * @ctxt: an XML parser context
9241 *
9242 * parse an XML Misc* optionnal field.
Daniel Veillard260a68f1998-08-13 03:39:55 +00009243 *
9244 * [27] Misc ::= Comment | PI | S
9245 */
9246
Daniel Veillard0ba4d531998-11-01 19:34:31 +00009247void
9248xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00009249 while (((RAW == '<') && (NXT(1) == '?')) ||
9250 ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00009251 (NXT(2) == '-') && (NXT(3) == '-')) ||
9252 IS_BLANK(CUR)) {
Daniel Veillardcf461992000-03-14 18:30:20 +00009253 if ((RAW == '<') && (NXT(1) == '?')) {
Daniel Veillard260a68f1998-08-13 03:39:55 +00009254 xmlParsePI(ctxt);
9255 } else if (IS_BLANK(CUR)) {
9256 NEXT;
9257 } else
Daniel Veillardb96e6431999-08-29 21:02:19 +00009258 xmlParseComment(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +00009259 }
9260}
9261
Daniel Veillard11e00581998-10-24 18:27:49 +00009262/**
Daniel Veillardcf461992000-03-14 18:30:20 +00009263 * xmlParseDocument:
Daniel Veillard11e00581998-10-24 18:27:49 +00009264 * @ctxt: an XML parser context
9265 *
9266 * parse an XML document (and build a tree if using the standard SAX
9267 * interface).
Daniel Veillard260a68f1998-08-13 03:39:55 +00009268 *
9269 * [1] document ::= prolog element Misc*
9270 *
9271 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
Daniel Veillard11e00581998-10-24 18:27:49 +00009272 *
Daniel Veillard1e346af1999-02-22 10:33:01 +00009273 * Returns 0, -1 in case of error. the parser context is augmented
Daniel Veillard11e00581998-10-24 18:27:49 +00009274 * as a result of the parsing.
Daniel Veillard260a68f1998-08-13 03:39:55 +00009275 */
9276
Daniel Veillard0ba4d531998-11-01 19:34:31 +00009277int
9278xmlParseDocument(xmlParserCtxtPtr ctxt) {
Daniel Veillardcf461992000-03-14 18:30:20 +00009279 xmlChar start[4];
9280 xmlCharEncoding enc;
9281
Daniel Veillard260a68f1998-08-13 03:39:55 +00009282 xmlDefaultSAXHandlerInit();
9283
Daniel Veillarde2d034d1999-07-27 19:52:06 +00009284 GROW;
9285
Daniel Veillard260a68f1998-08-13 03:39:55 +00009286 /*
9287 * SAX: beginning of the document processing.
9288 */
Daniel Veillard517752b1999-04-05 12:20:10 +00009289 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
Daniel Veillard27d88741999-05-29 11:51:49 +00009290 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
Daniel Veillard260a68f1998-08-13 03:39:55 +00009291
Daniel Veillardcf461992000-03-14 18:30:20 +00009292 /*
9293 * Get the 4 first bytes and decode the charset
9294 * if enc != XML_CHAR_ENCODING_NONE
9295 * plug some encoding conversion routines.
Daniel Veillard260a68f1998-08-13 03:39:55 +00009296 */
Daniel Veillardcf461992000-03-14 18:30:20 +00009297 start[0] = RAW;
9298 start[1] = NXT(1);
9299 start[2] = NXT(2);
9300 start[3] = NXT(3);
9301 enc = xmlDetectCharEncoding(start, 4);
9302 if (enc != XML_CHAR_ENCODING_NONE) {
9303 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009304 }
9305
Daniel Veillardcf461992000-03-14 18:30:20 +00009306
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009307 if (CUR == 0) {
9308 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00009309 ctxt->sax->error(ctxt->userData, "Document is empty\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +00009310 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009311 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009312 ctxt->disableSAX = 1;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009313 }
Daniel Veillard260a68f1998-08-13 03:39:55 +00009314
9315 /*
9316 * Check for the XMLDecl in the Prolog.
9317 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00009318 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00009319 if ((RAW == '<') && (NXT(1) == '?') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00009320 (NXT(2) == 'x') && (NXT(3) == 'm') &&
Daniel Veillard686d6b62000-01-03 11:08:02 +00009321 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
Daniel Veillard496a1cf2000-05-03 14:20:55 +00009322
9323 /*
9324 * Note that we will switch encoding on the fly.
9325 */
Daniel Veillard260a68f1998-08-13 03:39:55 +00009326 xmlParseXMLDecl(ctxt);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00009327 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9328 /*
9329 * The XML REC instructs us to stop parsing right here
9330 */
9331 return(-1);
9332 }
Daniel Veillardcf461992000-03-14 18:30:20 +00009333 ctxt->standalone = ctxt->input->standalone;
Daniel Veillard260a68f1998-08-13 03:39:55 +00009334 SKIP_BLANKS;
Daniel Veillard260a68f1998-08-13 03:39:55 +00009335 } else {
Daniel Veillard517752b1999-04-05 12:20:10 +00009336 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard260a68f1998-08-13 03:39:55 +00009337 }
Daniel Veillardcf461992000-03-14 18:30:20 +00009338 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
Daniel Veillard27d88741999-05-29 11:51:49 +00009339 ctxt->sax->startDocument(ctxt->userData);
Daniel Veillard260a68f1998-08-13 03:39:55 +00009340
9341 /*
9342 * The Misc part of the Prolog
9343 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00009344 GROW;
Daniel Veillard260a68f1998-08-13 03:39:55 +00009345 xmlParseMisc(ctxt);
9346
9347 /*
9348 * Then possibly doc type declaration(s) and more Misc
9349 * (doctypedecl Misc*)?
9350 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00009351 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00009352 if ((RAW == '<') && (NXT(1) == '!') &&
Daniel Veillard260a68f1998-08-13 03:39:55 +00009353 (NXT(2) == 'D') && (NXT(3) == 'O') &&
9354 (NXT(4) == 'C') && (NXT(5) == 'T') &&
9355 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
9356 (NXT(8) == 'E')) {
Daniel Veillardcf461992000-03-14 18:30:20 +00009357
9358 ctxt->inSubset = 1;
Daniel Veillard260a68f1998-08-13 03:39:55 +00009359 xmlParseDocTypeDecl(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00009360 if (RAW == '[') {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009361 ctxt->instate = XML_PARSER_DTD;
9362 xmlParseInternalSubset(ctxt);
9363 }
Daniel Veillardcf461992000-03-14 18:30:20 +00009364
9365 /*
9366 * Create and update the external subset.
9367 */
9368 ctxt->inSubset = 2;
9369 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
9370 (!ctxt->disableSAX))
9371 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
9372 ctxt->extSubSystem, ctxt->extSubURI);
9373 ctxt->inSubset = 0;
9374
9375
Daniel Veillardb05deb71999-08-10 19:04:08 +00009376 ctxt->instate = XML_PARSER_PROLOG;
Daniel Veillard260a68f1998-08-13 03:39:55 +00009377 xmlParseMisc(ctxt);
9378 }
9379
9380 /*
9381 * Time to start parsing the tree itself
9382 */
Daniel Veillarde2d034d1999-07-27 19:52:06 +00009383 GROW;
Daniel Veillardcf461992000-03-14 18:30:20 +00009384 if (RAW != '<') {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009385 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +00009386 ctxt->sax->error(ctxt->userData,
Daniel Veillardcf461992000-03-14 18:30:20 +00009387 "Start tag expected, '<' not found\n");
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009388 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009389 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009390 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009391 ctxt->instate = XML_PARSER_EOF;
9392 } else {
9393 ctxt->instate = XML_PARSER_CONTENT;
9394 xmlParseElement(ctxt);
9395 ctxt->instate = XML_PARSER_EPILOG;
9396
9397
9398 /*
9399 * The Misc part at the end
9400 */
9401 xmlParseMisc(ctxt);
9402
Daniel Veillardcf461992000-03-14 18:30:20 +00009403 if (RAW != 0) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9405 ctxt->sax->error(ctxt->userData,
9406 "Extra content at the end of the document\n");
9407 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009408 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009409 ctxt->errNo = XML_ERR_DOCUMENT_END;
9410 }
9411 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009412 }
9413
Daniel Veillard260a68f1998-08-13 03:39:55 +00009414 /*
9415 * SAX: end of the document processing.
9416 */
Daniel Veillardcf461992000-03-14 18:30:20 +00009417 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9418 (!ctxt->disableSAX))
Daniel Veillard27d88741999-05-29 11:51:49 +00009419 ctxt->sax->endDocument(ctxt->userData);
Daniel Veillardcf461992000-03-14 18:30:20 +00009420
Daniel Veillard39a1f9a1999-01-17 19:11:59 +00009421 if (! ctxt->wellFormed) return(-1);
Daniel Veillard260a68f1998-08-13 03:39:55 +00009422 return(0);
9423}
9424
Daniel Veillardb05deb71999-08-10 19:04:08 +00009425/************************************************************************
9426 * *
Daniel Veillard7f858501999-11-17 17:32:38 +00009427 * Progressive parsing interfaces *
9428 * *
9429 ************************************************************************/
9430
9431/**
9432 * xmlParseLookupSequence:
9433 * @ctxt: an XML parser context
9434 * @first: the first char to lookup
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009435 * @next: the next char to lookup or zero
9436 * @third: the next char to lookup or zero
Daniel Veillard7f858501999-11-17 17:32:38 +00009437 *
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009438 * Try to find if a sequence (first, next, third) or just (first next) or
9439 * (first) is available in the input stream.
9440 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
9441 * to avoid rescanning sequences of bytes, it DOES change the state of the
9442 * parser, do not use liberally.
Daniel Veillard7f858501999-11-17 17:32:38 +00009443 *
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009444 * Returns the index to the current parsing point if the full sequence
9445 * is available, -1 otherwise.
Daniel Veillard7f858501999-11-17 17:32:38 +00009446 */
9447int
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009448xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
9449 xmlChar next, xmlChar third) {
9450 int base, len;
9451 xmlParserInputPtr in;
9452 const xmlChar *buf;
9453
9454 in = ctxt->input;
9455 if (in == NULL) return(-1);
9456 base = in->cur - in->base;
9457 if (base < 0) return(-1);
9458 if (ctxt->checkIndex > base)
9459 base = ctxt->checkIndex;
9460 if (in->buf == NULL) {
9461 buf = in->base;
9462 len = in->length;
9463 } else {
9464 buf = in->buf->buffer->content;
9465 len = in->buf->buffer->use;
9466 }
9467 /* take into account the sequence length */
9468 if (third) len -= 2;
9469 else if (next) len --;
9470 for (;base < len;base++) {
9471 if (buf[base] == first) {
9472 if (third != 0) {
9473 if ((buf[base + 1] != next) ||
9474 (buf[base + 2] != third)) continue;
9475 } else if (next != 0) {
9476 if (buf[base + 1] != next) continue;
9477 }
9478 ctxt->checkIndex = 0;
9479#ifdef DEBUG_PUSH
9480 if (next == 0)
9481 fprintf(stderr, "PP: lookup '%c' found at %d\n",
9482 first, base);
9483 else if (third == 0)
9484 fprintf(stderr, "PP: lookup '%c%c' found at %d\n",
9485 first, next, base);
9486 else
9487 fprintf(stderr, "PP: lookup '%c%c%c' found at %d\n",
9488 first, next, third, base);
9489#endif
9490 return(base - (in->cur - in->base));
9491 }
9492 }
9493 ctxt->checkIndex = base;
9494#ifdef DEBUG_PUSH
9495 if (next == 0)
9496 fprintf(stderr, "PP: lookup '%c' failed\n", first);
9497 else if (third == 0)
9498 fprintf(stderr, "PP: lookup '%c%c' failed\n", first, next);
9499 else
9500 fprintf(stderr, "PP: lookup '%c%c%c' failed\n", first, next, third);
9501#endif
9502 return(-1);
Daniel Veillard7f858501999-11-17 17:32:38 +00009503}
9504
9505/**
Daniel Veillard71b656e2000-01-05 14:46:17 +00009506 * xmlParseTryOrFinish:
Daniel Veillard7f858501999-11-17 17:32:38 +00009507 * @ctxt: an XML parser context
Daniel Veillard71b656e2000-01-05 14:46:17 +00009508 * @terminate: last chunk indicator
Daniel Veillard7f858501999-11-17 17:32:38 +00009509 *
9510 * Try to progress on parsing
9511 *
9512 * Returns zero if no parsing was possible
9513 */
9514int
Daniel Veillard71b656e2000-01-05 14:46:17 +00009515xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
Daniel Veillard7f858501999-11-17 17:32:38 +00009516 int ret = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009517 int avail;
9518 xmlChar cur, next;
9519
9520#ifdef DEBUG_PUSH
9521 switch (ctxt->instate) {
9522 case XML_PARSER_EOF:
9523 fprintf(stderr, "PP: try EOF\n"); break;
9524 case XML_PARSER_START:
9525 fprintf(stderr, "PP: try START\n"); break;
9526 case XML_PARSER_MISC:
9527 fprintf(stderr, "PP: try MISC\n");break;
9528 case XML_PARSER_COMMENT:
9529 fprintf(stderr, "PP: try COMMENT\n");break;
9530 case XML_PARSER_PROLOG:
9531 fprintf(stderr, "PP: try PROLOG\n");break;
9532 case XML_PARSER_START_TAG:
9533 fprintf(stderr, "PP: try START_TAG\n");break;
9534 case XML_PARSER_CONTENT:
9535 fprintf(stderr, "PP: try CONTENT\n");break;
9536 case XML_PARSER_CDATA_SECTION:
9537 fprintf(stderr, "PP: try CDATA_SECTION\n");break;
9538 case XML_PARSER_END_TAG:
9539 fprintf(stderr, "PP: try END_TAG\n");break;
9540 case XML_PARSER_ENTITY_DECL:
9541 fprintf(stderr, "PP: try ENTITY_DECL\n");break;
9542 case XML_PARSER_ENTITY_VALUE:
9543 fprintf(stderr, "PP: try ENTITY_VALUE\n");break;
9544 case XML_PARSER_ATTRIBUTE_VALUE:
9545 fprintf(stderr, "PP: try ATTRIBUTE_VALUE\n");break;
9546 case XML_PARSER_DTD:
9547 fprintf(stderr, "PP: try DTD\n");break;
9548 case XML_PARSER_EPILOG:
9549 fprintf(stderr, "PP: try EPILOG\n");break;
9550 case XML_PARSER_PI:
9551 fprintf(stderr, "PP: try PI\n");break;
9552 }
9553#endif
Daniel Veillard7f858501999-11-17 17:32:38 +00009554
9555 while (1) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009556 /*
9557 * Pop-up of finished entities.
9558 */
Daniel Veillardcf461992000-03-14 18:30:20 +00009559 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009560 xmlPopInput(ctxt);
9561
Daniel Veillardcf461992000-03-14 18:30:20 +00009562 if (ctxt->input ==NULL) break;
9563 if (ctxt->input->buf == NULL)
9564 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009565 else
Daniel Veillardcf461992000-03-14 18:30:20 +00009566 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009567 if (avail < 1)
9568 goto done;
Daniel Veillard7f858501999-11-17 17:32:38 +00009569 switch (ctxt->instate) {
9570 case XML_PARSER_EOF:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009571 /*
9572 * Document parsing is done !
9573 */
9574 goto done;
9575 case XML_PARSER_START:
9576 /*
9577 * Very first chars read from the document flow.
9578 */
Daniel Veillardcf461992000-03-14 18:30:20 +00009579 cur = ctxt->input->cur[0];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009580 if (IS_BLANK(cur)) {
9581 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9582 ctxt->sax->setDocumentLocator(ctxt->userData,
9583 &xmlDefaultSAXLocator);
9584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9585 ctxt->sax->error(ctxt->userData,
9586 "Extra spaces at the beginning of the document are not allowed\n");
9587 ctxt->errNo = XML_ERR_DOCUMENT_START;
9588 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009589 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009590 SKIP_BLANKS;
9591 ret++;
Daniel Veillardcf461992000-03-14 18:30:20 +00009592 if (ctxt->input->buf == NULL)
9593 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009594 else
Daniel Veillardcf461992000-03-14 18:30:20 +00009595 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009596 }
9597 if (avail < 2)
9598 goto done;
9599
Daniel Veillardcf461992000-03-14 18:30:20 +00009600 cur = ctxt->input->cur[0];
9601 next = ctxt->input->cur[1];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009602 if (cur == 0) {
9603 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9604 ctxt->sax->setDocumentLocator(ctxt->userData,
9605 &xmlDefaultSAXLocator);
9606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9607 ctxt->sax->error(ctxt->userData, "Document is empty\n");
9608 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
9609 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009610 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009611 ctxt->instate = XML_PARSER_EOF;
9612#ifdef DEBUG_PUSH
9613 fprintf(stderr, "PP: entering EOF\n");
9614#endif
9615 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
9616 ctxt->sax->endDocument(ctxt->userData);
9617 goto done;
9618 }
9619 if ((cur == '<') && (next == '?')) {
9620 /* PI or XML decl */
9621 if (avail < 5) return(ret);
Daniel Veillard71b656e2000-01-05 14:46:17 +00009622 if ((!terminate) &&
9623 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009624 return(ret);
9625 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9626 ctxt->sax->setDocumentLocator(ctxt->userData,
9627 &xmlDefaultSAXLocator);
Daniel Veillardcf461992000-03-14 18:30:20 +00009628 if ((ctxt->input->cur[2] == 'x') &&
9629 (ctxt->input->cur[3] == 'm') &&
9630 (ctxt->input->cur[4] == 'l') &&
9631 (IS_BLANK(ctxt->input->cur[5]))) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009632 ret += 5;
9633#ifdef DEBUG_PUSH
9634 fprintf(stderr, "PP: Parsing XML Decl\n");
9635#endif
9636 xmlParseXMLDecl(ctxt);
Daniel Veillard496a1cf2000-05-03 14:20:55 +00009637 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
9638 /*
9639 * The XML REC instructs us to stop parsing right
9640 * here
9641 */
9642 ctxt->instate = XML_PARSER_EOF;
9643 return(0);
9644 }
Daniel Veillardcf461992000-03-14 18:30:20 +00009645 ctxt->standalone = ctxt->input->standalone;
9646 if ((ctxt->encoding == NULL) &&
9647 (ctxt->input->encoding != NULL))
9648 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
9649 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9650 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009651 ctxt->sax->startDocument(ctxt->userData);
9652 ctxt->instate = XML_PARSER_MISC;
9653#ifdef DEBUG_PUSH
9654 fprintf(stderr, "PP: entering MISC\n");
9655#endif
9656 } else {
9657 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillardcf461992000-03-14 18:30:20 +00009658 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9659 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009660 ctxt->sax->startDocument(ctxt->userData);
9661 ctxt->instate = XML_PARSER_MISC;
9662#ifdef DEBUG_PUSH
9663 fprintf(stderr, "PP: entering MISC\n");
9664#endif
9665 }
9666 } else {
9667 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
9668 ctxt->sax->setDocumentLocator(ctxt->userData,
9669 &xmlDefaultSAXLocator);
9670 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillardcf461992000-03-14 18:30:20 +00009671 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
9672 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009673 ctxt->sax->startDocument(ctxt->userData);
9674 ctxt->instate = XML_PARSER_MISC;
9675#ifdef DEBUG_PUSH
9676 fprintf(stderr, "PP: entering MISC\n");
9677#endif
9678 }
9679 break;
9680 case XML_PARSER_MISC:
9681 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00009682 if (ctxt->input->buf == NULL)
9683 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009684 else
Daniel Veillardcf461992000-03-14 18:30:20 +00009685 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009686 if (avail < 2)
9687 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00009688 cur = ctxt->input->cur[0];
9689 next = ctxt->input->cur[1];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009690 if ((cur == '<') && (next == '?')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00009691 if ((!terminate) &&
9692 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009693 goto done;
9694#ifdef DEBUG_PUSH
9695 fprintf(stderr, "PP: Parsing PI\n");
9696#endif
9697 xmlParsePI(ctxt);
9698 } else if ((cur == '<') && (next == '!') &&
Daniel Veillardcf461992000-03-14 18:30:20 +00009699 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00009700 if ((!terminate) &&
9701 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009702 goto done;
9703#ifdef DEBUG_PUSH
9704 fprintf(stderr, "PP: Parsing Comment\n");
9705#endif
9706 xmlParseComment(ctxt);
9707 ctxt->instate = XML_PARSER_MISC;
9708 } else if ((cur == '<') && (next == '!') &&
Daniel Veillardcf461992000-03-14 18:30:20 +00009709 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
9710 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
9711 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
9712 (ctxt->input->cur[8] == 'E')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00009713 if ((!terminate) &&
9714 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009715 goto done;
9716#ifdef DEBUG_PUSH
9717 fprintf(stderr, "PP: Parsing internal subset\n");
9718#endif
Daniel Veillardcf461992000-03-14 18:30:20 +00009719 ctxt->inSubset = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009720 xmlParseDocTypeDecl(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00009721 if (RAW == '[') {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009722 ctxt->instate = XML_PARSER_DTD;
9723#ifdef DEBUG_PUSH
9724 fprintf(stderr, "PP: entering DTD\n");
9725#endif
9726 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +00009727 /*
9728 * Create and update the external subset.
9729 */
9730 ctxt->inSubset = 2;
9731 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9732 (ctxt->sax->externalSubset != NULL))
9733 ctxt->sax->externalSubset(ctxt->userData,
9734 ctxt->intSubName, ctxt->extSubSystem,
9735 ctxt->extSubURI);
9736 ctxt->inSubset = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009737 ctxt->instate = XML_PARSER_PROLOG;
9738#ifdef DEBUG_PUSH
9739 fprintf(stderr, "PP: entering PROLOG\n");
9740#endif
9741 }
9742 } else if ((cur == '<') && (next == '!') &&
9743 (avail < 9)) {
9744 goto done;
9745 } else {
9746 ctxt->instate = XML_PARSER_START_TAG;
9747#ifdef DEBUG_PUSH
9748 fprintf(stderr, "PP: entering START_TAG\n");
9749#endif
9750 }
9751 break;
Daniel Veillard7f858501999-11-17 17:32:38 +00009752 case XML_PARSER_PROLOG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009753 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00009754 if (ctxt->input->buf == NULL)
9755 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009756 else
Daniel Veillardcf461992000-03-14 18:30:20 +00009757 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009758 if (avail < 2)
9759 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00009760 cur = ctxt->input->cur[0];
9761 next = ctxt->input->cur[1];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009762 if ((cur == '<') && (next == '?')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00009763 if ((!terminate) &&
9764 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009765 goto done;
9766#ifdef DEBUG_PUSH
9767 fprintf(stderr, "PP: Parsing PI\n");
9768#endif
9769 xmlParsePI(ctxt);
9770 } else if ((cur == '<') && (next == '!') &&
Daniel Veillardcf461992000-03-14 18:30:20 +00009771 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00009772 if ((!terminate) &&
9773 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009774 goto done;
9775#ifdef DEBUG_PUSH
9776 fprintf(stderr, "PP: Parsing Comment\n");
9777#endif
9778 xmlParseComment(ctxt);
9779 ctxt->instate = XML_PARSER_PROLOG;
9780 } else if ((cur == '<') && (next == '!') &&
9781 (avail < 4)) {
9782 goto done;
9783 } else {
9784 ctxt->instate = XML_PARSER_START_TAG;
9785#ifdef DEBUG_PUSH
9786 fprintf(stderr, "PP: entering START_TAG\n");
9787#endif
9788 }
9789 break;
Daniel Veillard7f858501999-11-17 17:32:38 +00009790 case XML_PARSER_EPILOG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009791 SKIP_BLANKS;
Daniel Veillardcf461992000-03-14 18:30:20 +00009792 if (ctxt->input->buf == NULL)
9793 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009794 else
Daniel Veillardcf461992000-03-14 18:30:20 +00009795 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009796 if (avail < 2)
9797 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00009798 cur = ctxt->input->cur[0];
9799 next = ctxt->input->cur[1];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009800 if ((cur == '<') && (next == '?')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00009801 if ((!terminate) &&
9802 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009803 goto done;
9804#ifdef DEBUG_PUSH
9805 fprintf(stderr, "PP: Parsing PI\n");
9806#endif
9807 xmlParsePI(ctxt);
9808 ctxt->instate = XML_PARSER_EPILOG;
9809 } else if ((cur == '<') && (next == '!') &&
Daniel Veillardcf461992000-03-14 18:30:20 +00009810 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00009811 if ((!terminate) &&
9812 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009813 goto done;
9814#ifdef DEBUG_PUSH
9815 fprintf(stderr, "PP: Parsing Comment\n");
9816#endif
9817 xmlParseComment(ctxt);
9818 ctxt->instate = XML_PARSER_EPILOG;
9819 } else if ((cur == '<') && (next == '!') &&
9820 (avail < 4)) {
9821 goto done;
9822 } else {
9823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9824 ctxt->sax->error(ctxt->userData,
9825 "Extra content at the end of the document\n");
9826 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009827 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009828 ctxt->errNo = XML_ERR_DOCUMENT_END;
9829 ctxt->instate = XML_PARSER_EOF;
9830#ifdef DEBUG_PUSH
9831 fprintf(stderr, "PP: entering EOF\n");
9832#endif
Daniel Veillardcf461992000-03-14 18:30:20 +00009833 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9834 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009835 ctxt->sax->endDocument(ctxt->userData);
9836 goto done;
9837 }
9838 break;
9839 case XML_PARSER_START_TAG: {
9840 xmlChar *name, *oldname;
9841
Daniel Veillardcf461992000-03-14 18:30:20 +00009842 if ((avail < 2) && (ctxt->inputNr == 1))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009843 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00009844 cur = ctxt->input->cur[0];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009845 if (cur != '<') {
9846 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9847 ctxt->sax->error(ctxt->userData,
9848 "Start tag expect, '<' not found\n");
9849 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
9850 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009851 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009852 ctxt->instate = XML_PARSER_EOF;
9853#ifdef DEBUG_PUSH
9854 fprintf(stderr, "PP: entering EOF\n");
9855#endif
Daniel Veillardcf461992000-03-14 18:30:20 +00009856 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9857 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009858 ctxt->sax->endDocument(ctxt->userData);
9859 goto done;
9860 }
Daniel Veillard71b656e2000-01-05 14:46:17 +00009861 if ((!terminate) &&
9862 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009863 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00009864 if (ctxt->spaceNr == 0)
9865 spacePush(ctxt, -1);
9866 else
9867 spacePush(ctxt, *ctxt->space);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009868 name = xmlParseStartTag(ctxt);
9869 if (name == NULL) {
Daniel Veillardcf461992000-03-14 18:30:20 +00009870 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009871 ctxt->instate = XML_PARSER_EOF;
9872#ifdef DEBUG_PUSH
9873 fprintf(stderr, "PP: entering EOF\n");
9874#endif
Daniel Veillardcf461992000-03-14 18:30:20 +00009875 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
9876 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009877 ctxt->sax->endDocument(ctxt->userData);
9878 goto done;
9879 }
9880 namePush(ctxt, xmlStrdup(name));
9881
9882 /*
9883 * [ VC: Root Element Type ]
9884 * The Name in the document type declaration must match
9885 * the element type of the root element.
9886 */
9887 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
Daniel Veillardcf461992000-03-14 18:30:20 +00009888 ctxt->node && (ctxt->node == ctxt->myDoc->children))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009889 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
9890
9891 /*
9892 * Check for an Empty Element.
9893 */
Daniel Veillardcf461992000-03-14 18:30:20 +00009894 if ((RAW == '/') && (NXT(1) == '>')) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009895 SKIP(2);
Daniel Veillardcf461992000-03-14 18:30:20 +00009896 if ((ctxt->sax != NULL) &&
9897 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009898 ctxt->sax->endElement(ctxt->userData, name);
9899 xmlFree(name);
9900 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00009901 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009902 if (oldname != NULL) {
9903#ifdef DEBUG_STACK
9904 fprintf(stderr,"Close: popped %s\n", oldname);
9905#endif
9906 xmlFree(oldname);
9907 }
9908 if (ctxt->name == NULL) {
9909 ctxt->instate = XML_PARSER_EPILOG;
9910#ifdef DEBUG_PUSH
9911 fprintf(stderr, "PP: entering EPILOG\n");
9912#endif
9913 } else {
9914 ctxt->instate = XML_PARSER_CONTENT;
9915#ifdef DEBUG_PUSH
9916 fprintf(stderr, "PP: entering CONTENT\n");
9917#endif
9918 }
9919 break;
9920 }
Daniel Veillardcf461992000-03-14 18:30:20 +00009921 if (RAW == '>') {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009922 NEXT;
9923 } else {
9924 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9925 ctxt->sax->error(ctxt->userData,
9926 "Couldn't find end of Start Tag %s\n",
9927 name);
9928 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +00009929 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009930 ctxt->errNo = XML_ERR_GT_REQUIRED;
9931
9932 /*
9933 * end of parsing of this node.
9934 */
9935 nodePop(ctxt);
9936 oldname = namePop(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +00009937 spacePop(ctxt);
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009938 if (oldname != NULL) {
9939#ifdef DEBUG_STACK
9940 fprintf(stderr,"Close: popped %s\n", oldname);
9941#endif
9942 xmlFree(oldname);
9943 }
9944 }
9945 xmlFree(name);
9946 ctxt->instate = XML_PARSER_CONTENT;
9947#ifdef DEBUG_PUSH
9948 fprintf(stderr, "PP: entering CONTENT\n");
9949#endif
9950 break;
9951 }
9952 case XML_PARSER_CONTENT:
9953 /*
9954 * Handle preparsed entities and charRef
9955 */
9956 if (ctxt->token != 0) {
9957 xmlChar cur[2] = { 0 , 0 } ;
9958
9959 cur[0] = (xmlChar) ctxt->token;
Daniel Veillardcf461992000-03-14 18:30:20 +00009960 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
9961 (ctxt->sax->characters != NULL))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009962 ctxt->sax->characters(ctxt->userData, cur, 1);
9963 ctxt->token = 0;
9964 }
Daniel Veillardcf461992000-03-14 18:30:20 +00009965 if ((avail < 2) && (ctxt->inputNr == 1))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009966 goto done;
Daniel Veillardcf461992000-03-14 18:30:20 +00009967 cur = ctxt->input->cur[0];
9968 next = ctxt->input->cur[1];
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009969 if ((cur == '<') && (next == '?')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00009970 if ((!terminate) &&
9971 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009972 goto done;
9973#ifdef DEBUG_PUSH
9974 fprintf(stderr, "PP: Parsing PI\n");
9975#endif
9976 xmlParsePI(ctxt);
9977 } else if ((cur == '<') && (next == '!') &&
Daniel Veillardcf461992000-03-14 18:30:20 +00009978 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
Daniel Veillard71b656e2000-01-05 14:46:17 +00009979 if ((!terminate) &&
9980 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009981 goto done;
9982#ifdef DEBUG_PUSH
9983 fprintf(stderr, "PP: Parsing Comment\n");
9984#endif
9985 xmlParseComment(ctxt);
9986 ctxt->instate = XML_PARSER_CONTENT;
Daniel Veillardcf461992000-03-14 18:30:20 +00009987 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
9988 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
9989 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
9990 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
9991 (ctxt->input->cur[8] == '[')) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +00009992 SKIP(9);
9993 ctxt->instate = XML_PARSER_CDATA_SECTION;
9994#ifdef DEBUG_PUSH
9995 fprintf(stderr, "PP: entering CDATA_SECTION\n");
9996#endif
9997 break;
9998 } else if ((cur == '<') && (next == '!') &&
9999 (avail < 9)) {
10000 goto done;
10001 } else if ((cur == '<') && (next == '/')) {
10002 ctxt->instate = XML_PARSER_END_TAG;
10003#ifdef DEBUG_PUSH
10004 fprintf(stderr, "PP: entering END_TAG\n");
10005#endif
10006 break;
10007 } else if (cur == '<') {
10008 ctxt->instate = XML_PARSER_START_TAG;
10009#ifdef DEBUG_PUSH
10010 fprintf(stderr, "PP: entering START_TAG\n");
10011#endif
10012 break;
10013 } else if (cur == '&') {
Daniel Veillard71b656e2000-01-05 14:46:17 +000010014 if ((!terminate) &&
10015 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010016 goto done;
10017#ifdef DEBUG_PUSH
10018 fprintf(stderr, "PP: Parsing Reference\n");
10019#endif
10020 /* TODO: check generation of subtrees if noent !!! */
10021 xmlParseReference(ctxt);
10022 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +000010023 /* TODO Avoid the extra copy, handle directly !!! */
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010024 /*
Daniel Veillardcf461992000-03-14 18:30:20 +000010025 * Goal of the following test is:
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010026 * - minimize calls to the SAX 'character' callback
10027 * when they are mergeable
10028 * - handle an problem for isBlank when we only parse
10029 * a sequence of blank chars and the next one is
10030 * not available to check against '<' presence.
10031 * - tries to homogenize the differences in SAX
10032 * callbacks beween the push and pull versions
10033 * of the parser.
10034 */
10035 if ((ctxt->inputNr == 1) &&
10036 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
Daniel Veillard71b656e2000-01-05 14:46:17 +000010037 if ((!terminate) &&
10038 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010039 goto done;
10040 }
10041 ctxt->checkIndex = 0;
10042#ifdef DEBUG_PUSH
10043 fprintf(stderr, "PP: Parsing char data\n");
10044#endif
10045 xmlParseCharData(ctxt, 0);
10046 }
10047 /*
10048 * Pop-up of finished entities.
10049 */
Daniel Veillardcf461992000-03-14 18:30:20 +000010050 while ((RAW == 0) && (ctxt->inputNr > 1))
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010051 xmlPopInput(ctxt);
10052 break;
10053 case XML_PARSER_CDATA_SECTION: {
10054 /*
10055 * The Push mode need to have the SAX callback for
10056 * cdataBlock merge back contiguous callbacks.
10057 */
10058 int base;
10059
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010060 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
10061 if (base < 0) {
10062 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
Daniel Veillardcf461992000-03-14 18:30:20 +000010063 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010064 if (ctxt->sax->cdataBlock != NULL)
Daniel Veillardcf461992000-03-14 18:30:20 +000010065 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010066 XML_PARSER_BIG_BUFFER_SIZE);
10067 }
10068 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
10069 ctxt->checkIndex = 0;
10070 }
10071 goto done;
10072 } else {
Daniel Veillardcf461992000-03-14 18:30:20 +000010073 if ((ctxt->sax != NULL) && (base > 0) &&
10074 (!ctxt->disableSAX)) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010075 if (ctxt->sax->cdataBlock != NULL)
10076 ctxt->sax->cdataBlock(ctxt->userData,
Daniel Veillardcf461992000-03-14 18:30:20 +000010077 ctxt->input->cur, base);
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010078 }
10079 SKIP(base + 3);
10080 ctxt->checkIndex = 0;
10081 ctxt->instate = XML_PARSER_CONTENT;
10082#ifdef DEBUG_PUSH
10083 fprintf(stderr, "PP: entering CONTENT\n");
10084#endif
10085 }
10086 break;
10087 }
Daniel Veillard5e5c6231999-12-29 12:49:06 +000010088 case XML_PARSER_END_TAG:
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010089 if (avail < 2)
10090 goto done;
Daniel Veillard71b656e2000-01-05 14:46:17 +000010091 if ((!terminate) &&
10092 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010093 goto done;
10094 xmlParseEndTag(ctxt);
10095 if (ctxt->name == NULL) {
10096 ctxt->instate = XML_PARSER_EPILOG;
10097#ifdef DEBUG_PUSH
10098 fprintf(stderr, "PP: entering EPILOG\n");
10099#endif
10100 } else {
10101 ctxt->instate = XML_PARSER_CONTENT;
10102#ifdef DEBUG_PUSH
10103 fprintf(stderr, "PP: entering CONTENT\n");
10104#endif
10105 }
10106 break;
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010107 case XML_PARSER_DTD: {
10108 /*
10109 * Sorry but progressive parsing of the internal subset
10110 * is not expected to be supported. We first check that
10111 * the full content of the internal subset is available and
10112 * the parsing is launched only at that point.
10113 * Internal subset ends up with "']' S? '>'" in an unescaped
10114 * section and not in a ']]>' sequence which are conditional
10115 * sections (whoever argued to keep that crap in XML deserve
10116 * a place in hell !).
10117 */
10118 int base, i;
10119 xmlChar *buf;
10120 xmlChar quote = 0;
10121
Daniel Veillardcf461992000-03-14 18:30:20 +000010122 base = ctxt->input->cur - ctxt->input->base;
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010123 if (base < 0) return(0);
10124 if (ctxt->checkIndex > base)
10125 base = ctxt->checkIndex;
Daniel Veillardcf461992000-03-14 18:30:20 +000010126 buf = ctxt->input->buf->buffer->content;
Daniel Veillard3f6f7f62000-06-30 17:58:25 +000010127 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
10128 base++) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010129 if (quote != 0) {
10130 if (buf[base] == quote)
10131 quote = 0;
10132 continue;
10133 }
10134 if (buf[base] == '"') {
10135 quote = '"';
10136 continue;
10137 }
10138 if (buf[base] == '\'') {
10139 quote = '\'';
10140 continue;
10141 }
10142 if (buf[base] == ']') {
Daniel Veillard3f6f7f62000-06-30 17:58:25 +000010143 if ((unsigned int) base +1 >=
10144 ctxt->input->buf->buffer->use)
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010145 break;
10146 if (buf[base + 1] == ']') {
10147 /* conditional crap, skip both ']' ! */
10148 base++;
10149 continue;
10150 }
Daniel Veillard3f6f7f62000-06-30 17:58:25 +000010151 for (i = 0;
10152 (unsigned int) base + i < ctxt->input->buf->buffer->use;
10153 i++) {
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010154 if (buf[base + i] == '>')
10155 goto found_end_int_subset;
10156 }
10157 break;
10158 }
10159 }
10160 /*
10161 * We didn't found the end of the Internal subset
10162 */
10163 if (quote == 0)
10164 ctxt->checkIndex = base;
10165#ifdef DEBUG_PUSH
10166 if (next == 0)
10167 fprintf(stderr, "PP: lookup of int subset end filed\n");
10168#endif
10169 goto done;
10170
10171found_end_int_subset:
10172 xmlParseInternalSubset(ctxt);
Daniel Veillardcf461992000-03-14 18:30:20 +000010173 ctxt->inSubset = 2;
10174 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
10175 (ctxt->sax->externalSubset != NULL))
10176 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
10177 ctxt->extSubSystem, ctxt->extSubURI);
10178 ctxt->inSubset = 0;
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010179 ctxt->instate = XML_PARSER_PROLOG;
10180 ctxt->checkIndex = 0;
10181#ifdef DEBUG_PUSH
10182 fprintf(stderr, "PP: entering PROLOG\n");
10183#endif
10184 break;
10185 }
Daniel Veillard7f858501999-11-17 17:32:38 +000010186 case XML_PARSER_COMMENT:
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010187 fprintf(stderr, "PP: internal error, state == COMMENT\n");
10188 ctxt->instate = XML_PARSER_CONTENT;
10189#ifdef DEBUG_PUSH
10190 fprintf(stderr, "PP: entering CONTENT\n");
10191#endif
10192 break;
10193 case XML_PARSER_PI:
10194 fprintf(stderr, "PP: internal error, state == PI\n");
10195 ctxt->instate = XML_PARSER_CONTENT;
10196#ifdef DEBUG_PUSH
10197 fprintf(stderr, "PP: entering CONTENT\n");
10198#endif
10199 break;
10200 case XML_PARSER_ENTITY_DECL:
10201 fprintf(stderr, "PP: internal error, state == ENTITY_DECL\n");
10202 ctxt->instate = XML_PARSER_DTD;
10203#ifdef DEBUG_PUSH
10204 fprintf(stderr, "PP: entering DTD\n");
10205#endif
10206 break;
10207 case XML_PARSER_ENTITY_VALUE:
10208 fprintf(stderr, "PP: internal error, state == ENTITY_VALUE\n");
10209 ctxt->instate = XML_PARSER_CONTENT;
10210#ifdef DEBUG_PUSH
10211 fprintf(stderr, "PP: entering DTD\n");
10212#endif
10213 break;
10214 case XML_PARSER_ATTRIBUTE_VALUE:
10215 fprintf(stderr, "PP: internal error, state == ATTRIBUTE_VALUE\n");
10216 ctxt->instate = XML_PARSER_START_TAG;
10217#ifdef DEBUG_PUSH
10218 fprintf(stderr, "PP: entering START_TAG\n");
10219#endif
10220 break;
Daniel Veillardcf461992000-03-14 18:30:20 +000010221 case XML_PARSER_SYSTEM_LITERAL:
10222 fprintf(stderr, "PP: internal error, state == SYSTEM_LITERAL\n");
10223 ctxt->instate = XML_PARSER_START_TAG;
10224#ifdef DEBUG_PUSH
10225 fprintf(stderr, "PP: entering START_TAG\n");
10226#endif
10227 break;
Daniel Veillard7f858501999-11-17 17:32:38 +000010228 }
10229 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010230done:
10231#ifdef DEBUG_PUSH
10232 fprintf(stderr, "PP: done %d\n", ret);
10233#endif
Daniel Veillard7f858501999-11-17 17:32:38 +000010234 return(ret);
10235}
10236
10237/**
Daniel Veillard71b656e2000-01-05 14:46:17 +000010238 * xmlParseTry:
10239 * @ctxt: an XML parser context
10240 *
10241 * Try to progress on parsing
10242 *
10243 * Returns zero if no parsing was possible
10244 */
10245int
10246xmlParseTry(xmlParserCtxtPtr ctxt) {
10247 return(xmlParseTryOrFinish(ctxt, 0));
10248}
10249
10250/**
Daniel Veillard7f858501999-11-17 17:32:38 +000010251 * xmlParseChunk:
10252 * @ctxt: an XML parser context
10253 * @chunk: an char array
10254 * @size: the size in byte of the chunk
10255 * @terminate: last chunk indicator
10256 *
10257 * Parse a Chunk of memory
10258 *
10259 * Returns zero if no error, the xmlParserErrors otherwise.
10260 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010261int
Daniel Veillard7f858501999-11-17 17:32:38 +000010262xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
10263 int terminate) {
Daniel Veillarda819dac1999-11-24 18:04:22 +000010264 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010265 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
10266 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
10267 int cur = ctxt->input->cur - ctxt->input->base;
10268
Daniel Veillarda819dac1999-11-24 18:04:22 +000010269 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010270 ctxt->input->base = ctxt->input->buf->buffer->content + base;
10271 ctxt->input->cur = ctxt->input->base + cur;
10272#ifdef DEBUG_PUSH
10273 fprintf(stderr, "PP: pushed %d\n", size);
10274#endif
10275
Daniel Veillardd0f7f742000-02-02 17:42:48 +000010276 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
10277 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010278 } else if (ctxt->instate != XML_PARSER_EOF)
Daniel Veillard71b656e2000-01-05 14:46:17 +000010279 xmlParseTryOrFinish(ctxt, terminate);
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010280 if (terminate) {
Daniel Veillardcf461992000-03-14 18:30:20 +000010281 /*
Daniel Veillardcf461992000-03-14 18:30:20 +000010282 * Check for termination
10283 */
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010284 if ((ctxt->instate != XML_PARSER_EOF) &&
10285 (ctxt->instate != XML_PARSER_EPILOG)) {
10286 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10287 ctxt->sax->error(ctxt->userData,
10288 "Extra content at the end of the document\n");
10289 ctxt->wellFormed = 0;
Daniel Veillardcf461992000-03-14 18:30:20 +000010290 ctxt->disableSAX = 1;
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010291 ctxt->errNo = XML_ERR_DOCUMENT_END;
10292 }
10293 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillardcf461992000-03-14 18:30:20 +000010294 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
10295 (!ctxt->disableSAX))
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010296 ctxt->sax->endDocument(ctxt->userData);
10297 }
10298 ctxt->instate = XML_PARSER_EOF;
Daniel Veillard7f858501999-11-17 17:32:38 +000010299 }
10300 return((xmlParserErrors) ctxt->errNo);
10301}
10302
10303/************************************************************************
10304 * *
Daniel Veillardb05deb71999-08-10 19:04:08 +000010305 * I/O front end functions to the parser *
10306 * *
10307 ************************************************************************/
10308
Daniel Veillard11e00581998-10-24 18:27:49 +000010309/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010310 * xmlCreatePushParserCtxt:
Daniel Veillard3f6f7f62000-06-30 17:58:25 +000010311 * @ctxt: an XML parser context
10312 *
10313 * Blocks further parser processing
10314 */
10315void
10316xmlStopParser(xmlParserCtxtPtr ctxt) {
10317 ctxt->instate = XML_PARSER_EOF;
10318 if (ctxt->input != NULL)
10319 ctxt->input->cur = BAD_CAST"";
10320}
10321
10322/**
10323 * xmlCreatePushParserCtxt:
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010324 * @sax: a SAX handler
10325 * @user_data: The user data returned on SAX callbacks
10326 * @chunk: a pointer to an array of chars
10327 * @size: number of chars in the array
10328 * @filename: an optional file name or URI
10329 *
10330 * Create a parser context for using the XML parser in push mode
10331 * To allow content encoding detection, @size should be >= 4
10332 * The value of @filename is used for fetching external entities
10333 * and error/warning reports.
10334 *
10335 * Returns the new parser context or NULL
10336 */
10337xmlParserCtxtPtr
10338xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10339 const char *chunk, int size, const char *filename) {
10340 xmlParserCtxtPtr ctxt;
10341 xmlParserInputPtr inputStream;
10342 xmlParserInputBufferPtr buf;
10343 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
10344
10345 /*
Daniel Veillardcf461992000-03-14 18:30:20 +000010346 * plug some encoding conversion routines
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010347 */
10348 if ((chunk != NULL) && (size >= 4))
Daniel Veillardcf461992000-03-14 18:30:20 +000010349 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010350
10351 buf = xmlAllocParserInputBuffer(enc);
10352 if (buf == NULL) return(NULL);
10353
10354 ctxt = xmlNewParserCtxt();
10355 if (ctxt == NULL) {
10356 xmlFree(buf);
10357 return(NULL);
10358 }
10359 if (sax != NULL) {
10360 if (ctxt->sax != &xmlDefaultSAXHandler)
10361 xmlFree(ctxt->sax);
10362 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10363 if (ctxt->sax == NULL) {
10364 xmlFree(buf);
10365 xmlFree(ctxt);
10366 return(NULL);
10367 }
10368 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10369 if (user_data != NULL)
10370 ctxt->userData = user_data;
10371 }
10372 if (filename == NULL) {
10373 ctxt->directory = NULL;
10374 } else {
10375 ctxt->directory = xmlParserGetDirectory(filename);
10376 }
10377
10378 inputStream = xmlNewInputStream(ctxt);
10379 if (inputStream == NULL) {
10380 xmlFreeParserCtxt(ctxt);
10381 return(NULL);
10382 }
10383
10384 if (filename == NULL)
10385 inputStream->filename = NULL;
10386 else
10387 inputStream->filename = xmlMemStrdup(filename);
10388 inputStream->buf = buf;
10389 inputStream->base = inputStream->buf->buffer->content;
10390 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillardcf461992000-03-14 18:30:20 +000010391 if (enc != XML_CHAR_ENCODING_NONE) {
10392 xmlSwitchEncoding(ctxt, enc);
10393 }
Daniel Veillarddbfd6411999-12-28 16:35:14 +000010394
10395 inputPush(ctxt, inputStream);
10396
10397 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
10398 (ctxt->input->buf != NULL)) {
10399 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
10400#ifdef DEBUG_PUSH
10401 fprintf(stderr, "PP: pushed %d\n", size);
10402#endif
10403 }
10404
10405 return(ctxt);
10406}
10407
10408/**
Daniel Veillard5e873c42000-04-12 13:27:38 +000010409 * xmlCreateIOParserCtxt:
10410 * @sax: a SAX handler
10411 * @user_data: The user data returned on SAX callbacks
10412 * @ioread: an I/O read function
10413 * @ioclose: an I/O close function
10414 * @ioctx: an I/O handler
10415 * @enc: the charset encoding if known
10416 *
10417 * Create a parser context for using the XML parser with an existing
10418 * I/O stream
10419 *
10420 * Returns the new parser context or NULL
10421 */
10422xmlParserCtxtPtr
10423xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
10424 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
10425 void *ioctx, xmlCharEncoding enc) {
10426 xmlParserCtxtPtr ctxt;
10427 xmlParserInputPtr inputStream;
10428 xmlParserInputBufferPtr buf;
10429
10430 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
10431 if (buf == NULL) return(NULL);
10432
10433 ctxt = xmlNewParserCtxt();
10434 if (ctxt == NULL) {
10435 xmlFree(buf);
10436 return(NULL);
10437 }
10438 if (sax != NULL) {
10439 if (ctxt->sax != &xmlDefaultSAXHandler)
10440 xmlFree(ctxt->sax);
10441 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
10442 if (ctxt->sax == NULL) {
10443 xmlFree(buf);
10444 xmlFree(ctxt);
10445 return(NULL);
10446 }
10447 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
10448 if (user_data != NULL)
10449 ctxt->userData = user_data;
10450 }
10451
10452 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
10453 if (inputStream == NULL) {
10454 xmlFreeParserCtxt(ctxt);
10455 return(NULL);
10456 }
10457 inputPush(ctxt, inputStream);
10458
10459 return(ctxt);
10460}
10461
10462/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010463 * xmlCreateDocParserCtxt:
Daniel Veillarddd6b3671999-09-23 22:19:22 +000010464 * @cur: a pointer to an array of xmlChar
Daniel Veillardd692aa41999-02-28 21:54:31 +000010465 *
Daniel Veillard06047432000-04-24 11:33:38 +000010466 * Creates a parser context for an XML in-memory document.
Daniel Veillardd692aa41999-02-28 21:54:31 +000010467 *
10468 * Returns the new parser context or NULL
10469 */
10470xmlParserCtxtPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +000010471xmlCreateDocParserCtxt(xmlChar *cur) {
Daniel Veillardd692aa41999-02-28 21:54:31 +000010472 xmlParserCtxtPtr ctxt;
10473 xmlParserInputPtr input;
10474
Daniel Veillardb05deb71999-08-10 19:04:08 +000010475 ctxt = xmlNewParserCtxt();
Daniel Veillardd692aa41999-02-28 21:54:31 +000010476 if (ctxt == NULL) {
Daniel Veillardd692aa41999-02-28 21:54:31 +000010477 return(NULL);
10478 }
Daniel Veillardb05deb71999-08-10 19:04:08 +000010479 input = xmlNewInputStream(ctxt);
Daniel Veillardd692aa41999-02-28 21:54:31 +000010480 if (input == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +000010481 xmlFreeParserCtxt(ctxt);
Daniel Veillardd692aa41999-02-28 21:54:31 +000010482 return(NULL);
10483 }
10484
Daniel Veillardd692aa41999-02-28 21:54:31 +000010485 input->base = cur;
10486 input->cur = cur;
Daniel Veillardd692aa41999-02-28 21:54:31 +000010487
10488 inputPush(ctxt, input);
10489 return(ctxt);
10490}
10491
10492/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010493 * xmlSAXParseDoc:
Daniel Veillard42dc9b31998-11-09 01:17:21 +000010494 * @sax: the SAX handler block
Daniel Veillarddd6b3671999-09-23 22:19:22 +000010495 * @cur: a pointer to an array of xmlChar
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000010496 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10497 * documents
Daniel Veillard11e00581998-10-24 18:27:49 +000010498 *
10499 * parse an XML in-memory document and build a tree.
Daniel Veillard42dc9b31998-11-09 01:17:21 +000010500 * It use the given SAX function block to handle the parsing callback.
10501 * If sax is NULL, fallback to the default DOM tree building routines.
Daniel Veillard11e00581998-10-24 18:27:49 +000010502 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000010503 * Returns the resulting document tree
Daniel Veillard260a68f1998-08-13 03:39:55 +000010504 */
10505
Daniel Veillard1e346af1999-02-22 10:33:01 +000010506xmlDocPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +000010507xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
Daniel Veillard260a68f1998-08-13 03:39:55 +000010508 xmlDocPtr ret;
10509 xmlParserCtxtPtr ctxt;
Daniel Veillard260a68f1998-08-13 03:39:55 +000010510
10511 if (cur == NULL) return(NULL);
10512
Daniel Veillardd692aa41999-02-28 21:54:31 +000010513
10514 ctxt = xmlCreateDocParserCtxt(cur);
10515 if (ctxt == NULL) return(NULL);
Daniel Veillard27d88741999-05-29 11:51:49 +000010516 if (sax != NULL) {
10517 ctxt->sax = sax;
10518 ctxt->userData = NULL;
10519 }
Daniel Veillard260a68f1998-08-13 03:39:55 +000010520
10521 xmlParseDocument(ctxt);
Daniel Veillard517752b1999-04-05 12:20:10 +000010522 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000010523 else {
10524 ret = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +000010525 xmlFreeDoc(ctxt->myDoc);
10526 ctxt->myDoc = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000010527 }
Daniel Veillard97fea181999-06-26 23:07:37 +000010528 if (sax != NULL)
10529 ctxt->sax = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +000010530 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +000010531
10532 return(ret);
10533}
10534
Daniel Veillard11e00581998-10-24 18:27:49 +000010535/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010536 * xmlParseDoc:
Daniel Veillarddd6b3671999-09-23 22:19:22 +000010537 * @cur: a pointer to an array of xmlChar
Daniel Veillard42dc9b31998-11-09 01:17:21 +000010538 *
10539 * parse an XML in-memory document and build a tree.
10540 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000010541 * Returns the resulting document tree
Daniel Veillard42dc9b31998-11-09 01:17:21 +000010542 */
10543
Daniel Veillard1e346af1999-02-22 10:33:01 +000010544xmlDocPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +000010545xmlParseDoc(xmlChar *cur) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000010546 return(xmlSAXParseDoc(NULL, cur, 0));
10547}
10548
10549/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010550 * xmlSAXParseDTD:
Daniel Veillard011b63c1999-06-02 17:44:04 +000010551 * @sax: the SAX handler block
10552 * @ExternalID: a NAME* containing the External ID of the DTD
10553 * @SystemID: a NAME* containing the URL to the DTD
10554 *
10555 * Load and parse an external subset.
10556 *
10557 * Returns the resulting xmlDtdPtr or NULL in case of error.
10558 */
10559
10560xmlDtdPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +000010561xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
10562 const xmlChar *SystemID) {
Daniel Veillard011b63c1999-06-02 17:44:04 +000010563 xmlDtdPtr ret = NULL;
10564 xmlParserCtxtPtr ctxt;
Daniel Veillard14fff061999-06-22 21:49:07 +000010565 xmlParserInputPtr input = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +000010566 xmlCharEncoding enc;
10567
10568 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
10569
Daniel Veillardb05deb71999-08-10 19:04:08 +000010570 ctxt = xmlNewParserCtxt();
Daniel Veillard011b63c1999-06-02 17:44:04 +000010571 if (ctxt == NULL) {
Daniel Veillard011b63c1999-06-02 17:44:04 +000010572 return(NULL);
10573 }
Daniel Veillard011b63c1999-06-02 17:44:04 +000010574
10575 /*
10576 * Set-up the SAX context
10577 */
Daniel Veillard011b63c1999-06-02 17:44:04 +000010578 if (sax != NULL) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +000010579 if (ctxt->sax != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +000010580 xmlFree(ctxt->sax);
Daniel Veillard011b63c1999-06-02 17:44:04 +000010581 ctxt->sax = sax;
10582 ctxt->userData = NULL;
10583 }
10584
10585 /*
10586 * Ask the Entity resolver to load the damn thing
10587 */
10588
10589 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
10590 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
10591 if (input == NULL) {
Daniel Veillard97fea181999-06-26 23:07:37 +000010592 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +000010593 xmlFreeParserCtxt(ctxt);
10594 return(NULL);
10595 }
10596
10597 /*
Daniel Veillardcf461992000-03-14 18:30:20 +000010598 * plug some encoding conversion routines here.
Daniel Veillard011b63c1999-06-02 17:44:04 +000010599 */
10600 xmlPushInput(ctxt, input);
Daniel Veillardcf461992000-03-14 18:30:20 +000010601 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
Daniel Veillard011b63c1999-06-02 17:44:04 +000010602 xmlSwitchEncoding(ctxt, enc);
10603
Daniel Veillardb05deb71999-08-10 19:04:08 +000010604 if (input->filename == NULL)
Daniel Veillardcf461992000-03-14 18:30:20 +000010605 input->filename = (char *) xmlStrdup(SystemID);
Daniel Veillard011b63c1999-06-02 17:44:04 +000010606 input->line = 1;
10607 input->col = 1;
10608 input->base = ctxt->input->cur;
10609 input->cur = ctxt->input->cur;
10610 input->free = NULL;
10611
10612 /*
10613 * let's parse that entity knowing it's an external subset.
10614 */
Daniel Veillard06047432000-04-24 11:33:38 +000010615 ctxt->inSubset = 2;
10616 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10617 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
10618 ExternalID, SystemID);
Daniel Veillard011b63c1999-06-02 17:44:04 +000010619 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
10620
10621 if (ctxt->myDoc != NULL) {
10622 if (ctxt->wellFormed) {
Daniel Veillard06047432000-04-24 11:33:38 +000010623 ret = ctxt->myDoc->extSubset;
10624 ctxt->myDoc->extSubset = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +000010625 } else {
10626 ret = NULL;
10627 }
10628 xmlFreeDoc(ctxt->myDoc);
10629 ctxt->myDoc = NULL;
10630 }
Daniel Veillard97fea181999-06-26 23:07:37 +000010631 if (sax != NULL) ctxt->sax = NULL;
Daniel Veillard011b63c1999-06-02 17:44:04 +000010632 xmlFreeParserCtxt(ctxt);
10633
10634 return(ret);
10635}
10636
10637/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010638 * xmlParseDTD:
Daniel Veillard011b63c1999-06-02 17:44:04 +000010639 * @ExternalID: a NAME* containing the External ID of the DTD
10640 * @SystemID: a NAME* containing the URL to the DTD
10641 *
10642 * Load and parse an external subset.
10643 *
10644 * Returns the resulting xmlDtdPtr or NULL in case of error.
10645 */
10646
10647xmlDtdPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +000010648xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
Daniel Veillard011b63c1999-06-02 17:44:04 +000010649 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
10650}
10651
10652/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010653 * xmlSAXParseBalancedChunk:
Daniel Veillard0142b842000-01-14 14:45:24 +000010654 * @ctx: an XML parser context (possibly NULL)
10655 * @sax: the SAX handler bloc (possibly NULL)
10656 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10657 * @input: a parser input stream
10658 * @enc: the encoding
10659 *
10660 * Parse a well-balanced chunk of an XML document
10661 * The user has to provide SAX callback block whose routines will be
10662 * called by the parser
10663 * The allowed sequence for the Well Balanced Chunk is the one defined by
10664 * the content production in the XML grammar:
10665 *
10666 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10667 *
Daniel Veillardcf461992000-03-14 18:30:20 +000010668 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
Daniel Veillard0142b842000-01-14 14:45:24 +000010669 * the error code otherwise
10670 */
10671
10672int
10673xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
10674 void *user_data, xmlParserInputPtr input,
10675 xmlCharEncoding enc) {
10676 xmlParserCtxtPtr ctxt;
10677 int ret;
10678
10679 if (input == NULL) return(-1);
10680
10681 if (ctx != NULL)
10682 ctxt = ctx;
10683 else {
10684 ctxt = xmlNewParserCtxt();
10685 if (ctxt == NULL)
10686 return(-1);
10687 if (sax == NULL)
10688 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
10689 }
10690
10691 /*
10692 * Set-up the SAX context
10693 */
10694 if (sax != NULL) {
10695 if (ctxt->sax != NULL)
10696 xmlFree(ctxt->sax);
10697 ctxt->sax = sax;
10698 ctxt->userData = user_data;
10699 }
10700
10701 /*
10702 * plug some encoding conversion routines here.
10703 */
10704 xmlPushInput(ctxt, input);
10705 if (enc != XML_CHAR_ENCODING_NONE)
10706 xmlSwitchEncoding(ctxt, enc);
10707
10708 /*
10709 * let's parse that entity knowing it's an external subset.
10710 */
10711 xmlParseContent(ctxt);
10712 ret = ctxt->errNo;
10713
10714 if (ctx == NULL) {
10715 if (sax != NULL)
10716 ctxt->sax = NULL;
10717 else
10718 xmlFreeDoc(ctxt->myDoc);
10719 xmlFreeParserCtxt(ctxt);
10720 }
10721 return(ret);
10722}
10723
10724/**
Daniel Veillard87b95392000-08-12 21:12:04 +000010725 * xmlParseCtxtExternalEntity:
10726 * @ctx: the existing parsing context
10727 * @URL: the URL for the entity to load
10728 * @ID: the System ID for the entity to load
10729 * @list: the return value for the set of parsed nodes
10730 *
10731 * Parse an external general entity within an existing parsing context
10732 * An external general parsed entity is well-formed if it matches the
10733 * production labeled extParsedEnt.
10734 *
10735 * [78] extParsedEnt ::= TextDecl? content
10736 *
10737 * Returns 0 if the entity is well formed, -1 in case of args problem and
10738 * the parser error code otherwise
10739 */
10740
10741int
10742xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
10743 const xmlChar *ID, xmlNodePtr *list) {
10744 xmlParserCtxtPtr ctxt;
10745 xmlDocPtr newDoc;
10746 xmlSAXHandlerPtr oldsax = NULL;
10747 int ret = 0;
10748
10749 if (ctx->depth > 40) {
10750 return(XML_ERR_ENTITY_LOOP);
10751 }
10752
10753 if (list != NULL)
10754 *list = NULL;
10755 if ((URL == NULL) && (ID == NULL))
10756 return(-1);
10757 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
10758 return(-1);
10759
10760
10761 ctxt = xmlCreateEntityParserCtxt(URL, ID, ctx->myDoc->URL);
10762 if (ctxt == NULL) return(-1);
10763 ctxt->userData = ctxt;
10764 oldsax = ctxt->sax;
10765 ctxt->sax = ctx->sax;
10766 newDoc = xmlNewDoc(BAD_CAST "1.0");
10767 if (newDoc == NULL) {
10768 xmlFreeParserCtxt(ctxt);
10769 return(-1);
10770 }
10771 if (ctx->myDoc != NULL) {
10772 newDoc->intSubset = ctx->myDoc->intSubset;
10773 newDoc->extSubset = ctx->myDoc->extSubset;
10774 }
10775 if (ctx->myDoc->URL != NULL) {
10776 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
10777 }
10778 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10779 if (newDoc->children == NULL) {
10780 ctxt->sax = oldsax;
10781 xmlFreeParserCtxt(ctxt);
10782 newDoc->intSubset = NULL;
10783 newDoc->extSubset = NULL;
10784 xmlFreeDoc(newDoc);
10785 return(-1);
10786 }
10787 nodePush(ctxt, newDoc->children);
10788 if (ctx->myDoc == NULL) {
10789 ctxt->myDoc = newDoc;
10790 } else {
10791 ctxt->myDoc = ctx->myDoc;
10792 newDoc->children->doc = ctx->myDoc;
10793 }
10794
10795 /*
10796 * Parse a possible text declaration first
10797 */
10798 GROW;
10799 if ((RAW == '<') && (NXT(1) == '?') &&
10800 (NXT(2) == 'x') && (NXT(3) == 'm') &&
10801 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
10802 xmlParseTextDecl(ctxt);
10803 }
10804
10805 /*
10806 * Doing validity checking on chunk doesn't make sense
10807 */
10808 ctxt->instate = XML_PARSER_CONTENT;
10809 ctxt->validate = ctx->validate;
10810 ctxt->depth = ctx->depth + 1;
10811 ctxt->replaceEntities = ctx->replaceEntities;
10812 if (ctxt->validate) {
10813 ctxt->vctxt.error = ctx->vctxt.error;
10814 ctxt->vctxt.warning = ctx->vctxt.warning;
10815 /* Allocate the Node stack */
10816 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
10817 ctxt->vctxt.nodeNr = 0;
10818 ctxt->vctxt.nodeMax = 4;
10819 ctxt->vctxt.node = NULL;
10820 } else {
10821 ctxt->vctxt.error = NULL;
10822 ctxt->vctxt.warning = NULL;
10823 }
10824
10825 xmlParseContent(ctxt);
10826
10827 if ((RAW == '<') && (NXT(1) == '/')) {
10828 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10829 ctxt->sax->error(ctxt->userData,
10830 "chunk is not well balanced\n");
10831 ctxt->wellFormed = 0;
10832 ctxt->disableSAX = 1;
10833 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10834 } else if (RAW != 0) {
10835 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10836 ctxt->sax->error(ctxt->userData,
10837 "extra content at the end of well balanced chunk\n");
10838 ctxt->wellFormed = 0;
10839 ctxt->disableSAX = 1;
10840 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10841 }
10842 if (ctxt->node != newDoc->children) {
10843 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10844 ctxt->sax->error(ctxt->userData,
10845 "chunk is not well balanced\n");
10846 ctxt->wellFormed = 0;
10847 ctxt->disableSAX = 1;
10848 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10849 }
10850
10851 if (!ctxt->wellFormed) {
10852 if (ctxt->errNo == 0)
10853 ret = 1;
10854 else
10855 ret = ctxt->errNo;
10856 } else {
10857 if (list != NULL) {
10858 xmlNodePtr cur;
10859
10860 /*
10861 * Return the newly created nodeset after unlinking it from
10862 * they pseudo parent.
10863 */
10864 cur = newDoc->children->children;
10865 *list = cur;
10866 while (cur != NULL) {
10867 cur->parent = NULL;
10868 cur = cur->next;
10869 }
10870 newDoc->children->children = NULL;
10871 }
10872 ret = 0;
10873 }
10874 ctxt->sax = oldsax;
10875 xmlFreeParserCtxt(ctxt);
10876 newDoc->intSubset = NULL;
10877 newDoc->extSubset = NULL;
10878 xmlFreeDoc(newDoc);
10879
10880 return(ret);
10881}
10882
10883/**
Daniel Veillardcf461992000-03-14 18:30:20 +000010884 * xmlParseExternalEntity:
Daniel Veillard0142b842000-01-14 14:45:24 +000010885 * @doc: the document the chunk pertains to
Daniel Veillardcf461992000-03-14 18:30:20 +000010886 * @sax: the SAX handler bloc (possibly NULL)
10887 * @user_data: The user data returned on SAX callbacks (possibly NULL)
10888 * @depth: Used for loop detection, use 0
10889 * @URL: the URL for the entity to load
10890 * @ID: the System ID for the entity to load
10891 * @list: the return value for the set of parsed nodes
Daniel Veillard0142b842000-01-14 14:45:24 +000010892 *
Daniel Veillardcf461992000-03-14 18:30:20 +000010893 * Parse an external general entity
10894 * An external general parsed entity is well-formed if it matches the
10895 * production labeled extParsedEnt.
10896 *
10897 * [78] extParsedEnt ::= TextDecl? content
10898 *
10899 * Returns 0 if the entity is well formed, -1 in case of args problem and
10900 * the parser error code otherwise
Daniel Veillard0142b842000-01-14 14:45:24 +000010901 */
10902
Daniel Veillardcf461992000-03-14 18:30:20 +000010903int
10904xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
10905 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
10906 xmlParserCtxtPtr ctxt;
10907 xmlDocPtr newDoc;
10908 xmlSAXHandlerPtr oldsax = NULL;
10909 int ret = 0;
10910
10911 if (depth > 40) {
10912 return(XML_ERR_ENTITY_LOOP);
10913 }
10914
10915
10916
10917 if (list != NULL)
10918 *list = NULL;
10919 if ((URL == NULL) && (ID == NULL))
10920 return(-1);
Daniel Veillard87b95392000-08-12 21:12:04 +000010921 if (doc == NULL) /* @@ relax but check for dereferences */
10922 return(-1);
Daniel Veillardcf461992000-03-14 18:30:20 +000010923
10924
10925 ctxt = xmlCreateEntityParserCtxt(URL, ID, doc->URL);
10926 if (ctxt == NULL) return(-1);
10927 ctxt->userData = ctxt;
10928 if (sax != NULL) {
10929 oldsax = ctxt->sax;
10930 ctxt->sax = sax;
10931 if (user_data != NULL)
10932 ctxt->userData = user_data;
10933 }
10934 newDoc = xmlNewDoc(BAD_CAST "1.0");
10935 if (newDoc == NULL) {
10936 xmlFreeParserCtxt(ctxt);
10937 return(-1);
10938 }
10939 if (doc != NULL) {
10940 newDoc->intSubset = doc->intSubset;
10941 newDoc->extSubset = doc->extSubset;
10942 }
10943 if (doc->URL != NULL) {
10944 newDoc->URL = xmlStrdup(doc->URL);
10945 }
10946 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
10947 if (newDoc->children == NULL) {
10948 if (sax != NULL)
10949 ctxt->sax = oldsax;
10950 xmlFreeParserCtxt(ctxt);
10951 newDoc->intSubset = NULL;
10952 newDoc->extSubset = NULL;
10953 xmlFreeDoc(newDoc);
10954 return(-1);
10955 }
10956 nodePush(ctxt, newDoc->children);
10957 if (doc == NULL) {
10958 ctxt->myDoc = newDoc;
10959 } else {
10960 ctxt->myDoc = doc;
10961 newDoc->children->doc = doc;
10962 }
10963
10964 /*
10965 * Parse a possible text declaration first
10966 */
10967 GROW;
10968 if ((RAW == '<') && (NXT(1) == '?') &&
10969 (NXT(2) == 'x') && (NXT(3) == 'm') &&
10970 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
10971 xmlParseTextDecl(ctxt);
10972 }
10973
10974 /*
10975 * Doing validity checking on chunk doesn't make sense
10976 */
10977 ctxt->instate = XML_PARSER_CONTENT;
10978 ctxt->validate = 0;
10979 ctxt->depth = depth;
10980
10981 xmlParseContent(ctxt);
10982
10983 if ((RAW == '<') && (NXT(1) == '/')) {
10984 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10985 ctxt->sax->error(ctxt->userData,
10986 "chunk is not well balanced\n");
10987 ctxt->wellFormed = 0;
10988 ctxt->disableSAX = 1;
10989 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
10990 } else if (RAW != 0) {
10991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
10992 ctxt->sax->error(ctxt->userData,
10993 "extra content at the end of well balanced chunk\n");
10994 ctxt->wellFormed = 0;
10995 ctxt->disableSAX = 1;
10996 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
10997 }
10998 if (ctxt->node != newDoc->children) {
10999 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
11000 ctxt->sax->error(ctxt->userData,
11001 "chunk is not well balanced\n");
11002 ctxt->wellFormed = 0;
11003 ctxt->disableSAX = 1;
11004 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
11005 }
11006
11007 if (!ctxt->wellFormed) {
11008 if (ctxt->errNo == 0)
11009 ret = 1;
11010 else
11011 ret = ctxt->errNo;
11012 } else {
11013 if (list != NULL) {
11014 xmlNodePtr cur;
11015
11016 /*
11017 * Return the newly created nodeset after unlinking it from
11018 * they pseudo parent.
11019 */
11020 cur = newDoc->children->children;
11021 *list = cur;
11022 while (cur != NULL) {
11023 cur->parent = NULL;
11024 cur = cur->next;
11025 }
11026 newDoc->children->children = NULL;
11027 }
11028 ret = 0;
11029 }
11030 if (sax != NULL)
11031 ctxt->sax = oldsax;
11032 xmlFreeParserCtxt(ctxt);
11033 newDoc->intSubset = NULL;
11034 newDoc->extSubset = NULL;
11035 xmlFreeDoc(newDoc);
11036
11037 return(ret);
Daniel Veillard0142b842000-01-14 14:45:24 +000011038}
11039
11040/**
Daniel Veillardcf461992000-03-14 18:30:20 +000011041 * xmlParseBalancedChunk:
11042 * @doc: the document the chunk pertains to
11043 * @sax: the SAX handler bloc (possibly NULL)
11044 * @user_data: The user data returned on SAX callbacks (possibly NULL)
11045 * @depth: Used for loop detection, use 0
11046 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
11047 * @list: the return value for the set of parsed nodes
11048 *
11049 * Parse a well-balanced chunk of an XML document
11050 * called by the parser
11051 * The allowed sequence for the Well Balanced Chunk is the one defined by
11052 * the content production in the XML grammar:
11053 *
11054 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
11055 *
11056 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
11057 * the parser error code otherwise
11058 */
11059
11060int
11061xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
11062 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
11063 xmlParserCtxtPtr ctxt;
11064 xmlDocPtr newDoc;
11065 xmlSAXHandlerPtr oldsax = NULL;
11066 int size;
11067 int ret = 0;
11068
11069 if (depth > 40) {
11070 return(XML_ERR_ENTITY_LOOP);
11071 }
11072
11073
11074 if (list != NULL)
11075 *list = NULL;
11076 if (string == NULL)
11077 return(-1);
11078
11079 size = xmlStrlen(string);
11080
11081 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
11082 if (ctxt == NULL) return(-1);
11083 ctxt->userData = ctxt;
11084 if (sax != NULL) {
11085 oldsax = ctxt->sax;
11086 ctxt->sax = sax;
11087 if (user_data != NULL)
11088 ctxt->userData = user_data;
11089 }
11090 newDoc = xmlNewDoc(BAD_CAST "1.0");
11091 if (newDoc == NULL) {
11092 xmlFreeParserCtxt(ctxt);
11093 return(-1);
11094 }
11095 if (doc != NULL) {
11096 newDoc->intSubset = doc->intSubset;
11097 newDoc->extSubset = doc->extSubset;
11098 }
11099 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
11100 if (newDoc->children == NULL) {
11101 if (sax != NULL)
11102 ctxt->sax = oldsax;
11103 xmlFreeParserCtxt(ctxt);
11104 newDoc->intSubset = NULL;
11105 newDoc->extSubset = NULL;
11106 xmlFreeDoc(newDoc);
11107 return(-1);
11108 }
11109 nodePush(ctxt, newDoc->children);
11110 if (doc == NULL) {
11111 ctxt->myDoc = newDoc;
11112 } else {
11113 ctxt->myDoc = doc;
11114 newDoc->children->doc = doc;
11115 }
11116 ctxt->instate = XML_PARSER_CONTENT;
11117 ctxt->depth = depth;
11118
11119 /*
11120 * Doing validity checking on chunk doesn't make sense
11121 */
11122 ctxt->validate = 0;
11123
11124 xmlParseContent(ctxt);
11125
11126 if ((RAW == '<') && (NXT(1) == '/')) {
11127 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
11128 ctxt->sax->error(ctxt->userData,
11129 "chunk is not well balanced\n");
11130 ctxt->wellFormed = 0;
11131 ctxt->disableSAX = 1;
11132 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
11133 } else if (RAW != 0) {
11134 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
11135 ctxt->sax->error(ctxt->userData,
11136 "extra content at the end of well balanced chunk\n");
11137 ctxt->wellFormed = 0;
11138 ctxt->disableSAX = 1;
11139 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
11140 }
11141 if (ctxt->node != newDoc->children) {
11142 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
11143 ctxt->sax->error(ctxt->userData,
11144 "chunk is not well balanced\n");
11145 ctxt->wellFormed = 0;
11146 ctxt->disableSAX = 1;
11147 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
11148 }
11149
11150 if (!ctxt->wellFormed) {
11151 if (ctxt->errNo == 0)
11152 ret = 1;
11153 else
11154 ret = ctxt->errNo;
11155 } else {
11156 if (list != NULL) {
11157 xmlNodePtr cur;
11158
11159 /*
11160 * Return the newly created nodeset after unlinking it from
11161 * they pseudo parent.
11162 */
11163 cur = newDoc->children->children;
11164 *list = cur;
11165 while (cur != NULL) {
11166 cur->parent = NULL;
11167 cur = cur->next;
11168 }
11169 newDoc->children->children = NULL;
11170 }
11171 ret = 0;
11172 }
11173 if (sax != NULL)
11174 ctxt->sax = oldsax;
11175 xmlFreeParserCtxt(ctxt);
11176 newDoc->intSubset = NULL;
11177 newDoc->extSubset = NULL;
11178 xmlFreeDoc(newDoc);
11179
11180 return(ret);
11181}
11182
11183/**
11184 * xmlParseBalancedChunkFile:
Daniel Veillard0142b842000-01-14 14:45:24 +000011185 * @doc: the document the chunk pertains to
11186 *
11187 * Parse a well-balanced chunk of an XML document contained in a file
11188 *
11189 * Returns the resulting list of nodes resulting from the parsing,
11190 * they are not added to @node
11191 */
11192
11193xmlNodePtr
11194xmlParseBalancedChunkFile(xmlDocPtr doc, xmlNodePtr node) {
Daniel Veillardcf461992000-03-14 18:30:20 +000011195 /* TODO !!! */
11196 return(NULL);
Daniel Veillard0142b842000-01-14 14:45:24 +000011197}
11198
11199/**
Daniel Veillardcf461992000-03-14 18:30:20 +000011200 * xmlRecoverDoc:
Daniel Veillarddd6b3671999-09-23 22:19:22 +000011201 * @cur: a pointer to an array of xmlChar
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000011202 *
11203 * parse an XML in-memory document and build a tree.
11204 * In the case the document is not Well Formed, a tree is built anyway
11205 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000011206 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000011207 */
11208
Daniel Veillard1e346af1999-02-22 10:33:01 +000011209xmlDocPtr
Daniel Veillarddd6b3671999-09-23 22:19:22 +000011210xmlRecoverDoc(xmlChar *cur) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000011211 return(xmlSAXParseDoc(NULL, cur, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +000011212}
11213
11214/**
Daniel Veillardcf461992000-03-14 18:30:20 +000011215 * xmlCreateEntityParserCtxt:
11216 * @URL: the entity URL
11217 * @ID: the entity PUBLIC ID
11218 * @base: a posible base for the target URI
11219 *
11220 * Create a parser context for an external entity
11221 * Automatic support for ZLIB/Compress compressed document is provided
11222 * by default if found at compile-time.
11223 *
11224 * Returns the new parser context or NULL
11225 */
11226xmlParserCtxtPtr
11227xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
11228 const xmlChar *base) {
11229 xmlParserCtxtPtr ctxt;
11230 xmlParserInputPtr inputStream;
11231 char *directory = NULL;
Daniel Veillard87b95392000-08-12 21:12:04 +000011232 xmlChar *uri;
11233
Daniel Veillardcf461992000-03-14 18:30:20 +000011234 ctxt = xmlNewParserCtxt();
11235 if (ctxt == NULL) {
11236 return(NULL);
11237 }
11238
Daniel Veillard87b95392000-08-12 21:12:04 +000011239 uri = xmlBuildURI(URL, base);
11240
11241 if (uri == NULL) {
11242 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
11243 if (inputStream == NULL) {
11244 xmlFreeParserCtxt(ctxt);
11245 return(NULL);
11246 }
11247
11248 inputPush(ctxt, inputStream);
11249
11250 if ((ctxt->directory == NULL) && (directory == NULL))
11251 directory = xmlParserGetDirectory((char *)URL);
11252 if ((ctxt->directory == NULL) && (directory != NULL))
11253 ctxt->directory = directory;
11254 } else {
11255 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
11256 if (inputStream == NULL) {
11257 xmlFreeParserCtxt(ctxt);
11258 return(NULL);
11259 }
11260
11261 inputPush(ctxt, inputStream);
11262
11263 if ((ctxt->directory == NULL) && (directory == NULL))
11264 directory = xmlParserGetDirectory((char *)uri);
11265 if ((ctxt->directory == NULL) && (directory != NULL))
11266 ctxt->directory = directory;
11267 xmlFree(uri);
Daniel Veillardcf461992000-03-14 18:30:20 +000011268 }
11269
Daniel Veillardcf461992000-03-14 18:30:20 +000011270 return(ctxt);
11271}
11272
11273/**
11274 * xmlCreateFileParserCtxt:
Daniel Veillard11e00581998-10-24 18:27:49 +000011275 * @filename: the filename
11276 *
Daniel Veillardd692aa41999-02-28 21:54:31 +000011277 * Create a parser context for a file content.
11278 * Automatic support for ZLIB/Compress compressed document is provided
11279 * by default if found at compile-time.
Daniel Veillard11e00581998-10-24 18:27:49 +000011280 *
Daniel Veillardd692aa41999-02-28 21:54:31 +000011281 * Returns the new parser context or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +000011282 */
Daniel Veillardd692aa41999-02-28 21:54:31 +000011283xmlParserCtxtPtr
11284xmlCreateFileParserCtxt(const char *filename)
11285{
11286 xmlParserCtxtPtr ctxt;
Daniel Veillard260a68f1998-08-13 03:39:55 +000011287 xmlParserInputPtr inputStream;
Daniel Veillarde2d034d1999-07-27 19:52:06 +000011288 xmlParserInputBufferPtr buf;
Daniel Veillardb05deb71999-08-10 19:04:08 +000011289 char *directory = NULL;
Daniel Veillard260a68f1998-08-13 03:39:55 +000011290
Daniel Veillarde2d034d1999-07-27 19:52:06 +000011291 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
11292 if (buf == NULL) return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +000011293
Daniel Veillardb05deb71999-08-10 19:04:08 +000011294 ctxt = xmlNewParserCtxt();
Daniel Veillard260a68f1998-08-13 03:39:55 +000011295 if (ctxt == NULL) {
Daniel Veillard260a68f1998-08-13 03:39:55 +000011296 return(NULL);
11297 }
Daniel Veillardb05deb71999-08-10 19:04:08 +000011298
11299 inputStream = xmlNewInputStream(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +000011300 if (inputStream == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +000011301 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +000011302 return(NULL);
11303 }
11304
Daniel Veillard6454aec1999-09-02 22:04:43 +000011305 inputStream->filename = xmlMemStrdup(filename);
Daniel Veillarde2d034d1999-07-27 19:52:06 +000011306 inputStream->buf = buf;
Daniel Veillarde2d034d1999-07-27 19:52:06 +000011307 inputStream->base = inputStream->buf->buffer->content;
11308 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard260a68f1998-08-13 03:39:55 +000011309
11310 inputPush(ctxt, inputStream);
Daniel Veillardb05deb71999-08-10 19:04:08 +000011311 if ((ctxt->directory == NULL) && (directory == NULL))
11312 directory = xmlParserGetDirectory(filename);
11313 if ((ctxt->directory == NULL) && (directory != NULL))
11314 ctxt->directory = directory;
11315
Daniel Veillardd692aa41999-02-28 21:54:31 +000011316 return(ctxt);
11317}
11318
11319/**
Daniel Veillardcf461992000-03-14 18:30:20 +000011320 * xmlSAXParseFile:
Daniel Veillardd692aa41999-02-28 21:54:31 +000011321 * @sax: the SAX handler block
11322 * @filename: the filename
11323 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
11324 * documents
11325 *
11326 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11327 * compressed document is provided by default if found at compile-time.
11328 * It use the given SAX function block to handle the parsing callback.
11329 * If sax is NULL, fallback to the default DOM tree building routines.
11330 *
11331 * Returns the resulting document tree
11332 */
11333
Daniel Veillard011b63c1999-06-02 17:44:04 +000011334xmlDocPtr
11335xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
Daniel Veillardd692aa41999-02-28 21:54:31 +000011336 int recovery) {
11337 xmlDocPtr ret;
11338 xmlParserCtxtPtr ctxt;
Daniel Veillardb05deb71999-08-10 19:04:08 +000011339 char *directory = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +000011340
11341 ctxt = xmlCreateFileParserCtxt(filename);
11342 if (ctxt == NULL) return(NULL);
Daniel Veillard27d88741999-05-29 11:51:49 +000011343 if (sax != NULL) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +000011344 if (ctxt->sax != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +000011345 xmlFree(ctxt->sax);
Daniel Veillard27d88741999-05-29 11:51:49 +000011346 ctxt->sax = sax;
11347 ctxt->userData = NULL;
11348 }
Daniel Veillard260a68f1998-08-13 03:39:55 +000011349
Daniel Veillardb05deb71999-08-10 19:04:08 +000011350 if ((ctxt->directory == NULL) && (directory == NULL))
11351 directory = xmlParserGetDirectory(filename);
11352 if ((ctxt->directory == NULL) && (directory != NULL))
Daniel Veillardcf461992000-03-14 18:30:20 +000011353 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
Daniel Veillardb05deb71999-08-10 19:04:08 +000011354
Daniel Veillard260a68f1998-08-13 03:39:55 +000011355 xmlParseDocument(ctxt);
11356
Daniel Veillard517752b1999-04-05 12:20:10 +000011357 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000011358 else {
11359 ret = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +000011360 xmlFreeDoc(ctxt->myDoc);
11361 ctxt->myDoc = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000011362 }
Daniel Veillard97fea181999-06-26 23:07:37 +000011363 if (sax != NULL)
11364 ctxt->sax = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +000011365 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +000011366
11367 return(ret);
11368}
11369
Daniel Veillard42dc9b31998-11-09 01:17:21 +000011370/**
Daniel Veillardcf461992000-03-14 18:30:20 +000011371 * xmlParseFile:
Daniel Veillard42dc9b31998-11-09 01:17:21 +000011372 * @filename: the filename
11373 *
11374 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11375 * compressed document is provided by default if found at compile-time.
11376 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000011377 * Returns the resulting document tree
Daniel Veillard42dc9b31998-11-09 01:17:21 +000011378 */
11379
Daniel Veillard011b63c1999-06-02 17:44:04 +000011380xmlDocPtr
11381xmlParseFile(const char *filename) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000011382 return(xmlSAXParseFile(NULL, filename, 0));
11383}
11384
11385/**
Daniel Veillardcf461992000-03-14 18:30:20 +000011386 * xmlRecoverFile:
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000011387 * @filename: the filename
11388 *
11389 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
11390 * compressed document is provided by default if found at compile-time.
11391 * In the case the document is not Well Formed, a tree is built anyway
11392 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000011393 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000011394 */
11395
Daniel Veillard011b63c1999-06-02 17:44:04 +000011396xmlDocPtr
11397xmlRecoverFile(const char *filename) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000011398 return(xmlSAXParseFile(NULL, filename, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +000011399}
Daniel Veillard260a68f1998-08-13 03:39:55 +000011400
Daniel Veillard11e00581998-10-24 18:27:49 +000011401/**
Daniel Veillardcf461992000-03-14 18:30:20 +000011402 * xmlCreateMemoryParserCtxt:
11403 * @buffer: a pointer to a zero terminated char array
Daniel Veillardb566ce12000-03-04 11:39:42 +000011404 * @size: the size of the array (without the trailing 0)
Daniel Veillard11e00581998-10-24 18:27:49 +000011405 *
Daniel Veillardd692aa41999-02-28 21:54:31 +000011406 * Create a parser context for an XML in-memory document.
Daniel Veillard11e00581998-10-24 18:27:49 +000011407 *
Daniel Veillardd692aa41999-02-28 21:54:31 +000011408 * Returns the new parser context or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +000011409 */
Daniel Veillardd692aa41999-02-28 21:54:31 +000011410xmlParserCtxtPtr
11411xmlCreateMemoryParserCtxt(char *buffer, int size) {
Daniel Veillard260a68f1998-08-13 03:39:55 +000011412 xmlParserCtxtPtr ctxt;
11413 xmlParserInputPtr input;
Daniel Veillard46e370e2000-07-21 20:32:03 +000011414 xmlParserInputBufferPtr buf;
Daniel Veillard260a68f1998-08-13 03:39:55 +000011415
Daniel Veillardcf461992000-03-14 18:30:20 +000011416 if (buffer[size] != 0)
Daniel Veillardb566ce12000-03-04 11:39:42 +000011417 return(NULL);
Daniel Veillard260a68f1998-08-13 03:39:55 +000011418
Daniel Veillardb05deb71999-08-10 19:04:08 +000011419 ctxt = xmlNewParserCtxt();
Daniel Veillardcf461992000-03-14 18:30:20 +000011420 if (ctxt == NULL)
Daniel Veillard260a68f1998-08-13 03:39:55 +000011421 return(NULL);
Daniel Veillardb05deb71999-08-10 19:04:08 +000011422
Daniel Veillard46e370e2000-07-21 20:32:03 +000011423 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
11424 if (buf == NULL) return(NULL);
11425
Daniel Veillardb05deb71999-08-10 19:04:08 +000011426 input = xmlNewInputStream(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +000011427 if (input == NULL) {
Daniel Veillardb05deb71999-08-10 19:04:08 +000011428 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +000011429 return(NULL);
11430 }
11431
11432 input->filename = NULL;
Daniel Veillard46e370e2000-07-21 20:32:03 +000011433 input->buf = buf;
11434 input->base = input->buf->buffer->content;
11435 input->cur = input->buf->buffer->content;
Daniel Veillard260a68f1998-08-13 03:39:55 +000011436
11437 inputPush(ctxt, input);
Daniel Veillardd692aa41999-02-28 21:54:31 +000011438 return(ctxt);
11439}
11440
11441/**
Daniel Veillardcf461992000-03-14 18:30:20 +000011442 * xmlSAXParseMemory:
Daniel Veillardd692aa41999-02-28 21:54:31 +000011443 * @sax: the SAX handler block
11444 * @buffer: an pointer to a char array
Daniel Veillard51e3b151999-11-12 17:02:31 +000011445 * @size: the size of the array
11446 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
Daniel Veillardd692aa41999-02-28 21:54:31 +000011447 * documents
11448 *
11449 * parse an XML in-memory block and use the given SAX function block
11450 * to handle the parsing callback. If sax is NULL, fallback to the default
11451 * DOM tree building routines.
11452 *
Daniel Veillardd692aa41999-02-28 21:54:31 +000011453 * Returns the resulting document tree
11454 */
11455xmlDocPtr
11456xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
11457 xmlDocPtr ret;
11458 xmlParserCtxtPtr ctxt;
11459
11460 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11461 if (ctxt == NULL) return(NULL);
Daniel Veillard27d88741999-05-29 11:51:49 +000011462 if (sax != NULL) {
11463 ctxt->sax = sax;
11464 ctxt->userData = NULL;
11465 }
Daniel Veillard260a68f1998-08-13 03:39:55 +000011466
11467 xmlParseDocument(ctxt);
11468
Daniel Veillard517752b1999-04-05 12:20:10 +000011469 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000011470 else {
11471 ret = NULL;
Daniel Veillard517752b1999-04-05 12:20:10 +000011472 xmlFreeDoc(ctxt->myDoc);
11473 ctxt->myDoc = NULL;
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000011474 }
Daniel Veillard97fea181999-06-26 23:07:37 +000011475 if (sax != NULL)
11476 ctxt->sax = NULL;
Daniel Veillardd692aa41999-02-28 21:54:31 +000011477 xmlFreeParserCtxt(ctxt);
Daniel Veillard260a68f1998-08-13 03:39:55 +000011478
11479 return(ret);
11480}
11481
Daniel Veillard42dc9b31998-11-09 01:17:21 +000011482/**
Daniel Veillardcf461992000-03-14 18:30:20 +000011483 * xmlParseMemory:
Daniel Veillard1e346af1999-02-22 10:33:01 +000011484 * @buffer: an pointer to a char array
Daniel Veillard42dc9b31998-11-09 01:17:21 +000011485 * @size: the size of the array
11486 *
11487 * parse an XML in-memory block and build a tree.
11488 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000011489 * Returns the resulting document tree
Daniel Veillard42dc9b31998-11-09 01:17:21 +000011490 */
11491
11492xmlDocPtr xmlParseMemory(char *buffer, int size) {
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000011493 return(xmlSAXParseMemory(NULL, buffer, size, 0));
11494}
11495
11496/**
Daniel Veillardcf461992000-03-14 18:30:20 +000011497 * xmlRecoverMemory:
Daniel Veillard1e346af1999-02-22 10:33:01 +000011498 * @buffer: an pointer to a char array
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000011499 * @size: the size of the array
11500 *
11501 * parse an XML in-memory block and build a tree.
11502 * In the case the document is not Well Formed, a tree is built anyway
11503 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000011504 * Returns the resulting document tree
Daniel Veillard39a1f9a1999-01-17 19:11:59 +000011505 */
11506
11507xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
11508 return(xmlSAXParseMemory(NULL, buffer, size, 1));
Daniel Veillard42dc9b31998-11-09 01:17:21 +000011509}
Daniel Veillard260a68f1998-08-13 03:39:55 +000011510
Daniel Veillard260a68f1998-08-13 03:39:55 +000011511
Daniel Veillard11e00581998-10-24 18:27:49 +000011512/**
11513 * xmlSetupParserForBuffer:
11514 * @ctxt: an XML parser context
Daniel Veillarddd6b3671999-09-23 22:19:22 +000011515 * @buffer: a xmlChar * buffer
Daniel Veillard11e00581998-10-24 18:27:49 +000011516 * @filename: a file name
11517 *
Daniel Veillard260a68f1998-08-13 03:39:55 +000011518 * Setup the parser context to parse a new buffer; Clears any prior
11519 * contents from the parser context. The buffer parameter must not be
11520 * NULL, but the filename parameter can be
11521 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +000011522void
Daniel Veillarddd6b3671999-09-23 22:19:22 +000011523xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
Daniel Veillard260a68f1998-08-13 03:39:55 +000011524 const char* filename)
11525{
Daniel Veillardb05deb71999-08-10 19:04:08 +000011526 xmlParserInputPtr input;
Daniel Veillard260a68f1998-08-13 03:39:55 +000011527
Daniel Veillardb05deb71999-08-10 19:04:08 +000011528 input = xmlNewInputStream(ctxt);
11529 if (input == NULL) {
11530 perror("malloc");
Daniel Veillard6454aec1999-09-02 22:04:43 +000011531 xmlFree(ctxt);
Daniel Veillard0142b842000-01-14 14:45:24 +000011532 return;
Daniel Veillardb05deb71999-08-10 19:04:08 +000011533 }
11534
11535 xmlClearParserCtxt(ctxt);
11536 if (filename != NULL)
Daniel Veillard6454aec1999-09-02 22:04:43 +000011537 input->filename = xmlMemStrdup(filename);
Daniel Veillardb05deb71999-08-10 19:04:08 +000011538 input->base = buffer;
11539 input->cur = buffer;
11540 inputPush(ctxt, input);
Daniel Veillard260a68f1998-08-13 03:39:55 +000011541}
11542
Daniel Veillard7a66ee61999-09-26 11:31:02 +000011543/**
11544 * xmlSAXUserParseFile:
11545 * @sax: a SAX handler
11546 * @user_data: The user data returned on SAX callbacks
11547 * @filename: a file name
11548 *
11549 * parse an XML file and call the given SAX handler routines.
11550 * Automatic support for ZLIB/Compress compressed document is provided
11551 *
11552 * Returns 0 in case of success or a error number otherwise
11553 */
Daniel Veillard11a48ec1999-11-23 10:40:46 +000011554int
11555xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
11556 const char *filename) {
Daniel Veillard7a66ee61999-09-26 11:31:02 +000011557 int ret = 0;
11558 xmlParserCtxtPtr ctxt;
11559
11560 ctxt = xmlCreateFileParserCtxt(filename);
11561 if (ctxt == NULL) return -1;
Daniel Veillard294cbca1999-12-03 13:19:09 +000011562 if (ctxt->sax != &xmlDefaultSAXHandler)
11563 xmlFree(ctxt->sax);
Daniel Veillard7a66ee61999-09-26 11:31:02 +000011564 ctxt->sax = sax;
Daniel Veillarddbfd6411999-12-28 16:35:14 +000011565 if (user_data != NULL)
11566 ctxt->userData = user_data;
Daniel Veillard7a66ee61999-09-26 11:31:02 +000011567
11568 xmlParseDocument(ctxt);
11569
11570 if (ctxt->wellFormed)
11571 ret = 0;
11572 else {
11573 if (ctxt->errNo != 0)
11574 ret = ctxt->errNo;
11575 else
11576 ret = -1;
11577 }
11578 if (sax != NULL)
11579 ctxt->sax = NULL;
11580 xmlFreeParserCtxt(ctxt);
11581
11582 return ret;
11583}
11584
11585/**
11586 * xmlSAXUserParseMemory:
11587 * @sax: a SAX handler
11588 * @user_data: The user data returned on SAX callbacks
11589 * @buffer: an in-memory XML document input
Daniel Veillard51e3b151999-11-12 17:02:31 +000011590 * @size: the length of the XML document in bytes
Daniel Veillard7a66ee61999-09-26 11:31:02 +000011591 *
11592 * A better SAX parsing routine.
11593 * parse an XML in-memory buffer and call the given SAX handler routines.
11594 *
11595 * Returns 0 in case of success or a error number otherwise
11596 */
11597int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
11598 char *buffer, int size) {
11599 int ret = 0;
11600 xmlParserCtxtPtr ctxt;
Daniel Veillard87b95392000-08-12 21:12:04 +000011601 xmlSAXHandlerPtr oldsax = NULL;
Daniel Veillard7a66ee61999-09-26 11:31:02 +000011602
11603 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
11604 if (ctxt == NULL) return -1;
Daniel Veillard87b95392000-08-12 21:12:04 +000011605 if (sax != NULL) {
11606 oldsax = ctxt->sax;
11607 ctxt->sax = sax;
11608 }
Daniel Veillard7a66ee61999-09-26 11:31:02 +000011609 ctxt->userData = user_data;
11610
11611 xmlParseDocument(ctxt);
11612
11613 if (ctxt->wellFormed)
11614 ret = 0;
11615 else {
11616 if (ctxt->errNo != 0)
11617 ret = ctxt->errNo;
11618 else
11619 ret = -1;
11620 }
Daniel Veillard87b95392000-08-12 21:12:04 +000011621 if (sax != NULL) {
11622 ctxt->sax = oldsax;
11623 }
Daniel Veillard7a66ee61999-09-26 11:31:02 +000011624 xmlFreeParserCtxt(ctxt);
11625
11626 return ret;
11627}
11628
Daniel Veillard260a68f1998-08-13 03:39:55 +000011629
Daniel Veillardb05deb71999-08-10 19:04:08 +000011630/************************************************************************
11631 * *
Daniel Veillard51e3b151999-11-12 17:02:31 +000011632 * Miscellaneous *
Daniel Veillardb05deb71999-08-10 19:04:08 +000011633 * *
11634 ************************************************************************/
11635
Daniel Veillarda819dac1999-11-24 18:04:22 +000011636/**
11637 * xmlCleanupParser:
11638 *
11639 * Cleanup function for the XML parser. It tries to reclaim all
11640 * parsing related global memory allocated for the parser processing.
11641 * It doesn't deallocate any document related memory. Calling this
11642 * function should not prevent reusing the parser.
11643 */
11644
11645void
11646xmlCleanupParser(void) {
11647 xmlCleanupCharEncodingHandlers();
Daniel Veillardf5c2c871999-12-01 09:51:45 +000011648 xmlCleanupPredefinedEntities();
Daniel Veillarda819dac1999-11-24 18:04:22 +000011649}
Daniel Veillardb05deb71999-08-10 19:04:08 +000011650
Daniel Veillard11e00581998-10-24 18:27:49 +000011651/**
11652 * xmlParserFindNodeInfo:
11653 * @ctxt: an XML parser context
11654 * @node: an XML node within the tree
11655 *
11656 * Find the parser node info struct for a given node
11657 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000011658 * Returns an xmlParserNodeInfo block pointer or NULL
Daniel Veillard260a68f1998-08-13 03:39:55 +000011659 */
11660const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
11661 const xmlNode* node)
11662{
11663 unsigned long pos;
11664
11665 /* Find position where node should be at */
11666 pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
11667 if ( ctx->node_seq.buffer[pos].node == node )
11668 return &ctx->node_seq.buffer[pos];
11669 else
11670 return NULL;
11671}
11672
11673
Daniel Veillard11e00581998-10-24 18:27:49 +000011674/**
Daniel Veillardcf461992000-03-14 18:30:20 +000011675 * xmlInitNodeInfoSeq:
Daniel Veillard11e00581998-10-24 18:27:49 +000011676 * @seq: a node info sequence pointer
11677 *
11678 * -- Initialize (set to initial state) node info sequence
Daniel Veillard260a68f1998-08-13 03:39:55 +000011679 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +000011680void
11681xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
Daniel Veillard260a68f1998-08-13 03:39:55 +000011682{
11683 seq->length = 0;
11684 seq->maximum = 0;
11685 seq->buffer = NULL;
11686}
11687
Daniel Veillard11e00581998-10-24 18:27:49 +000011688/**
Daniel Veillardcf461992000-03-14 18:30:20 +000011689 * xmlClearNodeInfoSeq:
Daniel Veillard11e00581998-10-24 18:27:49 +000011690 * @seq: a node info sequence pointer
11691 *
11692 * -- Clear (release memory and reinitialize) node
Daniel Veillard260a68f1998-08-13 03:39:55 +000011693 * info sequence
11694 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +000011695void
11696xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
Daniel Veillard260a68f1998-08-13 03:39:55 +000011697{
11698 if ( seq->buffer != NULL )
Daniel Veillard6454aec1999-09-02 22:04:43 +000011699 xmlFree(seq->buffer);
Daniel Veillard260a68f1998-08-13 03:39:55 +000011700 xmlInitNodeInfoSeq(seq);
11701}
11702
11703
Daniel Veillard11e00581998-10-24 18:27:49 +000011704/**
11705 * xmlParserFindNodeInfoIndex:
11706 * @seq: a node info sequence pointer
11707 * @node: an XML node pointer
11708 *
11709 *
Daniel Veillard260a68f1998-08-13 03:39:55 +000011710 * xmlParserFindNodeInfoIndex : Find the index that the info record for
11711 * the given node is or should be at in a sorted sequence
Daniel Veillard1164e751999-02-16 16:29:17 +000011712 *
Daniel Veillard1e346af1999-02-22 10:33:01 +000011713 * Returns a long indicating the position of the record
Daniel Veillard260a68f1998-08-13 03:39:55 +000011714 */
11715unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
11716 const xmlNode* node)
11717{
11718 unsigned long upper, lower, middle;
11719 int found = 0;
11720
11721 /* Do a binary search for the key */
11722 lower = 1;
11723 upper = seq->length;
11724 middle = 0;
11725 while ( lower <= upper && !found) {
11726 middle = lower + (upper - lower) / 2;
11727 if ( node == seq->buffer[middle - 1].node )
11728 found = 1;
11729 else if ( node < seq->buffer[middle - 1].node )
11730 upper = middle - 1;
11731 else
11732 lower = middle + 1;
11733 }
11734
11735 /* Return position */
11736 if ( middle == 0 || seq->buffer[middle - 1].node < node )
11737 return middle;
11738 else
11739 return middle - 1;
11740}
11741
11742
Daniel Veillard11e00581998-10-24 18:27:49 +000011743/**
11744 * xmlParserAddNodeInfo:
11745 * @ctxt: an XML parser context
Daniel Veillard1e346af1999-02-22 10:33:01 +000011746 * @info: a node info sequence pointer
Daniel Veillard11e00581998-10-24 18:27:49 +000011747 *
11748 * Insert node info record into the sorted sequence
Daniel Veillard260a68f1998-08-13 03:39:55 +000011749 */
Daniel Veillard0ba4d531998-11-01 19:34:31 +000011750void
Daniel Veillarde3bffb91998-11-08 14:40:56 +000011751xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
Daniel Veillard1e346af1999-02-22 10:33:01 +000011752 const xmlParserNodeInfo* info)
Daniel Veillard260a68f1998-08-13 03:39:55 +000011753{
11754 unsigned long pos;
11755 static unsigned int block_size = 5;
11756
11757 /* Find pos and check to see if node is already in the sequence */
Daniel Veillarde3bffb91998-11-08 14:40:56 +000011758 pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, info->node);
11759 if ( pos < ctxt->node_seq.length
11760 && ctxt->node_seq.buffer[pos].node == info->node ) {
11761 ctxt->node_seq.buffer[pos] = *info;
Daniel Veillard260a68f1998-08-13 03:39:55 +000011762 }
11763
11764 /* Otherwise, we need to add new node to buffer */
11765 else {
11766 /* Expand buffer by 5 if needed */
Daniel Veillarde3bffb91998-11-08 14:40:56 +000011767 if ( ctxt->node_seq.length + 1 > ctxt->node_seq.maximum ) {
Daniel Veillard260a68f1998-08-13 03:39:55 +000011768 xmlParserNodeInfo* tmp_buffer;
Daniel Veillarde3bffb91998-11-08 14:40:56 +000011769 unsigned int byte_size = (sizeof(*ctxt->node_seq.buffer)
11770 *(ctxt->node_seq.maximum + block_size));
Daniel Veillard260a68f1998-08-13 03:39:55 +000011771
Daniel Veillarde3bffb91998-11-08 14:40:56 +000011772 if ( ctxt->node_seq.buffer == NULL )
Daniel Veillard6454aec1999-09-02 22:04:43 +000011773 tmp_buffer = (xmlParserNodeInfo*) xmlMalloc(byte_size);
Daniel Veillard260a68f1998-08-13 03:39:55 +000011774 else
Daniel Veillard6454aec1999-09-02 22:04:43 +000011775 tmp_buffer = (xmlParserNodeInfo*) xmlRealloc(ctxt->node_seq.buffer, byte_size);
Daniel Veillard260a68f1998-08-13 03:39:55 +000011776
11777 if ( tmp_buffer == NULL ) {
Daniel Veillarde3bffb91998-11-08 14:40:56 +000011778 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard27d88741999-05-29 11:51:49 +000011779 ctxt->sax->error(ctxt->userData, "Out of memory\n");
Daniel Veillarddd6b3671999-09-23 22:19:22 +000011780 ctxt->errNo = XML_ERR_NO_MEMORY;
Daniel Veillard260a68f1998-08-13 03:39:55 +000011781 return;
11782 }
Daniel Veillarde3bffb91998-11-08 14:40:56 +000011783 ctxt->node_seq.buffer = tmp_buffer;
11784 ctxt->node_seq.maximum += block_size;
Daniel Veillard260a68f1998-08-13 03:39:55 +000011785 }
11786
11787 /* If position is not at end, move elements out of the way */
Daniel Veillarde3bffb91998-11-08 14:40:56 +000011788 if ( pos != ctxt->node_seq.length ) {
Daniel Veillard260a68f1998-08-13 03:39:55 +000011789 unsigned long i;
11790
Daniel Veillarde3bffb91998-11-08 14:40:56 +000011791 for ( i = ctxt->node_seq.length; i > pos; i-- )
11792 ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
Daniel Veillard260a68f1998-08-13 03:39:55 +000011793 }
11794
11795 /* Copy element and increase length */
Daniel Veillarde3bffb91998-11-08 14:40:56 +000011796 ctxt->node_seq.buffer[pos] = *info;
11797 ctxt->node_seq.length++;
Daniel Veillard260a68f1998-08-13 03:39:55 +000011798 }
11799}
Daniel Veillard011b63c1999-06-02 17:44:04 +000011800
11801
Daniel Veillardb05deb71999-08-10 19:04:08 +000011802/**
Daniel Veillardf0cc7cc2000-08-26 21:40:43 +000011803 * xmlPedanticParserDefault:
11804 * @val: int 0 or 1
11805 *
11806 * Set and return the previous value for enabling pedantic warnings.
11807 *
11808 * Returns the last value for 0 for no substitution, 1 for substitution.
11809 */
11810
11811int
11812xmlPedanticParserDefault(int val) {
11813 int old = xmlPedanticParserDefaultValue;
11814
11815 xmlPedanticParserDefaultValue = val;
11816 return(old);
11817}
11818
11819/**
Daniel Veillardcf461992000-03-14 18:30:20 +000011820 * xmlSubstituteEntitiesDefault:
Daniel Veillardb05deb71999-08-10 19:04:08 +000011821 * @val: int 0 or 1
11822 *
11823 * Set and return the previous value for default entity support.
11824 * Initially the parser always keep entity references instead of substituting
11825 * entity values in the output. This function has to be used to change the
11826 * default parser behaviour
11827 * SAX::subtituteEntities() has to be used for changing that on a file by
11828 * file basis.
11829 *
11830 * Returns the last value for 0 for no substitution, 1 for substitution.
11831 */
11832
11833int
11834xmlSubstituteEntitiesDefault(int val) {
11835 int old = xmlSubstituteEntitiesDefaultValue;
11836
11837 xmlSubstituteEntitiesDefaultValue = val;
11838 return(old);
11839}
11840
Daniel Veillardfb76c402000-03-04 11:39:42 +000011841/**
11842 * xmlKeepBlanksDefault:
11843 * @val: int 0 or 1
11844 *
11845 * Set and return the previous value for default blanks text nodes support.
11846 * The 1.x version of the parser used an heuristic to try to detect
11847 * ignorable white spaces. As a result the SAX callback was generating
11848 * ignorableWhitespace() callbacks instead of characters() one, and when
11849 * using the DOM output text nodes containing those blanks were not generated.
11850 * The 2.x and later version will switch to the XML standard way and
11851 * ignorableWhitespace() are only generated when running the parser in
11852 * validating mode and when the current element doesn't allow CDATA or
11853 * mixed content.
11854 * This function is provided as a way to force the standard behaviour
11855 * on 1.X libs and to switch back to the old mode for compatibility when
11856 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
11857 * by using xmlIsBlankNode() commodity function to detect the "empty"
11858 * nodes generated.
11859 * This value also affect autogeneration of indentation when saving code
11860 * if blanks sections are kept, indentation is not generated.
11861 *
11862 * Returns the last value for 0 for no substitution, 1 for substitution.
11863 */
11864
11865int
11866xmlKeepBlanksDefault(int val) {
11867 int old = xmlKeepBlanksDefaultValue;
11868
11869 xmlKeepBlanksDefaultValue = val;
11870 xmlIndentTreeOutput = !val;
11871 return(old);
11872}
11873